From f0dce905083a69988d3d6f68a45e37dc9e815c7e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 17 Jul 2017 09:59:52 +0300 Subject: [PATCH 001/141] ipvs: SNAT packet replies only for NATed connections commit 3c5ab3f395d66a9e4e937fcfdf6ebc63894f028b upstream. We do not check if packet from real server is for NAT connection before performing SNAT. This causes problems for setups that use DR/TUN and allow local clients to access the real server directly, for example: - local client in director creates IPVS-DR/TUN connection CIP->VIP and the request packets are routed to RIP. Talks are finished but IPVS connection is not expired yet. - second local client creates non-IPVS connection CIP->RIP with same reply tuple RIP->CIP and when replies are received on LOCAL_IN we wrongly assign them for the first client connection because RIP->CIP matches the reply direction. As result, IPVS SNATs replies for non-IPVS connections. The problem is more visible to local UDP clients but in rare cases it can happen also for TCP or remote clients when the real server sends the reply traffic via the director. So, better to be more precise for the reply traffic. As replies are not expected for DR/TUN connections, better to not touch them. Reported-by: Nick Moriarty Signed-off-by: Julian Anastasov Signed-off-by: Willy Tarreau --- net/netfilter/ipvs/ip_vs_core.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1c6a71c41e6..ca66520b894 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -795,10 +795,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, { unsigned int verdict = NF_DROP; - if (IP_VS_FWD_METHOD(cp) != 0) { - pr_err("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -832,6 +830,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); + +ignore_cp: verdict = NF_ACCEPT; out: @@ -1182,8 +1182,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) */ cp = pp->conn_out_get(af, skb, &iph, 0); - if (likely(cp)) + if (likely(cp)) { + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); + } if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || @@ -1225,9 +1228,15 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) } } } + +out: IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; + +ignore_cp: + __ip_vs_conn_put(cp); + goto out; } /* From 1bff8a2960fc6617cf10c5ac050958275aba6e58 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2017 10:20:32 -0800 Subject: [PATCH 002/141] net: reduce skb_warn_bad_offload() noise commit b2504a5dbef3305ef41988ad270b0e8ec289331c upstream. Dmitry reported warnings occurring in __skb_gso_segment() [1] All SKB_GSO_DODGY producers can allow user space to feed packets that trigger the current check. We could prevent them from doing so, rejecting packets, but this might add regressions to existing programs. It turns out our SKB_GSO_DODGY handlers properly set up checksum information that is needed anyway when packets needs to be segmented. By checking again skb_needs_check() after skb_mac_gso_segment(), we should remove these pesky warnings, at a very minor cost. With help from Willem de Bruijn [1] WARNING: CPU: 1 PID: 6768 at net/core/dev.c:2439 skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 lo: caps=(0x000000a2803b7c69, 0x0000000000000000) len=138 data_len=0 gso_size=15883 gso_type=4 ip_summed=0 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 6768 Comm: syz-executor1 Not tainted 4.9.0 #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ffff8801c063ecd8 ffffffff82346bdf ffffffff00000001 1ffff100380c7d2e ffffed00380c7d26 0000000041b58ab3 ffffffff84b37e38 ffffffff823468f1 ffffffff84820740 ffffffff84f289c0 dffffc0000000000 ffff8801c063ee20 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 [] panic+0x1fb/0x412 kernel/panic.c:179 [] __warn+0x1c4/0x1e0 kernel/panic.c:542 [] warn_slowpath_fmt+0xc5/0x100 kernel/panic.c:565 [] skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 [] __skb_gso_segment+0x482/0x780 net/core/dev.c:2706 [] skb_gso_segment include/linux/netdevice.h:3985 [inline] [] validate_xmit_skb+0x5c9/0xc20 net/core/dev.c:2969 [] __dev_queue_xmit+0xe6b/0x1e70 net/core/dev.c:3383 [] dev_queue_xmit+0x17/0x20 net/core/dev.c:3424 [] packet_snd net/packet/af_packet.c:2930 [inline] [] packet_sendmsg+0x32ed/0x4d30 net/packet/af_packet.c:2955 [] sock_sendmsg_nosec net/socket.c:621 [inline] [] sock_sendmsg+0xca/0x110 net/socket.c:631 [] ___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1954 [] __sys_sendmsg+0x138/0x300 net/socket.c:1988 [] SYSC_sendmsg net/socket.c:1999 [inline] [] SyS_sendmsg+0x2d/0x50 net/socket.c:1995 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/dev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 682bf5ad63a..c637a2dc287 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2361,11 +2361,12 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) { + struct sk_buff *segs; + if (unlikely(skb_needs_check(skb, tx_path))) { int err; - skb_warn_bad_offload(skb); - + /* We're going to init ->check field in TCP or UDP header */ if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -2375,7 +2376,12 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_reset_mac_len(skb); - return skb_mac_gso_segment(skb, features); + segs = skb_mac_gso_segment(skb, features); + + if (unlikely(skb_needs_check(skb, tx_path))) + skb_warn_bad_offload(skb); + + return segs; } EXPORT_SYMBOL(__skb_gso_segment); From bf98a1be38f65c5dfd27af5e7a04f50afa2cb088 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Feb 2017 14:29:42 -0800 Subject: [PATCH 003/141] net: skb_needs_check() accepts CHECKSUM_NONE for tx commit 6e7bc478c9a006c701c14476ec9d389a484b4864 upstream. My recent change missed fact that UFO would perform a complete UDP checksum before segmenting in frags. In this case skb->ip_summed is set to CHECKSUM_NONE. We need to add this valid case to skb_needs_check() Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index c637a2dc287..c8842036508 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2342,9 +2342,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment); static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; - else - return skb->ip_summed == CHECKSUM_NONE; + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_NONE; + + return skb->ip_summed == CHECKSUM_NONE; } /** From b673e043f91c569cfe55241fa0b95c9007a9459b Mon Sep 17 00:00:00 2001 From: Cheah Kok Cheong Date: Thu, 3 Aug 2017 13:14:41 +0100 Subject: [PATCH 004/141] Staging: comedi: comedi_fops: Avoid orphaned proc entry commit bf279ece37d2a3eaaa9813fcd7a1d8a81eb29c20 upstream. Move comedi_proc_init to the end to avoid orphaned proc entry if module loading failed. Signed-off-by: Cheah Kok Cheong Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/staging/comedi/comedi_fops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 0ae406a4750..57457011ded 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2557,9 +2557,6 @@ static int __init comedi_init(void) comedi_class->dev_attrs = comedi_dev_attrs; - /* XXX requires /proc interface */ - comedi_proc_init(); - /* create devices files for legacy/manual use */ for (i = 0; i < comedi_num_legacy_minors; i++) { struct comedi_device *dev; @@ -2576,6 +2573,9 @@ static int __init comedi_init(void) } } + /* XXX requires /proc interface */ + comedi_proc_init(); + return 0; } module_init(comedi_init); From 69ddef7b41fdad8123a9a4a6eb2a1aa57aafc92b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:29:19 -0400 Subject: [PATCH 005/141] udp: consistently apply ufo or fragmentation commit 85f1bd9a7b5a79d5baa8bf44af19658f7bf77bfa upstream. When iteratively building a UDP datagram with MSG_MORE and that datagram exceeds MTU, consistently choose UFO or fragmentation. Once skb_is_gso, always apply ufo. Conversely, once a datagram is split across multiple skbs, do not consider ufo. Sendpage already maintains the first invariant, only add the second. IPv6 does not have a sendpage implementation to modify. A gso skb must have a partial checksum, do not follow sk_no_check_tx in udp_send_skb. Found by syzkaller. [gregkh - tweaks for 3.18 for ipv6, hopefully they are correct...] [wt: s/skb_is_gso/skb_has_frags for 3.10] Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- net/ipv4/ip_output.c | 7 +++++-- net/ipv4/udp.c | 2 +- net/ipv6/ip6_output.c | 7 ++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5f077efad29..40faf48cb10 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -846,10 +846,12 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_has_frags(skb))) && + if ((skb && skb_has_frags(skb)) || + ((length > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM))) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1160,6 +1162,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, cork->length += size; if ((size + skb->len > mtu) && + (skb_queue_len(&sk->sk_write_queue) == 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 68174e4d88c..882b23e8e77 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -763,7 +763,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + else if (sk->sk_no_check == UDP_CSUM_NOXMIT && !skb_has_frags(skb)) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 17a88ebcc84..3a65b9a9cb4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1288,11 +1288,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; - if (((length > mtu) || - (skb && skb_has_frags(skb))) && + if ((skb && skb_has_frags(skb)) || + (((length + fragheaderlen) > mtu) && + (skb_queue_len(&sk->sk_write_queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM))) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); From b8d57eb2f414876510ffffa88105a9d4f350f575 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 19 Dec 2014 06:20:59 +0000 Subject: [PATCH 006/141] Bluetooth: bnep: bnep_add_connection() should verify that it's dealing with l2cap socket commit 71bb99a02b32b4cc4265118e85f6035ca72923f0 upstream. same story as cmtp Signed-off-by: Al Viro Signed-off-by: Marcel Holtmann Signed-off-by: Willy Tarreau --- net/bluetooth/bnep/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index e430b1abcd2..e387e6719fa 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "bnep.h" @@ -539,6 +540,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) BT_DBG(""); + if (!l2cap_is_socket(sock)) + return -EBADFD; + baswap((void *) dst, &bt_sk(sock->sk)->dst); baswap((void *) src, &bt_sk(sock->sk)->src); From ecce864b83040205ad7b7b1d44c73ad51f316914 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 19 Dec 2014 06:20:58 +0000 Subject: [PATCH 007/141] Bluetooth: cmtp: cmtp_add_connection() should verify that it's dealing with l2cap socket commit 96c26653ce65bf84f3212f8b00d4316c1efcbf4c upstream. ... rather than relying on ciptool(8) never passing it anything else. Give it e.g. an AF_UNIX connected socket (from socketpair(2)) and it'll oops, trying to evaluate &l2cap_pi(sock->sk)->chan->dst... Signed-off-by: Al Viro Signed-off-by: Marcel Holtmann Signed-off-by: Willy Tarreau --- net/bluetooth/cmtp/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index e0a6ebf2baa..84460f623fc 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -334,6 +334,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) BT_DBG(""); + if (!l2cap_is_socket(sock)) + return -EBADFD; + session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); if (!session) return -ENOMEM; From 42a858e036bb26cb559157393b7890cabe70bfc2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 27 Jul 2017 10:01:20 -0400 Subject: [PATCH 008/141] tcp: introduce tcp_rto_delta_us() helper for xmit timer fix commit e1a10ef7fa876f8510aaec36ea5c0cf34baba410 upstream. Pure refactor. This helper will be required in the xmit timer fix later in the patch series. (Because the TLP logic will want to make this calculation.) [This version of the commit was compiled and briefly tested based on top of v3.10.107.] Change-Id: I1ccfba0b00465454bf5ce22e6fef5f7b5dd94d15 Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: Willy Tarreau --- include/net/tcp.h | 10 ++++++++++ net/ipv4/tcp_input.c | 4 +--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 79cd118d599..c4db9acefa9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1592,4 +1592,14 @@ struct tcp_request_sock_ops { extern void tcp_v4_init(void); extern void tcp_init(void); +/* At how many jiffies into the future should the RTO fire? */ +static inline s32 tcp_rto_delta(const struct sock *sk) +{ + const struct sk_buff *skb = tcp_write_queue_head(sk); + const u32 rto = inet_csk(sk)->icsk_rto; + const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; + + return (s32)(rto_time_stamp - tcp_time_stamp); +} + #endif /* _TCP_H */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0680058fe69..a36b7c55b76 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2972,9 +2972,7 @@ void tcp_rearm_rto(struct sock *sk) /* Offset the time elapsed after installing regular RTO */ if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { - struct sk_buff *skb = tcp_write_queue_head(sk); - const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; - s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); + s32 delta = tcp_rto_delta(sk); /* delta may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ From aa923a7b8f795640fa307494881823f7712e96aa Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 27 Jul 2017 10:09:30 -0400 Subject: [PATCH 009/141] tcp: enable xmit timer fix by having TLP use time when RTO should fire commit a2815817ffa68c7933a43eb55836d6e789bd4389 upstream. Have tcp_schedule_loss_probe() base the TLP scheduling decision based on when the RTO *should* fire. This is to enable the upcoming xmit timer fix in this series, where tcp_schedule_loss_probe() cannot assume that the last timer installed was an RTO timer (because we are no longer doing the "rearm RTO, rearm RTO, rearm TLP" dance on every ACK). So tcp_schedule_loss_probe() must independently figure out when an RTO would want to fire. In the new TLP implementation following in this series, we cannot assume that icsk_timeout was set based on an RTO; after processing a cumulative ACK the icsk_timeout we see can be from a previous TLP or RTO. So we need to independently recalculate the RTO time (instead of reading it out of icsk_timeout). Removing this dependency on the nature of icsk_timeout makes things a little easier to reason about anyway. Note that the old and new code should be equivalent, since they are both saying: "if the RTO is in the future, but at an earlier time than the normal TLP time, then set the TLP timer to fire when the RTO would have fired". [This version of the commit was compiled and briefly tested based on top of v3.10.107.] Change-Id: I597ad6446edde15bf2cea8e56d603a2c52f8221b Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: Willy Tarreau --- net/ipv4/tcp_output.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8729a934124..135440283f2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1947,8 +1947,8 @@ bool tcp_schedule_loss_probe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 timeout, tlp_time_stamp, rto_time_stamp; u32 rtt = tp->srtt >> 3; + u32 timeout, rto_delta; if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) return false; @@ -1987,14 +1987,10 @@ bool tcp_schedule_loss_probe(struct sock *sk) (rtt + (rtt >> 1) + TCP_DELACK_MAX)); timeout = max_t(u32, timeout, msecs_to_jiffies(10)); - /* If RTO is shorter, just schedule TLP in its place. */ - tlp_time_stamp = tcp_time_stamp + timeout; - rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; - if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { - s32 delta = rto_time_stamp - tcp_time_stamp; - if (delta > 0) - timeout = delta; - } + /* If the RTO formula yields an earlier time, then use that time. */ + rto_delta = tcp_rto_delta(sk); /* How far in future is RTO? */ + if (rto_delta > 0) + timeout = min_t(u32, timeout, rto_delta); inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); From 01c577c922f9ad4ec15553d0acd0e94952ab1fed Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 26 Jul 2017 21:43:27 -0400 Subject: [PATCH 010/141] tcp: fix xmit timer to only be reset if data ACKed/SACKed commit df92c8394e6ea0469e8056946ef8add740ab8046 upstream. Fix a TCP loss recovery performance bug raised recently on the netdev list, in two threads: (i) July 26, 2017: netdev thread "TCP fast retransmit issues" (ii) July 26, 2017: netdev thread: "[PATCH V2 net-next] TLP: Don't reschedule PTO when there's one outstanding TLP retransmission" The basic problem is that incoming TCP packets that did not indicate forward progress could cause the xmit timer (TLP or RTO) to be rearmed and pushed back in time. In certain corner cases this could result in the following problems noted in these threads: - Repeated ACKs coming in with bogus SACKs corrupted by middleboxes could cause TCP to repeatedly schedule TLPs forever. We kept sending TLPs after every ~200ms, which elicited bogus SACKs, which caused more TLPs, ad infinitum; we never fired an RTO to fill in the holes. - Incoming data segments could, in some cases, cause us to reschedule our RTO or TLP timer further out in time, for no good reason. This could cause repeated inbound data to result in stalls in outbound data, in the presence of packet loss. This commit fixes these bugs by changing the TLP and RTO ACK processing to: (a) Only reschedule the xmit timer once per ACK. (b) Only reschedule the xmit timer if tcp_clean_rtx_queue() deems the ACK indicates sufficient forward progress (a packet was cumulatively ACKed, or we got a SACK for a packet that was sent before the most recent retransmit of the write queue head). This brings us back into closer compliance with the RFCs, since, as the comment for tcp_rearm_rto() notes, we should only restart the RTO timer after forward progress on the connection. Previously we were restarting the xmit timer even in these cases where there was no forward progress. As a side benefit, this commit simplifies and speeds up the TCP timer arming logic. We had been calling inet_csk_reset_xmit_timer() three times on normal ACKs that cumulatively acknowledged some data: 1) Once near the top of tcp_ack() to switch from TLP timer to RTO: if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); 2) Once in tcp_clean_rtx_queue(), to update the RTO: if (flag & FLAG_ACKED) { tcp_rearm_rto(sk); 3) Once in tcp_ack() after tcp_fastretrans_alert() to switch from RTO to TLP: if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); This commit, by only rescheduling the xmit timer once per ACK, simplifies the code and reduces CPU overhead. This commit was tested in an A/B test with Google web server traffic. SNMP stats and request latency metrics were within noise levels, substantiating that for normal web traffic patterns this is a rare issue. This commit was also tested with packetdrill tests to verify that it fixes the timer behavior in the corner cases discussed in the netdev threads mentioned above. This patch is a bug fix patch intended to be queued for -stable relases. [This version of the commit was compiled and briefly tested based on top of v3.10.107.] Change-Id: If0417380fd59290b65cf04a415373aa13dd1dad7 Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Reported-by: Klavs Klavsen Reported-by: Mao Wenan Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: Willy Tarreau --- net/ipv4/tcp_input.c | 25 +++++++++++++++---------- net/ipv4/tcp_output.c | 12 ------------ 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a36b7c55b76..70f217cb3a1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -111,6 +111,7 @@ int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ @@ -3002,6 +3003,13 @@ void tcp_resume_early_retransmit(struct sock *sk) tcp_xmit_retransmit_queue(sk); } +/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ +static void tcp_set_xmit_timer(struct sock *sk) +{ + if (!tcp_schedule_loss_probe(sk)) + tcp_rearm_rto(sk); +} + /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@ -3132,7 +3140,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } tcp_ack_update_rtt(sk, flag, seq_rtt); - tcp_rearm_rto(sk); + flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */ if (tcp_is_reno(tp)) { tcp_remove_reno_sacks(sk, pkts_acked); @@ -3392,10 +3400,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) - tcp_rearm_rto(sk); - if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; @@ -3452,6 +3456,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) pkts_acked = previous_packets_out - tp->packets_out; + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + /* If needed, reset TLP/RTO timer; RACK may later override this. */ + if (flag & FLAG_SET_XMIT_TIMER) + tcp_set_xmit_timer(sk); + if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) @@ -3464,17 +3474,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_cong_avoid(sk, ack, prior_in_flight); } - if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst) dst_confirm(dst); } - if (icsk->icsk_pending == ICSK_TIME_RETRANS) - tcp_schedule_loss_probe(sk); if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) tcp_update_pacing_rate(sk); return 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 135440283f2..f5d670ccd40 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1945,28 +1945,16 @@ repair: bool tcp_schedule_loss_probe(struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 rtt = tp->srtt >> 3; u32 timeout, rto_delta; - if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) - return false; - /* No consecutive loss probes. */ - if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { - tcp_rearm_rto(sk); - return false; - } /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ if (sk->sk_state == TCP_SYN_RECV) return false; - /* TLP is only scheduled when next timer event is RTO. */ - if (icsk->icsk_pending != ICSK_TIME_RETRANS) - return false; - /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ From 7c11e178a64d4ec02212cdfed6762a228734b35a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 25 Oct 2016 09:51:14 -0500 Subject: [PATCH 011/141] mm/page_alloc: Remove kernel address exposure in free_reserved_area() commit adb1fe9ae2ee6ef6bc10f3d5a588020e7664dfa7 upstream. Linus suggested we try to remove some of the low-hanging fruit related to kernel address exposure in dmesg. The only leaks I see on my local system are: Freeing SMP alternatives memory: 32K (ffffffff9e309000 - ffffffff9e311000) Freeing initrd memory: 10588K (ffffa0b736b42000 - ffffa0b737599000) Freeing unused kernel memory: 3592K (ffffffff9df87000 - ffffffff9e309000) Freeing unused kernel memory: 1352K (ffffa0b7288ae000 - ffffa0b728a00000) Freeing unused kernel memory: 632K (ffffa0b728d62000 - ffffa0b728e00000) Linus says: "I suspect we should just remove [the addresses in the 'Freeing' messages]. I'm sure they are useful in theory, but I suspect they were more useful back when the whole "free init memory" was originally done. These days, if we have a use-after-free, I suspect the init-mem situation is the easiest situation by far. Compared to all the dynamic allocations which are much more likely to show it anyway. So having debug output for that case is likely not all that productive." With this patch the freeing messages now look like this: Freeing SMP alternatives memory: 32K Freeing initrd memory: 10588K Freeing unused kernel memory: 3592K Freeing unused kernel memory: 1352K Freeing unused kernel memory: 632K Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/6836ff90c45b71d38e5d4405aec56fa9e5d1d4b2.1477405374.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Willy Tarreau --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e892753929..829ee76d552 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5177,8 +5177,8 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end, } if (pages && s) - pr_info("Freeing %s memory: %ldK (%lx - %lx)\n", - s, pages << (PAGE_SHIFT - 10), start, end); + pr_info("Freeing %s memory: %ldK\n", + s, pages << (PAGE_SHIFT - 10)); return pages; } From 75894adb0c708c9f3617f8921d98c750b821de1a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Sep 2017 05:28:05 +0100 Subject: [PATCH 012/141] leak in O_DIRECT readv past the EOF In all versions from 2.5.62 to 3.15, on each iteration through the loop by iovec array in do_blockdev_direct_IO() we used to do this: sdio.head = 0; sdio.tail = 0; ... retval = do_direct_IO(dio, &sdio, &map_bh); if (retval) { dio_cleanup(dio, &sdio); break; } with another dio_cleanup() done after the loop, catching the situation when retval had been 0. Consider the situation when e.g. the 3rd iovec in 4-iovec array passed to readv() has crossed the EOF. do_direct_IO() returns 0 and buggers off *without* exhausting the page array. The loop proceeds to the next iovec without calling dio_cleanup() and resets sdio.head and sdio.tail. That reset of sdio.{head,tail} has prevented the eventual dio_cleanup() from seeing anything and the page reference end up leaking. Commit 7b2c99d15559 (new helper: iov_iter_get_pages()) in 3.16 had eliminated the loop by iovec array, along with sdio.head and sdio.tail resets. Backporting that is too much work - the minimal fix is simply to make sure that the only case when do_direct_IO() buggers off early without returning non-zero will not skip dio_cleanup(). The fix applies to all versions from 2.5.62 to 3.15. Reported-and-tested-by: Venki Pallipadi Signed-off-by: Al Viro Signed-off-by: Willy Tarreau --- fs/direct-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/direct-io.c b/fs/direct-io.c index 7ab90f5081e..e17d919aa6d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -933,6 +933,7 @@ do_holes: i_size_aligned >> blkbits) { /* We hit eof */ page_cache_release(page); + dio_cleanup(dio, sdio); goto out; } zero_user(page, block_in_page << blkbits, From 58e47d437d911efef76319fed7578eb6a4f00a7a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 22 Aug 2014 20:13:50 +0900 Subject: [PATCH 013/141] usb: renesas_usbhs: fix the behavior of some usbhs_pkt_handle commit 8355b2b3082d302091506703d2e4e239f7deed7f upstream. Some gadget drivers will call usb_ep_queue() more than once before the first queue doesn't finish. However, this driver didn't handle it correctly. So, this patch fixes the behavior of some usbhs_pkt_handle using the "running" flag. Otherwise, the oops below happens if we use g_ncm driver and when the "iperf -u -c host -b 200M" is running. Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 80000007 [#1] SMP ARM Modules linked in: usb_f_ncm g_ncm libcomposite u_ether CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 3.17.0-rc1-00008-g8b2be8a-dirty #20 task: c051c7e0 ti: c0512000 task.ti: c0512000 PC is at 0x0 LR is at usbhsf_pkt_handler+0xa8/0x114 pc : [<00000000>] lr : [] psr: 60000193 sp : c0513ce8 ip : c0513c58 fp : c0513d24 r10: 00000001 r9 : 00000193 r8 : eebec4a0 r7 : eebec410 r6 : eebe0c6c r5 : 00000000 r4 : ee4a2774 r3 : 00000000 r2 : ee251e00 r1 : c0513cf4 r0 : ee4a2774 Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/renesas_usbhs/fifo.c | 25 ++++++++++++++++++++++++- drivers/usb/renesas_usbhs/pipe.c | 13 +++++++++++++ drivers/usb/renesas_usbhs/pipe.h | 4 ++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 157a9f9afc2..c531026b826 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -545,6 +545,7 @@ static int usbhsf_pio_try_push(struct usbhs_pkt *pkt, int *is_done) usbhsf_send_terminator(pipe, fifo); usbhsf_tx_irq_ctrl(pipe, !*is_done); + usbhs_pipe_running(pipe, !*is_done); usbhs_pipe_enable(pipe); dev_dbg(dev, " send %d (%d/ %d/ %d/ %d)\n", @@ -571,12 +572,21 @@ usbhs_fifo_write_busy: * retry in interrupt */ usbhsf_tx_irq_ctrl(pipe, 1); + usbhs_pipe_running(pipe, 1); return ret; } +static int usbhsf_pio_prepare_push(struct usbhs_pkt *pkt, int *is_done) +{ + if (usbhs_pipe_is_running(pkt->pipe)) + return 0; + + return usbhsf_pio_try_push(pkt, is_done); +} + struct usbhs_pkt_handle usbhs_fifo_pio_push_handler = { - .prepare = usbhsf_pio_try_push, + .prepare = usbhsf_pio_prepare_push, .try_run = usbhsf_pio_try_push, }; @@ -590,6 +600,9 @@ static int usbhsf_prepare_pop(struct usbhs_pkt *pkt, int *is_done) if (usbhs_pipe_is_busy(pipe)) return 0; + if (usbhs_pipe_is_running(pipe)) + return 0; + /* * pipe enable to prepare packet receive */ @@ -598,6 +611,7 @@ static int usbhsf_prepare_pop(struct usbhs_pkt *pkt, int *is_done) usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->length); usbhs_pipe_enable(pipe); + usbhs_pipe_running(pipe, 1); usbhsf_rx_irq_ctrl(pipe, 1); return 0; @@ -643,6 +657,7 @@ static int usbhsf_pio_try_pop(struct usbhs_pkt *pkt, int *is_done) (total_len < maxp)) { /* short packet */ *is_done = 1; usbhsf_rx_irq_ctrl(pipe, 0); + usbhs_pipe_running(pipe, 0); usbhs_pipe_disable(pipe); /* disable pipe first */ } @@ -798,6 +813,7 @@ static void xfer_work(struct work_struct *work) dev_dbg(dev, " %s %d (%d/ %d)\n", fifo->name, usbhs_pipe_number(pipe), pkt->length, pkt->zero); + usbhs_pipe_running(pipe, 1); usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans); usbhs_pipe_enable(pipe); usbhsf_dma_start(pipe, fifo); @@ -829,6 +845,10 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done) if ((uintptr_t)(pkt->buf + pkt->actual) & 0x7) /* 8byte alignment */ goto usbhsf_pio_prepare_push; + /* return at this time if the pipe is running */ + if (usbhs_pipe_is_running(pipe)) + return 0; + /* get enable DMA fifo */ fifo = usbhsf_get_dma_fifo(priv, pkt); if (!fifo) @@ -866,6 +886,7 @@ static int usbhsf_dma_push_done(struct usbhs_pkt *pkt, int *is_done) pkt->actual = pkt->trans; *is_done = !pkt->zero; /* send zero packet ? */ + usbhs_pipe_running(pipe, !*is_done); usbhsf_dma_stop(pipe, pipe->fifo); usbhsf_dma_unmap(pkt); @@ -966,8 +987,10 @@ static int usbhsf_dma_pop_done(struct usbhs_pkt *pkt, int *is_done) if ((pkt->actual == pkt->length) || /* receive all data */ (pkt->trans < maxp)) { /* short packet */ *is_done = 1; + usbhs_pipe_running(pipe, 0); } else { /* re-enable */ + usbhs_pipe_running(pipe, 0); usbhsf_prepare_pop(pkt, is_done); } diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c index 7926e1c700f..85e30e1d5e8 100644 --- a/drivers/usb/renesas_usbhs/pipe.c +++ b/drivers/usb/renesas_usbhs/pipe.c @@ -578,6 +578,19 @@ int usbhs_pipe_is_dir_host(struct usbhs_pipe *pipe) return usbhsp_flags_has(pipe, IS_DIR_HOST); } +int usbhs_pipe_is_running(struct usbhs_pipe *pipe) +{ + return usbhsp_flags_has(pipe, IS_RUNNING); +} + +void usbhs_pipe_running(struct usbhs_pipe *pipe, int running) +{ + if (running) + usbhsp_flags_set(pipe, IS_RUNNING); + else + usbhsp_flags_clr(pipe, IS_RUNNING); +} + void usbhs_pipe_data_sequence(struct usbhs_pipe *pipe, int sequence) { u16 mask = (SQCLR | SQSET); diff --git a/drivers/usb/renesas_usbhs/pipe.h b/drivers/usb/renesas_usbhs/pipe.h index b476fde955b..b18a794922d 100644 --- a/drivers/usb/renesas_usbhs/pipe.h +++ b/drivers/usb/renesas_usbhs/pipe.h @@ -36,6 +36,7 @@ struct usbhs_pipe { #define USBHS_PIPE_FLAGS_IS_USED (1 << 0) #define USBHS_PIPE_FLAGS_IS_DIR_IN (1 << 1) #define USBHS_PIPE_FLAGS_IS_DIR_HOST (1 << 2) +#define USBHS_PIPE_FLAGS_IS_RUNNING (1 << 3) struct usbhs_pkt_handle *handler; @@ -79,6 +80,9 @@ int usbhs_pipe_probe(struct usbhs_priv *priv); void usbhs_pipe_remove(struct usbhs_priv *priv); int usbhs_pipe_is_dir_in(struct usbhs_pipe *pipe); int usbhs_pipe_is_dir_host(struct usbhs_pipe *pipe); +int usbhs_pipe_is_running(struct usbhs_pipe *pipe); +void usbhs_pipe_running(struct usbhs_pipe *pipe, int running); + void usbhs_pipe_init(struct usbhs_priv *priv, int (*dma_map_ctrl)(struct usbhs_pkt *pkt, int map)); int usbhs_pipe_get_maxpacket(struct usbhs_pipe *pipe); From b2453bd445b0086f1b9b3dda10dca15588fac2c9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 12 Mar 2015 15:35:19 +0900 Subject: [PATCH 014/141] usb: renesas_usbhs: fix the sequence in xfer_work() commit 9b53d9af7aac09cf249d72bfbf15f08e47c4f7fe upstream. This patch fixes the setup sequence in xfer_work(). Otherwise, sometimes a usb transaction will get stuck. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/renesas_usbhs/fifo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index c531026b826..0c2825b3e2b 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -814,10 +814,10 @@ static void xfer_work(struct work_struct *work) fifo->name, usbhs_pipe_number(pipe), pkt->length, pkt->zero); usbhs_pipe_running(pipe, 1); - usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans); - usbhs_pipe_enable(pipe); usbhsf_dma_start(pipe, fifo); + usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans); dma_async_issue_pending(chan); + usbhs_pipe_enable(pipe); } /* From 9a95fe3d74d2317ed94bba7aaa7285fd92df8879 Mon Sep 17 00:00:00 2001 From: Kazuya Mizuguchi Date: Mon, 2 Oct 2017 14:01:41 +0900 Subject: [PATCH 015/141] usb: renesas_usbhs: Fix DMAC sequence for receiving zero-length packet commit 29c7f3e68eec4ae94d85ad7b5dfdafdb8089f513 upstream. The DREQE bit of the DnFIFOSEL should be set to 1 after the DE bit of USB-DMAC on R-Car SoCs is set to 1 after the USB-DMAC received a zero-length packet. Otherwise, a transfer completion interruption of USB-DMAC doesn't happen. Even if the driver changes the sequence, normal operations (transmit/receive without zero-length packet) will not cause any side-effects. So, this patch fixes the sequence anyway. Signed-off-by: Kazuya Mizuguchi [shimoda: revise the commit log] Fixes: e73a9891b3a1 ("usb: renesas_usbhs: add DMAEngine support") Cc: # v3.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/renesas_usbhs/fifo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 0c2825b3e2b..95647562fd6 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -814,9 +814,9 @@ static void xfer_work(struct work_struct *work) fifo->name, usbhs_pipe_number(pipe), pkt->length, pkt->zero); usbhs_pipe_running(pipe, 1); - usbhsf_dma_start(pipe, fifo); usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans); dma_async_issue_pending(chan); + usbhsf_dma_start(pipe, fifo); usbhs_pipe_enable(pipe); } From 4f54b2642d4f1f908bf240fe5040925672b51b77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Jun 2017 15:08:57 -0700 Subject: [PATCH 016/141] fs/exec.c: account for argv/envp pointers commit 98da7d08850fb8bdeb395d6368ed15753304aa0c upstream. When limiting the argv/envp strings during exec to 1/4 of the stack limit, the storage of the pointers to the strings was not included. This means that an exec with huge numbers of tiny strings could eat 1/4 of the stack limit in strings and then additional space would be later used by the pointers to the strings. For example, on 32-bit with a 8MB stack rlimit, an exec with 1677721 single-byte strings would consume less than 2MB of stack, the max (8MB / 4) amount allowed, but the pointers to the strings would consume the remaining additional stack space (1677721 * 4 == 6710884). The result (1677721 + 6710884 == 8388605) would exhaust stack space entirely. Controlling this stack exhaustion could result in pathological behavior in setuid binaries (CVE-2017-1000365). [akpm@linux-foundation.org: additional commenting from Kees] Fixes: b6a2fea39318 ("mm: variable length argument support") Link: http://lkml.kernel.org/r/20170622001720.GA32173@beast Signed-off-by: Kees Cook Acked-by: Rik van Riel Acked-by: Michal Hocko Cc: Alexander Viro Cc: Qualys Security Advisory Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- fs/exec.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index c945a555eb2..e3abc8e3d58 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -196,8 +196,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (write) { unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; + unsigned long ptr_size; struct rlimit *rlim; + /* + * Since the stack will hold pointers to the strings, we + * must account for them as well. + * + * The size calculation is the entire vma while each arg page is + * built, so each time we get here it's calculating how far it + * is currently (rather than each call being just the newly + * added size from the arg page). As a result, we need to + * always add the entire size of the pointers, so that on the + * last call to get_arg_page() we'll actually have the entire + * correct size. + */ + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + if (ptr_size > ULONG_MAX - size) + goto fail; + size += ptr_size; + acct_arg_size(bprm, size / PAGE_SIZE); /* @@ -215,13 +233,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ rlim = current->signal->rlim; - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { - put_page(page); - return NULL; - } + if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) + goto fail; } return page; + +fail: + put_page(page); + return NULL; } static void put_arg_page(struct page *page) From 978e42bd2930a79556d539a488372d994206d413 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 15 Jun 2017 00:12:24 +0100 Subject: [PATCH 017/141] rxrpc: Fix several cases where a padded len isn't checked in ticket decode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5f2f97656ada8d811d3c1bef503ced266fcd53a0 upstream. This fixes CVE-2017-7482. When a kerberos 5 ticket is being decoded so that it can be loaded into an rxrpc-type key, there are several places in which the length of a variable-length field is checked to make sure that it's not going to overrun the available data - but the data is padded to the nearest four-byte boundary and the code doesn't check for this extra. This could lead to the size-remaining variable wrapping and the data pointer going over the end of the buffer. Fix this by making the various variable-length data checks use the padded length. Reported-by: 石磊 Signed-off-by: David Howells Reviewed-by: Marc Dionne Reviewed-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- net/rxrpc/ar-key.c | 64 ++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 7633a752c65..10e6e5de36e 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -213,7 +213,7 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, unsigned int *_toklen) { const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, n_parts, loop, tmp; + unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen; /* there must be at least one name, and at least #names+1 length * words */ @@ -243,16 +243,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, toklen -= 4; if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX) return -EINVAL; - if (tmp > toklen) + paddedlen = (tmp + 3) & ~3; + if (paddedlen > toklen) return -EINVAL; princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL); if (!princ->name_parts[loop]) return -ENOMEM; memcpy(princ->name_parts[loop], xdr, tmp); princ->name_parts[loop][tmp] = 0; - tmp = (tmp + 3) & ~3; - toklen -= tmp; - xdr += tmp >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; } if (toklen < 4) @@ -261,16 +261,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, toklen -= 4; if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX) return -EINVAL; - if (tmp > toklen) + paddedlen = (tmp + 3) & ~3; + if (paddedlen > toklen) return -EINVAL; princ->realm = kmalloc(tmp + 1, GFP_KERNEL); if (!princ->realm) return -ENOMEM; memcpy(princ->realm, xdr, tmp); princ->realm[tmp] = 0; - tmp = (tmp + 3) & ~3; - toklen -= tmp; - xdr += tmp >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; _debug("%s/...@%s", princ->name_parts[0], princ->realm); @@ -289,7 +289,7 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, unsigned int *_toklen) { const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len; + unsigned int toklen = *_toklen, len, paddedlen; /* there must be at least one tag and one length word */ if (toklen <= 8) @@ -303,15 +303,17 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, toklen -= 8; if (len > max_data_size) return -EINVAL; + paddedlen = (len + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; td->data_len = len; if (len > 0) { td->data = kmemdup(xdr, len, GFP_KERNEL); if (!td->data) return -ENOMEM; - len = (len + 3) & ~3; - toklen -= len; - xdr += len >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; } _debug("tag %x len %x", td->tag, td->data_len); @@ -383,7 +385,7 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, const __be32 **_xdr, unsigned int *_toklen) { const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len; + unsigned int toklen = *_toklen, len, paddedlen; /* there must be at least one length word */ if (toklen <= 4) @@ -395,6 +397,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, toklen -= 4; if (len > AFSTOKEN_K5_TIX_MAX) return -EINVAL; + paddedlen = (len + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; *_tktlen = len; _debug("ticket len %u", len); @@ -403,9 +408,8 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, *_ticket = kmemdup(xdr, len, GFP_KERNEL); if (!*_ticket) return -ENOMEM; - len = (len + 3) & ~3; - toklen -= len; - xdr += len >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; } *_xdr = xdr; @@ -549,7 +553,7 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal { const __be32 *xdr = data, *token; const char *cp; - unsigned int len, tmp, loop, ntoken, toklen, sec_ix; + unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix; int ret; _enter(",{%x,%x,%x,%x},%zu", @@ -574,22 +578,21 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal if (len < 1 || len > AFSTOKEN_CELL_MAX) goto not_xdr; datalen -= 4; - tmp = (len + 3) & ~3; - if (tmp > datalen) + paddedlen = (len + 3) & ~3; + if (paddedlen > datalen) goto not_xdr; cp = (const char *) xdr; for (loop = 0; loop < len; loop++) if (!isprint(cp[loop])) goto not_xdr; - if (len < tmp) - for (; loop < tmp; loop++) - if (cp[loop]) - goto not_xdr; + for (; loop < paddedlen; loop++) + if (cp[loop]) + goto not_xdr; _debug("cellname: [%u/%u] '%*.*s'", - len, tmp, len, len, (const char *) xdr); - datalen -= tmp; - xdr += tmp >> 2; + len, paddedlen, len, len, (const char *) xdr); + datalen -= paddedlen; + xdr += paddedlen >> 2; /* get the token count */ if (datalen < 12) @@ -610,10 +613,11 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal sec_ix = ntohl(*xdr); datalen -= 4; _debug("token: [%x/%zx] %x", toklen, datalen, sec_ix); - if (toklen < 20 || toklen > datalen) + paddedlen = (toklen + 3) & ~3; + if (toklen < 20 || toklen > datalen || paddedlen > datalen) goto not_xdr; - datalen -= (toklen + 3) & ~3; - xdr += (toklen + 3) >> 2; + datalen -= paddedlen; + xdr += paddedlen >> 2; } while (--loop > 0); From 7571b7478bc9cfecb7894d909bda5cbd0f4aae04 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 2 Aug 2017 19:50:14 +0200 Subject: [PATCH 018/141] xfrm: policy: check policy direction value commit 7bab09631c2a303f87a7eb7e3d69e888673b9b7e upstream. The 'dir' parameter in xfrm_migrate() is a user-controlled byte which is used as an array index. This can lead to an out-of-bound access, kernel lockup and DoS. Add a check for the 'dir' value. This fixes CVE-2017-11600. References: https://bugzilla.redhat.com/show_bug.cgi?id=1474928 Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)") Cc: # v2.6.21-rc1 Reported-by: "bo Zhang" Signed-off-by: Vladis Dronov Signed-off-by: Steffen Klassert Signed-off-by: Willy Tarreau --- net/xfrm/xfrm_policy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ea970b8002a..10c556e373b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3201,9 +3201,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_state *x_new[XFRM_MAX_DEPTH]; struct xfrm_migrate *mp; + /* Stage 0 - sanity checks */ if ((err = xfrm_migrate_check(m, num_migrate)) < 0) goto out; + if (dir >= XFRM_POLICY_MAX) { + err = -EINVAL; + goto out; + } + /* Stage 1 - find policy */ if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { err = -ENOENT; From 44ec968933c5d5041fbdff2e93a2f769435f9607 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 13 Sep 2017 00:21:21 +0200 Subject: [PATCH 019/141] nl80211: check for the required netlink attributes presence commit e785fa0a164aa11001cba931367c7f94ffaff888 upstream. nl80211_set_rekey_data() does not check if the required attributes NL80211_REKEY_DATA_{REPLAY_CTR,KEK,KCK} are present when processing NL80211_CMD_SET_REKEY_OFFLOAD request. This request can be issued by users with CAP_NET_ADMIN privilege and may result in NULL dereference and a system crash. Add a check for the required attributes presence. This patch is based on the patch by bo Zhang. This fixes CVE-2017-12153. References: https://bugzilla.redhat.com/show_bug.cgi?id=1491046 Fixes: e5497d766ad ("cfg80211/nl80211: support GTK rekey offload") Cc: # v3.1-rc1 Reported-by: bo Zhang Signed-off-by: Vladis Dronov Signed-off-by: Johannes Berg Signed-off-by: Willy Tarreau --- net/wireless/nl80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dd3dbed89c8..32c5443514c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8010,6 +8010,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] || + !tb[NL80211_REKEY_DATA_KCK]) + return -EINVAL; if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN) return -ERANGE; if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN) From 3937c2c918c2ac71c064892d6631d0fc47c882c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Oct 2017 11:09:20 +0200 Subject: [PATCH 020/141] ALSA: seq: Fix use-after-free at creating a port commit 71105998845fb012937332fe2e806d443c09e026 upstream. There is a potential race window opened at creating and deleting a port via ioctl, as spotted by fuzzing. snd_seq_create_port() creates a port object and returns its pointer, but it doesn't take the refcount, thus it can be deleted immediately by another thread. Meanwhile, snd_seq_ioctl_create_port() still calls the function snd_seq_system_client_ev_port_start() with the created port object that is being deleted, and this triggers use-after-free like: BUG: KASAN: use-after-free in snd_seq_ioctl_create_port+0x504/0x630 [snd_seq] at addr ffff8801f2241cb1 ============================================================================= BUG kmalloc-512 (Tainted: G B ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in snd_seq_create_port+0x94/0x9b0 [snd_seq] age=1 cpu=3 pid=4511 ___slab_alloc+0x425/0x460 __slab_alloc+0x20/0x40 kmem_cache_alloc_trace+0x150/0x190 snd_seq_create_port+0x94/0x9b0 [snd_seq] snd_seq_ioctl_create_port+0xd1/0x630 [snd_seq] snd_seq_do_ioctl+0x11c/0x190 [snd_seq] snd_seq_ioctl+0x40/0x80 [snd_seq] do_vfs_ioctl+0x54b/0xda0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x16/0x75 INFO: Freed in port_delete+0x136/0x1a0 [snd_seq] age=1 cpu=2 pid=4717 __slab_free+0x204/0x310 kfree+0x15f/0x180 port_delete+0x136/0x1a0 [snd_seq] snd_seq_delete_port+0x235/0x350 [snd_seq] snd_seq_ioctl_delete_port+0xc8/0x180 [snd_seq] snd_seq_do_ioctl+0x11c/0x190 [snd_seq] snd_seq_ioctl+0x40/0x80 [snd_seq] do_vfs_ioctl+0x54b/0xda0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x16/0x75 Call Trace: [] dump_stack+0x63/0x82 [] print_trailer+0xfb/0x160 [] object_err+0x34/0x40 [] kasan_report.part.2+0x223/0x520 [] ? snd_seq_ioctl_create_port+0x504/0x630 [snd_seq] [] __asan_report_load1_noabort+0x2e/0x30 [] snd_seq_ioctl_create_port+0x504/0x630 [snd_seq] [] ? snd_seq_ioctl_delete_port+0x180/0x180 [snd_seq] [] ? taskstats_exit+0xbc0/0xbc0 [] snd_seq_do_ioctl+0x11c/0x190 [snd_seq] [] snd_seq_ioctl+0x40/0x80 [snd_seq] [] ? acct_account_cputime+0x63/0x80 [] do_vfs_ioctl+0x54b/0xda0 ..... We may fix this in a few different ways, and in this patch, it's fixed simply by taking the refcount properly at snd_seq_create_port() and letting the caller unref the object after use. Also, there is another potential use-after-free by sprintf() call in snd_seq_create_port(), and this is moved inside the lock. This fix covers CVE-2017-15265. Reported-and-tested-by: Michael23 Yu Suggested-by: Linus Torvalds Cc: Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/core/seq/seq_clientmgr.c | 6 +++++- sound/core/seq/seq_ports.c | 7 +++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index d449dde1bf5..7b5a7902b7a 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1248,6 +1248,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, struct snd_seq_client_port *port; struct snd_seq_port_info info; struct snd_seq_port_callback *callback; + int port_idx; if (copy_from_user(&info, arg, sizeof(info))) return -EFAULT; @@ -1261,7 +1262,9 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, return -ENOMEM; if (client->type == USER_CLIENT && info.kernel) { - snd_seq_delete_port(client, port->addr.port); + port_idx = port->addr.port; + snd_seq_port_unlock(port); + snd_seq_delete_port(client, port_idx); return -EINVAL; } if (client->type == KERNEL_CLIENT) { @@ -1283,6 +1286,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, snd_seq_set_port_info(port, &info); snd_seq_system_client_ev_port_start(port->addr.client, port->addr.port); + snd_seq_port_unlock(port); if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index ee0522a8f73..a28d1acad57 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -122,7 +122,9 @@ static void port_subs_info_init(struct snd_seq_port_subs_info *grp) } -/* create a port, port number is returned (-1 on failure) */ +/* create a port, port number is returned (-1 on failure); + * the caller needs to unref the port via snd_seq_port_unlock() appropriately + */ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, int port) { @@ -153,6 +155,7 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, snd_use_lock_init(&new_port->use_lock); port_subs_info_init(&new_port->c_src); port_subs_info_init(&new_port->c_dest); + snd_use_lock_use(&new_port->use_lock); num = port >= 0 ? port : 0; mutex_lock(&client->ports_mutex); @@ -167,9 +170,9 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, list_add_tail(&new_port->list, &p->list); client->num_ports++; new_port->addr.port = num; /* store the port number in the port */ + sprintf(new_port->name, "port-%d", num); write_unlock_irqrestore(&client->ports_lock, flags); mutex_unlock(&client->ports_mutex); - sprintf(new_port->name, "port-%d", num); return new_port; } From 6c176369e70376d26e2de758401176a8342d7af5 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:08:29 +0100 Subject: [PATCH 021/141] MIPS: Send SIGILL for BPOSGE32 in `__compute_return_epc_for_insn' commit 7b82c1058ac1f8f8b9f2b8786b1f710a57a870a8 upstream. Fix commit e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") and send SIGILL rather than SIGBUS whenever an unimplemented BPOSGE32 DSP ASE instruction has been encountered in `__compute_return_epc_for_insn' as our Reserved Instruction exception handler would in response to an attempt to actually execute the instruction. Sending SIGBUS only makes sense for the unaligned PC case, since moved to `__compute_return_epc'. Adjust function documentation accordingly, correct formatting and use `pr_info' rather than `printk' as the other exit path already does. Fixes: e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 2.6.14+ Patchwork: https://patchwork.linux-mips.org/patch/16396/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/branch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 46c2ad0703a..92509962897 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -200,7 +200,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) * * @regs: Pointer to pt_regs * @insn: branch instruction to decode - * @returns: -EFAULT on error and forces SIGBUS, and on success + * @returns: -EFAULT on error and forces SIGILL, and on success * returns 0 or BRANCH_LIKELY_TAKEN as appropriate after * evaluating the branch. */ @@ -436,8 +436,9 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, return ret; sigill: - printk("%s: DSP branch but not DSP ASE - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + pr_info("%s: DSP branch but not DSP ASE - sending SIGILL.\n", + current->comm); + force_sig(SIGILL, current); return -EFAULT; } EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn); From e044ff5c05d3d24f1f0d70122b535d8390010b05 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 26 Apr 2017 12:24:21 +0200 Subject: [PATCH 022/141] serial: ifx6x60: fix use-after-free on module unload commit 1e948479b3d63e3ac0ecca13cbf4921c7d17c168 upstream. Make sure to deregister the SPI driver before releasing the tty driver to avoid use-after-free in the SPI remove callback where the tty devices are deregistered. Fixes: 72d4724ea54c ("serial: ifx6x60: Add modem power off function in the platform reboot process") Cc: stable # 3.8 Cc: Jun Chen Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/tty/serial/ifx6x60.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c index 8b1534c424a..be3dc751dfb 100644 --- a/drivers/tty/serial/ifx6x60.c +++ b/drivers/tty/serial/ifx6x60.c @@ -1379,9 +1379,9 @@ static struct spi_driver ifx_spi_driver = { static void __exit ifx_spi_exit(void) { /* unregister */ + spi_unregister_driver((void *)&ifx_spi_driver); tty_unregister_driver(tty_drv); put_tty_driver(tty_drv); - spi_unregister_driver((void *)&ifx_spi_driver); unregister_reboot_notifier(&ifx_modem_reboot_notifier_block); } From f862c9436454a5b78d29ebaebc547aa6d1474f7a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Jun 2017 14:48:40 +0100 Subject: [PATCH 023/141] KEYS: fix dereferencing NULL payload with nonzero length commit 5649645d725c73df4302428ee4e02c869248b4c5 upstream. sys_add_key() and the KEYCTL_UPDATE operation of sys_keyctl() allowed a NULL payload with nonzero length to be passed to the key type's ->preparse(), ->instantiate(), and/or ->update() methods. Various key types including asymmetric, cifs.idmap, cifs.spnego, and pkcs7_test did not handle this case, allowing an unprivileged user to trivially cause a NULL pointer dereference (kernel oops) if one of these key types was present. Fix it by doing the copy_from_user() when 'plen' is nonzero rather than when '_payload' is non-NULL, causing the syscall to fail with EFAULT as expected when an invalid buffer is specified. Cc: stable@vger.kernel.org # 2.6.10+ Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Willy Tarreau --- security/keys/keyctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 066baa1926b..7576f49eeb3 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -93,7 +93,7 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type, payload = NULL; vm = false; - if (_payload) { + if (plen) { ret = -ENOMEM; payload = kmalloc(plen, GFP_KERNEL | __GFP_NOWARN); if (!payload) { @@ -327,7 +327,7 @@ long keyctl_update_key(key_serial_t id, /* pull the payload in if one was supplied */ payload = NULL; - if (_payload) { + if (plen) { ret = -ENOMEM; payload = kmalloc(plen, GFP_KERNEL); if (!payload) From 54fd65c07635e1fc5d263c169a970c9ceea1598d Mon Sep 17 00:00:00 2001 From: Michael Thalmeier Date: Thu, 18 May 2017 16:14:14 +0200 Subject: [PATCH 024/141] usb: chipidea: debug: check before accessing ci_role commit 0340ff83cd4475261e7474033a381bc125b45244 upstream. ci_role BUGs when the role is >= CI_ROLE_END. Cc: stable@vger.kernel.org #v3.10+ Signed-off-by: Michael Thalmeier Signed-off-by: Peter Chen Signed-off-by: Willy Tarreau --- drivers/usb/chipidea/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c index 36a7063a6cb..5a38ca87f40 100644 --- a/drivers/usb/chipidea/debug.c +++ b/drivers/usb/chipidea/debug.c @@ -203,7 +203,8 @@ static int ci_role_show(struct seq_file *s, void *data) { struct ci13xxx *ci = s->private; - seq_printf(s, "%s\n", ci_role(ci)->name); + if (ci->role != CI_ROLE_END) + seq_printf(s, "%s\n", ci_role(ci)->name); return 0; } From 69e0576f17049ea34477290c3a0a40787ccd8564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Wilczy=C5=84ski?= Date: Sun, 11 Jun 2017 17:28:39 +0900 Subject: [PATCH 025/141] cpufreq: conservative: Allow down_threshold to take values from 1 to 10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b8e11f7d2791bd9320be1c6e772a60b2aa093e45 upstream. Commit 27ed3cd2ebf4 (cpufreq: conservative: Fix the logic in frequency decrease checking) removed the 10 point substraction when comparing the load against down_threshold but did not remove the related limit for the down_threshold value. As a result, down_threshold lower than 11 is not allowed even though values from 1 to 10 do work correctly too. The comment ("cannot be lower than 11 otherwise freq will not fall") also does not apply after removing the substraction. For this reason, allow down_threshold to take any value from 1 to 99 and fix the related comment. Fixes: 27ed3cd2ebf4 (cpufreq: conservative: Fix the logic in frequency decrease checking) Signed-off-by: Tomasz Wilczyński Acked-by: Viresh Kumar Cc: 3.10+ # 3.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Willy Tarreau --- drivers/cpufreq/cpufreq_conservative.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index f97cb3d8c5a..f34e8191665 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -212,8 +212,8 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, int ret; ret = sscanf(buf, "%u", &input); - /* cannot be lower than 11 otherwise freq will not fall */ - if (ret != 1 || input < 11 || input > 100 || + /* cannot be lower than 1 otherwise freq will not fall */ + if (ret != 1 || input < 1 || input > 100 || input >= cs_tuners->up_threshold) return -EINVAL; From 790d869bd9dec1d7d101bdd05026f6a5bfa21222 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 1 Jun 2017 16:18:15 +0530 Subject: [PATCH 026/141] powerpc/kprobes: Pause function_graph tracing during jprobes handling commit a9f8553e935f26cb5447f67e280946b0923cd2dc upstream. This fixes a crash when function_graph and jprobes are used together. This is essentially commit 237d28db036e ("ftrace/jprobes/x86: Fix conflict between jprobes and function graph tracing"), but for powerpc. Jprobes breaks function_graph tracing since the jprobe hook needs to use jprobe_return(), which never returns back to the hook, but instead to the original jprobe'd function. The solution is to momentarily pause function_graph tracing before invoking the jprobe hook and re-enable it when returning back to the original jprobe'd function. Fixes: 6794c78243bf ("powerpc64: port of the function graph tracer") Cc: stable@vger.kernel.org # v2.6.30+ Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/kprobes.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 11f5b03a0b0..762c10d46d6 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -529,6 +529,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); + return 1; } @@ -551,6 +560,8 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) * saved regs... */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); preempt_enable_no_resched(); return 1; } From fe7e5549d05c83b2ffd4cad1eec2156f03f859d2 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 16 Jun 2017 19:35:34 +0100 Subject: [PATCH 027/141] staging: comedi: fix clean-up of comedi_class in comedi_init() commit a9332e9ad09c2644c99058fcf6ae2f355e93ce74 upstream. There is a clean-up bug in the core comedi module initialization functions, `comedi_init()`. If the `comedi_num_legacy_minors` module parameter is non-zero (and valid), it creates that many "legacy" devices and registers them in SysFS. A failure causes the function to clean up and return an error. Unfortunately, it fails to destroy the "comedi" class that was created earlier. Fix it by adding a call to `class_destroy(comedi_class)` at the appropriate place in the clean-up sequence. Signed-off-by: Ian Abbott Cc: # 3.9+ Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/staging/comedi/comedi_fops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 57457011ded..2aba2f75fb8 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2563,6 +2563,7 @@ static int __init comedi_init(void) dev = comedi_alloc_board_minor(NULL); if (IS_ERR(dev)) { comedi_cleanup_board_minors(); + class_destroy(comedi_class); cdev_del(&comedi_cdev); unregister_chrdev_region(MKDEV(COMEDI_MAJOR, 0), COMEDI_NUM_MINORS); From 7136ca73ff3496758b56f60b6fe76d675e69cd21 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Fri, 7 Jul 2017 21:09:06 +0100 Subject: [PATCH 028/141] brcmfmac: fix possible buffer overflow in brcmf_cfg80211_mgmt_tx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8f44c9a41386729fea410e688959ddaa9d51be7c upstream. The lower level nl80211 code in cfg80211 ensures that "len" is between 25 and NL80211_ATTR_FRAME (2304). We subtract DOT11_MGMT_HDR_LEN (24) from "len" so thats's max of 2280. However, the action_frame->data[] buffer is only BRCMF_FIL_ACTION_FRAME_SIZE (1800) bytes long so this memcpy() can overflow. memcpy(action_frame->data, &buf[DOT11_MGMT_HDR_LEN], le16_to_cpu(action_frame->len)); Cc: stable@vger.kernel.org # 3.9.x Fixes: 18e2f61db3b70 ("brcmfmac: P2P action frame tx.") Reported-by: "freenerguo(郭大兴)" Signed-off-by: Arend van Spriel Signed-off-by: David S. Miller [wt: s/cfg80211.c/wl_cfg80211.c in 3.10] Signed-off-by: Willy Tarreau --- drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index 2c524305589..8afb6092533 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -4019,6 +4019,11 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, true, GFP_KERNEL); } else if (ieee80211_is_action(mgmt->frame_control)) { + if (len > BRCMF_FIL_ACTION_FRAME_SIZE + DOT11_MGMT_HDR_LEN) { + brcmf_err("invalid action frame length\n"); + err = -EINVAL; + goto exit; + } af_params = kzalloc(sizeof(*af_params), GFP_KERNEL); if (af_params == NULL) { brcmf_err("unable to allocate frame\n"); From 1fc0b43a1df72704b4d5f6cd5a2ca6c09736d67f Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sat, 3 Jun 2017 09:35:06 +0200 Subject: [PATCH 029/141] vt: fix unchecked __put_user() in tioclinux ioctls commit 6987dc8a70976561d22450b5858fc9767788cc1c upstream. Only read access is checked before this call. Actually, at the moment this is not an issue, as every in-tree arch does the same manual checks for VERIFY_READ vs VERIFY_WRITE, relying on the MMU to tell them apart, but this wasn't the case in the past and may happen again on some odd arch in the future. If anyone cares about 3.7 and earlier, this is a security hole (untested) on real 80386 CPUs. Signed-off-by: Adam Borowski CC: stable@vger.kernel.org # v3.7- Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/tty/vt/vt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 010ec70d59f..3390a39f5a7 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2601,13 +2601,13 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) * related to the kernel should not use this. */ data = vt_get_shift_state(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_GETMOUSEREPORTING: console_lock(); /* May be overkill */ data = mouse_reporting(); console_unlock(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_SETVESABLANK: console_lock(); @@ -2616,7 +2616,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) break; case TIOCL_GETKMSGREDIRECT: data = vt_get_kmsg_redirect(); - ret = __put_user(data, p); + ret = put_user(data, p); break; case TIOCL_SETKMSGREDIRECT: if (!capable(CAP_SYS_ADMIN)) { From f9c9c73b86b0950f670f5df311fc94910c4cd489 Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Tue, 2 May 2017 09:38:35 -0400 Subject: [PATCH 030/141] crypto: talitos - Extend max key length for SHA384/512-HMAC and AEAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03d2c5114c95797c0aa7d9f463348b171a274fd4 upstream. An updated patch that also handles the additional key length requirements for the AEAD algorithms. The max keysize is not 96. For SHA384/512 it's 128, and for the AEAD algorithms it's longer still. Extend the max keysize for the AEAD size for AES256 + HMAC(SHA512). Cc: # 3.6+ Fixes: 357fb60502ede ("crypto: talitos - add sha224, sha384 and sha512 to existing AEAD algorithms") Signed-off-by: Martin Hicks Acked-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Willy Tarreau --- drivers/crypto/talitos.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 057d894eee6..6e5ba44dfaa 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -623,7 +623,7 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 -#define TALITOS_MAX_KEY_SIZE 96 +#define TALITOS_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE) #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ #define MD5_BLOCK_SIZE 64 @@ -1380,6 +1380,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, { struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); + if (keylen > TALITOS_MAX_KEY_SIZE) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + memcpy(&ctx->key, key, keylen); ctx->keylen = keylen; From c11c8fcaa2e91f3a579fabbc56ef07a11d4db2c0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Jun 2017 16:56:18 +0200 Subject: [PATCH 031/141] PM / Domains: Fix unsafe iteration over modified list of device links commit c6e83cac3eda5f7dd32ee1453df2f7abb5c6cd46 upstream. pm_genpd_remove_subdomain() iterates over domain's master_links list and removes matching element thus it has to use safe version of list iteration. Fixes: f721889ff65a ("PM / Domains: Support for generic I/O PM domains (v8)") Cc: 3.1+ # 3.1+ Signed-off-by: Krzysztof Kozlowski Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Willy Tarreau --- drivers/base/power/domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 7072404c8b6..8d73f999f40 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1692,7 +1692,7 @@ int pm_genpd_add_subdomain_names(const char *master_name, int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *subdomain) { - struct gpd_link *link; + struct gpd_link *l, *link; int ret = -EINVAL; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) @@ -1701,7 +1701,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, start: genpd_acquire_lock(genpd); - list_for_each_entry(link, &genpd->master_links, master_node) { + list_for_each_entry_safe(link, l, &genpd->master_links, master_node) { if (link->slave != subdomain) continue; From 8e38d086b8b623ec1fad6290fcb807dc530a1b1e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 11 Jul 2017 22:10:54 +1000 Subject: [PATCH 032/141] powerpc/64: Fix atomic64_inc_not_zero() to return an int commit 01e6a61aceb82e13bec29502a8eb70d9574f97ad upstream. Although it's not documented anywhere, there is an expectation that atomic64_inc_not_zero() returns a result which fits in an int. This is the behaviour implemented on all arches except powerpc. This has caused at least one bug in practice, in the percpu-refcount code, where the long result from our atomic64_inc_not_zero() was truncated to an int leading to lost references and stuck systems. That was worked around in that code in commit 966d2b04e070 ("percpu-refcount: fix reference leak during percpu-atomic transition"). To the best of my grepping abilities there are no other callers in-tree which truncate the value, but we should fix it anyway. Because the breakage is subtle and potentially very harmful I'm also tagging it for stable. Code generation is largely unaffected because in most cases the callers are just using the result for a test anyway. In particular the case of fget() that was mentioned in commit a6cf7ed5119f ("powerpc/atomic: Implement atomic*_inc_not_zero") generates exactly the same code. Fixes: a6cf7ed5119f ("powerpc/atomic: Implement atomic*_inc_not_zero") Cc: stable@vger.kernel.org # v3.4 Noticed-by: Linus Torvalds Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/include/asm/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index e3b1d41c89b..84bcdfa410f 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -501,7 +501,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) * Atomically increments @v by 1, so long as @v is non-zero. * Returns non-zero if @v was non-zero, and zero otherwise. */ -static __inline__ long atomic64_inc_not_zero(atomic64_t *v) +static __inline__ int atomic64_inc_not_zero(atomic64_t *v) { long t1, t2; @@ -520,7 +520,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v) : "r" (&v->counter) : "cc", "xer", "memory"); - return t1; + return t1 != 0; } #endif /* __powerpc64__ */ From 733c88d4da7984ec1ce65d6628ad0da25816b7a2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 15 Jun 2017 09:46:39 +1000 Subject: [PATCH 033/141] powerpc: Fix emulation of mfocrf in emulate_step() commit 64e756c55aa46fc18fd53e8f3598b73b528d8637 upstream. From POWER4 onwards, mfocrf() only places the specified CR field into the destination GPR, and the rest of it is set to 0. The PowerPC AS from version 3.0 now requires this behaviour. The emulation code currently puts the entire CR into the destination GPR. Fix it. Fixes: 6888199f7fe5 ("[POWERPC] Emulate more instructions in software") Cc: stable@vger.kernel.org # v2.6.22+ Signed-off-by: Anton Blanchard Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/lib/sstep.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 08490ecc465..23da15ff779 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -863,6 +863,19 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; #endif case 19: /* mfcr */ + if ((instr >> 20) & 1) { + imm = 0xf0000000UL; + for (sh = 0; sh < 8; ++sh) { + if (instr & (0x80000 >> sh)) { + regs->gpr[rd] = regs->ccr & imm; + break; + } + imm >>= 4; + } + + goto instr_done; + } + regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; From ecac824c4d0711df071a47cd8eb374ce53ce8727 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 6 Jul 2017 18:46:43 +1000 Subject: [PATCH 034/141] powerpc/asm: Mark cr0 as clobbered in mftb() commit 2400fd822f467cb4c886c879d8ad99feac9cf319 upstream. The workaround for the CELL timebase bug does not correctly mark cr0 as being clobbered. This means GCC doesn't know that the asm block changes cr0 and might leave the result of an unrelated comparison in cr0 across the block, which we then trash, leading to basically random behaviour. Fixes: 859deea949c3 ("[POWERPC] Cell timebase bug workaround") Cc: stable@vger.kernel.org # v2.6.19+ Signed-off-by: Oliver O'Halloran [mpe: Tweak change log and flag for stable] Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 469d7715d6a..954168be787 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1136,7 +1136,7 @@ " .llong 0\n" \ " .llong 0\n" \ ".previous" \ - : "=r" (rval) : "i" (CPU_FTR_CELL_TB_BUG)); rval;}) + : "=r" (rval) : "i" (CPU_FTR_CELL_TB_BUG) : "cr0"); rval;}) #else #define mftb() ({unsigned long rval; \ asm volatile("mftb %0" : "=r" (rval)); rval;}) From a2555cd3e61b44f4f451989bd21299c546ff635c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 19 Jul 2017 16:16:54 +0900 Subject: [PATCH 035/141] usb: renesas_usbhs: fix usbhsc_resume() for !USBHSF_RUNTIME_PWCTRL commit 59a0879a0e17b2e43ecdc5e3299da85b8410d7ce upstream. This patch fixes an issue that some registers may be not initialized after resume if the USBHSF_RUNTIME_PWCTRL is not set. Otherwise, if a cable is not connected, the driver will not enable INTENB0.VBSE after resume. And then, the driver cannot detect the VBUS. Fixes: ca8a282a5373 ("usb: gadget: renesas_usbhs: add suspend/resume support") Cc: # v3.2+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/renesas_usbhs/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index cfd205036ab..a4b02734258 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -600,8 +600,10 @@ static int usbhsc_resume(struct device *dev) struct usbhs_priv *priv = dev_get_drvdata(dev); struct platform_device *pdev = usbhs_priv_to_pdev(priv); - if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) + if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) { usbhsc_power_ctrl(priv, 1); + usbhs_mod_autonomy_mode(priv); + } usbhs_platform_call(priv, phy_reset, pdev); From 6aa23d16b4dab21974ac5fcf46ac75a888006bc7 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:06:19 +0100 Subject: [PATCH 036/141] MIPS: Actually decode JALX in `__compute_return_epc_for_insn' commit a9db101b735a9d49295326ae41f610f6da62b08c upstream. Complement commit fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") and actually decode the regular MIPS JALX major instruction opcode, the handling of which has been added with the said commit for EPC calculation in `__compute_return_epc_for_insn'. Fixes: fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 3.9+ Patchwork: https://patchwork.linux-mips.org/patch/16394/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/branch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 92509962897..63b942f613c 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -297,6 +297,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* * These are unconditional and in j_format. */ + case jalx_op: case jal_op: regs->regs[31] = regs->cp0_epc + 8; case j_op: From 2121b630f51d64af5ce46722d9f419093de80366 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:07:34 +0100 Subject: [PATCH 037/141] MIPS: Fix unaligned PC interpretation in `compute_return_epc' commit 11a3799dbeb620bf0400b1fda5cc2c6bea55f20a upstream. Fix a regression introduced with commit fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") and defer to `__compute_return_epc' if the ISA bit is set in EPC with non-MIPS16, non-microMIPS hardware, which will then arrange for a SIGBUS due to an unaligned instruction reference. Returning EPC here is never correct as the API defines this function's result to be either a negative error code on failure or one of 0 and BRANCH_LIKELY_TAKEN on success. Fixes: fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 3.9+ Patchwork: https://patchwork.linux-mips.org/patch/16395/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/include/asm/branch.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h index e28a3e0eb3c..582d8b61ce5 100644 --- a/arch/mips/include/asm/branch.h +++ b/arch/mips/include/asm/branch.h @@ -44,10 +44,7 @@ static inline int compute_return_epc(struct pt_regs *regs) return __microMIPS_compute_return_epc(regs); if (cpu_has_mips16) return __MIPS16e_compute_return_epc(regs); - return regs->cp0_epc; - } - - if (!delay_slot(regs)) { + } else if (!delay_slot(regs)) { regs->cp0_epc += 4; return 0; } From 4748ac3f70a2b3663bd59b3dcb00e265fcde1a9f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 16 Jun 2017 00:05:08 +0100 Subject: [PATCH 038/141] MIPS: math-emu: Prevent wrong ISA mode instruction emulation commit 13769ebad0c42738831787e27c7c7f982e7da579 upstream. Terminate FPU emulation immediately whenever an ISA mode switch has been observed. This is so that we do not interpret machine code in the wrong mode, for example when a regular MIPS FPU instruction has been placed in a delay slot of a jump that switches into the MIPS16 mode, as with the following code (taken from a GCC test suite case): 00400650 : 400650: 3c020100 lui v0,0x100 400654: 03e00008 jr ra 400658: 44c2f800 ctc1 v0,c1_fcsr 40065c: 00000000 nop [...] 004012d0 <__libc_csu_init>: 4012d0: f000 6a02 li v0,2 4012d4: f150 0b1c la v1,3f9430 <_DYNAMIC-0x6df0> 4012d8: f400 3240 sll v0,16 4012dc: e269 addu v0,v1 4012de: 659a move gp,v0 4012e0: f00c 64f6 save a0-a2,48,ra,s0-s1 4012e4: 673c move s1,gp 4012e6: f010 9978 lw v1,-32744(s1) 4012ea: d204 sw v0,16(sp) 4012ec: eb40 jalr v1 4012ee: 653b move t9,v1 4012f0: f010 997c lw v1,-32740(s1) 4012f4: f030 9920 lw s1,-32736(s1) 4012f8: e32f subu v1,s1 4012fa: 326b sra v0,v1,2 4012fc: d206 sw v0,24(sp) 4012fe: 220c beqz v0,401318 <__libc_csu_init+0x48> 401300: 6800 li s0,0 401302: 99e0 lw a3,0(s1) 401304: 4801 addiu s0,1 401306: 960e lw a2,56(sp) 401308: 4904 addiu s1,4 40130a: 950d lw a1,52(sp) 40130c: 940c lw a0,48(sp) 40130e: ef40 jalr a3 401310: 653f move t9,a3 401312: 9206 lw v0,24(sp) 401314: ea0a cmp v0,s0 401316: 61f5 btnez 401302 <__libc_csu_init+0x32> 401318: 6476 restore 48,ra,s0-s1 40131a: e8a0 jrc ra Here `set_fast_math' is called from `40130e' (`40130f' with the ISA bit) and emulation triggers for the CTC1 instruction. As it is in a jump delay slot emulation continues from `401312' (`401313' with the ISA bit). However we have no path to handle MIPS16 FPU code emulation, because there are no MIPS16 FPU instructions. So the default emulation path is taken, interpreting a 32-bit word fetched by `get_user' from `401313' as a regular MIPS instruction, which is: 401313: f5ea0a92 sdc1 $f10,2706(t7) This makes the FPU emulator proceed with the supposed SDC1 instruction and consequently makes the program considered here terminate with SIGSEGV. A similar although less severe issue exists with pure-microMIPS processors in the case where similarly an FPU instruction is emulated in a delay slot of a register jump that (incorrectly) switches into the regular MIPS mode. A subsequent instruction fetch from the jump's target is supposed to cause an Address Error exception, however instead we proceed with regular MIPS FPU emulation. For simplicity then, always terminate the emulation loop whenever a mode change is detected, denoted by an ISA mode bit flip. As from commit 377cb1b6c16a ("MIPS: Disable MIPS16/microMIPS crap for platforms not supporting these ASEs.") the result of `get_isa16_mode' can be hardcoded to 0, so we need to examine the ISA mode bit by hand. This complements commit 102cedc32a6e ("MIPS: microMIPS: Floating point support.") which added JALX decoding to FPU emulation. Fixes: 102cedc32a6e ("MIPS: microMIPS: Floating point support.") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 3.9+ Patchwork: https://patchwork.linux-mips.org/patch/16393/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/math-emu/cp1emu.c | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 3d492a823a5..dbddc9ccf27 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -2002,6 +2002,35 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, return 0; } +/* + * Emulate FPU instructions. + * + * If we use FPU hardware, then we have been typically called to handle + * an unimplemented operation, such as where an operand is a NaN or + * denormalized. In that case exit the emulation loop after a single + * iteration so as to let hardware execute any subsequent instructions. + * + * If we have no FPU hardware or it has been disabled, then continue + * emulating floating-point instructions until one of these conditions + * has occurred: + * + * - a non-FPU instruction has been encountered, + * + * - an attempt to emulate has ended with a signal, + * + * - the ISA mode has been switched. + * + * We need to terminate the emulation loop if we got switched to the + * MIPS16 mode, whether supported or not, so that we do not attempt + * to emulate a MIPS16 instruction as a regular MIPS FPU instruction. + * Similarly if we got switched to the microMIPS mode and only the + * regular MIPS mode is supported, so that we do not attempt to emulate + * a microMIPS instruction as a regular MIPS FPU instruction. Or if + * we got switched to the regular MIPS mode and only the microMIPS mode + * is supported, so that we do not attempt to emulate a regular MIPS + * instruction that should cause an Address Error exception instead. + * For simplicity we always terminate upon an ISA mode switch. + */ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr) { @@ -2093,6 +2122,15 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; if (sig) break; + /* + * We have to check for the ISA bit explicitly here, + * because `get_isa16_mode' may return 0 if support + * for code compression has been globally disabled, + * or otherwise we may produce the wrong signal or + * even proceed successfully where we must not. + */ + if ((xcp->cp0_epc ^ prevepc) & 0x1) + break; cond_resched(); } while (xcp->cp0_epc > prevepc); From 7e20e7b9910eede5c21c7badcadb4d3830f8d69d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 Jul 2017 13:06:41 +0300 Subject: [PATCH 039/141] libata: array underflow in ata_find_dev() commit 59a5e266c3f5c1567508888dd61a45b86daed0fa upstream. My static checker complains that "devno" can be negative, meaning that we read before the start of the loop. I've looked at the code, and I think the warning is right. This come from /proc so it's root only or it would be quite a quite a serious bug. The call tree looks like this: proc_scsi_write() <- gets id and channel from simple_strtoul() -> scsi_add_single_device() <- calls shost->transportt->user_scan() -> ata_scsi_user_scan() -> ata_find_dev() Signed-off-by: Dan Carpenter Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # all versions at this point Signed-off-by: Willy Tarreau --- drivers/ata/libata-scsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index f3f0801a0e8..aa4e36b3a59 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2794,10 +2794,12 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) static struct ata_device *ata_find_dev(struct ata_port *ap, int devno) { if (!sata_pmp_attached(ap)) { - if (likely(devno < ata_link_max_devices(&ap->link))) + if (likely(devno >= 0 && + devno < ata_link_max_devices(&ap->link))) return &ap->link.device[devno]; } else { - if (likely(devno < ap->nr_pmp_links)) + if (likely(devno >= 0 && + devno < ap->nr_pmp_links)) return &ap->pmp_link[devno].device[0]; } From ae49cb71c5f4a51dded50f66c2dcf9a764285148 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jul 2017 18:41:52 -0400 Subject: [PATCH 040/141] workqueue: restore WQ_UNBOUND/max_active==1 to be ordered commit 5c0338c68706be53b3dc472e4308961c36e4ece1 upstream. The combination of WQ_UNBOUND and max_active == 1 used to imply ordered execution. After NUMA affinity 4c16bd327c74 ("workqueue: implement NUMA affinity for unbound workqueues"), this is no longer true due to per-node worker pools. While the right way to create an ordered workqueue is alloc_ordered_workqueue(), the documentation has been misleading for a long time and people do use WQ_UNBOUND and max_active == 1 for ordered workqueues which can lead to subtle bugs which are very difficult to trigger. It's unlikely that we'd see noticeable performance impact by enforcing ordering on WQ_UNBOUND / max_active == 1 workqueues. Let's automatically set __WQ_ORDERED for those workqueues. Signed-off-by: Tejun Heo Reported-by: Christoph Hellwig Reported-by: Alexei Potashnik Fixes: 4c16bd327c74 ("workqueue: implement NUMA affinity for unbound workqueues") Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Willy Tarreau --- kernel/workqueue.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 66972ac0c6c..3f8558f85a4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4213,6 +4213,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, struct workqueue_struct *wq; struct pool_workqueue *pwq; + /* + * Unbound && max_active == 1 used to imply ordered, which is no + * longer the case on NUMA machines due to per-node pools. While + * alloc_ordered_workqueue() is the right way to create an ordered + * workqueue, keep the previous behavior to avoid subtle breakages + * on NUMA. + */ + if ((flags & WQ_UNBOUND) && max_active == 1) + flags |= __WQ_ORDERED; + /* allocate wq and format name */ if (flags & WQ_UNBOUND) tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]); From bdc5fbb365b3d7baf9eb783562288946c4a72219 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 5 Aug 2017 17:43:24 -0400 Subject: [PATCH 041/141] ext4: fix SEEK_HOLE/SEEK_DATA for blocksize < pagesize commit fcf5ea10992fbac3c7473a1db33d56a139333cd1 upstream. ext4_find_unwritten_pgoff() does not properly handle a situation when starting index is in the middle of a page and blocksize < pagesize. The following command shows the bug on filesystem with 1k blocksize: xfs_io -f -c "falloc 0 4k" \ -c "pwrite 1k 1k" \ -c "pwrite 3k 1k" \ -c "seek -a -r 0" foo In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048, SEEK_DATA) will return the correct result. Fix the problem by neglecting buffers in a page before starting offset. Reported-by: Andreas Gruenbacher Signed-off-by: Theodore Ts'o Signed-off-by: Jan Kara CC: stable@vger.kernel.org # 3.8+ Signed-off-by: Willy Tarreau --- fs/ext4/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ec9770f4253..d490d6ec396 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -382,6 +382,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, lastoff = page_offset(page); bh = head = page_buffers(page); do { + if (lastoff + bh->b_size <= startoff) + goto next; if (buffer_uptodate(bh) || buffer_unwritten(bh)) { if (whence == SEEK_DATA) @@ -396,6 +398,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); goto out; } +next: lastoff += bh->b_size; bh = bh->b_this_page; } while (bh != head); From c39dcbde3872cb39e1dbff940c610963aad96518 Mon Sep 17 00:00:00 2001 From: Jerry Lee Date: Sun, 6 Aug 2017 01:18:31 -0400 Subject: [PATCH 042/141] ext4: fix overflow caused by missing cast in ext4_resize_fs() commit aec51758ce10a9c847a62a48a168f8c804c6e053 upstream. On a 32-bit platform, the value of n_blcoks_count may be wrong during the file system is resized to size larger than 2^32 blocks. This may caused the superblock being corrupted with zero blocks count. Fixes: 1c6bd7173d66 Signed-off-by: Jerry Lee Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org # 3.7+ Signed-off-by: Willy Tarreau --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index cf0a7048661..f6190fdfd8c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1911,7 +1911,8 @@ retry: n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); - n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_blocks_count = (ext4_fsblk_t)n_group * + EXT4_BLOCKS_PER_GROUP(sb); n_group--; /* set to last group number */ } From a6a256f6125b80f980e8add0d72153c2426adad3 Mon Sep 17 00:00:00 2001 From: Prabhakar Lad Date: Thu, 20 Jul 2017 08:02:09 -0400 Subject: [PATCH 043/141] media: platform: davinci: return -EINVAL for VPFE_CMD_S_CCDC_RAW_PARAMS ioctl commit da05d52d2f0f6bd61094a0cd045fed94bf7d673a upstream. this patch makes sure VPFE_CMD_S_CCDC_RAW_PARAMS ioctl no longer works for vpfe_capture driver with a minimal patch suitable for backporting. - This ioctl was never in public api and was only defined in kernel header. - The function set_params constantly mixes up pointers and phys_addr_t numbers. - This is part of a 'VPFE_CMD_S_CCDC_RAW_PARAMS' ioctl command that is described as an 'experimental ioctl that will change in future kernels'. - The code to allocate the table never gets called after we copy_from_user the user input over the kernel settings, and then compare them for inequality. - We then go on to use an address provided by user space as both the __user pointer for input and pass it through phys_to_virt to come up with a kernel pointer to copy the data to. This looks like a trivially exploitable root hole. Due to these reasons we make sure this ioctl now returns -EINVAL and backport this patch as far as possible. Fixes: 5f15fbb68fd7 ("V4L/DVB (12251): v4l: dm644x ccdc module for vpfe capture driver") Signed-off-by: Lad, Prabhakar Cc: # for v3.7 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/platform/davinci/vpfe_capture.c | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c index 93609091cb2..9dad717fb78 100644 --- a/drivers/media/platform/davinci/vpfe_capture.c +++ b/drivers/media/platform/davinci/vpfe_capture.c @@ -1706,27 +1706,9 @@ static long vpfe_param_handler(struct file *file, void *priv, switch (cmd) { case VPFE_CMD_S_CCDC_RAW_PARAMS: + ret = -EINVAL; v4l2_warn(&vpfe_dev->v4l2_dev, - "VPFE_CMD_S_CCDC_RAW_PARAMS: experimental ioctl\n"); - if (ccdc_dev->hw_ops.set_params) { - ret = ccdc_dev->hw_ops.set_params(param); - if (ret) { - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "Error setting parameters in CCDC\n"); - goto unlock_out; - } - ret = vpfe_get_ccdc_image_format(vpfe_dev, - &vpfe_dev->fmt); - if (ret < 0) { - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "Invalid image format at CCDC\n"); - goto unlock_out; - } - } else { - ret = -EINVAL; - v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, - "VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n"); - } + "VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n"); break; default: ret = -ENOTTY; From 003bea4a702279fc339a0d2f1262e907364a2c73 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 27 Mar 2017 16:12:43 -0700 Subject: [PATCH 044/141] target: Avoid mappedlun symlink creation during lun shutdown commit 49cb77e297dc611a1b795cfeb79452b3002bd331 upstream. This patch closes a race between se_lun deletion during configfs unlink in target_fabric_port_unlink() -> core_dev_del_lun() -> core_tpg_remove_lun(), when transport_clear_lun_ref() blocks waiting for percpu_ref RCU grace period to finish, but a new NodeACL mappedlun is added before the RCU grace period has completed. This can happen in target_fabric_mappedlun_link() because it only checks for se_lun->lun_se_dev, which is not cleared until after transport_clear_lun_ref() percpu_ref RCU grace period finishes. This bug originally manifested as NULL pointer dereference OOPsen in target_stat_scsi_att_intr_port_show_attr_dev() on v4.1.y code, because it dereferences lun->lun_se_dev without a explicit NULL pointer check. In post v4.1 code with target-core RCU conversion, the code in target_stat_scsi_att_intr_port_show_attr_dev() no longer uses se_lun->lun_se_dev, but the same race still exists. To address the bug, go ahead and set se_lun>lun_shutdown as early as possible in core_tpg_remove_lun(), and ensure new NodeACL mappedlun creation in target_fabric_mappedlun_link() fails during se_lun shutdown. Reported-by: James Shen Cc: James Shen Tested-by: James Shen Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger Signed-off-by: Willy Tarreau --- drivers/target/target_core_fabric_configfs.c | 5 +++++ drivers/target/target_core_tpg.c | 3 +++ include/target/target_core_base.h | 1 + 3 files changed, 9 insertions(+) diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 04c775cb3e6..179f7810d39 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -84,6 +84,11 @@ static int target_fabric_mappedlun_link( "_tpg does not exist\n"); return -EINVAL; } + if (lun->lun_shutdown) { + pr_err("Unable to create mappedlun symlink because" + " lun->lun_shutdown=true\n"); + return -EINVAL; + } se_tpg = lun->lun_sep->sep_tpg; nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 8572207e3d4..bc3092f032b 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -839,6 +839,8 @@ static void core_tpg_shutdown_lun( struct se_portal_group *tpg, struct se_lun *lun) { + lun->lun_shutdown = true; + core_clear_lun_from_tpg(lun, tpg); transport_clear_lun_from_sessions(lun); } @@ -868,6 +870,7 @@ struct se_lun *core_tpg_pre_dellun( spin_unlock(&tpg->tpg_lun_lock); return ERR_PTR(-ENODEV); } + lun->lun_shutdown = false; spin_unlock(&tpg->tpg_lun_lock); return lun; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 7d99c0b5b78..8e271438f77 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -729,6 +729,7 @@ struct se_port_stat_grps { struct se_lun { #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; + bool lun_shutdown; /* See transport_lun_status_table */ enum transport_lun_status_table lun_status; u32 lun_access; From c2ace3043febefbd2fec2964eb5d8a2db99039a4 Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 7 Jun 2017 12:26:49 +0200 Subject: [PATCH 045/141] fuse: initialize the flock flag in fuse_file on allocation commit 68227c03cba84a24faf8a7277d2b1a03c8959c2c upstream. Before the patch, the flock flag could remain uninitialized for the lifespan of the fuse_file allocation. Unless set to true in fuse_file_flock(), it would remain in an indeterminate state until read in an if statement in fuse_release_common(). This could consequently lead to taking an unexpected branch in the code. The bug was discovered by a runtime instrumentation designed to detect use of uninitialized memory in the kernel. Signed-off-by: Mateusz Jurczyk Fixes: 37fb3a30b462 ("fuse: fix flock") Cc: # v3.1+ Signed-off-by: Miklos Szeredi Signed-off-by: Willy Tarreau --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1dce9304101..ee5c3e9a598 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -54,7 +54,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; - ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); + ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL); if (unlikely(!ff)) return NULL; From f609dac6507b615e43dd59b7c49760ba6ec651a9 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:51 +0200 Subject: [PATCH 046/141] scsi: zfcp: fix queuecommand for scsi_eh commands when DIX enabled commit 71b8e45da51a7b64a23378221c0a5868bd79da4f upstream. Since commit db007fc5e20c ("[SCSI] Command protection operation"), scsi_eh_prep_cmnd() saves scmd->prot_op and temporarily resets it to SCSI_PROT_NORMAL. Other FCP LLDDs such as qla2xxx and lpfc shield their queuecommand() to only access any of scsi_prot_sg...() if (scsi_get_prot_op(cmd) != SCSI_PROT_NORMAL). Do the same thing for zfcp, which introduced DIX support with commit ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for DIF/DIX"). Otherwise, TUR SCSI commands as part of scsi_eh likely fail in zfcp, because the regular SCSI command with DIX protection data, that scsi_eh re-uses in scsi_send_eh_cmnd(), of course still has (scsi_prot_sg_count() != 0) and so zfcp sends down bogus requests to the FCP channel hardware. This causes scsi_eh_test_devices() to have (finish_cmds == 0) [not SCSI device is online or not scsi_eh_tur() failed] so regular SCSI commands, that caused / were affected by scsi_eh, are moved to work_q and scsi_eh_test_devices() itself returns false. In turn, it unnecessarily escalates in our case in scsi_eh_ready_devs() beyond host reset to finally scsi_eh_offline_sdevs() which sets affected SCSI devices offline with the following kernel message: "kernel: sd H:0:T:L: Device offlined - not ready after error recovery" Signed-off-by: Steffen Maier Fixes: ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for DIF/DIX") Cc: #2.6.36+ Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_fsf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index ad5718401ea..d27b49194d6 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2286,7 +2286,8 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd) fcp_cmnd = (struct fcp_cmnd *) &req->qtcb->bottom.io.fcp_cmnd; zfcp_fc_scsi_to_fcp(fcp_cmnd, scsi_cmnd, 0); - if (scsi_prot_sg_count(scsi_cmnd)) { + if ((scsi_get_prot_op(scsi_cmnd) != SCSI_PROT_NORMAL) && + scsi_prot_sg_count(scsi_cmnd)) { zfcp_qdio_set_data_div(qdio, &req->qdio_req, scsi_prot_sg_count(scsi_cmnd)); retval = zfcp_qdio_sbals_from_sg(qdio, &req->qdio_req, From e7ab3bd99e221e848fd01df00629c8d4c808c5f6 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 28 Jul 2017 12:30:52 +0200 Subject: [PATCH 047/141] scsi: zfcp: add handling for FCP_RESID_OVER to the fcp ingress path commit a099b7b1fc1f0418ab8d79ecf98153e1e134656e upstream. Up until now zfcp would just ignore the FCP_RESID_OVER flag in the FCP response IU. When this flag is set, it is possible, in regards to the FCP standard, that the storage-server processes the command normally, up to the point where data is missing and simply ignores those. In this case no CHECK CONDITION would be set, and because we ignored the FCP_RESID_OVER flag we resulted in at least a data loss or even -corruption as a follow-up error, depending on how the applications/layers on top behave. To prevent this, we now set the host-byte of the corresponding scsi_cmnd to DID_ERROR. Other storage-behaviors, where the same condition results in a CHECK CONDITION set in the answer, don't need to be changed as they are handled in the mid-layer already. Following is an example trace record decoded with zfcpdbf from the s390-tools package. We forcefully injected a fc_dl which is one byte too small: Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : rsl_err Request ID : 0x... SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00070000 ^^DID_ERROR SCSI retries : 0x.. SCSI allowed : 0x.. SCSI scribble : 0x... SCSI opcode : 2a000000 00000000 08000000 00000000 FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000400 00000001 ^^fr_flags==FCP_RESID_OVER ^^fr_status==SAM_STAT_GOOD ^^^^^^^^fr_resid 00000000 00000000 As of now, we don't actively handle to possibility that a response IU has both flags - FCP_RESID_OVER and FCP_RESID_UNDER - set at once. Reported-by: Luke M. Hopkins Reviewed-by: Steffen Maier Fixes: 553448f6c483 ("[SCSI] zfcp: Message cleanup") Fixes: ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter.") (tglx/history.git) Cc: #2.6.33+ Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_fc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fc.h b/drivers/s390/scsi/zfcp_fc.h index b1d2024ed51..c2e40e10b29 100644 --- a/drivers/s390/scsi/zfcp_fc.h +++ b/drivers/s390/scsi/zfcp_fc.h @@ -4,7 +4,7 @@ * Fibre Channel related definitions and inline functions for the zfcp * device driver * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2017 */ #ifndef ZFCP_FC_H @@ -291,6 +291,10 @@ void zfcp_fc_eval_fcp_rsp(struct fcp_resp_with_ext *fcp_rsp, !(rsp_flags & FCP_SNS_LEN_VAL) && fcp_rsp->resp.fr_status == SAM_STAT_GOOD) set_host_byte(scsi, DID_ERROR); + } else if (unlikely(rsp_flags & FCP_RESID_OVER)) { + /* FCP_DL was not sufficient for SCSI data length */ + if (fcp_rsp->resp.fr_status == SAM_STAT_GOOD) + set_host_byte(scsi, DID_ERROR); } } From 2c7bfb3408afee40c339d34f6bf4284fcfd2dcac Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:55 +0200 Subject: [PATCH 048/141] scsi: zfcp: fix missing trace records for early returns in TMF eh handlers commit 1a5d999ebfc7bfe28deb48931bb57faa8e4102b6 upstream. For problem determination we need to see that we were in scsi_eh as well as whether and why we were successful or not. The following commits introduced new early returns without adding a trace record: v2.6.35 commit a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") on fc_block_scsi_eh() returning != 0 which is FAST_IO_FAIL, v2.6.30 commit 63caf367e1c9 ("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp") on not having gotten an FSF request after the maximum number of retry attempts and thus could not issue a TMF and has to return FAILED. Signed-off-by: Steffen Maier Fixes: a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") Fixes: 63caf367e1c9 ("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp") Cc: #2.6.38+ Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_scsi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 66c37e77ac7..8ec101a4a5e 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -294,8 +294,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) zfcp_erp_wait(adapter); ret = fc_block_scsi_eh(scpnt); - if (ret) + if (ret) { + zfcp_dbf_scsi_devreset("fiof", scpnt, tm_flags); return ret; + } if (!(atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_RUNNING)) { @@ -303,8 +305,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) return SUCCESS; } } - if (!fsf_req) + if (!fsf_req) { + zfcp_dbf_scsi_devreset("reqf", scpnt, tm_flags); return FAILED; + } wait_for_completion(&fsf_req->completion); From bb5cf573ea98352ec337a537ea1199b69d001307 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:56 +0200 Subject: [PATCH 049/141] scsi: zfcp: fix payload with full FCP_RSP IU in SCSI trace records commit 12c3e5754c8022a4f2fd1e9f00d19e99ee0d3cc1 upstream. If the FCP_RSP UI has optional parts (FCP_SNS_INFO or FCP_RSP_INFO) and thus does not fit into the fsp_rsp field built into a SCSI trace record, trace the full FCP_RSP UI with all optional parts as payload record instead of just FCP_SNS_INFO as payload and a 1 byte RSP_INFO_CODE part of FCP_RSP_INFO built into the SCSI record. That way we would also get the full FCP_SNS_INFO in case a target would ever send more than min(SCSI_SENSE_BUFFERSIZE==96, ZFCP_DBF_PAY_MAX_REC==256)==96. The mandatory part of FCP_RSP IU is only 24 bytes. PAYload costs at least one full PAY record of 256 bytes anyway. We cap to the hardware response size which is only FSF_FCP_RSP_SIZE==128. So we can just put the whole FCP_RSP IU with any optional parts into PAYload similarly as we do for SAN PAY since v4.9 commit aceeffbb59bb ("zfcp: trace full payload of all SAN records (req,resp,iels)"). This does not cause any additional trace records wasting memory. Decoded trace records were confusing because they showed a hard-coded sense data length of 96 even if the FCP_RSP_IU field FCP_SNS_LEN showed actually less. Since the same commit, we set pl_len for SAN traces to the full length of a request/response even if we cap the corresponding trace. In contrast, here for SCSI traces we set pl_len to the pre-computed length of FCP_RSP IU considering SNS_LEN or RSP_LEN if valid. Nonetheless we trace a hardcoded payload of length FSF_FCP_RSP_SIZE==128 if there were optional parts. This makes it easier for the zfcpdbf tool to format only the relevant part of the long FCP_RSP UI buffer. And any trailing information is still available in the payload trace record just in case. Rename the payload record tag from "fcp_sns" to "fcp_riu" to make the new content explicit to zfcpdbf which can then pick a suitable field name such as "FCP rsp IU all:" instead of "Sense info :" Also, the same zfcpdbf can still be backwards compatible with "fcp_sns". Old example trace record before this fix, formatted with the tool zfcpdbf from s390-tools: Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU id : .. Caller : 0x... Record id : 1 Tag : rsl_err Request id : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00000002 SCSI retries : 0x00 SCSI allowed : 0x05 SCSI scribble : 0x SCSI opcode : 00000000 00000000 00000000 00000000 FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000202 00000000 ^^==FCP_SNS_LEN_VALID 00000020 00000000 ^^^^^^^^==FCP_SNS_LEN==32 Sense len : 96 <==min(SCSI_SENSE_BUFFERSIZE,ZFCP_DBF_PAY_MAX_REC) Sense info : 70000600 00000018 00000000 29000000 00000400 00000000 00000000 00000000 00000000 00000000 00000000 00000000<==superfluous 00000000 00000000 00000000 00000000<==superfluous 00000000 00000000 00000000 00000000<==superfluous 00000000 00000000 00000000 00000000<==superfluous New example trace records with this fix: Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : rsl_err Request ID : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00000002 SCSI retries : 0x00 SCSI allowed : 0x03 SCSI scribble : 0x SCSI opcode : a30c0112 00000000 02000000 00000000 FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000a02 00000200 00000020 00000000 FCP rsp IU len : 56 FCP rsp IU all : 00000000 00000000 00000a02 00000200 ^^=FCP_RESID_UNDER|FCP_SNS_LEN_VALID 00000020 00000000 70000500 00000018 ^^^^^^^^==FCP_SNS_LEN ^^^^^^^^^^^^^^^^^ 00000000 240000cb 00011100 00000000 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 00000000 00000000 ^^^^^^^^^^^^^^^^^==FCP_SNS_INFO Timestamp : ... Area : SCSI Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : 0x... Record ID : 1 Tag : lr_okay Request ID : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x00000000 SCSI retries : 0x00 SCSI allowed : 0x05 SCSI scribble : 0x SCSI opcode : FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000100 00000000 00000000 00000008 FCP rsp IU len : 32 FCP rsp IU all : 00000000 00000000 00000100 00000000 ^^==FCP_RSP_LEN_VALID 00000000 00000008 00000000 00000000 ^^^^^^^^==FCP_RSP_LEN ^^^^^^^^^^^^^^^^^==FCP_RSP_INFO Signed-off-by: Steffen Maier Fixes: 250a1352b95e ("[SCSI] zfcp: Redesign of the debug tracing for SCSI records.") Cc: #2.6.38+ Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_dbf.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index bf13e73ecab..0f3581b7a2e 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -556,19 +556,32 @@ void zfcp_dbf_scsi(char *tag, int level, struct scsi_cmnd *sc, if (fsf) { rec->fsf_req_id = fsf->req_id; + rec->pl_len = FCP_RESP_WITH_EXT; fcp_rsp = (struct fcp_resp_with_ext *) &(fsf->qtcb->bottom.io.fcp_rsp); + /* mandatory parts of FCP_RSP IU in this SCSI record */ memcpy(&rec->fcp_rsp, fcp_rsp, FCP_RESP_WITH_EXT); if (fcp_rsp->resp.fr_flags & FCP_RSP_LEN_VAL) { fcp_rsp_info = (struct fcp_resp_rsp_info *) &fcp_rsp[1]; rec->fcp_rsp_info = fcp_rsp_info->rsp_code; + rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_rsp_len); } if (fcp_rsp->resp.fr_flags & FCP_SNS_LEN_VAL) { - rec->pl_len = min((u16)SCSI_SENSE_BUFFERSIZE, - (u16)ZFCP_DBF_PAY_MAX_REC); - zfcp_dbf_pl_write(dbf, sc->sense_buffer, rec->pl_len, - "fcp_sns", fsf->req_id); + rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_sns_len); } + /* complete FCP_RSP IU in associated PAYload record + * but only if there are optional parts + */ + if (fcp_rsp->resp.fr_flags != 0) + zfcp_dbf_pl_write( + dbf, fcp_rsp, + /* at least one full PAY record + * but not beyond hardware response field + */ + min_t(u16, max_t(u16, rec->pl_len, + ZFCP_DBF_PAY_MAX_REC), + FSF_FCP_RSP_SIZE), + "fcp_riu", fsf->req_id); } debug_event(dbf->scsi, level, rec, sizeof(*rec)); From 4b106b91a0c954ec2aac206c485654372532cb16 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 28 Jul 2017 12:30:57 +0200 Subject: [PATCH 050/141] scsi: zfcp: trace HBA FSF response by default on dismiss or timedout late response commit fdb7cee3b9e3c561502e58137a837341f10cbf8b upstream. At the default trace level, we only trace unsuccessful events including FSF responses. zfcp_dbf_hba_fsf_response() only used protocol status and FSF status to decide on an unsuccessful response. However, this is only one of multiple possible sources determining a failed struct zfcp_fsf_req. An FSF request can also "fail" if its response runs into an ERP timeout or if it gets dismissed because a higher level recovery was triggered [trace tags "erscf_1" or "erscf_2" in zfcp_erp_strategy_check_fsfreq()]. FSF requests with ERP timeout are: FSF_QTCB_EXCHANGE_CONFIG_DATA, FSF_QTCB_EXCHANGE_PORT_DATA, FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT or FSF_QTCB_CLOSE_PHYSICAL_PORT for target ports, FSF_QTCB_OPEN_LUN, FSF_QTCB_CLOSE_LUN. One example is slow queue processing which can cause follow-on errors, e.g. FSF_PORT_ALREADY_OPEN after FSF_QTCB_OPEN_PORT_WITH_DID timed out. In order to see the root cause, we need to see late responses even if the channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD. Example trace records formatted with zfcpdbf from the s390-tools package: Timestamp : ... Area : REC Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : ... Record ID : 1 Tag : fcegpf1 LUN : 0xffffffffffffffff WWPN : 0x D_ID : 0x00 Adapter status : 0x5400050b Port status : 0x41200000 LUN status : 0x00000000 Ready count : 0x00000001 Running count : 0x... ERP want : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT ERP need : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT | Timestamp : ... 30 seconds later Area : REC Subarea : 00 Level : 1 Exception : - CPU ID : .. Caller : ... Record ID : 2 Tag : erscf_2 LUN : 0xffffffffffffffff WWPN : 0x D_ID : 0x00 Adapter status : 0x5400050b Port status : 0x41200000 LUN status : 0x00000000 Request ID : 0x ERP status : 0x10000000 ZFCP_STATUS_ERP_TIMEDOUT ERP step : 0x0800 ZFCP_ERP_STEP_PORT_OPENING ERP action : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT ERP count : 0x00 | Timestamp : ... later than previous record Area : HBA Subarea : 00 Level : 5 > default level => 3 <= default level Exception : - CPU ID : 00 Caller : ... Record ID : 1 Tag : fs_qtcb => fs_rerr Request ID : 0x Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED | ZFCP_STATUS_FSFREQ_CLEANUP FSF cmnd : 0x00000005 FSF sequence no: 0x... FSF issued : ... > 30 seconds ago FSF stat : 0x00000000 FSF_GOOD FSF stat qual : 00000000 00000000 00000000 00000000 Prot stat : 0x00000001 FSF_PROT_GOOD Prot stat qual : 00000000 00000000 00000000 00000000 Port handle : 0x... LUN handle : 0x00000000 QTCB log length: ... QTCB log info : ... In case of problems detecting that new responses are waiting on the input queue, we sooner or later trigger adapter recovery due to an FSF request timeout (trace tag "fsrth_1"). FSF requests with FSF request timeout are: typically FSF_QTCB_ABORT_FCP_CMND; but theoretically also FSF_QTCB_EXCHANGE_CONFIG_DATA or FSF_QTCB_EXCHANGE_PORT_DATA via sysfs, FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT for WKA ports, FSF_QTCB_FCP_CMND for task management function (LUN / target reset). One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD because the channel filled in the response via DMA into the request's QTCB. In a theroretical case, inject code can create an erroneous FSF request on purpose. If data router is enabled, it uses deferred error reporting. A READ SCSI command can succeed with FSF_PROT_GOOD, FSF_GOOD, and SAM_STAT_GOOD. But on writing the read data to host memory via DMA, it can still fail, e.g. if an intentionally wrong scatter list does not provide enough space. Rather than getting an unsuccessful response, we get a QDIO activate check which in turn triggers adapter recovery. One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD because the channel filled in the response via DMA into the request's QTCB. Example trace records formatted with zfcpdbf from the s390-tools package: Timestamp : ... Area : HBA Subarea : 00 Level : 6 > default level => 3 <= default level Exception : - CPU ID : .. Caller : ... Record ID : 1 Tag : fs_norm => fs_rerr Request ID : 0x Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED | ZFCP_STATUS_FSFREQ_CLEANUP FSF cmnd : 0x00000001 FSF sequence no: 0x... FSF issued : ... FSF stat : 0x00000000 FSF_GOOD FSF stat qual : 00000000 00000000 00000000 00000000 Prot stat : 0x00000001 FSF_PROT_GOOD Prot stat qual : ........ ........ 00000000 00000000 Port handle : 0x... LUN handle : 0x... | Timestamp : ... Area : SCSI Subarea : 00 Level : 3 Exception : - CPU ID : .. Caller : ... Record ID : 1 Tag : rsl_err Request ID : 0x SCSI ID : 0x... SCSI LUN : 0x... SCSI result : 0x000e0000 DID_TRANSPORT_DISRUPTED SCSI retries : 0x00 SCSI allowed : 0x05 SCSI scribble : 0x SCSI opcode : 28... Read(10) FCP rsp inf cod: 0x00 FCP rsp IU : 00000000 00000000 00000000 00000000 ^^ SAM_STAT_GOOD 00000000 00000000 Only with luck in both above cases, we could see a follow-on trace record of an unsuccesful event following a successful but late FSF response with FSF_PROT_GOOD and FSF_GOOD. Typically this was the case for I/O requests resulting in a SCSI trace record "rsl_err" with DID_TRANSPORT_DISRUPTED [On ZFCP_STATUS_FSFREQ_DISMISSED, zfcp_fsf_protstatus_eval() sets ZFCP_STATUS_FSFREQ_ERROR seen by the request handler functions as failure]. However, the reason for this follow-on trace was invisible because the corresponding HBA trace record was missing at the default trace level (by default hidden records with tags "fs_norm", "fs_qtcb", or "fs_open"). On adapter recovery, after we had shut down the QDIO queues, we perform unsuccessful pseudo completions with flag ZFCP_STATUS_FSFREQ_DISMISSED for each pending FSF request in zfcp_fsf_req_dismiss_all(). In order to find the root cause, we need to see all pseudo responses even if the channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD. Therefore, check zfcp_fsf_req.status for ZFCP_STATUS_FSFREQ_DISMISSED or ZFCP_STATUS_FSFREQ_ERROR and trace with a new tag "fs_rerr". It does not matter that there are numerous places which set ZFCP_STATUS_FSFREQ_ERROR after the location where we trace an FSF response early. These cases are based on protocol status != FSF_PROT_GOOD or == FSF_PROT_FSF_STATUS_PRESENTED and are thus already traced by default as trace tag "fs_perr" or "fs_ferr" respectively. NB: The trace record with tag "fssrh_1" for status read buffers on dismiss all remains. zfcp_fsf_req_complete() handles this and returns early. All other FSF request types are handled separately and as described above. Signed-off-by: Steffen Maier Fixes: 8a36e4532ea1 ("[SCSI] zfcp: enhancement of zfcp debug features") Fixes: 2e261af84cdb ("[SCSI] zfcp: Only collect FSF/HBA debug data for matching trace levels") Cc: #2.6.38+ Reviewed-by: Benjamin Block Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_dbf.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index a8165f14255..712a8484a7b 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -323,7 +323,11 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) { struct fsf_qtcb *qtcb = req->qtcb; - if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && + if (unlikely(req->status & (ZFCP_STATUS_FSFREQ_DISMISSED | + ZFCP_STATUS_FSFREQ_ERROR))) { + zfcp_dbf_hba_fsf_resp("fs_rerr", 3, req); + + } else if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && (qtcb->prefix.prot_status != FSF_PROT_FSF_STATUS_PRESENTED)) { zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); From b974ab50791088d1d69c53715306729f01c15a4d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 27 Sep 2017 18:47:12 +0900 Subject: [PATCH 051/141] usb: renesas_usbhs: fix the BCLR setting condition for non-DCP pipe commit 6124607acc88fffeaadf3aacfeb3cc1304c87387 upstream. This patch fixes an issue that the driver sets the BCLR bit of {C,Dn}FIFOCTR register to 1 even when it's non-DCP pipe and the FRDY bit of {C,Dn}FIFOCTR register is set to 1. Fixes: e8d548d54968 ("usb: renesas_usbhs: fifo became independent from pipe.") Cc: # v3.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/renesas_usbhs/fifo.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 95647562fd6..47a6ffab57c 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -261,11 +261,17 @@ static void usbhsf_fifo_clear(struct usbhs_pipe *pipe, struct usbhs_fifo *fifo) { struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); + int ret = 0; if (!usbhs_pipe_is_dcp(pipe)) - usbhsf_fifo_barrier(priv, fifo); + ret = usbhsf_fifo_barrier(priv, fifo); - usbhs_write(priv, fifo->ctr, BCLR); + /* + * if non-DCP pipe, this driver should set BCLR when + * usbhsf_fifo_barrier() returns 0. + */ + if (!ret) + usbhs_write(priv, fifo->ctr, BCLR); } static int usbhsf_fifo_rcv_len(struct usbhs_priv *priv, From 5abdd6e8304150f7939bb10a27eec38d2483cd9d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 27 Sep 2017 18:47:13 +0900 Subject: [PATCH 052/141] usb: renesas_usbhs: fix usbhsf_fifo_clear() for RX direction commit 0a2ce62b61f2c76d0213edf4e37aaf54a8ddf295 upstream. This patch fixes an issue that the usbhsf_fifo_clear() is possible to cause 10 msec delay if the pipe is RX direction and empty because the FRDY bit will never be set to 1 in such case. Fixes: e8d548d54968 ("usb: renesas_usbhs: fifo became independent from pipe.") Cc: # v3.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/renesas_usbhs/fifo.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 47a6ffab57c..0c962ff5eed 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -263,8 +263,17 @@ static void usbhsf_fifo_clear(struct usbhs_pipe *pipe, struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); int ret = 0; - if (!usbhs_pipe_is_dcp(pipe)) - ret = usbhsf_fifo_barrier(priv, fifo); + if (!usbhs_pipe_is_dcp(pipe)) { + /* + * This driver checks the pipe condition first to avoid -EBUSY + * from usbhsf_fifo_barrier() with about 10 msec delay in + * the interrupt handler if the pipe is RX direction and empty. + */ + if (usbhs_pipe_is_dir_in(pipe)) + ret = usbhs_pipe_is_accessible(pipe); + if (!ret) + ret = usbhsf_fifo_barrier(priv, fifo); + } /* * if non-DCP pipe, this driver should set BCLR when From 544d7ba2c8304b1db4a12a39225443f9f689985d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 13 Oct 2017 14:32:37 +0200 Subject: [PATCH 053/141] iommu/amd: Finish TLB flush in amd_iommu_unmap() commit ce76353f169a6471542d999baf3d29b121dce9c0 upstream. The function only sends the flush command to the IOMMU(s), but does not wait for its completion when it returns. Fix that. Fixes: 601367d76bd1 ('x86/amd-iommu: Remove iommu_flush_domain function') Cc: stable@vger.kernel.org # >= 2.6.33 Signed-off-by: Joerg Roedel Signed-off-by: Willy Tarreau --- drivers/iommu/amd_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 0e7cd14bf7b..88ba9649bd1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3402,6 +3402,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, mutex_unlock(&domain->api_lock); domain_flush_tlb_pde(domain); + domain_flush_complete(domain); return unmap_size; } From d75e8540fd8bedc8ee8d11941a7aed3a86735c47 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 Oct 2017 11:13:18 +0200 Subject: [PATCH 054/141] direct-io: Prevent NULL pointer access in submit_page_section commit 899f0429c7d3eed886406cd72182bee3b96aa1f9 upstream. In the code added to function submit_page_section by commit b1058b981, sdio->bio can currently be NULL when calling dio_bio_submit. This then leads to a NULL pointer access in dio_bio_submit, so check for a NULL bio in submit_page_section before trying to submit it instead. Fixes xfstest generic/250 on gfs2. Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Andreas Gruenbacher Reviewed-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Willy Tarreau --- fs/direct-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index e17d919aa6d..4007749a478 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -759,7 +759,8 @@ out: */ if (sdio->boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); - dio_bio_submit(dio, sdio); + if (sdio->bio) + dio_bio_submit(dio, sdio); page_cache_release(sdio->cur_page); sdio->cur_page = NULL; } From be3e7b176c28cb919f81c3fa2ff63148be7a74e5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Oct 2017 11:01:13 +0200 Subject: [PATCH 055/141] USB: serial: console: fix use-after-free after failed setup commit 299d7572e46f98534033a9e65973f13ad1ce9047 upstream. Make sure to reset the USB-console port pointer when console setup fails in order to avoid having the struct usb_serial be prematurely freed by the console code when the device is later disconnected. Fixes: 73e487fdb75f ("[PATCH] USB console: fix disconnection issues") Cc: stable # 2.6.18 Acked-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/console.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 5f3bcd31e20..f3bbe210119 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -188,6 +188,7 @@ static int usb_console_setup(struct console *co, char *options) kfree(tty); reset_open_count: port->port.count = 0; + info->port = NULL; usb_autopm_put_interface(serial->interface); error_get_interface: usb_serial_put(serial); From 20d94bc94065e6fc3f82b7d39b9a2011696aa074 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 12 Oct 2017 16:00:41 +0100 Subject: [PATCH 056/141] KEYS: don't let add_key() update an uninstantiated key commit 60ff5b2f547af3828aebafd54daded44cfb0807a upstream. Currently, when passed a key that already exists, add_key() will call the key's ->update() method if such exists. But this is heavily broken in the case where the key is uninstantiated because it doesn't call __key_instantiate_and_link(). Consequently, it doesn't do most of the things that are supposed to happen when the key is instantiated, such as setting the instantiation state, clearing KEY_FLAG_USER_CONSTRUCT and awakening tasks waiting on it, and incrementing key->user->nikeys. It also never takes key_construction_mutex, which means that ->instantiate() can run concurrently with ->update() on the same key. In the case of the "user" and "logon" key types this causes a memory leak, at best. Maybe even worse, the ->update() methods of the "encrypted" and "trusted" key types actually just dereference a NULL pointer when passed an uninstantiated key. Change key_create_or_update() to wait interruptibly for the key to finish construction before continuing. This patch only affects *uninstantiated* keys. For now we still allow a negatively instantiated key to be updated (thereby positively instantiating it), although that's broken too (the next patch fixes it) and I'm not sure that anyone actually uses that functionality either. Here is a simple reproducer for the bug using the "encrypted" key type (requires CONFIG_ENCRYPTED_KEYS=y), though as noted above the bug pertained to more than just the "encrypted" key type: #include #include #include int main(void) { int ringid = keyctl_join_session_keyring(NULL); if (fork()) { for (;;) { const char payload[] = "update user:foo 32"; usleep(rand() % 10000); add_key("encrypted", "desc", payload, sizeof(payload), ringid); keyctl_clear(ringid); } } else { for (;;) request_key("encrypted", "desc", "callout_info", ringid); } } It causes: BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: encrypted_update+0xb0/0x170 PGD 7a178067 P4D 7a178067 PUD 77269067 PMD 0 PREEMPT SMP CPU: 0 PID: 340 Comm: reproduce Tainted: G D 4.14.0-rc1-00025-g428490e38b2e #796 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8a467a39a340 task.stack: ffffb15c40770000 RIP: 0010:encrypted_update+0xb0/0x170 RSP: 0018:ffffb15c40773de8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8a467a275b00 RCX: 0000000000000000 RDX: 0000000000000005 RSI: ffff8a467a275b14 RDI: ffffffffb742f303 RBP: ffffb15c40773e20 R08: 0000000000000000 R09: ffff8a467a275b17 R10: 0000000000000020 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8a4677057180 R15: ffff8a467a275b0f FS: 00007f5d7fb08700(0000) GS:ffff8a467f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000077262005 CR4: 00000000001606f0 Call Trace: key_create_or_update+0x2bc/0x460 SyS_add_key+0x10c/0x1d0 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x7f5d7f211259 RSP: 002b:00007ffed03904c8 EFLAGS: 00000246 ORIG_RAX: 00000000000000f8 RAX: ffffffffffffffda RBX: 000000003b2a7955 RCX: 00007f5d7f211259 RDX: 00000000004009e4 RSI: 00000000004009ff RDI: 0000000000400a04 RBP: 0000000068db8bad R08: 000000003b2a7955 R09: 0000000000000004 R10: 000000000000001a R11: 0000000000000246 R12: 0000000000400868 R13: 00007ffed03905d0 R14: 0000000000000000 R15: 0000000000000000 Code: 77 28 e8 64 34 1f 00 45 31 c0 31 c9 48 8d 55 c8 48 89 df 48 8d 75 d0 e8 ff f9 ff ff 85 c0 41 89 c4 0f 88 84 00 00 00 4c 8b 7d c8 <49> 8b 75 18 4c 89 ff e8 24 f8 ff ff 85 c0 41 89 c4 78 6d 49 8b RIP: encrypted_update+0xb0/0x170 RSP: ffffb15c40773de8 CR2: 0000000000000018 Cc: # v2.6.12+ Reported-by: Eric Biggers Signed-off-by: David Howells cc: Eric Biggers Signed-off-by: Willy Tarreau --- security/keys/key.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/security/keys/key.c b/security/keys/key.c index 6595b2dd89f..6373ff18b8e 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -897,6 +897,16 @@ error: */ __key_link_end(keyring, ktype, prealloc); + key = key_ref_to_ptr(key_ref); + if (test_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) { + ret = wait_for_key_construction(key, true); + if (ret < 0) { + key_ref_put(key_ref); + key_ref = ERR_PTR(ret); + goto error_free_prep; + } + } + key_ref = __key_update(key_ref, &prep); goto error_free_prep; } From ba850927667f326ec400f3bb4e268a101639de1a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:40:00 -0700 Subject: [PATCH 057/141] FS-Cache: fix dereference of NULL user_key_payload commit d124b2c53c7bee6569d2a2d0b18b4a1afde00134 upstream. When the file /proc/fs/fscache/objects (available with CONFIG_FSCACHE_OBJECT_LIST=y) is opened, we request a user key with description "fscache:objlist", then access its payload. However, a revoked key has a NULL payload, and we failed to check for this. request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we access its payload. Fix it by checking for a NULL payload, treating it like a key which was already revoked at the time it was requested. Fixes: 4fbf4291aa15 ("FS-Cache: Allow the current state of all objects to be dumped") Reviewed-by: James Morris Cc: [v2.6.32+] Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Willy Tarreau --- fs/fscache/object-list.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index f27c89d1788..e7cf8c5f267 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -338,6 +338,13 @@ static void fscache_objlist_config(struct fscache_objlist_data *data) rcu_read_lock(); confkey = key->payload.data; + if (!confkey) { + /* key was revoked */ + rcu_read_unlock(); + key_put(key); + goto no_config; + } + buf = confkey->data; for (len = confkey->datalen - 1; len >= 0; len--) { From 9c66c82c5fd868ffd3b6a848ecc487f1061cdc36 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 21 May 2017 22:36:23 -0400 Subject: [PATCH 058/141] ext4: keep existing extra fields when inode expands commit 887a9730614727c4fff7cb756711b190593fc1df upstream. ext4_expand_extra_isize() should clear only space between old and new size. Fixes: 6dd4ee7cab7e # v2.6.23 Cc: stable@vger.kernel.org Signed-off-by: Konstantin Khlebnikov Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1095d77c2a9..1094017220d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5045,8 +5045,9 @@ static int ext4_expand_extra_isize(struct inode *inode, /* No extended attributes present */ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { - memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, - new_extra_isize); + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize, 0, + new_extra_isize - EXT4_I(inode)->i_extra_isize); EXT4_I(inode)->i_extra_isize = new_extra_isize; return 0; } From f9e779c5387d091d0d8cbe9241914d1dc43f9bf2 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 31 May 2017 16:19:47 +0100 Subject: [PATCH 059/141] MIPS: Fix mips_atomic_set() retry condition commit 2ec420b26f7b6ff332393f0bb5a7d245f7ad87f0 upstream. The inline asm retry check in the MIPS_ATOMIC_SET operation of the sysmips system call has been backwards since commit f1e39a4a616c ("MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler") merged in v2.6.32, resulting in the non R10000_LLSC_WAR case retrying until the operation was inatomic, before returning the new value that was probably just written multiple times instead of the old value. Invert the branch condition to fix that particular issue. Fixes: f1e39a4a616c ("MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler") Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16148/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index b79d13f95bf..eb0f4dfb385 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -140,7 +140,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" "2: sc %[tmp], (%[addr]) \n" - " bnez %[tmp], 4f \n" + " beqz %[tmp], 4f \n" "3: \n" " .subsection 2 \n" "4: b 1b \n" From e8705c4233f26a460b4d67f6bbf48e16f8261006 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Sep 2017 11:37:03 -0700 Subject: [PATCH 060/141] KEYS: prevent creating a different user's keyrings commit 237bbd29f7a049d310d907f4b2716a7feef9abf3 upstream. It was possible for an unprivileged user to create the user and user session keyrings for another user. For example: sudo -u '#3000' sh -c 'keyctl add keyring _uid.4000 "" @u keyctl add keyring _uid_ses.4000 "" @u sleep 15' & sleep 1 sudo -u '#4000' keyctl describe @u sudo -u '#4000' keyctl describe @us This is problematic because these "fake" keyrings won't have the right permissions. In particular, the user who created them first will own them and will have full access to them via the possessor permissions, which can be used to compromise the security of a user's keys: -4: alswrv-----v------------ 3000 0 keyring: _uid.4000 -5: alswrv-----v------------ 3000 0 keyring: _uid_ses.4000 Fix it by marking user and user session keyrings with a flag KEY_FLAG_UID_KEYRING. Then, when searching for a user or user session keyring by name, skip all keyrings that don't have the flag set. Fixes: 69664cf16af4 ("keys: don't generate user and user session keyrings unless they're accessed") Cc: [v2.6.26+] Signed-off-by: Eric Biggers Signed-off-by: David Howells [wt: adjust context] Signed-off-by: Willy Tarreau --- include/linux/key.h | 2 ++ security/keys/internal.h | 2 +- security/keys/key.c | 2 ++ security/keys/keyring.c | 23 ++++++++++++++--------- security/keys/process_keys.c | 8 ++++++-- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index 4dfde1161c5..66633b5f2f6 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -162,6 +162,7 @@ struct key { #define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ +#define KEY_FLAG_UID_KEYRING 11 /* set if key is a user or user session keyring */ /* the description string * - this is used to match a key against search criteria @@ -203,6 +204,7 @@ extern struct key *key_alloc(struct key_type *type, #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ #define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ +#define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); diff --git a/security/keys/internal.h b/security/keys/internal.h index d4f1468b9b5..ce6d4634a84 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -126,7 +126,7 @@ extern key_ref_t search_process_keyrings(struct key_type *type, key_match_func_t match, const struct cred *cred); -extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); +extern struct key *find_keyring_by_name(const char *name, bool uid_keyring); extern int install_user_keyrings(void); extern int install_thread_keyring_to_cred(struct cred *); diff --git a/security/keys/key.c b/security/keys/key.c index 6373ff18b8e..248c2e73137 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -299,6 +299,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; + if (flags & KEY_ALLOC_UID_KEYRING) + key->flags |= 1 << KEY_FLAG_UID_KEYRING; memset(&key->type_data, 0, sizeof(key->type_data)); diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 6ece7f2e570..b0cabf68c67 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -583,15 +583,15 @@ found: /* * Find a keyring with the specified name. * - * All named keyrings in the current user namespace are searched, provided they - * grant Search permission directly to the caller (unless this check is - * skipped). Keyrings whose usage points have reached zero or who have been - * revoked are skipped. + * Only keyrings that have nonzero refcount, are not revoked, and are owned by a + * user in the current user namespace are considered. If @uid_keyring is %true, + * the keyring additionally must have been allocated as a user or user session + * keyring; otherwise, it must grant Search permission directly to the caller. * * Returns a pointer to the keyring with the keyring's refcount having being * incremented on success. -ENOKEY is returned if a key could not be found. */ -struct key *find_keyring_by_name(const char *name, bool skip_perm_check) +struct key *find_keyring_by_name(const char *name, bool uid_keyring) { struct key *keyring; int bucket; @@ -619,10 +619,15 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check) if (strcmp(keyring->description, name) != 0) continue; - if (!skip_perm_check && - key_permission(make_key_ref(keyring, 0), - KEY_SEARCH) < 0) - continue; + if (uid_keyring) { + if (!test_bit(KEY_FLAG_UID_KEYRING, + &keyring->flags)) + continue; + } else { + if (key_permission(make_key_ref(keyring, 0), + KEY_SEARCH) < 0) + continue; + } /* we've got a match but we might end up racing with * key_cleanup() if the keyring is currently 'dead' diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 33384662fc8..f58a5aa05fa 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -76,7 +76,9 @@ int install_user_keyrings(void) if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID, cred, user_keyring_perm, - KEY_ALLOC_IN_QUOTA, NULL); + KEY_ALLOC_UID_KEYRING | + KEY_ALLOC_IN_QUOTA, + NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); goto error; @@ -92,7 +94,9 @@ int install_user_keyrings(void) session_keyring = keyring_alloc(buf, user->uid, INVALID_GID, cred, user_keyring_perm, - KEY_ALLOC_IN_QUOTA, NULL); + KEY_ALLOC_UID_KEYRING | + KEY_ALLOC_IN_QUOTA, + NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; From a45d36761bc230da5dc37741364608bb91bdfad1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:37:49 -0700 Subject: [PATCH 061/141] KEYS: encrypted: fix dereference of NULL user_key_payload commit 13923d0865ca96312197962522e88bc0aedccd74 upstream. A key of type "encrypted" references a "master key" which is used to encrypt and decrypt the encrypted key's payload. However, when we accessed the master key's payload, we failed to handle the case where the master key has been revoked, which sets the payload pointer to NULL. Note that request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we acquire its semaphore. Fix it by checking for a NULL payload, treating it like a key which was already revoked at the time it was requested. This was an issue for master keys of type "user" only. Master keys can also be of type "trusted", but those cannot be revoked. Fixes: 7e70cb497850 ("keys: add new key-type encrypted") Reviewed-by: James Morris Cc: [v2.6.38+] Cc: Mimi Zohar Cc: David Safford Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Willy Tarreau --- security/keys/encrypted-keys/encrypted.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index c4c8df4b214..7bef30b50d9 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -315,6 +315,13 @@ static struct key *request_user_key(const char *master_desc, u8 **master_key, down_read(&ukey->sem); upayload = ukey->payload.data; + if (!upayload) { + /* key was revoked before we acquired its semaphore */ + up_read(&ukey->sem); + key_put(ukey); + ukey = ERR_PTR(-EKEYREVOKED); + goto error; + } *master_key = upayload->data; *master_keylen = upayload->datalen; error: From 9ac5390b0288726726deab05c891c19eafda7ed3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 Aug 2017 10:23:25 +1000 Subject: [PATCH 062/141] md/bitmap: disable bitmap_resize for file-backed bitmaps. commit e8a27f836f165c26f867ece7f31eb5c811692319 upstream. bitmap_resize() does not work for file-backed bitmaps. The buffer_heads are allocated and initialized when the bitmap is read from the file, but resize doesn't read from the file, it loads from the internal bitmap. When it comes time to write the new bitmap, the bh is non-existent and we crash. The common case when growing an array involves making the array larger, and that normally means making the bitmap larger. Doing that inside the kernel is possible, but would need more code. It is probably easier to require people who use file-backed bitmaps to remove them and re-add after a reshape. So this patch disables the resizing of arrays which have file-backed bitmaps. This is better than crashing. Reported-by: Zhilong Liu Fixes: d60b479d177a ("md/bitmap: add bitmap_resize function to allow bitmap resizing.") Cc: stable@vger.kernel.org (v3.5+). Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Willy Tarreau --- drivers/md/bitmap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 37470ee7c85..4d874427101 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1806,6 +1806,11 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, long pages; struct bitmap_page *new_bp; + if (bitmap->storage.file && !init) { + pr_info("md: cannot resize file-based bitmap\n"); + return -EINVAL; + } + if (chunksize == 0) { /* If there is enough space, leave the chunk size unchanged, * else increase by factor of two until there is enough space. From a77cf1b983190f7cefffb5ff5cf358d7944d04b0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:43:20 -0700 Subject: [PATCH 063/141] lib/digsig: fix dereference of NULL user_key_payload commit 192cabd6a296cbc57b3d8c05c4c89d87fc102506 upstream. digsig_verify() requests a user key, then accesses its payload. However, a revoked key has a NULL payload, and we failed to check for this. request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we acquire its semaphore. Fix it by checking for a NULL payload, treating it like a key which was already revoked at the time it was requested. Fixes: 051dbb918c7f ("crypto: digital signature verification support") Reviewed-by: James Morris Cc: [v3.3+] Cc: Dmitry Kasatkin Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Willy Tarreau --- lib/digsig.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/digsig.c b/lib/digsig.c index 2f31e6a45f0..ae703dfc973 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -86,6 +86,12 @@ static int digsig_verify_rsa(struct key *key, down_read(&key->sem); ukp = key->payload.data; + if (!ukp) { + /* key was revoked before we acquired its semaphore */ + err = -EKEYREVOKED; + goto err1; + } + if (ukp->datalen < sizeof(*pkh)) goto err1; From 09ab11ebb7c1f9a66507e382aea696338304f83a Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 08:53:12 +0800 Subject: [PATCH 064/141] netfilter: invoke synchronize_rcu after set the _hook_ to NULL commit 3b7dabf029478bb80507a6c4500ca94132a2bc0b upstream. Otherwise, another CPU may access the invalid pointer. For example: CPU0 CPU1 - rcu_read_lock(); - pfunc = _hook_; _hook_ = NULL; - mod unload - - pfunc(); // invalid, panic - rcu_read_unlock(); So we must call synchronize_rcu() to wait the rcu reader to finish. Also note, in nf_nat_snmp_basic_fini, synchronize_rcu() will be invoked by later nf_conntrack_helper_unregister, but I'm inclined to add a explicit synchronize_rcu after set the nf_nat_snmp_hook to NULL. Depend on such obscure assumptions is not a good idea. Last, in nfnetlink_cttimeout, we use kfree_rcu to free the time object, so in cttimeout_exit, invoking rcu_barrier() is not necessary at all, remove it too. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 + net/netfilter/nf_conntrack_ecache.c | 2 ++ net/netfilter/nf_conntrack_netlink.c | 1 + net/netfilter/nf_nat_core.c | 2 ++ net/netfilter/nfnetlink_cttimeout.c | 1 + 5 files changed, 7 insertions(+) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5f011cc89cd..1e82bdb0f07 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1305,6 +1305,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 1df17614656..c9f131fc4bf 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -116,6 +116,7 @@ void nf_conntrack_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -152,6 +153,7 @@ void nf_ct_expect_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ecf065f9403..df65d52ba76 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3132,6 +3132,7 @@ static void __exit ctnetlink_exit(void) #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT RCU_INIT_POINTER(nfq_ct_hook, NULL); #endif + synchronize_rcu(); } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 2bb801e3ee8..7658d018105 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -853,6 +853,8 @@ static void __exit nf_nat_cleanup(void) #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif + synchronize_rcu(); + for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); synchronize_net(); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 65074dfb938..10d78dc0d2c 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -431,6 +431,7 @@ static void __exit cttimeout_exit(void) #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + synchronize_rcu(); #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ } From 634fd953630e9ea59072e0d44c614d8b6446d93c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 23 Feb 2017 12:26:41 -0800 Subject: [PATCH 065/141] md/raid10: submit bio directly to replacement disk commit 6d399783e9d4e9bd44931501948059d24ad96ff8 upstream. Commit 57c67df(md/raid10: submit IO from originating thread instead of md thread) submits bio directly for normal disks but not for replacement disks. There is no point we shouldn't do this for replacement disks. Cc: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Willy Tarreau --- drivers/md/raid10.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f53f4f89550..b4de9c3e5ca 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1569,11 +1569,24 @@ retry_write: mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); + + cb = blk_check_plugged(raid10_unplug, mddev, + sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid10_plug_cb, + cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } } From 7719043b415b16e1d8ced517bc581375693d070b Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 11:27:23 +0800 Subject: [PATCH 066/141] md: fix super_offset endianness in super_1_rdev_size_change commit 3fb632e40d7667d8bedfabc28850ac06d5493f54 upstream. The sb->super_offset should be big-endian, but the rdev->sb_start is in host byte order, so fix this by adding cpu_to_le64. Signed-off-by: Jason Yan Signed-off-by: Shaohua Li Signed-off-by: Willy Tarreau --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 7c45286e266..95eb53f6841 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1898,7 +1898,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); - sb->super_offset = rdev->sb_start; + sb->super_offset = cpu_to_le64(rdev->sb_start); sb->sb_csum = calc_sb_1_csum(sb); md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); From 303177699c7faa2e27db63c796b8bbba4baeb6ac Mon Sep 17 00:00:00 2001 From: Ilya Matveychikov Date: Fri, 23 Jun 2017 15:08:49 -0700 Subject: [PATCH 067/141] lib/cmdline.c: fix get_options() overflow while parsing ranges commit a91e0f680bcd9e10c253ae8b62462a38bd48f09f upstream. When using get_options() it's possible to specify a range of numbers, like 1-100500. The problem is that it doesn't track array size while calling internally to get_range() which iterates over the range and fills the memory with numbers. Link: http://lkml.kernel.org/r/2613C75C-B04D-4BFF-82A6-12F97BA0F620@gmail.com Signed-off-by: Ilya V. Matveychikov Cc: Jonathan Corbet Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- lib/cmdline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index eb6791188cf..efc35fbce78 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -22,14 +22,14 @@ * the values[M, M+1, ..., N] into the ints array in get_options. */ -static int get_range(char **str, int *pint) +static int get_range(char **str, int *pint, int n) { int x, inc_counter, upper_range; (*str)++; upper_range = simple_strtol((*str), NULL, 0); inc_counter = upper_range - *pint; - for (x = *pint; x < upper_range; x++) + for (x = *pint; n && x < upper_range; x++, n--) *pint++ = x; return inc_counter; } @@ -95,7 +95,7 @@ char *get_options(const char *str, int nints, int *ints) break; if (res == 3) { int range_nums; - range_nums = get_range((char **)&str, ints + i); + range_nums = get_range((char **)&str, ints + i, nints - i); if (range_nums < 0) break; /* From 2f34aab1a8758d956926d623823bab8af326668e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 21 May 2017 22:33:23 -0400 Subject: [PATCH 068/141] ext4: fix SEEK_HOLE commit 7d95eddf313c88b24f99d4ca9c2411a4b82fef33 upstream. Currently, SEEK_HOLE implementation in ext4 may both return that there's a hole at some offset although that offset already has data and skip some holes during a search for the next hole. The first problem is demostrated by: xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "seek -h 0" file wrote 57344/57344 bytes at offset 0 56 KiB, 14 ops; 0.0000 sec (2.054 GiB/sec and 538461.5385 ops/sec) Whence Result HOLE 0 Where we can see that SEEK_HOLE wrongly returned offset 0 as containing a hole although we have written data there. The second problem can be demonstrated by: xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k" -c "seek -h 0" file wrote 57344/57344 bytes at offset 0 56 KiB, 14 ops; 0.0000 sec (1.978 GiB/sec and 518518.5185 ops/sec) wrote 8192/8192 bytes at offset 131072 8 KiB, 2 ops; 0.0000 sec (2 GiB/sec and 500000.0000 ops/sec) Whence Result HOLE 139264 Where we can see that hole at offsets 56k..128k has been ignored by the SEEK_HOLE call. The underlying problem is in the ext4_find_unwritten_pgoff() which is just buggy. In some cases it fails to update returned offset when it finds a hole (when no pages are found or when the first found page has higher index than expected), in some cases conditions for detecting hole are just missing (we fail to detect a situation where indices of returned pages are not contiguous). Fix ext4_find_unwritten_pgoff() to properly detect non-contiguous page indices and also handle all cases where we got less pages then expected in one place and handle it properly there. Fixes: c8c0df241cc2719b1262e627f999638411934f60 CC: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- fs/ext4/file.c | 50 ++++++++++++++------------------------------------ 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d490d6ec396..dfba7b38706 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -325,47 +325,27 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); - if (nr_pages == 0) { - if (whence == SEEK_DATA) - break; - - BUG_ON(whence != SEEK_HOLE); - /* - * If this is the first time to go into the loop and - * offset is not beyond the end offset, it will be a - * hole at this offset - */ - if (lastoff == startoff || lastoff < endoff) - found = 1; + if (nr_pages == 0) break; - } - - /* - * If this is the first time to go into the loop and - * offset is smaller than the first page offset, it will be a - * hole at this offset. - */ - if (lastoff == startoff && whence == SEEK_HOLE && - lastoff < page_offset(pvec.pages[0])) { - found = 1; - break; - } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; struct buffer_head *bh, *head; /* - * If the current offset is not beyond the end of given - * range, it will be a hole. + * If current offset is smaller than the page offset, + * there is a hole at this offset. */ - if (lastoff < endoff && whence == SEEK_HOLE && - page->index > end) { + if (whence == SEEK_HOLE && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { found = 1; *offset = lastoff; goto out; } + if (page->index > end) + goto out; + lock_page(page); if (unlikely(page->mapping != inode->i_mapping)) { @@ -408,20 +388,18 @@ next: unlock_page(page); } - /* - * The no. of pages is less than our desired, that would be a - * hole in there. - */ - if (nr_pages < num && whence == SEEK_HOLE) { - found = 1; - *offset = lastoff; + /* The no. of pages is less than our desired, we are done. */ + if (nr_pages < num) break; - } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); + if (whence == SEEK_HOLE && lastoff < endoff) { + found = 1; + *offset = lastoff; + } out: pagevec_release(&pvec); return found; From 4ef41964ac05a4e3c34173f40aa6e2ecbb978a16 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Jun 2017 07:02:20 -0700 Subject: [PATCH 069/141] net: prevent sign extension in dev_get_stats() commit 6f64ec74515925cced6df4571638b5a099a49aae upstream. Similar to the fix provided by Dominik Heidler in commit 9b3dc0a17d73 ("l2tp: cast l2tp traffic counter to unsigned") we need to take care of 32bit kernels in dev_get_stats(). When using atomic_long_read(), we add a 'long' to u64 and might misinterpret high order bit, unless we cast to unsigned. Fixes: caf586e5f23ce ("net: add a core netdev->rx_dropped counter") Fixes: 015f0688f57ca ("net: net: add a core netdev->tx_dropped counter") Fixes: 6e7333d315a76 ("net: add rx_nohandler stat counter") Signed-off-by: Eric Dumazet Cc: Jarod Wilson Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index c8842036508..d69d8ec1138 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5643,7 +5643,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, } else { netdev_stats_to_stats64(storage, &dev->stats); } - storage->rx_dropped += atomic_long_read(&dev->rx_dropped); + storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped); return storage; } EXPORT_SYMBOL(dev_get_stats); From 7af6952b1858bf13abb625a99f34bcdd3104b446 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 6 Jul 2017 15:35:31 -0700 Subject: [PATCH 070/141] kernel/extable.c: mark core_kernel_text notrace commit c0d80ddab89916273cb97114889d3f337bc370ae upstream. core_kernel_text is used by MIPS in its function graph trace processing, so having this method traced leads to an infinite set of recursive calls such as: Call Trace: ftrace_return_to_handler+0x50/0x128 core_kernel_text+0x10/0x1b8 prepare_ftrace_return+0x6c/0x114 ftrace_graph_caller+0x20/0x44 return_to_handler+0x10/0x30 return_to_handler+0x0/0x30 return_to_handler+0x0/0x30 ftrace_ops_no_ops+0x114/0x1bc core_kernel_text+0x10/0x1b8 core_kernel_text+0x10/0x1b8 core_kernel_text+0x10/0x1b8 ftrace_ops_no_ops+0x114/0x1bc core_kernel_text+0x10/0x1b8 prepare_ftrace_return+0x6c/0x114 ftrace_graph_caller+0x20/0x44 (...) Mark the function notrace to avoid it being traced. Link: http://lkml.kernel.org/r/1498028607-6765-1-git-send-email-marcin.nowakowski@imgtec.com Signed-off-by: Marcin Nowakowski Reviewed-by: Masami Hiramatsu Cc: Peter Zijlstra Cc: Thomas Meyer Cc: Ingo Molnar Cc: Steven Rostedt Cc: Daniel Borkmann Cc: Paul Gortmaker Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- kernel/extable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/extable.c b/kernel/extable.c index 67460b93b1a..5ec4b6f861d 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -66,7 +66,7 @@ static inline int init_kernel_text(unsigned long addr) return 0; } -int core_kernel_text(unsigned long addr) +int notrace core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) From 19cc6d98a6ad6899ccc70cffb39365e7e7cdca93 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:35:25 +0100 Subject: [PATCH 071/141] wext: handle NULL extra data in iwe_stream_add_point better commit 93be2b74279c15c2844684b1a027fdc71dd5d9bf upstream. gcc-7 complains that wl3501_cs passes NULL into a function that then uses the argument as the input for memcpy: drivers/net/wireless/wl3501_cs.c: In function 'wl3501_get_scan': include/net/iw_handler.h:559:3: error: argument 2 null where non-null expected [-Werror=nonnull] memcpy(stream + point_len, extra, iwe->u.data.length); This works fine here because iwe->u.data.length is guaranteed to be 0 and the memcpy doesn't actually have an effect. Making the length check explicit avoids the warning and should have no other effect here. Also check the pointer itself, since otherwise we get warnings elsewhere in the code. Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg Signed-off-by: Willy Tarreau --- include/net/iw_handler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 5d5a6a4732e..5af07a1ab0c 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -551,7 +551,8 @@ iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends, memcpy(stream + lcp_len, ((char *) &iwe->u) + IW_EV_POINT_OFF, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); - memcpy(stream + point_len, extra, iwe->u.data.length); + if (iwe->u.data.length && extra) + memcpy(stream + point_len, extra, iwe->u.data.length); stream += event_len; } return stream; From 159c95bdb52696fd12c9238d9c301e9ed98b398f Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 16:35:29 +0800 Subject: [PATCH 072/141] netfilter: nf_ct_ext: fix possible panic after nf_ct_extend_unregister commit 9c3f3794926a997b1cab6c42480ff300efa2d162 upstream. If one cpu is doing nf_ct_extend_unregister while another cpu is doing __nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover, there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to NULL, so it's possible that we may access invalid pointer. But actually, most of the ct extends are built-in, so the problem listed above will not happen. However, there are two exceptions: NF_CT_EXT_NAT and NF_CT_EXT_SYNPROXY. For _EXT_NAT, the panic will not happen, since adding the nat extend and unregistering the nat extend are located in the same file(nf_nat_core.c), this means that after the nat module is removed, we cannot add the nat extend too. For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while synproxy extend unregister will be done by synproxy_core_exit. So after nf_synproxy_core.ko is removed, we may still try to add the synproxy extend, then kernel panic may happen. I know it's very hard to reproduce this issue, but I can play a tricky game to make it happen very easily :) Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook: # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook. Also note, in the userspace we only add a 20s' delay, then reinject the syn packet to the kernel: # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1 Step 3. Using "nc 2.2.2.2 1234" to connect the server. Step 4. Now remove the nf_synproxy_core.ko quickly: # iptables -F FORWARD # rmmod ipt_SYNPROXY # rmmod nf_synproxy_core Step 5. After 20s' delay, the syn packet is reinjected to the kernel. Now you will see the panic like this: kernel BUG at net/netfilter/nf_conntrack_extend.c:91! Call Trace: ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack] init_conntrack+0x12b/0x600 [nf_conntrack] nf_conntrack_in+0x4cc/0x580 [nf_conntrack] ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4] nf_reinject+0x104/0x270 nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue] ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue] ? nla_parse+0xa0/0x100 nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink] [...] One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e. introduce nf_conntrack_synproxy.c and only do ct extend register and unregister in it, similar to nf_conntrack_timeout.c. But having such a obscure restriction of nf_ct_extend_unregister is not a good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then it will be easier if we add new ct extend in the future. Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary anymore, remove it too. Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/netfilter/nf_conntrack_extend.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 1a9545965c0..531ca55f1af 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } + off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len + var_alloc_len; alloc_size = t->alloc_size + var_alloc_len; @@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } newoff = ALIGN(old->len, t->align); newlen = newoff + t->len + var_alloc_len; @@ -186,6 +193,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - rcu_barrier(); /* Wait for completion of call_rcu()'s */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); From ebb33aff58b3899842e5b7d89be8bf1222a943e6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 24 Aug 2017 13:22:06 -0400 Subject: [PATCH 073/141] ext4: in ext4_seek_{hole,data}, return -ENXIO for negative offsets commit 1bd8d6cd3e413d64e543ec3e69ff43e75a1cf1ea upstream. In the ext4 implementations of SEEK_HOLE and SEEK_DATA, make sure we return -ENXIO for negative offsets instead of banging around inside the extent code and returning -EFSCORRUPTED. Reported-by: Mateusz S Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org # 4.6 Signed-off-by: Willy Tarreau --- fs/ext4/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index dfba7b38706..ed2badabebf 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -421,7 +421,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) mutex_lock(&inode->i_mutex); isize = i_size_read(inode); - if (offset >= isize) { + if (offset < 0 || offset >= isize) { mutex_unlock(&inode->i_mutex); return -ENXIO; } @@ -504,7 +504,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) mutex_lock(&inode->i_mutex); isize = i_size_read(inode); - if (offset >= isize) { + if (offset < 0 || offset >= isize) { mutex_unlock(&inode->i_mutex); return -ENXIO; } From 9f75306bea1ce5be9b5278fe3cd9affba5ce0fd8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 11 Aug 2016 12:38:55 -0400 Subject: [PATCH 074/141] ext4: avoid deadlock when expanding inode size commit 2e81a4eeedcaa66e35f58b81e0755b87057ce392 upstream. When we need to move xattrs into external xattr block, we call ext4_xattr_block_set() from ext4_expand_extra_isize_ea(). That may end up calling ext4_mark_inode_dirty() again which will recurse back into the inode expansion code leading to deadlocks. Protect from recursion using EXT4_STATE_NO_EXPAND inode flag and move its management into ext4_expand_extra_isize_ea() since its manipulation is safe there (due to xattr_sem) from possible races with ext4_xattr_set_handle() which plays with it as well. CC: stable@vger.kernel.org # 4.4.x Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/inode.c | 2 -- fs/ext4/xattr.c | 19 +++++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1094017220d..26054c19e6c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5098,8 +5098,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) sbi->s_want_extra_isize, iloc, handle); if (ret) { - ext4_set_inode_state(inode, - EXT4_STATE_NO_EXPAND); if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { ext4_warning(inode->i_sb, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 92850bab451..dde00d1e299 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1266,11 +1266,13 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); down_write(&EXT4_I(inode)->xattr_sem); + /* + * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty + */ + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); retry: - if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { - up_write(&EXT4_I(inode)->xattr_sem); - return 0; - } + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) + goto out; header = IHDR(inode, raw_inode); entry = IFIRST(header); @@ -1295,8 +1297,7 @@ retry: (void *)header, total_ino, inode->i_sb->s_blocksize); EXT4_I(inode)->i_extra_isize = new_extra_isize; - error = 0; - goto cleanup; + goto out; } /* @@ -1457,6 +1458,8 @@ retry: kfree(bs); } brelse(bh); +out: + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); up_write(&EXT4_I(inode)->xattr_sem); return 0; @@ -1468,6 +1471,10 @@ cleanup: kfree(is); kfree(bs); brelse(bh); + /* + * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode + * size expansion failed. + */ up_write(&EXT4_I(inode)->xattr_sem); return error; } From 3cb637d2295a3f05c7fac17ccb9429f5b180772b Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Jul 2017 18:32:45 +0200 Subject: [PATCH 075/141] sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}() commit b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a upstream. If the length field of the iterator (|pos.p| or |err|) is past the end of the chunk, we shouldn't access it. This bug has been detected by KMSAN. For the following pair of system calls: socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3 sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 1 the tool has reported a use of uninitialized memory: ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0 CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 __sctp_rcv_init_lookup net/sctp/input.c:1074 __sctp_rcv_lookup_harder net/sctp/input.c:1233 __sctp_rcv_lookup net/sctp/input.c:1255 sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170 sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq kernel/softirq.c:328 __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181 local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31 rcu_read_unlock_bh ./include/linux/rcupdate.h:931 ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124 ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149 NF_HOOK_COND ./include/linux/netfilter.h:246 ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163 dst_output ./include/net/dst.h:486 NF_HOOK ./include/linux/netfilter.h:257 ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261 sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225 sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 RIP: 0033:0x401133 RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133 RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003 RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211 slab_alloc_node mm/slub.c:2743 __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 __alloc_skb+0x26b/0x840 net/core/skbuff.c:231 alloc_skb ./include/linux/skbuff.h:933 sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- include/net/sctp/sctp.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 845ab6decc4..b72cba7221f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -555,6 +555,8 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ pos.v += WORD_ROUND(ntohs(pos.p->length))) @@ -565,6 +567,8 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) From 2630e5a878f730bfd14af7ecf78189a6387d303d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 16:24:59 +0800 Subject: [PATCH 076/141] sctp: fix the check for _sctp_walk_params and _sctp_walk_errors commit 6b84202c946cd3da3a8daa92c682510e9ed80321 upstream. Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") tried to fix the issue that it may overstep the chunk end for _sctp_walk_{params, errors} with 'chunk_end > offset(length) + sizeof(length)'. But it introduced a side effect: When processing INIT, it verifies the chunks with 'param.v == chunk_end' after iterating all params by sctp_walk_params(). With the check 'chunk_end > offset(length) + sizeof(length)', it would return when the last param is not yet accessed. Because the last param usually is fwdtsn supported param whose size is 4 and 'chunk_end == offset(length) + sizeof(length)' This is a badly issue even causing sctp couldn't process 4-shakes. Client would always get abort when connecting to server, due to the failure of INIT chunk verification on server. The patch is to use 'chunk_end <= offset(length) + sizeof(length)' instead of 'chunk_end < offset(length) + sizeof(length)' for both _sctp_walk_params and _sctp_walk_errors. Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- include/net/sctp/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index b72cba7221f..ee81c68f24a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -555,7 +555,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ - (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ @@ -567,7 +567,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ - ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ From faa132ab55776ed0d38105f50525f0388cb47d0b Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 16 Aug 2017 20:16:40 +0200 Subject: [PATCH 077/141] sctp: fully initialize the IPv6 address in sctp_v6_to_addr() commit 15339e441ec46fbc3bf3486bb1ae4845b0f1bb8d upstream. KMSAN reported use of uninitialized sctp_addr->v4.sin_addr.s_addr and sctp_addr->v6.sin6_scope_id in sctp_v6_cmp_addr() (see below). Make sure all fields of an IPv6 address are initialized, which guarantees that the IPv4 fields are also initialized. ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x172/0x1c0 lib/dump_stack.c:42 is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg net/socket.c:643 [inline] SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x44b479 RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 origin description: ----dst_saddr@sctp_v6_get_dst local variable created at: sk_fullsock include/net/sock.h:2321 [inline] inet6_sk include/linux/ipv6.h:309 [inline] sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x172/0x1c0 lib/dump_stack.c:42 is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg net/socket.c:643 [inline] SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x44b479 RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 origin description: ----dst_saddr@sctp_v6_get_dst local variable created at: sk_fullsock include/net/sock.h:2321 [inline] inet6_sk include/linux/ipv6.h:309 [inline] sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 ================================================================== Signed-off-by: Alexander Potapenko Reviewed-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- net/sctp/ipv6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 10d3e2874dd..7c2fea69c83 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -492,7 +492,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; + addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = *saddr; + addr->v6.sin6_scope_id = 0; } /* Compare addresses exactly. From 08bd34b7527fe6b056928ad9cb02b4ab4c0d1011 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 Sep 2017 02:00:54 +0300 Subject: [PATCH 078/141] sctp: potential read out of bounds in sctp_ulpevent_type_enabled() commit fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 upstream. This code causes a static checker warning because Smatch doesn't trust anything that comes from skb->data. I've reviewed this code and I do think skb->data can be controlled by the user here. The sctp_event_subscribe struct has 13 __u8 fields and we want to see if ours is non-zero. sn_type can be any value in the 0-USHRT_MAX range. We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read either before the start of the struct or after the end. This is a very old bug and it's surprising that it would go undetected for so long but my theory is that it just doesn't have a big impact so it would be hard to notice. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- include/net/sctp/ulpevent.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index ca4693b4e09..00c0e5bf5d3 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -143,8 +143,12 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); static inline int sctp_ulpevent_type_enabled(__u16 sn_type, struct sctp_event_subscribe *mask) { + int offset = sn_type - SCTP_SN_TYPE_BASE; char *amask = (char *) mask; - return amask[sn_type - SCTP_SN_TYPE_BASE]; + + if (offset >= sizeof(struct sctp_event_subscribe)) + return 0; + return amask[offset]; } /* Given an event subscription, is this event enabled? */ From 946272f8a5f73b249808d1fb8f30a25724864466 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 31 May 2017 11:21:27 -0700 Subject: [PATCH 079/141] tcp: disallow cwnd undo when switching congestion control commit 44abafc4cc094214a99f860f778c48ecb23422fc upstream. When the sender switches its congestion control during loss recovery, if the recovery is spurious then it may incorrectly revert cwnd and ssthresh to the older values set by a previous congestion control. Consider a congestion control (like BBR) that does not use ssthresh and keeps it infinite: the connection may incorrectly revert cwnd to an infinite value when switching from BBR to another congestion control. This patch fixes it by disallowing such cwnd undo operation upon switching congestion control. Note that undo_marker is not reset s.t. the packets that were incorrectly marked lost would be corrected. We only avoid undoing the cwnd in tcp_undo_cwnd_reduction(). Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/tcp_cong.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 019c2389a34..2ca6c080a4b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -95,6 +95,7 @@ void tcp_init_congestion_control(struct sock *sk) rcu_read_unlock(); } + tcp_sk(sk)->prior_ssthresh = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); } From 72d7b83afbc4659de5948c4cc33f5d7b47f09aa4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Apr 2017 10:55:11 -0700 Subject: [PATCH 080/141] netfilter: xt_TCPMSS: add more sanity tests on tcph->doff commit 2638fd0f92d4397884fd991d8f4925cb3f081901 upstream. Denys provided an awesome KASAN report pointing to an use after free in xt_TCPMSS I have provided three patches to fix this issue, either in xt_TCPMSS or in xt_tcpudp.c. It seems xt_TCPMSS patch has the smallest possible impact. Signed-off-by: Eric Dumazet Reported-by: Denys Fedoryshchenko Signed-off-by: Pablo Neira Ayuso [wt: adjust context] Signed-off-by: Willy Tarreau --- net/netfilter/xt_TCPMSS.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 7011c71646f..c656269c4cf 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -68,7 +68,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); /* Header cannot be larger than the packet */ - if (tcplen < tcph->doff*4) + if (tcplen < tcph->doff*4 || tcph->doff*4 < sizeof(struct tcphdr)) return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { @@ -117,6 +117,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (tcplen > tcph->doff*4) return 0; + /* tcph->doff has 4 bits, do not wrap it to 0 */ + if (tcph->doff >= 15) + return 0; + /* * MSS Option not found ?! add it.. */ From 6f85a8873040becd7d779ad5f510f1b73d04f59b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 24 Jun 2017 23:50:30 -0700 Subject: [PATCH 081/141] tcp: reset sk_rx_dst in tcp_disconnect() commit d747a7a51b00984127a88113cdbbc26f91e9d815 upstream. We have to reset the sk->sk_rx_dst when we disconnect a TCP connection, because otherwise when we re-connect it this dst reference is simply overridden in tcp_finish_connect(). This fixes a dst leak which leads to a loopback dev refcnt leak. It is a long-standing bug, Kevin reported a very similar (if not same) bug before. Thanks to Andrei for providing such a reliable reproducer which greatly narrows down the problem. Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Reported-by: Andrei Vagin Reported-by: Kevin Xu Signed-off-by: Cong Wang Signed-off-by: David S. Miller [wt: adjusted context] Signed-off-by: Willy Tarreau --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d1e04221c27..446dc4fe28c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2316,6 +2316,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + dst_release(sk->sk_rx_dst); + sk->sk_rx_dst = NULL; WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); From 37cb36243061666f8bd44fdc51229ca6896effa3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 1 Aug 2017 13:22:32 -0700 Subject: [PATCH 082/141] tcp: avoid setting cwnd to invalid ssthresh after cwnd reduction states commit ed254971edea92c3ac5c67c6a05247a92aa6075e upstream. If the sender switches the congestion control during ECN-triggered cwnd-reduction state (CA_CWR), upon exiting recovery cwnd is set to the ssthresh value calculated by the previous congestion control. If the previous congestion control is BBR that always keep ssthresh to TCP_INIFINITE_SSTHRESH, cwnd ends up being infinite. The safe step is to avoid assigning invalid ssthresh value when recovery ends. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 70f217cb3a1..828835cb070 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2554,8 +2554,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } From 81d2ec2013886082bb8bb4163c9424b138b0d267 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 16 Aug 2017 17:53:36 -0400 Subject: [PATCH 083/141] tcp: when rearming RTO, if RTO time is in past then fire RTO ASAP commit cdbeb633ca71a02b7b63bfeb94994bf4e1a0b894 upstream. In some situations tcp_send_loss_probe() can realize that it's unable to send a loss probe (TLP), and falls back to calling tcp_rearm_rto() to schedule an RTO timer. In such cases, sometimes tcp_rearm_rto() realizes that the RTO was eligible to fire immediately or at some point in the past (delta_us <= 0). Previously in such cases tcp_rearm_rto() was scheduling such "overdue" RTOs to happen at now + icsk_rto, which caused needless delays of hundreds of milliseconds (and non-linear behavior that made reproducible testing difficult). This commit changes the logic to schedule "overdue" RTOs ASAP, rather than at now + icsk_rto. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Suggested-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller [wt: no need for usec_to_jiffies conversion in 3.10] Signed-off-by: Willy Tarreau --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 828835cb070..85dd09be161 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2977,8 +2977,7 @@ void tcp_rearm_rto(struct sock *sk) /* delta may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta > 0) - rto = delta; + rto = max_t(int, delta, 1); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); From 83fef5222fb220e232d106704d9e0abe838d8b0b Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 18 May 2017 11:22:33 -0700 Subject: [PATCH 084/141] tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0 commit 499350a5a6e7512d9ed369ed63a4244b6536f4f8 upstream. When tcp_disconnect() is called, inet_csk_delack_init() sets icsk->icsk_ack.rcv_mss to 0. This could potentially cause tcp_recvmsg() => tcp_cleanup_rbuf() => __tcp_select_window() call path to have division by 0 issue. So this patch initializes rcv_mss to TCP_MIN_MSS instead of 0. Reported-by: Andrey Konovalov Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 446dc4fe28c..b80b399f237 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2313,6 +2313,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() + */ + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); From 055ad9609c623acfc679a3e9049774f414e3a189 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 25 Apr 2017 18:51:46 +0200 Subject: [PATCH 085/141] net/packet: check length in getsockopt() called with PACKET_HDRLEN commit fd2c83b35752f0a8236b976978ad4658df14a59f upstream. In the case getsockopt() is called with PACKET_HDRLEN and optlen < 4 |val| remains uninitialized and the syscall may behave differently depending on its value, and even copy garbage to userspace on certain architectures. To fix this we now return -EINVAL if optlen is too small. This bug has been detected with KMSAN. Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0bbb3470fa7..b915d011287 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3338,6 +3338,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_HDRLEN: if (len > sizeof(int)) len = sizeof(int); + if (len < sizeof(int)) + return -EINVAL; if (copy_from_user(&val, optval, len)) return -EFAULT; switch (val) { From 78da094deb3ba652c17f4e83b16ac9317f45c6ed Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 26 Sep 2017 17:38:50 -0700 Subject: [PATCH 086/141] net: Set sk_prot_creator when cloning sockets to the right proto commit 9d538fa60bad4f7b23193c89e843797a1cf71ef3 upstream. sk->sk_prot and sk->sk_prot_creator can differ when the app uses IPV6_ADDRFORM (transforming an IPv6-socket to an IPv4-one). Which is why sk_prot_creator is there to make sure that sk_prot_free() does the kmem_cache_free() on the right kmem_cache slab. Now, if such a socket gets transformed back to a listening socket (using connect() with AF_UNSPEC) we will allocate an IPv4 tcp_sock through sk_clone_lock() when a new connection comes in. But sk_prot_creator will still point to the IPv6 kmem_cache (as everything got copied in sk_clone_lock()). When freeing, we will thus put this memory back into the IPv6 kmem_cache although it was allocated in the IPv4 cache. I have seen memory corruption happening because of this. With slub-debugging and MEMCG_KMEM enabled this gives the warning "cache_from_obj: Wrong slab cache. TCPv6 but object is from TCP" A C-program to trigger this: void main(void) { int fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP); int new_fd, newest_fd, client_fd; struct sockaddr_in6 bind_addr; struct sockaddr_in bind_addr4, client_addr1, client_addr2; struct sockaddr unsp; int val; memset(&bind_addr, 0, sizeof(bind_addr)); bind_addr.sin6_family = AF_INET6; bind_addr.sin6_port = ntohs(42424); memset(&client_addr1, 0, sizeof(client_addr1)); client_addr1.sin_family = AF_INET; client_addr1.sin_port = ntohs(42424); client_addr1.sin_addr.s_addr = inet_addr("127.0.0.1"); memset(&client_addr2, 0, sizeof(client_addr2)); client_addr2.sin_family = AF_INET; client_addr2.sin_port = ntohs(42421); client_addr2.sin_addr.s_addr = inet_addr("127.0.0.1"); memset(&unsp, 0, sizeof(unsp)); unsp.sa_family = AF_UNSPEC; bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr)); listen(fd, 5); client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); connect(client_fd, (struct sockaddr *)&client_addr1, sizeof(client_addr1)); new_fd = accept(fd, NULL, NULL); close(fd); val = AF_INET; setsockopt(new_fd, SOL_IPV6, IPV6_ADDRFORM, &val, sizeof(val)); connect(new_fd, &unsp, sizeof(unsp)); memset(&bind_addr4, 0, sizeof(bind_addr4)); bind_addr4.sin_family = AF_INET; bind_addr4.sin_port = ntohs(42421); bind(new_fd, (struct sockaddr *)&bind_addr4, sizeof(bind_addr4)); listen(new_fd, 5); client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); connect(client_fd, (struct sockaddr *)&client_addr2, sizeof(client_addr2)); newest_fd = accept(new_fd, NULL, NULL); close(new_fd); close(client_fd); close(new_fd); } As far as I can see, this bug has been there since the beginning of the git-days. Signed-off-by: Christoph Paasch Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/sock.c b/net/core/sock.c index 96e12591932..104784ee4bb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1470,6 +1470,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); + newsk->sk_prot_creator = sk->sk_prot; + /* SANITY */ get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); From bc3eedee113e4069762023a0e1dbed9647ffbcfa Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 23 Feb 2017 12:02:43 +0200 Subject: [PATCH 087/141] net/mlx4_core: Fix VF overwrite of module param which disables DMFS on new probed PFs commit 95f1ba9a24af9769f6e20dfe9a77c863f253f311 upstream. In the VF driver, module parameter mlx4_log_num_mgm_entry_size was mistakenly overwritten -- and in a manner which overrode the device-managed flow steering option encoded in the parameter. log_num_mgm_entry_size is a global module parameter which affects all ConnectX-3 PFs installed on that host. If a VF changes log_num_mgm_entry_size, this will affect all PFs which are probed subsequent to the change (by disabling DMFS for those PFs). Fixes: 3c439b5586e9 ("mlx4_core: Allow choosing flow steering mode") Signed-off-by: Majd Dibbiny Reviewed-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 3fb2643d05b..8c58001aff1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -511,8 +511,6 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENOSYS; } - mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; - dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); From 61eea2752a19dbf4749e6300093188401ad53fd3 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 16 Jun 2017 15:00:02 +0800 Subject: [PATCH 088/141] net: 8021q: Fix one possible panic caused by BUG_ON in free_netdev commit 9745e362add89432d2c951272a99b0a5fe4348a9 upstream. The register_vlan_device would invoke free_netdev directly, when register_vlan_dev failed. It would trigger the BUG_ON in free_netdev if the dev was already registered. In this case, the netdev would be freed in netdev_run_todo later. So add one condition check now. Only when dev is not registered, then free it directly. The following is the part coredump when netdev_upper_dev_link failed in register_vlan_dev. I removed the lines which are too long. [ 411.237457] ------------[ cut here ]------------ [ 411.237458] kernel BUG at net/core/dev.c:7998! [ 411.237484] invalid opcode: 0000 [#1] SMP [ 411.237705] [last unloaded: 8021q] [ 411.237718] CPU: 1 PID: 12845 Comm: vconfig Tainted: G E 4.12.0-rc5+ #6 [ 411.237737] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 411.237764] task: ffff9cbeb6685580 task.stack: ffffa7d2807d8000 [ 411.237782] RIP: 0010:free_netdev+0x116/0x120 [ 411.237794] RSP: 0018:ffffa7d2807dbdb0 EFLAGS: 00010297 [ 411.237808] RAX: 0000000000000002 RBX: ffff9cbeb6ba8fd8 RCX: 0000000000001878 [ 411.237826] RDX: 0000000000000001 RSI: 0000000000000282 RDI: 0000000000000000 [ 411.237844] RBP: ffffa7d2807dbdc8 R08: 0002986100029841 R09: 0002982100029801 [ 411.237861] R10: 0004000100029980 R11: 0004000100029980 R12: ffff9cbeb6ba9000 [ 411.238761] R13: ffff9cbeb6ba9060 R14: ffff9cbe60f1a000 R15: ffff9cbeb6ba9000 [ 411.239518] FS: 00007fb690d81700(0000) GS:ffff9cbebb640000(0000) knlGS:0000000000000000 [ 411.239949] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 411.240454] CR2: 00007f7115624000 CR3: 0000000077cdf000 CR4: 00000000003406e0 [ 411.240936] Call Trace: [ 411.241462] vlan_ioctl_handler+0x3f1/0x400 [8021q] [ 411.241910] sock_ioctl+0x18b/0x2c0 [ 411.242394] do_vfs_ioctl+0xa1/0x5d0 [ 411.242853] ? sock_alloc_file+0xa6/0x130 [ 411.243465] SyS_ioctl+0x79/0x90 [ 411.243900] entry_SYSCALL_64_fastpath+0x1e/0xa9 [ 411.244425] RIP: 0033:0x7fb69089a357 [ 411.244863] RSP: 002b:00007ffcd04e0fc8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 411.245445] RAX: ffffffffffffffda RBX: 00007ffcd04e2884 RCX: 00007fb69089a357 [ 411.245903] RDX: 00007ffcd04e0fd0 RSI: 0000000000008983 RDI: 0000000000000003 [ 411.246527] RBP: 00007ffcd04e0fd0 R08: 0000000000000000 R09: 1999999999999999 [ 411.246976] R10: 000000000000053f R11: 0000000000000202 R12: 0000000000000004 [ 411.247414] R13: 00007ffcd04e1128 R14: 00007ffcd04e2888 R15: 0000000000000001 [ 411.249129] RIP: free_netdev+0x116/0x120 RSP: ffffa7d2807dbdb0 Signed-off-by: Gao Feng Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 86abb2e59ae..82fdb35154f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -274,7 +274,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - free_netdev(new_dev); + if (new_dev->reg_state == NETREG_UNINITIALIZED) + free_netdev(new_dev); return err; } From 28968bccd81010f0ab4c198849aaed6fa20658a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jul 2017 14:53:02 +0200 Subject: [PATCH 089/141] x86/io: Add "memory" clobber to insb/insw/insl/outsb/outsw/outsl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7206f9bf108eb9513d170c73f151367a1bdf3dbf upstream. The x86 version of insb/insw/insl uses an inline assembly that does not have the target buffer listed as an output. This can confuse the compiler, leading it to think that a subsequent access of the buffer is uninitialized: drivers/net/wireless/wl3501_cs.c: In function ‘wl3501_mgmt_scan_confirm’: drivers/net/wireless/wl3501_cs.c:665:9: error: ‘sig.status’ is used uninitialized in this function [-Werror=uninitialized] drivers/net/wireless/wl3501_cs.c:668:12: error: ‘sig.cap_info’ may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/sb1000.c: In function 'sb1000_rx': drivers/net/sb1000.c:775:9: error: 'st[0]' is used uninitialized in this function [-Werror=uninitialized] drivers/net/sb1000.c:776:10: error: 'st[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/net/sb1000.c:784:11: error: 'st[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized] I tried to mark the exact input buffer as an output here, but couldn't figure it out. As suggested by Linus, marking all memory as clobbered however is good enough too. For the outs operations, I also add the memory clobber, to force the input to be written to local variables. This is probably already guaranteed by the "asm volatile", but it can't hurt to do this for symmetry. Suggested-by: Linus Torvalds Signed-off-by: Arnd Bergmann Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Link: http://lkml.kernel.org/r/20170719125310.2487451-5-arnd@arndb.de Link: https://lkml.org/lkml/2017/7/12/605 Signed-off-by: Ingo Molnar Signed-off-by: Willy Tarreau --- arch/x86/include/asm/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d8e8eefbe24..86ec87dc1f5 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -296,13 +296,13 @@ static inline unsigned type in##bwl##_p(int port) \ static inline void outs##bwl(int port, const void *addr, unsigned long count) \ { \ asm volatile("rep; outs" #bwl \ - : "+S"(addr), "+c"(count) : "d"(port)); \ + : "+S"(addr), "+c"(count) : "d"(port) : "memory"); \ } \ \ static inline void ins##bwl(int port, void *addr, unsigned long count) \ { \ asm volatile("rep; ins" #bwl \ - : "+D"(addr), "+c"(count) : "d"(port)); \ + : "+D"(addr), "+c"(count) : "d"(port) : "memory"); \ } BUILDIO(b, b, char) From ca58e3129147cd80996c9a7cde5faee28f9981ec Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Apr 2017 16:56:26 +0200 Subject: [PATCH 090/141] kvm: async_pf: fix rcu_irq_enter() with irqs enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bbaf0e2b1c1b4f88abd6ef49576f0efb1734eae5 upstream. native_safe_halt enables interrupts, and you just shouldn't call rcu_irq_enter() with interrupts enabled. Reorder the call with the following local_irq_disable() to respect the invariant. Reported-by: Ross Zwisler Signed-off-by: Paolo Bonzini Acked-by: Paul E. McKenney Tested-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/x86/kernel/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c4ff2a91613..c95ece93f35 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -159,8 +159,8 @@ void kvm_async_pf_task_wait(u32 token) */ rcu_irq_exit(); native_safe_halt(); - rcu_irq_enter(); local_irq_disable(); + rcu_irq_enter(); } } if (!n.halted) From f7eb769a8db069e8a999cea74457c24d813b7c38 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Jun 2017 09:29:25 -0700 Subject: [PATCH 091/141] net: ping: do not abuse udp_poll() commit 77d4b1d36926a9b8387c6b53eeba42bcaaffcea3 upstream. Alexander reported various KASAN messages triggered in recent kernels The problem is that ping sockets should not use udp_poll() in the first place, and recent changes in UDP stack finally exposed this old bug. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.") Signed-off-by: Eric Dumazet Reported-by: Sasha Levin Cc: Solar Designer Cc: Vasiliy Kulikov Cc: Lorenzo Colitti Acked-By: Lorenzo Colitti Tested-By: Lorenzo Colitti Signed-off-by: David S. Miller [wt: removed the parts related to ping6 as 6d0bfe226116 is not in 3.10] Signed-off-by: Willy Tarreau --- include/net/ipv6.h | 1 + net/ipv4/af_inet.c | 2 +- net/ipv6/raw.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 413e23be60d..1c96547c2a3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -822,6 +822,7 @@ extern int inet6_hash_connect(struct inet_timewait_death_row *death_row, */ extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; +extern const struct proto_ops inet6_sockraw_ops; struct group_source_req; struct group_filter; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 975c369d4e6..03610ebd8d0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1053,7 +1053,7 @@ static struct inet_protosw inetsw_array[] = .type = SOCK_DGRAM, .protocol = IPPROTO_ICMP, .prot = &ping_prot, - .ops = &inet_dgram_ops, + .ops = &inet_sockraw_ops, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c7ce2be09d9..a05e1f1a1a3 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1319,7 +1319,7 @@ void raw6_proc_exit(void) #endif /* CONFIG_PROC_FS */ /* Same as inet6_dgram_ops, sans udp_poll. */ -static const struct proto_ops inet6_sockraw_ops = { +const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, From a72c279dfbfa5b6c5832f16960d8dce31f27645b Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 23 May 2017 16:50:47 +0200 Subject: [PATCH 092/141] scsi: qla2xxx: don't disable a not previously enabled PCI device commit ddff7ed45edce4a4c92949d3c61cd25d229c4a14 upstream. When pci_enable_device() or pci_enable_device_mem() fail in qla2x00_probe_one() we bail out but do a call to pci_disable_device(). This causes the dev_WARN_ON() in pci_disable_device() to trigger, as the device wasn't enabled previously. So instead of taking the 'probe_out' error path we can directly return *iff* one of the pci_enable_device() calls fails. Additionally rename the 'probe_out' goto label's name to the more descriptive 'disable_device'. Signed-off-by: Johannes Thumshirn Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring") Cc: Reviewed-by: Bart Van Assche Reviewed-by: Giridhar Malavali Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/qla2xxx/qla_os.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 40fe8a77236..c11b82e7095 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2342,10 +2342,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) if (mem_only) { if (pci_enable_device_mem(pdev)) - goto probe_out; + return ret; } else { if (pci_enable_device(pdev)) - goto probe_out; + return ret; } /* This may fail but that's ok */ @@ -2355,7 +2355,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) if (!ha) { ql_log_pci(ql_log_fatal, pdev, 0x0009, "Unable to allocate memory for ha.\n"); - goto probe_out; + goto disable_device; } ql_dbg_pci(ql_dbg_init, pdev, 0x000a, "Memory allocated for ha=%p.\n", ha); @@ -2899,7 +2899,7 @@ iospace_config_failed: kfree(ha); ha = NULL; -probe_out: +disable_device: pci_disable_device(pdev); return ret; } From 7e25c935ec7b43c088d21b9c8e4a6b970375d933 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 27 Apr 2017 12:12:08 +0300 Subject: [PATCH 093/141] drm/vmwgfx: Handle vmalloc() failure in vmw_local_fifo_reserve() commit f0c62e9878024300319ba2438adc7b06c6b9c448 upstream. If vmalloc() fails then we need to a bit of cleanup before returning. Cc: Fixes: fb1d9738ca05 ("drm/vmwgfx: Add DRM driver for VMware Virtual GPU") Signed-off-by: Dan Carpenter Reviewed-by: Sinclair Yeh Signed-off-by: Willy Tarreau --- drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index 89664933861..a3a70283bde 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -368,6 +368,8 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes) return fifo_state->static_buffer; else { fifo_state->dynamic_buffer = vmalloc(bytes); + if (!fifo_state->dynamic_buffer) + goto out_err; return fifo_state->dynamic_buffer; } } From f6b525b94bda339cf0064e2561fc7e7747bb46a8 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Tue, 14 Feb 2017 19:11:44 +0200 Subject: [PATCH 094/141] net: xilinx_emaclite: fix receive buffer overflow commit cd224553641848dd17800fe559e4ff5d208553e8 upstream. xilinx_emaclite looks at the received data to try to determine the Ethernet packet length but does not properly clamp it if proto_type == ETH_P_IP or 1500 < proto_type <= 1518, causing a buffer overflow and a panic via skb_panic() as the length exceeds the allocated skb size. Fix those cases. Also add an additional unconditional check with WARN_ON() at the end. Signed-off-by: Anssi Hannula Fixes: bb81b2ddfa19 ("net: add Xilinx emac lite device driver") Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index b7268b3dae7..5f5f84ad069 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -398,7 +398,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, * * Return: Total number of bytes received */ -static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) +static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) { void __iomem *addr; u16 length, proto_type; @@ -438,7 +438,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) /* Check if received ethernet frame is a raw ethernet frame * or an IP packet or an ARP packet */ - if (proto_type > (ETH_FRAME_LEN + ETH_FCS_LEN)) { + if (proto_type > ETH_DATA_LEN) { if (proto_type == ETH_P_IP) { length = ((ntohl(in_be32(addr + @@ -446,6 +446,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) & XEL_RPLR_LENGTH_MASK); + length = min_t(u16, length, ETH_DATA_LEN); length += ETH_HLEN + ETH_FCS_LEN; } else if (proto_type == ETH_P_ARP) @@ -458,6 +459,9 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) /* Use the length in the frame, plus the header and trailer */ length = proto_type + ETH_HLEN + ETH_FCS_LEN; + if (WARN_ON(length > maxlen)) + length = maxlen; + /* Read from the EmacLite device */ xemaclite_aligned_read((u32 __force *) (addr + XEL_RXBUFF_OFFSET), data, length); @@ -632,7 +636,7 @@ static void xemaclite_rx_handler(struct net_device *dev) skb_reserve(skb, 2); - len = xemaclite_recv_data(lp, (u8 *) skb->data); + len = xemaclite_recv_data(lp, (u8 *) skb->data, len); if (!len) { dev->stats.rx_errors++; From 12d805b590300156e3d9381b6e7f0a549248265c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 12 May 2017 16:35:45 +0200 Subject: [PATCH 095/141] serial: efm32: Fix parity management in 'efm32_uart_console_get_options()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit be40597a1bc173bf9dadccdf5388b956f620ae8f upstream. UARTn_FRAME_PARITY_ODD is 0x0300 UARTn_FRAME_PARITY_EVEN is 0x0200 So if the UART is configured for EVEN parity, it would be reported as ODD. Fix it by correctly testing if the 2 bits are set. Fixes: 3afbd89c9639 ("serial/efm32: add new driver") Signed-off-by: Christophe JAILLET Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/tty/serial/efm32-uart.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c index 7d199c8e1a7..c9635f1c710 100644 --- a/drivers/tty/serial/efm32-uart.c +++ b/drivers/tty/serial/efm32-uart.c @@ -27,6 +27,7 @@ #define UARTn_FRAME 0x04 #define UARTn_FRAME_DATABITS__MASK 0x000f #define UARTn_FRAME_DATABITS(n) ((n) - 3) +#define UARTn_FRAME_PARITY__MASK 0x0300 #define UARTn_FRAME_PARITY_NONE 0x0000 #define UARTn_FRAME_PARITY_EVEN 0x0200 #define UARTn_FRAME_PARITY_ODD 0x0300 @@ -578,12 +579,16 @@ static void efm32_uart_console_get_options(struct efm32_uart_port *efm_port, 16 * (4 + (clkdiv >> 6))); frame = efm32_uart_read32(efm_port, UARTn_FRAME); - if (frame & UARTn_FRAME_PARITY_ODD) + switch (frame & UARTn_FRAME_PARITY__MASK) { + case UARTn_FRAME_PARITY_ODD: *parity = 'o'; - else if (frame & UARTn_FRAME_PARITY_EVEN) + break; + case UARTn_FRAME_PARITY_EVEN: *parity = 'e'; - else + break; + default: *parity = 'n'; + } *bits = (frame & UARTn_FRAME_DATABITS__MASK) - UARTn_FRAME_DATABITS(4) + 4; From e52eca82d17c9f093b6a9daa5d3e9462459404fc Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 8 May 2017 14:23:16 -0700 Subject: [PATCH 096/141] x86/mm/32: Set the '__vmalloc_start_set' flag in initmem_init() commit 861ce4a3244c21b0af64f880d5bfe5e6e2fb9e4a upstream. '__vmalloc_start_set' currently only gets set in initmem_init() when !CONFIG_NEED_MULTIPLE_NODES. This breaks detection of vmalloc address with virt_addr_valid() with CONFIG_NEED_MULTIPLE_NODES=y, causing a kernel crash: [mm/usercopy] 517e1fbeb6: kernel BUG at arch/x86/mm/physaddr.c:78! Set '__vmalloc_start_set' appropriately for that case as well. Reported-by: kbuild test robot Signed-off-by: Laura Abbott Reviewed-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: dc16ecf7fd1f ("x86-32: use specific __vmalloc_start_set flag in __virt_addr_valid") Link: http://lkml.kernel.org/r/1494278596-30373-1-git-send-email-labbott@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Willy Tarreau --- arch/x86/mm/numa_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 73a6d7395bd..58e7e9d4bbc 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -100,5 +100,6 @@ void __init initmem_init(void) printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); + __vmalloc_start_set = true; setup_bootmem_allocator(); } From 1e7ae91989929cbbb3b505514df2e3410a1757f2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 15 Apr 2017 10:05:08 -0700 Subject: [PATCH 097/141] mfd: omap-usb-tll: Fix inverted bit use for USB TLL mode commit 8b8a84c54aff4256d592dc18346c65ecf6811b45 upstream. Commit 16fa3dc75c22 ("mfd: omap-usb-tll: HOST TLL platform driver") added support for USB TLL, but uses OMAP_TLL_CHANNEL_CONF_ULPINOBITSTUFF bit the wrong way. The comments in the code are correct, but the inverted use of OMAP_TLL_CHANNEL_CONF_ULPINOBITSTUFF causes the register to be enabled instead of disabled unlike what the comments say. Without this change the Wrigley 3G LTE modem on droid 4 EHCI bus can be only pinged few times before it stops responding. Fixes: 16fa3dc75c22 ("mfd: omap-usb-tll: HOST TLL platform driver") Signed-off-by: Tony Lindgren Acked-by: Roger Quadros Signed-off-by: Lee Jones Signed-off-by: Willy Tarreau --- drivers/mfd/omap-usb-tll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c index c7576a503e5..2afadd00d3f 100644 --- a/drivers/mfd/omap-usb-tll.c +++ b/drivers/mfd/omap-usb-tll.c @@ -380,8 +380,8 @@ int omap_tll_init(struct usbhs_omap_platform_data *pdata) * and use SDR Mode */ reg &= ~(OMAP_TLL_CHANNEL_CONF_UTMIAUTOIDLE - | OMAP_TLL_CHANNEL_CONF_ULPINOBITSTUFF | OMAP_TLL_CHANNEL_CONF_ULPIDDRMODE); + reg |= OMAP_TLL_CHANNEL_CONF_ULPINOBITSTUFF; } else if (pdata->port_mode[i] == OMAP_EHCI_PORT_MODE_HSIC) { /* From 655e5ff7d64930835bf42a3be85b8eec0e40ddbd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Feb 2017 12:53:04 -0200 Subject: [PATCH 098/141] pvrusb2: reduce stack usage pvr2_eeprom_analyze() commit 6830733d53a4517588e56227b9c8538633f0c496 upstream. The driver uses a relatively large data structure on the stack, which showed up on my radar as we get a warning with the "latent entropy" GCC plugin: drivers/media/usb/pvrusb2/pvrusb2-eeprom.c:153:1: error: the frame size of 1376 bytes is larger than 1152 bytes [-Werror=frame-larger-than=] The warning is usually hidden as we raise the warning limit to 2048 when the plugin is enabled, but I'd like to lower that again in the future, and making this function smaller helps to do that without build regressions. Further analysis shows that putting an 'i2c_client' structure on the stack is not really supported, as the embedded 'struct device' is not initialized here, and we are only saved by the fact that the function that is called here does not use the pointer at all. Fixes: d855497edbfb ("V4L/DVB (4228a): pvrusb2 to kernel 2.6.18") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/usb/pvrusb2/pvrusb2-eeprom.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-eeprom.c b/drivers/media/usb/pvrusb2/pvrusb2-eeprom.c index 9515f3a68f8..122815e1cb6 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-eeprom.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-eeprom.c @@ -123,15 +123,10 @@ int pvr2_eeprom_analyze(struct pvr2_hdw *hdw) memset(&tvdata,0,sizeof(tvdata)); eeprom = pvr2_eeprom_fetch(hdw); - if (!eeprom) return -EINVAL; + if (!eeprom) + return -EINVAL; - { - struct i2c_client fake_client; - /* Newer version expects a useless client interface */ - fake_client.addr = hdw->eeprom_addr; - fake_client.adapter = &hdw->i2c_adap; - tveeprom_hauppauge_analog(&fake_client,&tvdata,eeprom); - } + tveeprom_hauppauge_analog(NULL, &tvdata, eeprom); trace_eeprom("eeprom assumed v4l tveeprom module"); trace_eeprom("eeprom direct call results:"); From e1bdf20be95bdf5ca4c4c63159965e8ccbca048d Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Thu, 27 Apr 2017 12:12:02 -0700 Subject: [PATCH 099/141] usb: r8a66597-hcd: select a different endpoint on timeout commit 1f873d857b6c2fefb4dada952674aa01bcfb92bd upstream. If multiple endpoints on a single device have pending IN URBs and one endpoint times out due to NAKs (perfectly legal), select a different endpoint URB to try. The existing code only checked to see another device address has pending URBs and ignores other IN endpoints on the current device address. This leads to endpoints never getting serviced if one endpoint is using NAK as a flow control method. Fixes: 5d3043586db4 ("usb: r8a66597-hcd: host controller driver for R8A6659") Signed-off-by: Chris Brandt Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/usb/host/r8a66597-hcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 6656dfda566..4105f7f6caf 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1786,6 +1786,7 @@ static void r8a66597_td_timer(unsigned long _r8a66597) pipe = td->pipe; pipe_stop(r8a66597, pipe); + /* Select a different address or endpoint */ new_td = td; do { list_move_tail(&new_td->queue, @@ -1795,7 +1796,8 @@ static void r8a66597_td_timer(unsigned long _r8a66597) new_td = td; break; } - } while (td != new_td && td->address == new_td->address); + } while (td != new_td && td->address == new_td->address && + td->pipe->info.epnum == new_td->pipe->info.epnum); start_transfer(r8a66597, new_td); From 779ec20b9519444ac3bd4ec6821402948119db4d Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Thu, 27 Apr 2017 12:12:49 -0700 Subject: [PATCH 100/141] usb: r8a66597-hcd: decrease timeout commit dd14a3e9b92ac6f0918054f9e3477438760a4fa6 upstream. The timeout for BULK packets was 300ms which is a long time if other endpoints or devices are waiting for their turn. Changing it to 50ms greatly increased the overall performance for multi-endpoint devices. Fixes: 5d3043586db4 ("usb: r8a66597-hcd: host controller driver for R8A6659") Signed-off-by: Chris Brandt Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/usb/host/r8a66597-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 4105f7f6caf..0fa139081b1 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1270,7 +1270,7 @@ static void set_td_timer(struct r8a66597 *r8a66597, struct r8a66597_td *td) time = 30; break; default: - time = 300; + time = 50; break; } From 240607692cd84b2ed16b0f346bdaa4f5a787322b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 May 2017 15:55:17 -0700 Subject: [PATCH 101/141] drivers/misc/c2port/c2port-duramar2150.c: checking for NULL instead of IS_ERR() commit 8128a31eaadbcdfa37774bbd28f3f00bac69996a upstream. c2port_device_register() never returns NULL, it uses error pointers. Link: http://lkml.kernel.org/r/20170412083321.GC3250@mwanda Fixes: 65131cd52b9e ("c2port: add c2port support for Eurotech Duramar 2150") Signed-off-by: Dan Carpenter Acked-by: Rodolfo Giometti Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- drivers/misc/c2port/c2port-duramar2150.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/c2port/c2port-duramar2150.c b/drivers/misc/c2port/c2port-duramar2150.c index 5484301d57d..3dc61ea7dc6 100644 --- a/drivers/misc/c2port/c2port-duramar2150.c +++ b/drivers/misc/c2port/c2port-duramar2150.c @@ -129,8 +129,8 @@ static int __init duramar2150_c2port_init(void) duramar2150_c2port_dev = c2port_device_register("uc", &duramar2150_c2port_ops, NULL); - if (!duramar2150_c2port_dev) { - ret = -ENODEV; + if (IS_ERR(duramar2150_c2port_dev)) { + ret = PTR_ERR(duramar2150_c2port_dev); goto free_region; } From 18144201ec726dffd07cebf3df0cd2b799fc9fc0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 30 May 2017 16:21:51 +0100 Subject: [PATCH 102/141] net: phy: fix marvell phy status reading commit 898805e0cdf7fd860ec21bf661d3a0285a3defbd upstream. The Marvell driver incorrectly provides phydev->lp_advertising as the logical and of the link partner's advert and our advert. This is incorrect - this field is supposed to store the link parter's unmodified advertisment. This allows ethtool to report the correct link partner auto-negotiation status. Fixes: be937f1f89ca ("Marvell PHY m88e1111 driver fix") Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/phy/marvell.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 202fe1ff198..b23f36a5b0d 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -656,8 +656,6 @@ static int marvell_read_status(struct phy_device *phydev) if (adv < 0) return adv; - lpa &= adv; - if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) phydev->duplex = DUPLEX_FULL; else From 6ac502ceabf86463c4a35166851edf497960262f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 23 Dec 2016 19:56:56 -0800 Subject: [PATCH 103/141] net: korina: Fix NAPI versus resources freeing commit e6afb1ad88feddf2347ea779cfaf4d03d3cd40b6 upstream. Commit beb0babfb77e ("korina: disable napi on close and restart") introduced calls to napi_disable() that were missing before, unfortunately this leaves a small window during which NAPI has a chance to run, yet we just freed resources since korina_free_ring() has been called: Fix this by disabling NAPI first then freeing resource, and make sure that we also cancel the restart task before doing the resource freeing. Fixes: beb0babfb77e ("korina: disable napi on close and restart") Reported-by: Alexandros C. Couloumbis Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/ethernet/korina.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 5409fe876a4..69bc0a0eb42 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -905,10 +905,10 @@ static void korina_restart_task(struct work_struct *work) DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); + korina_free_ring(dev); + if (korina_init(dev) < 0) { printk(KERN_ERR "%s: cannot restart device\n", dev->name); return; @@ -1069,12 +1069,12 @@ static int korina_close(struct net_device *dev) tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR; writel(tmp, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); cancel_work_sync(&lp->restart_task); + korina_free_ring(dev); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); From f478b4264a3c62b6b2d2b63a707e56c2f0cfdc6c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 13:35:37 +0300 Subject: [PATCH 104/141] xfrm: NULL dereference on allocation failure commit e747f64336fc15e1c823344942923195b800aa1e upstream. The default error code in pfkey_msg2xfrm_state() is -ENOBUFS. We added a new call to security_xfrm_state_alloc() which sets "err" to zero so there several places where we can return ERR_PTR(0) if kmalloc() fails. The caller is expecting error pointers so it leads to a NULL dereference. Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Dan Carpenter Signed-off-by: Steffen Klassert Signed-off-by: Willy Tarreau --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 66f51c5a8a3..b3191ef1b43 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1135,6 +1135,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } + err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; From 04d006963029d0346be966d1c5e6147995d08b8d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 13:34:05 +0300 Subject: [PATCH 105/141] xfrm: Oops on error in pfkey_msg2xfrm_state() commit 1e3d0c2c70cd3edb5deed186c5f5c75f2b84a633 upstream. There are some missing error codes here so we accidentally return NULL instead of an error pointer. It results in a NULL pointer dereference. Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Dan Carpenter Signed-off-by: Steffen Klassert Signed-off-by: Willy Tarreau --- net/key/af_key.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index b3191ef1b43..3ff567fb90e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1147,8 +1147,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); - if (!x->aalg) + if (!x->aalg) { + err = -ENOMEM; goto out; + } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { @@ -1167,8 +1169,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); - if (!x->calg) + if (!x->calg) { + err = -ENOMEM; goto out; + } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { @@ -1182,8 +1186,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); - if (!x->ealg) + if (!x->ealg) { + err = -ENOMEM; goto out; + } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { @@ -1231,8 +1237,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, struct xfrm_encap_tmpl *natt; x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); - if (!x->encap) + if (!x->encap) { + err = -ENOMEM; goto out; + } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; From 40cfe451debb10d983378f72868359a17656be8a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Feb 2017 16:19:06 +0300 Subject: [PATCH 106/141] cpufreq: s3c2416: double free on driver init error path commit a69261e4470d680185a15f748d9cdafb37c57a33 upstream. The "goto err_armclk;" error path already does a clk_put(s3c_freq->hclk); so this is a double free. Fixes: 34ee55075265 ([CPUFREQ] Add S3C2416/S3C2450 cpufreq driver) Signed-off-by: Dan Carpenter Reviewed-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Willy Tarreau --- drivers/cpufreq/s3c2416-cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c index 4f1881eee3f..6da4fbd4eef 100644 --- a/drivers/cpufreq/s3c2416-cpufreq.c +++ b/drivers/cpufreq/s3c2416-cpufreq.c @@ -434,7 +434,6 @@ static int __init s3c2416_cpufreq_driver_init(struct cpufreq_policy *policy) rate = clk_get_rate(s3c_freq->hclk); if (rate < 133 * 1000 * 1000) { pr_err("cpufreq: HCLK not at 133MHz\n"); - clk_put(s3c_freq->hclk); ret = -EINVAL; goto err_armclk; } From fe77accbd15d54708f5f4f23b585f6ca6225f9a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 18 May 2017 19:37:30 +0200 Subject: [PATCH 107/141] KVM: x86: zero base3 of unusable segments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f0367ee1d64d27fa08be2407df5c125442e885e3 upstream. Static checker noticed that base3 could be used uninitialized if the segment was not present (useable). Random stack values probably would not pass VMCS entry checks. Reported-by: Dan Carpenter Fixes: 1aa366163b8b ("KVM: x86 emulator: consolidate segment accessors") Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Willy Tarreau --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b70b67bde90..3d316cafff9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4596,6 +4596,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, if (var.unusable) { memset(desc, 0, sizeof(*desc)); + if (base3) + *base3 = 0; return false; } From 98b9e94ba66c9907ff8f7276c6f9bb71cce1c4ae Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Feb 2017 17:17:52 +0000 Subject: [PATCH 108/141] KEYS: Fix an error code in request_master_key() commit 57cb17e764ba0aaa169d07796acce54ccfbc6cae upstream. This function has two callers and neither are able to handle a NULL return. Really, -EINVAL is the correct thing return here anyway. This fixes some static checker warnings like: security/keys/encrypted-keys/encrypted.c:709 encrypted_key_decrypt() error: uninitialized symbol 'master_key'. Fixes: 7e70cb497850 ("keys: add new key-type encrypted") Signed-off-by: Dan Carpenter Acked-by: Mimi Zohar Signed-off-by: James Morris Signed-off-by: Willy Tarreau --- security/keys/encrypted-keys/encrypted.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 7bef30b50d9..b7d7cffe734 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -435,7 +435,7 @@ static int init_blkcipher_desc(struct blkcipher_desc *desc, const u8 *key, static struct key *request_master_key(struct encrypted_key_payload *epayload, u8 **master_key, size_t *master_keylen) { - struct key *mkey = NULL; + struct key *mkey = ERR_PTR(-EINVAL); if (!strncmp(epayload->master_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { From fa69abfe6a07ae3c009b30271966b871799077b6 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 21 Jun 2017 14:34:58 -0700 Subject: [PATCH 109/141] ipv6: avoid unregistering inet6_dev for loopback commit 60abc0be96e00ca71bac083215ac91ad2e575096 upstream. The per netns loopback_dev->ip6_ptr is unregistered and set to NULL when its mtu is set to smaller than IPV6_MIN_MTU, this leads to that we could set rt->rt6i_idev NULL after a rt6_uncached_list_flush_dev() and then crash after another call. In this case we should just bring its inet6_dev down, rather than unregistering it, at least prior to commit 176c39af29bc ("netns: fix addrconf_ifdown kernel panic") we always override the case for loopback. Thanks a lot to Andrey for finding a reliable reproducer. Fixes: 176c39af29bc ("netns: fix addrconf_ifdown kernel panic") Reported-by: Andrey Konovalov Cc: Andrey Konovalov Cc: Daniel Lezcano Cc: David Ahern Signed-off-by: Cong Wang Acked-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9c4aa2e2244..5ea5f77c0ec 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2892,6 +2892,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *) data; struct inet6_dev *idev = __in6_dev_get(dev); + struct net *net = dev_net(dev); int run_pending = 0; int err; @@ -2988,7 +2989,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); } break; From d47408188914e18508116fe473c7889208a59a7d Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Fri, 7 Jul 2017 01:43:42 +0300 Subject: [PATCH 110/141] cfg80211: Validate frequencies nested in NL80211_ATTR_SCAN_FREQUENCIES commit d7f13f7450369281a5d0ea463cc69890a15923ae upstream. validate_scan_freqs() retrieves frequencies from attributes nested in the attribute NL80211_ATTR_SCAN_FREQUENCIES with nla_get_u32(), which reads 4 bytes from each attribute without validating the size of data received. Attributes nested in NL80211_ATTR_SCAN_FREQUENCIES don't have an nla policy. Validate size of each attribute before parsing to avoid potential buffer overread. Fixes: 2a519311926 ("cfg80211/nl80211: scanning (and mac80211 update to use it)") Cc: stable@vger.kernel.org Signed-off-by: Srinivas Dasari Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Willy Tarreau --- net/wireless/nl80211.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 32c5443514c..67705d7a33f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5044,6 +5044,10 @@ static int validate_scan_freqs(struct nlattr *freqs) struct nlattr *attr1, *attr2; int n_channels = 0, tmp1, tmp2; + nla_for_each_nested(attr1, freqs, tmp1) + if (nla_len(attr1) != sizeof(u32)) + return 0; + nla_for_each_nested(attr1, freqs, tmp1) { n_channels++; /* From 9efa3d527965d5d6772332841923df8d62810bfe Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Fri, 7 Jul 2017 01:43:39 +0300 Subject: [PATCH 111/141] cfg80211: Check if PMKID attribute is of expected size commit 9361df14d1cbf966409d5d6f48bb334384fbe138 upstream. nla policy checks for only maximum length of the attribute data when the attribute type is NLA_BINARY. If userspace sends less data than specified, the wireless drivers may access illegal memory. When type is NLA_UNSPEC, nla policy check ensures that userspace sends minimum specified length number of bytes. Remove type assignment to NLA_BINARY from nla_policy of NL80211_ATTR_PMKID to make this NLA_UNSPEC and to make sure minimum WLAN_PMKID_LEN bytes are received from userspace with NL80211_ATTR_PMKID. Fixes: 67fbb16be69d ("nl80211: PMKSA caching support") Cc: stable@vger.kernel.org Signed-off-by: Srinivas Dasari Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Willy Tarreau --- net/wireless/nl80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67705d7a33f..da79f9b86df 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -310,8 +310,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, - [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, - .len = WLAN_PMKID_LEN }, + [NL80211_ATTR_PMKID] = { .len = WLAN_PMKID_LEN }, [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, From b569326cedfc6e0f2dc1c3cc237ad717e4a5b0f4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 14 Jul 2017 14:49:38 -0700 Subject: [PATCH 112/141] mm: fix overflow check in expand_upwards() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 37511fb5c91db93d8bd6e3f52f86e5a7ff7cfcdf upstream. Jörn Engel noticed that the expand_upwards() function might not return -ENOMEM in case the requested address is (unsigned long)-PAGE_SIZE and if the architecture didn't defined TASK_SIZE as multiple of PAGE_SIZE. Affected architectures are arm, frv, m68k, blackfin, h8300 and xtensa which all define TASK_SIZE as 0xffffffff, but since none of those have an upwards-growing stack we currently have no actual issue. Nevertheless let's fix this just in case any of the architectures with an upward-growing stack (currently parisc, metag and partly ia64) define TASK_SIZE similar. Link: http://lkml.kernel.org/r/20170702192452.GA11868@p100.box Fixes: bd726c90b6b8 ("Allow stack to grow up to address space limit") Signed-off-by: Helge Deller Reported-by: Jörn Engel Cc: Hugh Dickins Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index 3c4e4d7ae54..d042e254a16 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2132,7 +2132,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; - if (address >= TASK_SIZE) + if (address >= (TASK_SIZE & PAGE_MASK)) return -ENOMEM; address += PAGE_SIZE; From 7fb443fa86195a13277194e5f112445d982e639e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 7 Jul 2017 16:57:06 +0300 Subject: [PATCH 113/141] crypto: caam - fix signals handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7459e1d25ffefa2b1be799477fcc1f6c62f6cec7 upstream. Driver does not properly handle the case when signals interrupt wait_for_completion_interruptible(): -it does not check for return value -completion structure is allocated on stack; in case a signal interrupts the sleep, it will go out of scope, causing the worker thread (caam_jr_dequeue) to fail when it accesses it wait_for_completion_interruptible() is replaced with uninterruptable wait_for_completion(). We choose to block all signals while waiting for I/O (device executing the split key generation job descriptor) since the alternative - in order to have a deterministic device state - would be to flush the job ring (aborting *all* in-progress jobs). Cc: Fixes: 045e36780f115 ("crypto: caam - ahash hmac support") Fixes: 4c1ec1f930154 ("crypto: caam - refactor key_gen, sg") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Willy Tarreau --- drivers/crypto/caam/caamhash.c | 2 +- drivers/crypto/caam/key_gen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index e9d8b235f68..34815a74d90 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -476,7 +476,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); if (!ret) { /* in progress */ - wait_for_completion_interruptible(&result.completion); + wait_for_completion(&result.completion); ret = result.err; #ifdef DEBUG print_hex_dump(KERN_ERR, "digested key@"xstr(__LINE__)": ", diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index 87138d2adb5..fd6bc0bc56c 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -107,7 +107,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); if (!ret) { /* in progress */ - wait_for_completion_interruptible(&result.completion); + wait_for_completion(&result.completion); ret = result.err; #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", From ad0871670732e6094c796e0d3062c7b7843891fd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 08:46:44 -0300 Subject: [PATCH 114/141] ir-core: fix gcc-7 warning on bool arithmetic commit bd7e31bbade02bc1e92aa00d5cf2cee2da66838a upstream. gcc-7 suggests that an expression using a bitwise not and a bitmask on a 'bool' variable is better written using boolean logic: drivers/media/rc/imon.c: In function 'imon_incoming_scancode': drivers/media/rc/imon.c:1725:22: error: '~' on a boolean expression [-Werror=bool-operation] ictx->pad_mouse = ~(ictx->pad_mouse) & 0x1; ^ drivers/media/rc/imon.c:1725:22: note: did you mean to use logical not? I agree. Fixes: 21677cfc562a ("V4L/DVB: ir-core: add imon driver") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/rc/imon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index 72e3fa65248..257bb7a6ff5 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -1530,7 +1530,7 @@ static void imon_incoming_packet(struct imon_context *ictx, if (kc == KEY_KEYBOARD && !ictx->release_code) { ictx->last_keycode = kc; if (!nomouse) { - ictx->pad_mouse = ~(ictx->pad_mouse) & 0x1; + ictx->pad_mouse = !ictx->pad_mouse; dev_dbg(dev, "toggling to %s mode\n", ictx->pad_mouse ? "mouse" : "keyboard"); spin_unlock_irqrestore(&ictx->kc_lock, flags); From 3a7578afc3ad93519ac925f823b541b9490433f5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Jun 2017 16:20:25 +0200 Subject: [PATCH 115/141] udf: Fix deadlock between writeback and udf_setsize() commit f2e95355891153f66d4156bf3a142c6489cd78c6 upstream. udf_setsize() called truncate_setsize() with i_data_sem held. Thus truncate_pagecache() called from truncate_setsize() could lock a page under i_data_sem which can deadlock as page lock ranks below i_data_sem - e. g. writeback can hold page lock and try to acquire i_data_sem to map a block. Fix the problem by moving truncate_setsize() calls from under i_data_sem. It is safe for us to change i_size without holding i_data_sem as all the places that depend on i_size being stable already hold inode_lock. CC: stable@vger.kernel.org Fixes: 7e49b6f2480cb9a9e7322a91592e56a5c85361f5 Signed-off-by: Jan Kara Signed-off-by: Willy Tarreau --- fs/udf/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 5c1120a5fa4..0ead8bed774 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1237,8 +1237,8 @@ int udf_setsize(struct inode *inode, loff_t newsize) return err; } set_size: - truncate_setsize(inode, newsize); up_write(&iinfo->i_data_sem); + truncate_setsize(inode, newsize); } else { if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { down_write(&iinfo->i_data_sem); @@ -1255,9 +1255,9 @@ set_size: udf_get_block); if (err) return err; + truncate_setsize(inode, newsize); down_write(&iinfo->i_data_sem); udf_clear_extent_cache(inode); - truncate_setsize(inode, newsize); udf_truncate_extents(inode); up_write(&iinfo->i_data_sem); } From c1327b455fda6390d2b1b199ce35b20d2bd3dea8 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 8 Jun 2017 14:01:44 +0800 Subject: [PATCH 116/141] perf annotate: Fix broken arrow at row 0 connecting jmp instruction to its target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 80f62589fa52f530cffc50e78c0b5a2ae572d61e upstream. When the jump instruction is displayed at the row 0 in annotate view, the arrow is broken. An example: 16.86 │ ┌──je 82 0.01 │ movsd (%rsp),%xmm0 │ movsd 0x8(%rsp),%xmm4 │ movsd 0x8(%rsp),%xmm1 │ movsd (%rsp),%xmm3 │ divsd %xmm4,%xmm0 │ divsd %xmm3,%xmm1 │ movsd (%rsp),%xmm2 │ addsd %xmm1,%xmm0 │ addsd %xmm2,%xmm0 │ movsd %xmm0,(%rsp) │82: sub $0x1,%ebx 83.03 │ ↑ jne 38 │ add $0x10,%rsp │ xor %eax,%eax │ pop %rbx │ ← retq The patch increments the row number before checking with 0. Signed-off-by: Yao Jin Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: stable@vger.kernel.org Fixes: 944e1abed9e1 ("perf ui browser: Add method to draw up/down arrow line") Link: http://lkml.kernel.org/r/1496901704-30275-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Willy Tarreau --- tools/perf/ui/browser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index bbc782e364b..9118fb8cc10 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -672,7 +672,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser, ui_browser__gotorc(browser, row, column + 1); SLsmg_draw_hline(2); - if (row++ == 0) + if (++row == 0) goto out; } else row = 0; From 50602d3e499b2419b7ff0eda022525625f90e53c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Dec 2016 18:37:11 +0200 Subject: [PATCH 117/141] net/mlx4: Remove BUG_ON from ICM allocation routine commit c1d5f8ff80ea84768f5fae1ca9d1abfbb5e6bbaa upstream. This patch removes BUG_ON() macro from mlx4_alloc_icm_coherent() by checking DMA address alignment in advance and performing proper folding in case of error. Fixes: 5b0bf5e25efe ("mlx4_core: Support ICM tables in coherent memory") Reported-by: Ozgur Karatas Signed-off-by: Leon Romanovsky Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 31d02649be4..d22482b4974 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -113,8 +113,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } From ef6fa3f56c073d7d136f7a318088a030c904f4aa Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 19 Jul 2017 15:41:33 -0700 Subject: [PATCH 118/141] ipv4: initialize fib_trie prior to register_netdev_notifier call. commit 8799a221f5944a7d74516ecf46d58c28ec1d1f75 upstream. Net stack initialization currently initializes fib-trie after the first call to netdevice_notifier() call. In fact fib_trie initialization needs to happen before first rtnl_register(). It does not cause any problem since there are no devices UP at this moment, but trying to bring 'lo' UP at initialization would make this assumption wrong and exposes the issue. Fixes following crash Call Trace: ? alternate_node_alloc+0x76/0xa0 fib_table_insert+0x1b7/0x4b0 fib_magic.isra.17+0xea/0x120 fib_add_ifaddr+0x7b/0x190 fib_netdev_event+0xc0/0x130 register_netdevice_notifier+0x1c1/0x1d0 ip_fib_init+0x72/0x85 ip_rt_init+0x187/0x1e9 ip_init+0xe/0x1a inet_init+0x171/0x26c ? ipv4_offload_init+0x66/0x66 do_one_initcall+0x43/0x160 kernel_init_freeable+0x191/0x219 ? rest_init+0x80/0x80 kernel_init+0xe/0x150 ret_from_fork+0x22/0x30 Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08 RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28 CR2: 0000000000000014 Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.") Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization") Signed-off-by: Mahesh Bandewar Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/fib_frontend.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 017b4792cd4..bcd0a05d600 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1170,13 +1170,14 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + fib_trie_init(); register_pernet_subsys(&fib_net_ops); + register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_trie_init(); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); } From 162f50e6914ff75f11d15e52507aa88875878fd8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jul 2017 08:36:15 -0400 Subject: [PATCH 119/141] workqueue: implicit ordered attribute should be overridable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0a94efb5acbb6980d7c9ab604372d93cd507e4d8 upstream. 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") automatically enabled ordered attribute for unbound workqueues w/ max_active == 1. Because ordered workqueues reject max_active and some attribute changes, this implicit ordered mode broke cases where the user creates an unbound workqueue w/ max_active == 1 and later explicitly changes the related attributes. This patch distinguishes explicit and implicit ordered setting and overrides from attribute changes if implict. Signed-off-by: Tejun Heo Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") Cc: Holger Hoffstätte Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- include/linux/workqueue.h | 4 +++- kernel/workqueue.c | 13 +++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 120dd354849..4dab847a1d7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -306,6 +306,7 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ @@ -408,7 +409,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ + __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue((name), WQ_MEM_RECLAIM, 1) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3f8558f85a4..f55fbfa7fed 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3399,7 +3399,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) * attributes breaks ordering guarantee. Disallow exposing ordered * workqueues. */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return -EINVAL; wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); @@ -3964,8 +3964,12 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, return -EINVAL; /* creating multiple pwqs breaks ordering guarantee */ - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) - return -EINVAL; + if (!list_empty(&wq->pwqs)) { + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) + return -EINVAL; + + wq->flags &= ~__WQ_ORDERED; + } pwq_tbl = kzalloc(wq_numa_tbl_len * sizeof(pwq_tbl[0]), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(GFP_KERNEL); @@ -4411,13 +4415,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) struct pool_workqueue *pwq; /* disallow meddling with max_active for ordered workqueues */ - if (WARN_ON(wq->flags & __WQ_ORDERED)) + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) return; max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); mutex_lock(&wq->mutex); + wq->flags &= ~__WQ_ORDERED; wq->saved_max_active = max_active; for_each_pwq(pwq, wq) From fe1fd359c1dbde1c1561802f62a16ddc67c9ac07 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:41:58 -0400 Subject: [PATCH 120/141] packet: fix tp_reserve race in packet_set_ring commit c27927e372f0785f3303e8fad94b85945e2c97b7 upstream. Updates to tp_reserve can race with reads of the field in packet_set_ring. Avoid this by holding the socket lock during updates in setsockopt PACKET_RESERVE. This bug was discovered by syzkaller. Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/packet/af_packet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b915d011287..2f22b0759f2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3183,14 +3183,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; - po->tp_reserve = val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_reserve = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_LOSS: { From b4824101fca1c3bb111f3839c0fbc08082e9a26b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 11:31:03 +0200 Subject: [PATCH 121/141] staging:iio:resolver:ad2s1210 fix negative IIO_ANGL_VEL read commit 105967ad68d2eb1a041bc041f9cf96af2a653b65 upstream. gcc-7 points out an older regression: drivers/staging/iio/resolver/ad2s1210.c: In function 'ad2s1210_read_raw': drivers/staging/iio/resolver/ad2s1210.c:515:42: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context] The original code had 'unsigned short' here, but incorrectly got converted to 'bool'. This reverts the regression and uses a normal type instead. Fixes: 29148543c521 ("staging:iio:resolver:ad2s1210 minimal chan spec conversion.") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Jonathan Cameron Signed-off-by: Willy Tarreau --- drivers/staging/iio/resolver/ad2s1210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/resolver/ad2s1210.c b/drivers/staging/iio/resolver/ad2s1210.c index 0d3356d4b7d..3c0b16fe172 100644 --- a/drivers/staging/iio/resolver/ad2s1210.c +++ b/drivers/staging/iio/resolver/ad2s1210.c @@ -477,7 +477,7 @@ static int ad2s1210_read_raw(struct iio_dev *indio_dev, long m) { struct ad2s1210_state *st = iio_priv(indio_dev); - bool negative; + u16 negative; int ret = 0; u16 pos; s16 vel; From 136211c28496f8c7ca1f3e70e637cdbba7e491da Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2017 08:15:13 +0200 Subject: [PATCH 122/141] ALSA: core: Fix unexpected error at replacing user TLV commit 88c54cdf61f508ebcf8da2d819f5dfc03e954d1d upstream. When user tries to replace the user-defined control TLV, the kernel checks the change of its content via memcmp(). The problem is that the kernel passes the return value from memcmp() as is. memcmp() gives a non-zero negative value depending on the comparison result, and this shall be recognized as an error code. The patch covers that corner-case, return 1 properly for the changed TLV. Fixes: 8aa9b586e420 ("[ALSA] Control API - more robust TLV implementation") Cc: Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index 251bc575f5c..c3928261136 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1088,7 +1088,7 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol, mutex_lock(&ue->card->user_ctl_lock); change = ue->tlv_data_size != size; if (!change) - change = memcmp(ue->tlv_data, new_data, size); + change = memcmp(ue->tlv_data, new_data, size) != 0; kfree(ue->tlv_data); ue->tlv_data = new_data; ue->tlv_data_size = size; From 71bec91ec426598cdf2509164e713675a32e1f5b Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 16 Mar 2017 14:30:39 +0000 Subject: [PATCH 123/141] ACPI / APEI: Add missing synchronize_rcu() on NOTIFY_SCI removal commit 7d64f82cceb21e6d95db312d284f5f195e120154 upstream. When removing a GHES device notified by SCI, list_del_rcu() is used, ghes_remove() should call synchronize_rcu() before it goes on to call kfree(ghes), otherwise concurrent RCU readers may still hold this list entry after it has been freed. Signed-off-by: James Morse Reviewed-by: "Huang, Ying" Fixes: 81e88fdc432a (ACPI, APEI, Generic Hardware Error Source POLL/IRQ/NMI notification type support) Signed-off-by: Rafael J. Wysocki Signed-off-by: Willy Tarreau --- drivers/acpi/apei/ghes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 070b843c37e..8cff7cae733 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -988,6 +988,7 @@ static int ghes_remove(struct platform_device *ghes_dev) if (list_empty(&ghes_sci)) unregister_acpi_hed_notifier(&ghes_notifier_sci); mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); break; case ACPI_HEST_NOTIFY_NMI: mutex_lock(&ghes_list_mutex); From dd3e5010b97a24dd08dfefce517e06dc91fb01ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Aug 2017 15:59:49 +0200 Subject: [PATCH 124/141] qlge: avoid memcpy buffer overflow commit e58f95831e7468d25eb6e41f234842ecfe6f014f upstream. gcc-8.0.0 (snapshot) points out that we copy a variable-length string into a fixed length field using memcpy() with the destination length, and that ends up copying whatever follows the string: inlined from 'ql_core_dump' at drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:1106:2: drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:708:2: error: 'memcpy' reading 15 bytes from a region of size 14 [-Werror=stringop-overflow=] memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); Changing it to use strncpy() will instead zero-pad the destination, which seems to be the right thing to do here. The bug is probably harmless, but it seems like a good idea to address it in stable kernels as well, if only for the purpose of building with gcc-8 without warnings. Fixes: a61f80261306 ("qlge: Add ethtool register dump function.") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/ethernet/qlogic/qlge/qlge_dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index 10093f0c4c0..00a80587b47 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -724,7 +724,7 @@ static void ql_build_coredump_seg_header( seg_hdr->cookie = MPI_COREDUMP_COOKIE; seg_hdr->segNum = seg_number; seg_hdr->segSize = seg_size; - memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); + strncpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1); } /* From 4e39d5e48bac267582da35d420bf67f985105fd7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 8 Sep 2017 10:26:19 +0200 Subject: [PATCH 125/141] ipv6: fix memory leak with multiple tables during netns destruction commit ba1cc08d9488c94cb8d94f545305688b72a2a300 upstream. fib6_net_exit only frees the main and local tables. If another table was created with fib6_alloc_table, we leak it when the netns is destroyed. Fix this in the same way ip_fib_net_exit cleans up tables, by walking through the whole hashtable of fib6_table's. We can get rid of the special cases for local and main, since they're also part of the hashtable. Reproducer: ip netns add x ip -net x -6 rule add from 6003:1::/64 table 100 ip netns del x Reported-by: Jianlin Shi Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- net/ipv6/ip6_fib.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 46458ee3193..8a023b5ea19 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -167,6 +167,12 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->dst); } +static void fib6_free_table(struct fib6_table *table) +{ + inetpeer_invalidate_tree(&table->tb6_peers); + kfree(table); +} + static void fib6_link_table(struct net *net, struct fib6_table *tb) { unsigned int h; @@ -1738,15 +1744,22 @@ out_timer: static void fib6_net_exit(struct net *net) { + unsigned int i; + rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); - kfree(net->ipv6.fib6_local_tbl); -#endif - inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); - kfree(net->ipv6.fib6_main_tbl); + for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[i]; + struct hlist_node *tmp; + struct fib6_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) { + hlist_del(&tb->tb6_hlist); + fib6_free_table(tb); + } + } + kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); } From 488ec7b227e442ec3f288ad9245d9a78ec3df57b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Sep 2017 15:48:47 -0700 Subject: [PATCH 126/141] ipv6: fix typo in fib6_net_exit() commit 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b upstream. IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ. Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 8a023b5ea19..6de0d44a942 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1749,7 +1749,7 @@ static void fib6_net_exit(struct net *net) rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); - for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { struct hlist_head *head = &net->ipv6.fib_table_hash[i]; struct hlist_node *tmp; struct fib6_table *tb; From 6f47a87d1540a9f7c294b5e79e7dcf0827840060 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 4 Feb 2015 15:25:09 +0100 Subject: [PATCH 127/141] ip6_gre: fix endianness errors in ip6gre_err commit d1e158e2d7a0a91110b206653f0e02376e809150 upstream. info is in network byte order, change it back to host byte order before use. In particular, the current code sets the MTU of the tunnel to a wrong (too big) value. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Sabrina Dubroca Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/ip6_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ae88e17f5c7..529348e6a98 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -419,7 +419,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (code == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); - if (teli && teli == info - 2) { + if (teli && teli == be32_to_cpu(info) - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", @@ -431,7 +431,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } break; case ICMPV6_PKT_TOOBIG: - mtu = info - offset; + mtu = be32_to_cpu(info) - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; From 92e84b6aeccac20f9e665ec515c648a29017be68 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 21 Sep 2017 10:16:53 +0200 Subject: [PATCH 128/141] crypto: AF_ALG - remove SGL terminator indicator when chaining commit 1d4ba7f963a93a2207fd103d4a36df1b5aeefea2 upstream. Fixed differently upstream as commit 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code") The SGL is MAX_SGL_ENTS + 1 in size. The last SG entry is used for the chaining and is properly updated with the sg_chain invocation. During the filling-in of the initial SG entries, sg_mark_end is called for each SG entry. This is appropriate as long as no additional SGL is chained with the current SGL. However, when a new SGL is chained and the last SG entry is updated with sg_chain, the last but one entry still contains the end marker from the sg_mark_end. This end marker must be removed as otherwise a walk of the chained SGLs will cause a NULL pointer dereference at the last but one SG entry, because sg_next will return NULL. The patch only applies to all kernels up to and including 4.13. The patch 2d97591ef43d0587be22ad1b0d758d6df4999a0b added to 4.14-rc1 introduced a complete new code base which addresses this bug in a different way. Yet, that patch is too invasive for stable kernels and was therefore not marked for stable. Fixes: 8ff590903d5fc ("crypto: algif_skcipher - User-space interface for skcipher operations") Signed-off-by: Stephan Mueller Acked-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- crypto/algif_skcipher.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index ea05c531db2..8e2747401d3 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -92,8 +92,10 @@ static int skcipher_alloc_sgl(struct sock *sk) sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); sgl->cur = 0; - if (sg) + if (sg) { scatterwalk_sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); + sg_unmark_end(sg + (MAX_SGL_ENTS - 1)); + } list_add_tail(&sgl->list, &ctx->tsgl); } From c5378d27adda7f15e3465594694bf9688ebe970c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Aug 2017 16:30:35 +0300 Subject: [PATCH 129/141] scsi: qla2xxx: Fix an integer overflow in sysfs code commit e6f77540c067b48dee10f1e33678415bfcc89017 upstream. The value of "size" comes from the user. When we add "start + size" it could lead to an integer overflow bug. It means we vmalloc() a lot more memory than we had intended. I believe that on 64 bit systems vmalloc() can succeed even if we ask it to allocate huge 4GB buffers. So we would get memory corruption and likely a crash when we call ha->isp_ops->write_optrom() and ->read_optrom(). Only root can trigger this bug. Link: https://bugzilla.kernel.org/show_bug.cgi?id=194061 Cc: Fixes: b7cc176c9eb3 ("[SCSI] qla2xxx: Allow region-based flash-part accesses.") Reported-by: shqking Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/qla2xxx/qla_attr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index bf60c631abb..3b0f02c146d 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -299,6 +299,8 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, return -EINVAL; if (start > ha->optrom_size) return -EINVAL; + if (size > ha->optrom_size - start) + size = ha->optrom_size - start; switch (val) { case 0: @@ -320,8 +322,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, return -EINVAL; ha->optrom_region_start = start; - ha->optrom_region_size = start + size > ha->optrom_size ? - ha->optrom_size - start : size; + ha->optrom_region_size = start + size; ha->optrom_state = QLA_SREADING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); @@ -388,8 +389,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, } ha->optrom_region_start = start; - ha->optrom_region_size = start + size > ha->optrom_size ? - ha->optrom_size - start : size; + ha->optrom_region_size = start + size; ha->optrom_state = QLA_SWRITING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); From ec8c69ea4541f353451dffefd4fe1ef0f4640f3b Mon Sep 17 00:00:00 2001 From: Baohong Liu Date: Tue, 5 Sep 2017 16:57:19 -0500 Subject: [PATCH 130/141] tracing: Apply trace_clock changes to instance max buffer commit 170b3b1050e28d1ba0700e262f0899ffa4fccc52 upstream. Currently trace_clock timestamps are applied to both regular and max buffers only for global trace. For instance trace, trace_clock timestamps are applied only to regular buffer. But, regular and max buffers can be swapped, for example, following a snapshot. So, for instance trace, bad timestamps can be seen following a snapshot. Let's apply trace_clock timestamps to instance max buffer as well. Link: http://lkml.kernel.org/r/ebdb168d0be042dcdf51f81e696b17fabe3609c1.1504642143.git.tom.zanussi@linux.intel.com Cc: stable@vger.kernel.org Fixes: 277ba0446 ("tracing: Add interface to allow multiple trace buffers") Signed-off-by: Baohong Liu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Willy Tarreau --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index edffb6781c0..df820d9ebc8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4654,7 +4654,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) + if (tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->max_buffer); #endif From 7210062e8fc3e0f9519382c0ad2df3266989a96d Mon Sep 17 00:00:00 2001 From: Bo Yan Date: Mon, 18 Sep 2017 10:03:35 -0700 Subject: [PATCH 131/141] tracing: Erase irqsoff trace with empty write commit 8dd33bcb7050dd6f8c1432732f930932c9d3a33e upstream. One convenient way to erase trace is "echo > trace". However, this is currently broken if the current tracer is irqsoff tracer. This is because irqsoff tracer use max_buffer as the default trace buffer. Set the max_buffer as the one to be cleared when it's the trace buffer currently in use. Link: http://lkml.kernel.org/r/1505754215-29411-1-git-send-email-byan@nvidia.com Cc: Cc: stable@vger.kernel.org Fixes: 4acd4d00f ("tracing: give easy way to clear trace buffer") Signed-off-by: Bo Yan Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Willy Tarreau --- kernel/trace/trace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index df820d9ebc8..359fbd32dc9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3061,11 +3061,17 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); + struct trace_buffer *trace_buf = &tr->trace_buffer; + +#ifdef CONFIG_TRACER_MAX_TRACE + if (tr->current_trace->print_max) + trace_buf = &tr->max_buffer; +#endif if (cpu == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(trace_buf); else - tracing_reset(&tr->trace_buffer, cpu); + tracing_reset(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { From 827a5cb0a0ad3a74426a4e0d12cbc59917785fa2 Mon Sep 17 00:00:00 2001 From: satoru takeuchi Date: Tue, 12 Sep 2017 22:42:52 +0900 Subject: [PATCH 132/141] btrfs: prevent to set invalid default subvolid commit 6d6d282932d1a609e60dc4467677e0e863682f57 upstream. `btrfs sub set-default` succeeds to set an ID which isn't corresponding to any fs/file tree. If such the bad ID is set to a filesystem, we can't mount this filesystem without specifying `subvol` or `subvolid` mount options. Fixes: 6ef5ed0d386b ("Btrfs: add ioctl and incompat flag to set the default mount subvol") Cc: Signed-off-by: Satoru Takeuchi Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Willy Tarreau --- fs/btrfs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 296cc1b4944..7831e6865f1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2974,6 +2974,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) ret = PTR_ERR(new_root); goto out; } + if (!is_fstree(new_root->objectid)) { + ret = -ENOENT; + goto out; + } if (btrfs_root_refs(&new_root->root_item) == 0) { ret = -ENOENT; From 90326945a4f4939725d32e62b8b31d0375950958 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Wed, 28 Dec 2016 14:47:24 +0200 Subject: [PATCH 133/141] IB/ipoib: rtnl_unlock can not come after free_netdev commit 89a3987ab7a923c047c6dec008e60ad6f41fac22 upstream. The ipoib_vlan_add function calls rtnl_unlock after free_netdev, rtnl_unlock not only releases the lock, but also calls netdev_run_todo. The latter function browses the net_todo_list array and completes the unregistration of all its net_device instances. If we call free_netdev before rtnl_unlock, then netdev_run_todo call over the freed device causes panic. To fix, move rtnl_unlock call before free_netdev call. Fixes: 9baa0b036410 ("IB/ipoib: Add rtnl_link_ops support") Cc: Or Gerlitz Signed-off-by: Feras Daoud Signed-off-by: Erez Shitrit Reviewed-by: Yuval Shaia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 8292554bccb..7604ae54d7b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -165,11 +165,11 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) out: mutex_unlock(&ppriv->vlan_mutex); + rtnl_unlock(); + if (result) free_netdev(priv->dev); - rtnl_unlock(); - return result; } From f63d10b98de38d8264ed248221fb371e38b3b0b9 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Mon, 24 Apr 2017 18:29:16 +0800 Subject: [PATCH 134/141] team: fix memory leaks commit 72ec0bc64b9a5d8e0efcb717abfc757746b101b7 upstream. In functions team_nl_send_port_list_get() and team_nl_send_options_get(), pointer skb keeps the return value of nlmsg_new(). When the call to genlmsg_put() fails, the memory is not freed(). This will result in memory leak bugs. Fixes: 9b00cf2d1024 ("team: implement multipart netlink messages for options transfers") Signed-off-by: Pan Bian Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/team/team.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 5225d4321e7..0a3ad7ba2be 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2121,8 +2121,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2389,8 +2391,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_PORT_LIST_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; From 75840f10809bea0e3eb963d5a2fdb827044577a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:18:45 +0100 Subject: [PATCH 135/141] IB/qib: fix false-postive maybe-uninitialized warning commit f6aafac184a3e46e919769dd4faa8bf0dc436534 upstream. aarch64-linux-gcc-7 complains about code it doesn't fully understand: drivers/infiniband/hw/qib/qib_iba7322.c: In function 'qib_7322_txchk_change': include/asm-generic/bitops/non-atomic.h:105:35: error: 'shadow' may be used uninitialized in this function [-Werror=maybe-uninitialized] The code is right, and despite trying hard, I could not come up with a version that I liked better than just adding a fake initialization here to shut up the warning. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Signed-off-by: Arnd Bergmann Acked-by: Ira Weiny Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/hw/qib/qib_iba7322.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 5f5f20f4223..c61f3e7aa35 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6670,7 +6670,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start, unsigned long flags; while (wait) { - unsigned long shadow; + unsigned long shadow = 0; int cstart, previ = -1; /* From 49218ad1217e047e8c1cdfda7c7d40ff864c8682 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Tue, 10 Oct 2017 15:01:22 +0800 Subject: [PATCH 136/141] KVM: nVMX: fix guest CR4 loading when emulating L2 to L1 exit commit 8eb3f87d903168bdbd1222776a6b1e281f50513e upstream. When KVM emulates an exit from L2 to L1, it loads L1 CR4 into the guest CR4. Before this CR4 loading, the guest CR4 refers to L2 CR4. Because these two CR4's are in different levels of guest, we should vmx_set_cr4() rather than kvm_set_cr4() here. The latter, which is used to handle guest writes to its CR4, checks the guest change to CR4 and may fail if the change is invalid. The failure may cause trouble. Consider we start a L1 guest with non-zero L1 PCID in use, (i.e. L1 CR4.PCIDE == 1 && L1 CR3.PCID != 0) and a L2 guest with L2 PCID disabled, (i.e. L2 CR4.PCIDE == 0) and following events may happen: 1. If kvm_set_cr4() is used in load_vmcs12_host_state() to load L1 CR4 into guest CR4 (in VMCS01) for L2 to L1 exit, it will fail because of PCID check. As a result, the guest CR4 recorded in L0 KVM (i.e. vcpu->arch.cr4) is left to the value of L2 CR4. 2. Later, if L1 attempts to change its CR4, e.g., clearing VMXE bit, kvm_set_cr4() in L0 KVM will think L1 also wants to enable PCID, because the wrong L2 CR4 is used by L0 KVM as L1 CR4. As L1 CR3.PCID != 0, L0 KVM will inject GP to L1 guest. Fixes: 4704d0befb072 ("KVM: nVMX: Exiting from L2 to L1") Cc: qemu-stable@nongnu.org Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d9016e4a80f..be138952728 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8014,7 +8014,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask(); */ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); - kvm_set_cr4(vcpu, vmcs12->host_cr4); + vmx_set_cr4(vcpu, vmcs12->host_cr4); /* shadow page tables on either EPT or shadow page tables */ kvm_set_cr3(vcpu, vmcs12->host_cr3); From 40216270c67f9dcc5d73adbdcc65dd317cd4aa1d Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Sat, 30 Sep 2017 08:55:55 -0700 Subject: [PATCH 137/141] usb: gadget: composite: Fix use-after-free in usb_composite_overwrite_options commit aec17e1e249567e82b26dafbb86de7d07fde8729 upstream. KASAN enabled configuration reports an error BUG: KASAN: use-after-free in usb_composite_overwrite_options+... [libcomposite] at addr ... Read of size 1 by task ... when some driver is un-bound and then bound again. For example, this happens with FunctionFS driver when "ffs-test" test application is run several times in a row. If the driver has empty manufacturer ID string in initial static data, it is then replaced with generated string. After driver unbinding the generated string is freed, but the driver data still keep that pointer. And if the driver is then bound again, that pointer is re-used for string emptiness check. The fix is to clean up the driver string data upon its unbinding to drop the pointer to freed memory. Fixes: cc2683c318a5 ("usb: gadget: Provide a default implementation of default manufacturer string") Cc: stable@vger.kernel.org Signed-off-by: Andrew Gabbasov Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/gadget/composite.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index a9142a46ae8..2cbb26c2008 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1522,6 +1522,8 @@ static DEVICE_ATTR(suspended, 0444, composite_show_suspended, NULL); static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) { struct usb_composite_dev *cdev = get_gadget_data(gadget); + struct usb_gadget_strings *gstr = cdev->driver->strings[0]; + struct usb_string *dev_str = gstr->strings; /* composite_disconnect() must already have been called * by the underlying peripheral controller driver! @@ -1541,6 +1543,9 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) composite_dev_cleanup(cdev); + if (dev_str[USB_GADGET_MANUFACTURER_IDX].s == cdev->def_manufacturer) + dev_str[USB_GADGET_MANUFACTURER_IDX].s = ""; + kfree(cdev->def_manufacturer); kfree(cdev); set_gadget_data(gadget, NULL); From 4de0e6b506b3ec5ce4d0517dafff4aea24c6ce73 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Feb 2017 21:46:37 +0300 Subject: [PATCH 138/141] scsi: scsi_dh_emc: return success in clariion_std_inquiry() commit 4d7d39a18b8b81511f0b893b7d2203790bf8a58b upstream. We accidentally return an uninitialized variable on success. Fixes: b6ff1b14cdf4 ("[SCSI] scsi_dh: Update EMC handler") Signed-off-by: Dan Carpenter Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/device_handler/scsi_dh_emc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index e1c8be06de9..f94fcda1285 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -464,7 +464,7 @@ static int clariion_prep_fn(struct scsi_device *sdev, struct request *req) static int clariion_std_inquiry(struct scsi_device *sdev, struct clariion_dh_data *csdev) { - int err; + int err = SCSI_DH_OK; char *sp_model; err = send_inquiry_cmd(sdev, 0, csdev); From 541cbdc60ac9ca34b21db8e6e57e4d58eee66bc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20M=C3=A4tje?= Date: Wed, 18 Oct 2017 13:25:17 +0200 Subject: [PATCH 139/141] can: esd_usb2: Fix can_dlc value for received RTR, frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 72d92e865d1560723e1957ee3f393688c49ca5bf upstream. The dlc member of the struct rx_msg contains also the ESD_RTR flag to mark received RTR frames. Without the fix the can_dlc value for received RTR frames would always be set to 8 by get_can_dlc() instead of the received value. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Signed-off-by: Stefan Mätje Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Willy Tarreau --- drivers/net/can/usb/esd_usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index d5455c76061..503f37850fb 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -335,7 +335,7 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, } cf->can_id = id & ESD_IDMASK; - cf->can_dlc = get_can_dlc(msg->msg.rx.dlc); + cf->can_dlc = get_can_dlc(msg->msg.rx.dlc & ~ESD_RTR); if (id & ESD_EXTID) cf->can_id |= CAN_EFF_FLAG; From 68cbe93962196f08a1a52e81dc6d5bedaca09b06 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 2 Nov 2017 23:22:31 +0100 Subject: [PATCH 140/141] x86/apic: fix build breakage caused by incomplete backport to 3.10 Commit 928a277 ("x86/apic: Do not init irq remapping if ioapic is disabled") introduced in 3.10.105 introduced an implicit dependency of CONFIG_X86_LOCAL_APIC to CONFIG_X86_IO_APIC which was later solved as part of simplifications on the config dependencies in more recent kernels. This dependency results in build failure when CONFIG_X86_LOCAL_APIC is set without CONFIG_X86_IO_APIC (this setup requires CONFIG_SMP=n). The reason is that skip_ioapic_setup is declared in apic.c and that the backported code was picked from a context where the #ifdef surrounding the function used to cover this condition. Let's just add the appropriate #ifdef to fix the 3.10 backport. Thanks to Christoph Biedl for reporting and diagnosing this one. Reported-by: Christoph Biedl Cc: Christoph Biedl Cc: Jan Beulich Cc: Wanpeng Li Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Willy Tarreau --- arch/x86/kernel/apic/apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3cd8bfc3c4b..bc37ddeaa62 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1581,8 +1581,10 @@ void __init enable_IR_x2apic(void) int ret, x2apic_enabled = 0; int hardware_init_ret; +#ifdef CONFIG_X86_IO_APIC if (skip_ioapic_setup) return; +#endif /* Make sure irq_remap_ops are initialized */ setup_irq_remapping_ops(); From e7a59c7f266809d17dcde20fd2055e23e7eb6895 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 4 Nov 2017 23:34:48 +0100 Subject: [PATCH 141/141] Linux 3.10.108 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 752b1c67daa..924f98a4bc0 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 107 +SUBLEVEL = 108 EXTRAVERSION = -NAME = TOSSUG Baby Fish +NAME = END-OF-LIFE # *DOCUMENTATION* # To see a list of typical targets execute "make help"