From 4bfb6dd77e79a9b70bb574bb4b9192a17c3351c1 Mon Sep 17 00:00:00 2001 From: Philip Pettersson Date: Wed, 30 Nov 2016 14:55:36 -0800 Subject: [PATCH 001/252] packet: fix race condition in packet_set_ring commit 84ac7260236a49c79eede91617700174c2c19b0c upstream. When packet_set_ring creates a ring buffer it will initialize a struct timer_list if the packet version is TPACKET_V3. This value can then be raced by a different thread calling setsockopt to set the version to TPACKET_V1 before packet_set_ring has finished. This leads to a use-after-free on a function pointer in the struct timer_list when the socket is closed as the previously initialized timer will not be deleted. The bug is fixed by taking lock_sock(sk) in packet_setsockopt when changing the packet version while also taking the lock at the start of packet_set_ring. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Signed-off-by: Philip Pettersson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Cc: gregkh@linuxfoundation.org Signed-off-by: Willy Tarreau --- net/packet/af_packet.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 24f006623f7..e38c69969c3 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3135,19 +3135,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: - po->tp_version = val; - return 0; + break; default: return -EINVAL; } + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_version = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_RESERVE: { @@ -3603,6 +3609,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; + lock_sock(sk); /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { WARN(1, "Tx-ring is not supported.\n"); @@ -3684,7 +3691,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; } - lock_sock(sk); /* Detach socket from network */ spin_lock(&po->bind_lock); @@ -3733,11 +3739,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!tx_ring) prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue); } - release_sock(sk); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: + release_sock(sk); return err; } From 5fd53819a37e243f368376d70260873448b83df8 Mon Sep 17 00:00:00 2001 From: James Yonan Date: Thu, 26 Sep 2013 02:20:39 -0600 Subject: [PATCH 002/252] crypto: crypto_memneq - add equality testing of memory regions w/o timing leaks commit 6bf37e5aa90f18baf5acf4874bca505dd667c37f upstream. When comparing MAC hashes, AEAD authentication tags, or other hash values in the context of authentication or integrity checking, it is important not to leak timing information to a potential attacker, i.e. when communication happens over a network. Bytewise memory comparisons (such as memcmp) are usually optimized so that they return a nonzero value as soon as a mismatch is found. E.g, on x86_64/i5 for 512 bytes this can be ~50 cyc for a full mismatch and up to ~850 cyc for a full match (cold). This early-return behavior can leak timing information as a side channel, allowing an attacker to iteratively guess the correct result. This patch adds a new method crypto_memneq ("memory not equal to each other") to the crypto API that compares memory areas of the same length in roughly "constant time" (cache misses could change the timing, but since they don't reveal information about the content of the strings being compared, they are effectively benign). Iow, best and worst case behaviour take the same amount of time to complete (in contrast to memcmp). Note that crypto_memneq (unlike memcmp) can only be used to test for equality or inequality, NOT for lexicographical order. This, however, is not an issue for its use-cases within the crypto API. We tried to locate all of the places in the crypto API where memcmp was being used for authentication or integrity checking, and convert them over to crypto_memneq. crypto_memneq is declared noinline, placed in its own source file, and compiled with optimizations that might increase code size disabled ("Os") because a smart compiler (or LTO) might notice that the return value is always compared against zero/nonzero, and might then reintroduce the same early-return optimization that we are trying to avoid. Using #pragma or __attribute__ optimization annotations of the code for disabling optimization was avoided as it seems to be considered broken or unmaintained for long time in GCC [1]. Therefore, we work around that by specifying the compile flag for memneq.o directly in the Makefile. We found that this seems to be most appropriate. As we use ("Os"), this patch also provides a loop-free "fast-path" for frequently used 16 byte digests. Similarly to kernel library string functions, leave an option for future even further optimized architecture specific assembler implementations. This was a joint work of James Yonan and Daniel Borkmann. Also thanks for feedback from Florian Weimer on this and earlier proposals [2]. [1] http://gcc.gnu.org/ml/gcc/2012-07/msg00211.html [2] https://lkml.org/lkml/2013/2/10/131 Signed-off-by: James Yonan Signed-off-by: Daniel Borkmann Cc: Florian Weimer Signed-off-by: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Willy Tarreau --- crypto/Makefile | 7 +- crypto/asymmetric_keys/rsa.c | 5 +- crypto/authenc.c | 6 +- crypto/authencesn.c | 8 +- crypto/ccm.c | 4 +- crypto/gcm.c | 2 +- crypto/memneq.c | 138 +++++++++++++++++++++++++++++++++++ include/crypto/algapi.h | 18 ++++- 8 files changed, 174 insertions(+), 14 deletions(-) create mode 100644 crypto/memneq.c diff --git a/crypto/Makefile b/crypto/Makefile index a8e9b0fefbe..b54916590d3 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -2,8 +2,13 @@ # Cryptographic API # +# memneq MUST be built with -Os or -O0 to prevent early-return optimizations +# that will defeat memneq's actual purpose to prevent timing attacks. +CFLAGS_REMOVE_memneq.o := -O1 -O2 -O3 +CFLAGS_memneq.o := -Os + obj-$(CONFIG_CRYPTO) += crypto.o -crypto-y := api.o cipher.o compress.o +crypto-y := api.o cipher.o compress.o memneq.o obj-$(CONFIG_CRYPTO_WORKQUEUE) += crypto_wq.o diff --git a/crypto/asymmetric_keys/rsa.c b/crypto/asymmetric_keys/rsa.c index 4a6a0696f8a..1912b9be504 100644 --- a/crypto/asymmetric_keys/rsa.c +++ b/crypto/asymmetric_keys/rsa.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "public_key.h" MODULE_LICENSE("GPL"); @@ -189,12 +190,12 @@ static int RSA_verify(const u8 *H, const u8 *EM, size_t k, size_t hash_size, } } - if (memcmp(asn1_template, EM + T_offset, asn1_size) != 0) { + if (crypto_memneq(asn1_template, EM + T_offset, asn1_size) != 0) { kleave(" = -EBADMSG [EM[T] ASN.1 mismatch]"); return -EBADMSG; } - if (memcmp(H, EM + T_offset + asn1_size, hash_size) != 0) { + if (crypto_memneq(H, EM + T_offset + asn1_size, hash_size) != 0) { kleave(" = -EKEYREJECTED [EM[T] hash mismatch]"); return -EKEYREJECTED; } diff --git a/crypto/authenc.c b/crypto/authenc.c index a2cfae251dd..65bcd076b18 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -188,7 +188,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq, scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); - err = memcmp(ihash, ahreq->result, authsize) ? -EBADMSG : 0; + err = crypto_memneq(ihash, ahreq->result, authsize) ? -EBADMSG : 0; if (err) goto out; @@ -227,7 +227,7 @@ static void authenc_verify_ahash_done(struct crypto_async_request *areq, scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); - err = memcmp(ihash, ahreq->result, authsize) ? -EBADMSG : 0; + err = crypto_memneq(ihash, ahreq->result, authsize) ? -EBADMSG : 0; if (err) goto out; @@ -463,7 +463,7 @@ static int crypto_authenc_verify(struct aead_request *req, ihash = ohash + authsize; scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); - return memcmp(ihash, ohash, authsize) ? -EBADMSG : 0; + return crypto_memneq(ihash, ohash, authsize) ? -EBADMSG : 0; } static int crypto_authenc_iverify(struct aead_request *req, u8 *iv, diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 16c225cb28c..a3ef98be206 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -247,7 +247,7 @@ static void authenc_esn_verify_ahash_update_done(struct crypto_async_request *ar scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); - err = memcmp(ihash, ahreq->result, authsize) ? -EBADMSG : 0; + err = crypto_memneq(ihash, ahreq->result, authsize) ? -EBADMSG : 0; if (err) goto out; @@ -296,7 +296,7 @@ static void authenc_esn_verify_ahash_update_done2(struct crypto_async_request *a scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); - err = memcmp(ihash, ahreq->result, authsize) ? -EBADMSG : 0; + err = crypto_memneq(ihash, ahreq->result, authsize) ? -EBADMSG : 0; if (err) goto out; @@ -336,7 +336,7 @@ static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq, scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); - err = memcmp(ihash, ahreq->result, authsize) ? -EBADMSG : 0; + err = crypto_memneq(ihash, ahreq->result, authsize) ? -EBADMSG : 0; if (err) goto out; @@ -568,7 +568,7 @@ static int crypto_authenc_esn_verify(struct aead_request *req) ihash = ohash + authsize; scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); - return memcmp(ihash, ohash, authsize) ? -EBADMSG : 0; + return crypto_memneq(ihash, ohash, authsize) ? -EBADMSG : 0; } static int crypto_authenc_esn_iverify(struct aead_request *req, u8 *iv, diff --git a/crypto/ccm.c b/crypto/ccm.c index c569c9c6afe..003bbbd21a2 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -364,7 +364,7 @@ static void crypto_ccm_decrypt_done(struct crypto_async_request *areq, if (!err) { err = crypto_ccm_auth(req, req->dst, cryptlen); - if (!err && memcmp(pctx->auth_tag, pctx->odata, authsize)) + if (!err && crypto_memneq(pctx->auth_tag, pctx->odata, authsize)) err = -EBADMSG; } aead_request_complete(req, err); @@ -423,7 +423,7 @@ static int crypto_ccm_decrypt(struct aead_request *req) return err; /* verify */ - if (memcmp(authtag, odata, authsize)) + if (crypto_memneq(authtag, odata, authsize)) return -EBADMSG; return err; diff --git a/crypto/gcm.c b/crypto/gcm.c index a1ec756b843..49b6fb20cce 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -582,7 +582,7 @@ static int crypto_gcm_verify(struct aead_request *req, crypto_xor(auth_tag, iauth_tag, 16); scatterwalk_map_and_copy(iauth_tag, req->src, cryptlen, authsize, 0); - return memcmp(iauth_tag, auth_tag, authsize) ? -EBADMSG : 0; + return crypto_memneq(iauth_tag, auth_tag, authsize) ? -EBADMSG : 0; } static void gcm_decrypt_done(struct crypto_async_request *areq, int err) diff --git a/crypto/memneq.c b/crypto/memneq.c new file mode 100644 index 00000000000..cd0162221c1 --- /dev/null +++ b/crypto/memneq.c @@ -0,0 +1,138 @@ +/* + * Constant-time equality testing of memory regions. + * + * Authors: + * + * James Yonan + * Daniel Borkmann + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * BSD LICENSE + * + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of OpenVPN Technologies nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ + +/* Generic path for arbitrary size */ +static inline unsigned long +__crypto_memneq_generic(const void *a, const void *b, size_t size) +{ + unsigned long neq = 0; + +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + while (size >= sizeof(unsigned long)) { + neq |= *(unsigned long *)a ^ *(unsigned long *)b; + a += sizeof(unsigned long); + b += sizeof(unsigned long); + size -= sizeof(unsigned long); + } +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + while (size > 0) { + neq |= *(unsigned char *)a ^ *(unsigned char *)b; + a += 1; + b += 1; + size -= 1; + } + return neq; +} + +/* Loop-free fast-path for frequently used 16-byte size */ +static inline unsigned long __crypto_memneq_16(const void *a, const void *b) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (sizeof(unsigned long) == 8) + return ((*(unsigned long *)(a) ^ *(unsigned long *)(b)) + | (*(unsigned long *)(a+8) ^ *(unsigned long *)(b+8))); + else if (sizeof(unsigned int) == 4) + return ((*(unsigned int *)(a) ^ *(unsigned int *)(b)) + | (*(unsigned int *)(a+4) ^ *(unsigned int *)(b+4)) + | (*(unsigned int *)(a+8) ^ *(unsigned int *)(b+8)) + | (*(unsigned int *)(a+12) ^ *(unsigned int *)(b+12))); + else +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + return ((*(unsigned char *)(a) ^ *(unsigned char *)(b)) + | (*(unsigned char *)(a+1) ^ *(unsigned char *)(b+1)) + | (*(unsigned char *)(a+2) ^ *(unsigned char *)(b+2)) + | (*(unsigned char *)(a+3) ^ *(unsigned char *)(b+3)) + | (*(unsigned char *)(a+4) ^ *(unsigned char *)(b+4)) + | (*(unsigned char *)(a+5) ^ *(unsigned char *)(b+5)) + | (*(unsigned char *)(a+6) ^ *(unsigned char *)(b+6)) + | (*(unsigned char *)(a+7) ^ *(unsigned char *)(b+7)) + | (*(unsigned char *)(a+8) ^ *(unsigned char *)(b+8)) + | (*(unsigned char *)(a+9) ^ *(unsigned char *)(b+9)) + | (*(unsigned char *)(a+10) ^ *(unsigned char *)(b+10)) + | (*(unsigned char *)(a+11) ^ *(unsigned char *)(b+11)) + | (*(unsigned char *)(a+12) ^ *(unsigned char *)(b+12)) + | (*(unsigned char *)(a+13) ^ *(unsigned char *)(b+13)) + | (*(unsigned char *)(a+14) ^ *(unsigned char *)(b+14)) + | (*(unsigned char *)(a+15) ^ *(unsigned char *)(b+15))); +} + +/* Compare two areas of memory without leaking timing information, + * and with special optimizations for common sizes. Users should + * not call this function directly, but should instead use + * crypto_memneq defined in crypto/algapi.h. + */ +noinline unsigned long __crypto_memneq(const void *a, const void *b, + size_t size) +{ + switch (size) { + case 16: + return __crypto_memneq_16(a, b); + default: + return __crypto_memneq_generic(a, b, size); + } +} +EXPORT_SYMBOL(__crypto_memneq); + +#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */ diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 418d270e180..e73c19e90e3 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -386,5 +386,21 @@ static inline int crypto_requires_sync(u32 type, u32 mask) return (type ^ CRYPTO_ALG_ASYNC) & mask & CRYPTO_ALG_ASYNC; } -#endif /* _CRYPTO_ALGAPI_H */ +noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size); +/** + * crypto_memneq - Compare two areas of memory without leaking + * timing information. + * + * @a: One area of memory + * @b: Another area of memory + * @size: The size of the area. + * + * Returns 0 when data is equal, 1 otherwise. + */ +static inline int crypto_memneq(const void *a, const void *b, size_t size) +{ + return __crypto_memneq(a, b, size) != 0UL ? 1 : 0; +} + +#endif /* _CRYPTO_ALGAPI_H */ From 37510515d6a758fea2e79d9b887c4d13efabe1e0 Mon Sep 17 00:00:00 2001 From: Ryan Ware Date: Thu, 11 Feb 2016 15:58:44 -0800 Subject: [PATCH 003/252] EVM: Use crypto_memneq() for digest comparisons commit 613317bd212c585c20796c10afe5daaa95d4b0a1 upstream. This patch fixes vulnerability CVE-2016-2085. The problem exists because the vm_verify_hmac() function includes a use of memcmp(). Unfortunately, this allows timing side channel attacks; specifically a MAC forgery complexity drop from 2^128 to 2^12. This patch changes the memcmp() to the cryptographically safe crypto_memneq(). Reported-by: Xiaofei Rex Guo Signed-off-by: Ryan Ware Cc: stable@vger.kernel.org Signed-off-by: Mimi Zohar Signed-off-by: James Morris Cc: Jason A. Donenfeld Signed-off-by: Willy Tarreau --- security/integrity/evm/evm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index b980a6ce5c7..3db2bf1f0a6 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "evm.h" int evm_initialized; @@ -128,7 +129,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, xattr_value_len, calc.digest); if (rc) break; - rc = memcmp(xattr_data->digest, calc.digest, + rc = crypto_memneq(xattr_data->digest, calc.digest, sizeof(calc.digest)); if (rc) rc = -EINVAL; From dd7d4be1c6a153eca651da013914e3fa54ef93d9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 1 Mar 2017 17:33:27 +0100 Subject: [PATCH 004/252] libceph: don't set weight to IN when OSD is destroyed commit b581a5854eee4b7851dedb0f8c2ceb54fb902c06 upstream. Since ceph.git commit 4e28f9e63644 ("osd/OSDMap: clear osd_info, osd_xinfo on osd deletion"), weight is set to IN when OSD is deleted. This changes the result of applying an incremental for clients, not just OSDs. Because CRUSH computations are obviously affected, pre-4e28f9e63644 servers disagree with post-4e28f9e63644 clients on object placement, resulting in misdirected requests. Mirrors ceph.git commit a6009d1039a55e2c77f431662b3d6cc5a8e8e63f. Fixes: 930c53286977 ("libceph: apply new_state before new_up_client on incrementals") Link: http://tracker.ceph.com/issues/19122 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Willy Tarreau --- net/ceph/osdmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index c1de8d404c4..26e2235356c 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -870,7 +870,6 @@ static int decode_new_up_state_weight(void **p, void *end, if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && (xorstate & CEPH_OSD_EXISTS)) { pr_info("osd%d does not exist\n", osd); - map->osd_weight[osd] = CEPH_OSD_IN; memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr)); map->osd_state[osd] = 0; } else { From 2dd7d7e46d791e8a6f85197510a95acae11dbc5b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 15:02:32 +0100 Subject: [PATCH 005/252] KVM: x86: fix emulation of "MOV SS, null selector" commit 33ab91103b3415e12457e3104f0e4517ce12d0f3 upstream. This is CVE-2017-2583. On Intel this causes a failed vmentry because SS's type is neither 3 nor 7 (even though the manual says this check is only done for usable SS, and the dmesg splat says that SS is unusable!). On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb. The fix fabricates a data segment descriptor when SS is set to a null selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb. Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3; this in turn ensures CPL < 3 because RPL must be equal to CPL. Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing the bug and deciphering the manuals. [js] backport to 3.12 Reported-by: Xiaohan Zhang Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011 Signed-off-by: Paolo Bonzini Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ddad189e596..364f020a5d6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1599,7 +1599,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, &ctxt->exception); } -/* Does not support long mode */ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg) { @@ -1612,6 +1611,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, int ret; u16 dummy; + + /* + * None of MOV, POP and LSS can load a NULL selector in CPL=3, but + * they can load it at CPL<3 (Intel's manual says only LSS can, + * but it's wrong). + * + * However, the Intel manual says that putting IST=1/DPL=3 in + * an interrupt gate will result in SS=3 (the AMD manual instead + * says it doesn't), so allow SS=3 in __load_segment_descriptor + * and only forbid it here. + */ + if (seg == VCPU_SREG_SS && selector == 3 && + ctxt->mode == X86EMUL_MODE_PROT64) + return emulate_exception(ctxt, GP_VECTOR, 0, true); + memset(&seg_desc, 0, sizeof seg_desc); if (ctxt->mode == X86EMUL_MODE_REAL) { @@ -1634,20 +1648,34 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; cpl = ctxt->ops->cpl(ctxt); - /* NULL selector is not valid for TR, CS and SS (except for long mode) */ - if ((seg == VCPU_SREG_CS - || (seg == VCPU_SREG_SS - && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) - || seg == VCPU_SREG_TR) - && null_selector) - goto exception; - /* TR should be in GDT only */ if (seg == VCPU_SREG_TR && (selector & (1 << 2))) goto exception; - if (null_selector) /* for NULL selector skip all following checks */ + /* NULL selector is not valid for TR, CS and (except for long mode) SS */ + if (null_selector) { + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR) + goto exception; + + if (seg == VCPU_SREG_SS) { + if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl) + goto exception; + + /* + * ctxt->ops->set_segment expects the CPL to be in + * SS.DPL, so fake an expand-up 32-bit data segment. + */ + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = cpl; + seg_desc.d = 1; + seg_desc.g = 1; + } + + /* Skip all following checks */ goto load; + } ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); if (ret != X86EMUL_CONTINUE) From 27bf4b6e27baa89d93ce9f2da7878e126748d721 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 11 Jan 2017 18:28:29 -0800 Subject: [PATCH 006/252] KVM: x86: Introduce segmented_write_std commit 129a72a0d3c8e139a04512325384fe5ac119e74d upstream. Introduces segemented_write_std. Switches from emulated reads/writes to standard read/writes in fxsave, fxrstor, sgdt, and sidt. This fixes CVE-2017-2584, a longstanding kernel memory leak. Since commit 283c95d0e389 ("KVM: x86: emulate FXSAVE and FXRSTOR", 2016-11-09), which is luckily not yet in any final release, this would also be an exploitable kernel memory *write*! Reported-by: Dmitry Vyukov Fixes: 96051572c819194c37a8367624b285be10297eca Fixes: 283c95d0e3891b64087706b344a4b545d04a6e62 Suggested-by: Paolo Bonzini Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini Signed-off-by: Willy Tarreau --- arch/x86/kvm/emulate.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 364f020a5d6..c96485054f6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -906,6 +906,20 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } +static int segmented_write_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned int size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); +} + /* * Fetch the next byte of the instruction being emulated which is pointed to * by ctxt->_eip, then increment ctxt->_eip. @@ -3361,8 +3375,8 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, } /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return segmented_write(ctxt, ctxt->dst.addr.mem, - &desc_ptr, 2 + ctxt->op_bytes); + return segmented_write_std(ctxt, ctxt->dst.addr.mem, + &desc_ptr, 2 + ctxt->op_bytes); } static int em_sgdt(struct x86_emulate_ctxt *ctxt) From dd2421b5edbadc5404e1cbc8fdc552f1558cc541 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Oct 2016 08:44:26 -0500 Subject: [PATCH 007/252] posix_acl: Clear SGID bit when setting file permissions commit 073931017b49d9458aa351605b43a7e34598caef upstream. When file permissions are modified via chmod(2) and the user is not in the owning group or capable of CAP_FSETID, the setgid bit is cleared in inode_change_ok(). Setting a POSIX ACL via setxattr(2) sets the file permissions as well as the new ACL, but doesn't clear the setgid bit in a similar way; this allows to bypass the check in chmod(2). Fix that. Reviewed-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Andreas Gruenbacher [wt: dropped hfsplus changes : no xattr in 3.10] Signed-off-by: Willy Tarreau --- fs/9p/acl.c | 40 +++++++++++++++++---------------------- fs/btrfs/acl.c | 6 ++---- fs/ext2/acl.c | 12 ++++-------- fs/ext3/acl.c | 10 +++------- fs/ext4/acl.c | 12 ++++-------- fs/f2fs/acl.c | 6 ++---- fs/gfs2/acl.c | 14 ++++++-------- fs/jffs2/acl.c | 9 ++++----- fs/jfs/xattr.c | 5 +++-- fs/ocfs2/acl.c | 20 +++++++------------- fs/posix_acl.c | 31 ++++++++++++++++++++++++++++++ fs/reiserfs/xattr_acl.c | 8 ++------ fs/xfs/xfs_acl.c | 15 +++++++-------- include/linux/posix_acl.h | 1 + 14 files changed, 93 insertions(+), 96 deletions(-) diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 7af425f53be..9686c1f1765 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -320,32 +320,26 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - umode_t mode = inode->i_mode; - retval = posix_acl_equiv_mode(acl, &mode); - if (retval < 0) + struct iattr iattr; + + retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); + if (retval) goto err_out; - else { - struct iattr iattr; - if (retval == 0) { - /* - * ACL can be represented - * by the mode bits. So don't - * update ACL. - */ - acl = NULL; - value = NULL; - size = 0; - } - /* Updte the mode bits */ - iattr.ia_mode = ((mode & S_IALLUGO) | - (inode->i_mode & ~S_IALLUGO)); - iattr.ia_valid = ATTR_MODE; - /* FIXME should we update ctime ? - * What is the following setxattr update the - * mode ? + if (!acl) { + /* + * ACL can be represented + * by the mode bits. So don't + * update ACL. */ - v9fs_vfs_setattr_dotl(dentry, &iattr); + value = NULL; + size = 0; } + iattr.ia_valid = ATTR_MODE; + /* FIXME should we update ctime ? + * What is the following setxattr update the + * mode ? + */ + v9fs_vfs_setattr_dotl(dentry, &iattr); } break; case ACL_TYPE_DEFAULT: diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 0890c83643e..d6d53e5e794 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -118,11 +118,9 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - ret = posix_acl_equiv_mode(acl, &inode->i_mode); - if (ret < 0) + ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (ret) return ret; - if (ret == 0) - acl = NULL; } ret = 0; break; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 110b6b371a4..48c3c2d7d26 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -206,15 +206,11 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) + error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (error) return error; - else { - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - if (error == 0) - acl = NULL; - } + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); } break; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index dbb5ad59a7f..2f994bbf73a 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -205,15 +205,11 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); + error = posix_acl_update_mode(inode, &inode->i_mode, &acl); if (error < 0) return error; - else { - inode->i_ctime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, inode); - if (error == 0) - acl = NULL; - } + inode->i_ctime = CURRENT_TIME_SEC; + ext3_mark_inode_dirty(handle, inode); } break; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 39a54a0e9fe..c844f1bfb45 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -211,15 +211,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) + error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (error) return error; - else { - inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - if (error == 0) - acl = NULL; - } + inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); } break; diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 44abc2f286e..9c4f3c732bc 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -223,12 +223,10 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) + error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (error) return error; set_acl_inode(fi, inode->i_mode); - if (error == 0) - acl = NULL; } break; diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index f69ac0af549..a61b0c2b57a 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -268,15 +268,13 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, if (type == ACL_TYPE_ACCESS) { umode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + struct posix_acl *old_acl = acl; - if (error <= 0) { - posix_acl_release(acl); - acl = NULL; - - if (error < 0) - return error; - } + error = posix_acl_update_mode(inode, &mode, &acl); + if (error < 0) + goto out_release; + if (!acl) + posix_acl_release(old_acl); error = gfs2_set_mode(inode, mode); if (error) diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 223283c3011..9335b8d3cf5 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -243,9 +243,10 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: xprefix = JFFS2_XPREFIX_ACL_ACCESS; if (acl) { - umode_t mode = inode->i_mode; - rc = posix_acl_equiv_mode(acl, &mode); - if (rc < 0) + umode_t mode; + + rc = posix_acl_update_mode(inode, &mode, &acl); + if (rc) return rc; if (inode->i_mode != mode) { struct iattr attr; @@ -257,8 +258,6 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) if (rc < 0) return rc; } - if (rc == 0) - acl = NULL; } break; case ACL_TYPE_DEFAULT: diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 42d67f9757b..29a28601cb9 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -693,8 +693,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return rc; } if (acl) { - rc = posix_acl_equiv_mode(acl, &inode->i_mode); - posix_acl_release(acl); + struct posix_acl *old_acl = acl; + rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); + posix_acl_release(old_acl); if (rc < 0) { printk(KERN_ERR "posix_acl_equiv_mode returned %d\n", diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 8a404576fb2..51ff9506cb0 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -274,20 +274,14 @@ static int ocfs2_set_acl(handle_t *handle, case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - umode_t mode = inode->i_mode; - ret = posix_acl_equiv_mode(acl, &mode); - if (ret < 0) + umode_t mode; + ret = posix_acl_update_mode(inode, &mode, &acl); + if (ret) + return ret; + ret = ocfs2_acl_set_mode(inode, di_bh, + handle, mode); + if (ret) return ret; - else { - if (ret == 0) - acl = NULL; - - ret = ocfs2_acl_set_mode(inode, di_bh, - handle, mode); - if (ret) - return ret; - - } } break; case ACL_TYPE_DEFAULT: diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 3542f1f814e..1da000aabb0 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -407,6 +407,37 @@ posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) } EXPORT_SYMBOL(posix_acl_create); +/** + * posix_acl_update_mode - update mode in set_acl + * + * Update the file mode when setting an ACL: compute the new file permission + * bits based on the ACL. In addition, if the ACL is equivalent to the new + * file mode, set *acl to NULL to indicate that no ACL should be set. + * + * As with chmod, clear the setgit bit if the caller is not in the owning group + * or capable of CAP_FSETID (see inode_change_ok). + * + * Called from set_acl inode operations. + */ +int posix_acl_update_mode(struct inode *inode, umode_t *mode_p, + struct posix_acl **acl) +{ + umode_t mode = inode->i_mode; + int error; + + error = posix_acl_equiv_mode(*acl, &mode); + if (error < 0) + return error; + if (error == 0) + *acl = NULL; + if (!in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(inode, CAP_FSETID)) + mode &= ~S_ISGID; + *mode_p = mode; + return 0; +} +EXPORT_SYMBOL(posix_acl_update_mode); + int posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode) { diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 6c8767fdfc6..2d73589f37d 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -286,13 +286,9 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) + error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (error) return error; - else { - if (error == 0) - acl = NULL; - } } break; case ACL_TYPE_DEFAULT: diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 306d883d89b..5e9a9a62a45 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -388,16 +388,15 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, goto out_release; if (type == ACL_TYPE_ACCESS) { - umode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + umode_t mode; + struct posix_acl *old_acl = acl; - if (error <= 0) { - posix_acl_release(acl); - acl = NULL; + error = posix_acl_update_mode(inode, &mode, &acl); - if (error < 0) - return error; - } + if (error) + goto out_release; + if (!acl) + posix_acl_release(old_acl); error = xfs_set_mode(inode, mode); if (error) diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 7931efe7117..43cb8d59d0a 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -89,6 +89,7 @@ extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); +extern int posix_acl_update_mode(struct inode *, umode_t *, struct posix_acl **); extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); From 1026a8ce24e2598dbde2882b8b56fcfa48a2c43c Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 9 Jan 2017 09:34:48 +0800 Subject: [PATCH 008/252] tmpfs: clear S_ISGID when setting posix ACLs commit 497de07d89c1410d76a15bec2bb41f24a2a89f31 upstream. This change was missed the tmpfs modification in In CVE-2016-7097 commit 073931017b49 ("posix_acl: Clear SGID bit when setting file permissions") It can test by xfstest generic/375, which failed to clear setgid bit in the following test case on tmpfs: touch $testfile chown 100:100 $testfile chmod 2755 $testfile _runas -u 100 -g 101 -- setfacl -m u::rwx,g::rwx,o::rwx $testfile Signed-off-by: Gu Zheng Signed-off-by: Al Viro Signed-off-by: Jan Kara Signed-off-by: Willy Tarreau --- fs/generic_acl.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/generic_acl.c b/fs/generic_acl.c index b3f3676796d..7855cfb938f 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -82,19 +82,21 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, return PTR_ERR(acl); } if (acl) { + struct posix_acl *old_acl; + error = posix_acl_valid(acl); if (error) goto failed; switch (type) { case ACL_TYPE_ACCESS: - error = posix_acl_equiv_mode(acl, &inode->i_mode); + old_acl = acl; + error = posix_acl_update_mode(inode, &inode->i_mode, + &acl); if (error < 0) goto failed; + if (!acl) + posix_acl_release(old_acl); inode->i_ctime = CURRENT_TIME; - if (error == 0) { - posix_acl_release(acl); - acl = NULL; - } break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) { From e39ab83635b7fc35ee97dfec0f782c67d4054531 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2017 15:18:24 -0800 Subject: [PATCH 009/252] fbdev: color map copying bounds checking commit 2dc705a9930b4806250fbf5a76e55266e59389f2 upstream. Copying color maps to userspace doesn't check the value of to->start, which will cause kernel heap buffer OOB read due to signedness wraps. CVE-2016-8405 Link: http://lkml.kernel.org/r/20170105224249.GA50925@beast Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kees Cook Reported-by: Peter Pi (@heisecode) of Trend Micro Cc: Min Chong Cc: Dan Carpenter Cc: Tomi Valkeinen Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- drivers/video/fbcmap.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c index 5c3960da755..71666c02dea 100644 --- a/drivers/video/fbcmap.c +++ b/drivers/video/fbcmap.c @@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap) int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); @@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); From a71b4196a72f09ed223d8140de7fd47ccdaf6e2b Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 31 Jan 2017 11:54:04 -0500 Subject: [PATCH 010/252] selinux: fix off-by-one in setprocattr commit 0c461cb727d146c9ef2d3e86214f498b78b7d125 upstream. SELinux tries to support setting/clearing of /proc/pid/attr attributes from the shell by ignoring terminating newlines and treating an attribute value that begins with a NUL or newline as an attempt to clear the attribute. However, the test for clearing attributes has always been wrong; it has an off-by-one error, and this could further lead to reading past the end of the allocated buffer since commit bb646cdb12e75d82258c2f2e7746d5952d3e321a ("proc_pid_attr_write(): switch to memdup_user()"). Fix the off-by-one error. Even with this fix, setting and clearing /proc/pid/attr attributes from the shell is not straightforward since the interface does not support multiple write() calls (so shells that write the value and newline separately will set and then immediately clear the attribute, requiring use of echo -n to set the attribute), whereas trying to use echo -n "" to clear the attribute causes the shell to skip the write() call altogether since POSIX says that a zero-length write causes no side effects. Thus, one must use echo -n to set and echo without -n to clear, as in the following example: $ echo -n unconfined_u:object_r:user_home_t:s0 > /proc/$$/attr/fscreate $ cat /proc/$$/attr/fscreate unconfined_u:object_r:user_home_t:s0 $ echo "" > /proc/$$/attr/fscreate $ cat /proc/$$/attr/fscreate Note the use of /proc/$$ rather than /proc/self, as otherwise the cat command will read its own attribute value, not that of the shell. There are no users of this facility to my knowledge; possibly we should just get rid of it. UPDATE: Upon further investigation it appears that a local process with the process:setfscreate permission can cause a kernel panic as a result of this bug. This patch fixes CVE-2017-2618. Signed-off-by: Stephen Smalley [PM: added the update about CVE-2017-2618 to the commit description] Signed-off-by: Paul Moore Signed-off-by: Jiri Slaby Signed-off-by: James Morris Signed-off-by: Willy Tarreau --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fdd6e4f8be3..c08d4a10b07 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5442,7 +5442,7 @@ static int selinux_setprocattr(struct task_struct *p, return error; /* Obtain a SID for the context, if one was specified. */ - if (size && str[1] && str[1] != '\n') { + if (size && str[0] && str[0] != '\n') { if (str[size-1] == '\n') { str[size-1] = 0; size--; From 452655e76305e6c55ddfce16041d85ffed5a61c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Feb 2017 14:59:38 -0800 Subject: [PATCH 011/252] tcp: avoid infinite loop in tcp_splice_read() commit ccf7abb93af09ad0868ae9033d1ca8108bdaec82 upstream. Splicing from TCP socket is vulnerable when a packet with URG flag is received and stored into receive queue. __tcp_splice_read() returns 0, and sk_wait_data() immediately returns since there is the problematic skb in queue. This is a nice way to burn cpu (aka infinite loop) and trigger soft lockups. Again, this gem was found by syzkaller tool. Fixes: 9c55e01c0cc8 ("[TCP]: Splice receive support.") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willy Tarreau Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5d4bd6ca3ab..d1e04221c27 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -723,6 +723,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo); if (signal_pending(current)) { ret = sock_intr_errno(timeo); From ccf85441e11acf8db050d9dcd5dce61f893c454e Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 22 Mar 2017 07:29:31 +0000 Subject: [PATCH 012/252] xfrm_user: validate XFRM_MSG_NEWAE XFRMA_REPLAY_ESN_VAL replay_window commit 677e806da4d916052585301785d847c3b3e6186a upstream. When a new xfrm state is created during an XFRM_MSG_NEWSA call we validate the user supplied replay_esn to ensure that the size is valid and to ensure that the replay_window size is within the allocated buffer. However later it is possible to update this replay_esn via a XFRM_MSG_NEWAE call. There we again validate the size of the supplied buffer matches the existing state and if so inject the contents. We do not at this point check that the replay_window is within the allocated memory. This leads to out-of-bounds reads and writes triggered by netlink packets. This leads to memory corruption and the potential for priviledge escalation. We already attempt to validate the incoming replay information in xfrm_new_ae() via xfrm_replay_verify_len(). This confirms that the user is not trying to change the size of the replay state buffer which includes the replay_esn. It however does not check the replay_window remains within that buffer. Add validation of the contained replay_window. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7a70a5a5671..8b55d42076d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -393,6 +393,9 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) + return -EINVAL; + return 0; } From cbd95d10ef8248a02864a45f26089553294b155d Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 23 Mar 2017 07:45:44 +0000 Subject: [PATCH 013/252] xfrm_user: validate XFRM_MSG_NEWAE incoming ESN size harder commit f843ee6dd019bcece3e74e76ad9df0155655d0df upstream. Kees Cook has pointed out that xfrm_replay_state_esn_len() is subject to wrapping issues. To ensure we are correctly ensuring that the two ESN structures are the same size compare both the overall size as reported by xfrm_replay_state_esn_len() and the internal length are the same. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- net/xfrm/xfrm_user.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8b55d42076d..91a6a2903e8 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -390,7 +390,11 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) return -EINVAL; if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) From 8e728d261335059eeb96e8d6278f5f7ae4e456f5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Apr 2017 15:31:07 +0100 Subject: [PATCH 014/252] KEYS: Disallow keyrings beginning with '.' to be joined as session keyrings commit ee8f844e3c5a73b999edf733df1c529d6503ec2f upstream. This fixes CVE-2016-9604. Keyrings whose name begin with a '.' are special internal keyrings and so userspace isn't allowed to create keyrings by this name to prevent shadowing. However, the patch that added the guard didn't fix KEYCTL_JOIN_SESSION_KEYRING. Not only can that create dot-named keyrings, it can also subscribe to them as a session keyring if they grant SEARCH permission to the user. This, for example, allows a root process to set .builtin_trusted_keys as its session keyring, at which point it has full access because now the possessor permissions are added. This permits root to add extra public keys, thereby bypassing module verification. This also affects kexec and IMA. This can be tested by (as root): keyctl session .builtin_trusted_keys keyctl add user a a @s keyctl list @s which on my test box gives me: 2 keys in keyring: 180010936: ---lswrv 0 0 asymmetric: Build time autogenerated kernel key: ae3d4a31b82daa8e1a75b49dc2bba949fd992a05 801382539: --alswrv 0 0 user: a Fix this by rejecting names beginning with a '.' in the keyctl. Signed-off-by: David Howells Acked-by: Mimi Zohar cc: linux-ima-devel@lists.sourceforge.net Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- security/keys/keyctl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3242195bfa9..1324b2e1028 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -271,7 +271,8 @@ error: * Create and join an anonymous session keyring or join a named session * keyring, creating it if necessary. A named session keyring must have Search * permission for it to be joined. Session keyrings without this permit will - * be skipped over. + * be skipped over. It is not permitted for userspace to create or join + * keyrings whose name begin with a dot. * * If successful, the ID of the joined session keyring will be returned. */ @@ -288,12 +289,16 @@ long keyctl_join_session_keyring(const char __user *_name) ret = PTR_ERR(name); goto error; } + + ret = -EPERM; + if (name[0] == '.') + goto error_name; } /* join the session */ ret = join_session_keyring(name); +error_name: kfree(name); - error: return ret; } From 5b22a8a5ba91e92b554d4799eea3bbfef3bc0af4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Apr 2017 15:31:08 +0100 Subject: [PATCH 015/252] KEYS: Change the name of the dead type to ".dead" to prevent user access commit c1644fe041ebaf6519f6809146a77c3ead9193af upstream. This fixes CVE-2017-6951. Userspace should not be able to do things with the "dead" key type as it doesn't have some of the helper functions set upon it that the kernel needs. Attempting to use it may cause the kernel to crash. Fix this by changing the name of the type to ".dead" so that it's rejected up front on userspace syscalls by key_get_type_from_user(). Though this doesn't seem to affect recent kernels, it does affect older ones, certainly those prior to: commit c06cfb08b88dfbe13be44a69ae2fdc3a7c902d81 Author: David Howells Date: Tue Sep 16 17:36:06 2014 +0100 KEYS: Remove key_type::match in favour of overriding default by match_preparse which went in before 3.18-rc1. Signed-off-by: David Howells Signed-off-by: Willy Tarreau --- security/keys/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/gc.c b/security/keys/gc.c index de34c290bd6..2e01e23295a 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -46,7 +46,7 @@ static unsigned long key_gc_flags; * immediately unlinked. */ struct key_type key_type_dead = { - .name = "dead", + .name = ".dead", }; /* From d19182e28cfd7ade11a71f1e8190fb7243ed4bf5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Apr 2017 15:31:09 +0100 Subject: [PATCH 016/252] KEYS: fix keyctl_set_reqkey_keyring() to not leak thread keyrings commit c9f838d104fed6f2f61d68164712e3204bf5271b upstream. This fixes CVE-2017-7472. Running the following program as an unprivileged user exhausts kernel memory by leaking thread keyrings: #include int main() { for (;;) keyctl_set_reqkey_keyring(KEY_REQKEY_DEFL_THREAD_KEYRING); } Fix it by only creating a new thread keyring if there wasn't one before. To make things more consistent, make install_thread_keyring_to_cred() and install_process_keyring_to_cred() both return 0 if the corresponding keyring is already present. Fixes: d84f4f992cbd ("CRED: Inaugurate COW credentials") Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Willy Tarreau --- security/keys/keyctl.c | 11 ++++----- security/keys/process_keys.c | 44 ++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 1324b2e1028..066baa1926b 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1245,8 +1245,8 @@ error: * Read or set the default keyring in which request_key() will cache keys and * return the old setting. * - * If a process keyring is specified then this will be created if it doesn't - * yet exist. The old setting will be returned if successful. + * If a thread or process keyring is specified then it will be created if it + * doesn't yet exist. The old setting will be returned if successful. */ long keyctl_set_reqkey_keyring(int reqkey_defl) { @@ -1271,11 +1271,8 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) case KEY_REQKEY_DEFL_PROCESS_KEYRING: ret = install_process_keyring_to_cred(new); - if (ret < 0) { - if (ret != -EEXIST) - goto error; - ret = 0; - } + if (ret < 0) + goto error; goto set; case KEY_REQKEY_DEFL_DEFAULT: diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index cd871dc8b7c..33384662fc8 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -125,13 +125,18 @@ error: } /* - * Install a fresh thread keyring directly to new credentials. This keyring is - * allowed to overrun the quota. + * Install a thread keyring to the given credentials struct if it didn't have + * one already. This is allowed to overrun the quota. + * + * Return: 0 if a thread keyring is now present; -errno on failure. */ int install_thread_keyring_to_cred(struct cred *new) { struct key *keyring; + if (new->thread_keyring) + return 0; + keyring = keyring_alloc("_tid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, NULL); @@ -143,7 +148,9 @@ int install_thread_keyring_to_cred(struct cred *new) } /* - * Install a fresh thread keyring, discarding the old one. + * Install a thread keyring to the current task if it didn't have one already. + * + * Return: 0 if a thread keyring is now present; -errno on failure. */ static int install_thread_keyring(void) { @@ -154,8 +161,6 @@ static int install_thread_keyring(void) if (!new) return -ENOMEM; - BUG_ON(new->thread_keyring); - ret = install_thread_keyring_to_cred(new); if (ret < 0) { abort_creds(new); @@ -166,17 +171,17 @@ static int install_thread_keyring(void) } /* - * Install a process keyring directly to a credentials struct. + * Install a process keyring to the given credentials struct if it didn't have + * one already. This is allowed to overrun the quota. * - * Returns -EEXIST if there was already a process keyring, 0 if one installed, - * and other value on any other error + * Return: 0 if a process keyring is now present; -errno on failure. */ int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; if (new->process_keyring) - return -EEXIST; + return 0; keyring = keyring_alloc("_pid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, @@ -189,11 +194,9 @@ int install_process_keyring_to_cred(struct cred *new) } /* - * Make sure a process keyring is installed for the current process. The - * existing process keyring is not replaced. + * Install a process keyring to the current task if it didn't have one already. * - * Returns 0 if there is a process keyring by the end of this function, some - * error otherwise. + * Return: 0 if a process keyring is now present; -errno on failure. */ static int install_process_keyring(void) { @@ -207,14 +210,18 @@ static int install_process_keyring(void) ret = install_process_keyring_to_cred(new); if (ret < 0) { abort_creds(new); - return ret != -EEXIST ? ret : 0; + return ret; } return commit_creds(new); } /* - * Install a session keyring directly to a credentials struct. + * Install the given keyring as the session keyring of the given credentials + * struct, replacing the existing one if any. If the given keyring is NULL, + * then install a new anonymous session keyring. + * + * Return: 0 on success; -errno on failure. */ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) { @@ -249,8 +256,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) } /* - * Install a session keyring, discarding the old one. If a keyring is not - * supplied, an empty one is invented. + * Install the given keyring as the session keyring of the current task, + * replacing the existing one if any. If the given keyring is NULL, then + * install a new anonymous session keyring. + * + * Return: 0 on success; -errno on failure. */ static int install_session_keyring(struct key *keyring) { From 9488a4776180db568e51d54d02250eb6f3ee724e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 24 Apr 2016 00:56:03 -0400 Subject: [PATCH 017/252] ext4: fix data exposure after a crash commit 06bd3c36a733ac27962fea7d6f47168841376824 upstream. Huang has reported that in his powerfail testing he is seeing stale block contents in some of recently allocated blocks although he mounts ext4 in data=ordered mode. After some investigation I have found out that indeed when delayed allocation is used, we don't add inode to transaction's list of inodes needing flushing before commit. Originally we were doing that but commit f3b59291a69d removed the logic with a flawed argument that it is not needed. The problem is that although for delayed allocated blocks we write their contents immediately after allocating them, there is no guarantee that the IO scheduler or device doesn't reorder things and thus transaction allocating blocks and attaching them to inode can reach stable storage before actual block contents. Actually whenever we attach freshly allocated blocks to inode using a written extent, we should add inode to transaction's ordered inode list to make sure we properly wait for block contents to be written before committing the transaction. So that is what we do in this patch. This also handles other cases where stale data exposure was possible - like filling hole via mmap in data=ordered,nodelalloc mode. The only exception to the above rule are extending direct IO writes where blkdev_direct_IO() waits for IO to complete before increasing i_size and thus stale data exposure is not possible. For now we don't complicate the code with optimizing this special case since the overhead is pretty low. In case this is observed to be a performance problem we can always handle it using a special flag to ext4_map_blocks(). Fixes: f3b59291a69d0b734be1fc8be489fef2dd846d3d Reported-by: "HUANG Weller (CM/ESW12-CN)" Tested-by: "HUANG Weller (CM/ESW12-CN)" Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/inode.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 31179ba2072..36df03833bf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -759,6 +759,20 @@ has_zeroout: int ret = check_block_validity(inode, map); if (ret != 0) return ret; + + /* + * Inodes with freshly allocated blocks where contents will be + * visible after transaction commit must be on transaction's + * ordered data list. + */ + if (map->m_flags & EXT4_MAP_NEW && + !(map->m_flags & EXT4_MAP_UNWRITTEN) && + !IS_NOQUOTA(inode) && + ext4_should_order_data(inode)) { + ret = ext4_jbd2_file_inode(handle, inode); + if (ret) + return ret; + } } return retval; } @@ -1119,15 +1133,6 @@ static int ext4_write_end(struct file *file, int i_size_changed = 0; trace_ext4_write_end(inode, pos, len, copied); - if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { - ret = ext4_jbd2_file_inode(handle, inode); - if (ret) { - unlock_page(page); - page_cache_release(page); - goto errout; - } - } - if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); From 07f03860aae42d0b2c38723bb91845f91422d199 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Nov 2016 21:04:41 +0000 Subject: [PATCH 018/252] locking/rtmutex: Prevent dequeue vs. unlock race commit dbb26055defd03d59f678cb5f2c992abe05b064a upstream. David reported a futex/rtmutex state corruption. It's caused by the following problem: CPU0 CPU1 CPU2 l->owner=T1 rt_mutex_lock(l) lock(l->wait_lock) l->owner = T1 | HAS_WAITERS; enqueue(T2) boost() unlock(l->wait_lock) schedule() rt_mutex_lock(l) lock(l->wait_lock) l->owner = T1 | HAS_WAITERS; enqueue(T3) boost() unlock(l->wait_lock) schedule() signal(->T2) signal(->T3) lock(l->wait_lock) dequeue(T2) deboost() unlock(l->wait_lock) lock(l->wait_lock) dequeue(T3) ===> wait list is now empty deboost() unlock(l->wait_lock) lock(l->wait_lock) fixup_rt_mutex_waiters() if (wait_list_empty(l)) { owner = l->owner & ~HAS_WAITERS; l->owner = owner ==> l->owner = T1 } lock(l->wait_lock) rt_mutex_unlock(l) fixup_rt_mutex_waiters() if (wait_list_empty(l)) { owner = l->owner & ~HAS_WAITERS; cmpxchg(l->owner, T1, NULL) ===> Success (l->owner = NULL) l->owner = owner ==> l->owner = T1 } That means the problem is caused by fixup_rt_mutex_waiters() which does the RMW to clear the waiters bit unconditionally when there are no waiters in the rtmutexes rbtree. This can be fatal: A concurrent unlock can release the rtmutex in the fastpath because the waiters bit is not set. If the cmpxchg() gets in the middle of the RMW operation then the previous owner, which just unlocked the rtmutex is set as the owner again when the write takes place after the successfull cmpxchg(). The solution is rather trivial: verify that the owner member of the rtmutex has the waiters bit set before clearing it. This does not require a cmpxchg() or other atomic operations because the waiters bit can only be set and cleared with the rtmutex wait_lock held. It's also safe against the fast path unlock attempt. The unlock attempt via cmpxchg() will either see the bit set and take the slowpath or see the bit cleared and release it atomically in the fastpath. It's remarkable that the test program provided by David triggers on ARM64 and MIPS64 really quick, but it refuses to reproduce on x86-64, while the problem exists there as well. That refusal might explain that this got not discovered earlier despite the bug existing from day one of the rtmutex implementation more than 10 years ago. Thanks to David for meticulously instrumenting the code and providing the information which allowed to decode this subtle problem. Reported-by: David Daney Tested-by: David Daney Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Will Deacon Fixes: 23f78d4a03c5 ("[PATCH] pi-futex: rt mutex core") Link: http://lkml.kernel.org/r/20161130210030.351136722@linutronix.de Signed-off-by: Ingo Molnar [wt: s/{READ,WRITE}_ONCE/ACCESS_ONCE/] Signed-off-by: Willy Tarreau --- kernel/rtmutex.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index d9ca207cec0..286c92f5573 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -64,8 +64,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) static void fixup_rt_mutex_waiters(struct rt_mutex *lock) { - if (!rt_mutex_has_waiters(lock)) - clear_rt_mutex_waiters(lock); + unsigned long owner, *p = (unsigned long *) &lock->owner; + + if (rt_mutex_has_waiters(lock)) + return; + + /* + * The rbtree has no waiters enqueued, now make sure that the + * lock->owner still has the waiters bit set, otherwise the + * following can happen: + * + * CPU 0 CPU 1 CPU2 + * l->owner=T1 + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T2) + * boost() + * unlock(l->lock) + * block() + * + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T3) + * boost() + * unlock(l->lock) + * block() + * signal(->T2) signal(->T3) + * lock(l->lock) + * dequeue(T2) + * deboost() + * unlock(l->lock) + * lock(l->lock) + * dequeue(T3) + * ==> wait list is empty + * deboost() + * unlock(l->lock) + * lock(l->lock) + * fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * l->owner = owner + * owner = l->owner & ~HAS_WAITERS; + * ==> l->owner = T1 + * } + * lock(l->lock) + * rt_mutex_unlock(l) fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * owner = l->owner & ~HAS_WAITERS; + * cmpxchg(l->owner, T1, NULL) + * ===> Success (l->owner = NULL) + * + * l->owner = owner + * ==> l->owner = T1 + * } + * + * With the check for the waiter bit in place T3 on CPU2 will not + * overwrite. All tasks fiddling with the waiters bit are + * serialized by l->lock, so nothing else can modify the waiters + * bit. If the bit is set then nothing can change l->owner either + * so the simple RMW is safe. The cmpxchg() will simply fail if it + * happens in the middle of the RMW because the waiters bit is + * still set. + */ + owner = ACCESS_ONCE(*p); + if (owner & RT_MUTEX_HAS_WAITERS) + ACCESS_ONCE(*p) = owner & ~RT_MUTEX_HAS_WAITERS; } /* From 3d1b965c770966423128e587ee8bdbcbd816ea8d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Oct 2016 17:12:28 +0200 Subject: [PATCH 019/252] m68k: Fix ndelay() macro commit 7e251bb21ae08ca2e4fb28cc0981fac2685a8efa upstream. The current ndelay() macro definition has an extra semi-colon at the end of the line thus leading to a compilation error when ndelay is used in a conditional block without curly braces like this one: if (cond) ndelay(t); else ... which, after the preprocessor pass gives: if (cond) m68k_ndelay(t);; else ... thus leading to the following gcc error: error: 'else' without a previous 'if' Remove this extra semi-colon. Signed-off-by: Boris Brezillon Fixes: c8ee038bd1488 ("m68k: Implement ndelay() based on the existing udelay() logic") Signed-off-by: Geert Uytterhoeven Signed-off-by: Willy Tarreau --- arch/m68k/include/asm/delay.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/delay.h b/arch/m68k/include/asm/delay.h index d28fa8fe26f..c598d847d56 100644 --- a/arch/m68k/include/asm/delay.h +++ b/arch/m68k/include/asm/delay.h @@ -114,6 +114,6 @@ static inline void __udelay(unsigned long usecs) */ #define HZSCALE (268435456 / (1000000 / HZ)) -#define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000)); +#define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000)) #endif /* defined(_M68K_DELAY_H) */ From 57bf12f43e0151395114768d343b64429ac7650f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 7 Dec 2016 14:54:38 +0100 Subject: [PATCH 020/252] hotplug: Make register and unregister notifier API symmetric commit 777c6e0daebb3fcefbbd6f620410a946b07ef6d0 upstream. Yu Zhao has noticed that __unregister_cpu_notifier only unregisters its notifiers when HOTPLUG_CPU=y while the registration might succeed even when HOTPLUG_CPU=n if MODULE is enabled. This means that e.g. zswap might keep a stale notifier on the list on the manual clean up during the pool tear down and thus corrupt the list. Resulting in the following [ 144.964346] BUG: unable to handle kernel paging request at ffff880658a2be78 [ 144.971337] IP: [] raw_notifier_chain_register+0x1b/0x40 [ 145.122628] Call Trace: [ 145.125086] [] __register_cpu_notifier+0x18/0x20 [ 145.131350] [] zswap_pool_create+0x273/0x400 [ 145.137268] [] __zswap_param_set+0x1fc/0x300 [ 145.143188] [] ? trace_hardirqs_on+0xd/0x10 [ 145.149018] [] ? kernel_param_lock+0x28/0x30 [ 145.154940] [] ? __might_fault+0x4f/0xa0 [ 145.160511] [] zswap_compressor_param_set+0x17/0x20 [ 145.167035] [] param_attr_store+0x5c/0xb0 [ 145.172694] [] module_attr_store+0x1d/0x30 [ 145.178443] [] sysfs_kf_write+0x4f/0x70 [ 145.183925] [] kernfs_fop_write+0x149/0x180 [ 145.189761] [] __vfs_write+0x18/0x40 [ 145.194982] [] vfs_write+0xb2/0x1a0 [ 145.200122] [] SyS_write+0x52/0xa0 [ 145.205177] [] entry_SYSCALL_64_fastpath+0x12/0x17 This can be even triggered manually by changing /sys/module/zswap/parameters/compressor multiple times. Fix this issue by making unregister APIs symmetric to the register so there are no surprises. [js] backport to 3.12 Fixes: 47e627bc8c9a ("[PATCH] hotplug: Allow modules to use the cpu hotplug notifiers even if !CONFIG_HOTPLUG_CPU") Reported-and-tested-by: Yu Zhao Signed-off-by: Michal Hocko Cc: linux-mm@kvack.org Cc: Andrew Morton Cc: Dan Streetman Link: http://lkml.kernel.org/r/20161207135438.4310-1-mhocko@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- include/linux/cpu.h | 12 +++--------- kernel/cpu.c | 3 +-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9f3c7e81270..d0d5946b6ee 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -119,22 +119,16 @@ enum { { .notifier_call = fn, .priority = pri }; \ register_cpu_notifier(&fn##_nb); \ } -#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); -#else -#ifndef MODULE -extern int register_cpu_notifier(struct notifier_block *nb); -#else +#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) + static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } -#endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { diff --git a/kernel/cpu.c b/kernel/cpu.c index bc255e25d5d..a6c24248986 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -185,8 +185,6 @@ static int cpu_notify(unsigned long val, void *v) return __cpu_notify(val, v, -1, NULL); } -#ifdef CONFIG_HOTPLUG_CPU - static void cpu_notify_nofail(unsigned long val, void *v) { BUG_ON(cpu_notify(val, v)); @@ -201,6 +199,7 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_cpu_notifier); +#ifdef CONFIG_HOTPLUG_CPU /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id From 6e97d31a5f2ff4b0f47e348df7e705b660296078 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Fri, 7 Oct 2016 17:30:47 +0800 Subject: [PATCH 021/252] Btrfs: fix tree search logic when replaying directory entry deletes commit 2a7bf53f577e49c43de4ffa7776056de26db65d9 upstream. If a log tree has a layout like the following: leaf N: ... item 240 key (282 DIR_LOG_ITEM 0) itemoff 8189 itemsize 8 dir log end 1275809046 leaf N + 1: item 0 key (282 DIR_LOG_ITEM 3936149215) itemoff 16275 itemsize 8 dir log end 18446744073709551615 ... When we pass the value 1275809046 + 1 as the parameter start_ret to the function tree-log.c:find_dir_range() (done by replay_dir_deletes()), we end up with path->slots[0] having the value 239 (points to the last item of leaf N, item 240). Because the dir log item in that position has an offset value smaller than *start_ret (1275809046 + 1) we need to move on to the next leaf, however the logic for that is wrong since it compares the current slot to the number of items in the leaf, which is smaller and therefore we don't lookup for the next leaf but instead we set the slot to point to an item that does not exist, at slot 240, and we later operate on that slot which has unexpected content or in the worst case can result in an invalid memory access (accessing beyond the last page of leaf N's extent buffer). So fix the logic that checks when we need to lookup at the next leaf by first incrementing the slot and only after to check if that slot is beyond the last item of the current leaf. Signed-off-by: Robbie Ko Reviewed-by: Filipe Manana Fixes: e02119d5a7b4 (Btrfs: Add a write ahead tree log to optimize synchronous operations) Signed-off-by: Filipe Manana [Modified changelog for clarity and correctness] Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- fs/btrfs/tree-log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7d3331cbccb..681782d00b1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1691,12 +1691,11 @@ static noinline int find_dir_range(struct btrfs_root *root, next: /* check the next slot in the tree to see if it is a valid item */ nritems = btrfs_header_nritems(path->nodes[0]); + path->slots[0]++; if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) goto out; - } else { - path->slots[0]++; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); From 6017ea9df35665149c7fbf5f82e6799f8606a792 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2016 16:55:01 +0100 Subject: [PATCH 022/252] USB: serial: kl5kusb105: fix open error path commit 6774d5f53271d5f60464f824748995b71da401ab upstream. Kill urbs and disable read before returning from open on failure to retrieve the line state. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/kl5kusb105.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 1b4054fe52a..70e163d21e9 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -304,7 +304,7 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) rc = usb_serial_generic_open(tty, port); if (rc) { retval = rc; - goto exit; + goto err_free_cfg; } rc = usb_control_msg(port->serial->dev, @@ -323,17 +323,32 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) dev_dbg(&port->dev, "%s - enabled reading\n", __func__); rc = klsi_105_get_line_state(port, &line_state); - if (rc >= 0) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_state = line_state; - spin_unlock_irqrestore(&priv->lock, flags); - dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, line_state); - retval = 0; - } else + if (rc < 0) { retval = rc; + goto err_disable_read; + } -exit: + spin_lock_irqsave(&priv->lock, flags); + priv->line_state = line_state; + spin_unlock_irqrestore(&priv->lock, flags); + dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, + line_state); + + return 0; + +err_disable_read: + usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + KL5KUSB105A_SIO_CONFIGURE, + USB_TYPE_VENDOR | USB_DIR_OUT, + KL5KUSB105A_SIO_CONFIGURE_READ_OFF, + 0, /* index */ + NULL, 0, + KLSI_TIMEOUT); + usb_serial_generic_close(port); +err_free_cfg: kfree(cfg); + return retval; } From 725da55c94027ae547807c948c7026f0a622f7fe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 Dec 2016 08:21:51 -0700 Subject: [PATCH 023/252] block_dev: don't test bdev->bd_contains when it is not stable commit bcc7f5b4bee8e327689a4d994022765855c807ff upstream. bdev->bd_contains is not stable before calling __blkdev_get(). When __blkdev_get() is called on a parition with ->bd_openers == 0 it sets bdev->bd_contains = bdev; which is not correct for a partition. After a call to __blkdev_get() succeeds, ->bd_openers will be > 0 and then ->bd_contains is stable. When FMODE_EXCL is used, blkdev_get() calls bd_start_claiming() -> bd_prepare_to_claim() -> bd_may_claim() This call happens before __blkdev_get() is called, so ->bd_contains is not stable. So bd_may_claim() cannot safely use ->bd_contains. It currently tries to use it, and this can lead to a BUG_ON(). This happens when a whole device is already open with a bd_holder (in use by dm in my particular example) and two threads race to open a partition of that device for the first time, one opening with O_EXCL and one without. The thread that doesn't use O_EXCL gets through blkdev_get() to __blkdev_get(), gains the ->bd_mutex, and sets bdev->bd_contains = bdev; Immediately thereafter the other thread, using FMODE_EXCL, calls bd_start_claiming() from blkdev_get(). This should fail because the whole device has a holder, but because bdev->bd_contains == bdev bd_may_claim() incorrectly reports success. This thread continues and blocks on bd_mutex. The first thread then sets bdev->bd_contains correctly and drops the mutex. The thread using FMODE_EXCL then continues and when it calls bd_may_claim() again in: BUG_ON(!bd_may_claim(bdev, whole, holder)); The BUG_ON fires. Fix this by removing the dependency on ->bd_contains in bd_may_claim(). As bd_may_claim() has direct access to the whole device, it can simply test if the target bdev is the whole device. Fixes: 6b4517a7913a ("block: implement bd_claiming and claiming block") Signed-off-by: NeilBrown Signed-off-by: Jens Axboe Signed-off-by: Willy Tarreau --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 85f5c85ec91..3a85a038c33 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -655,7 +655,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, return true; /* already a holder */ else if (bdev->bd_holder != NULL) return false; /* held by someone else */ - else if (bdev->bd_contains == bdev) + else if (whole == bdev) return true; /* is a whole device which isn't held */ else if (whole->bd_holder == bd_may_claim) From 625208a03ce1372785da22fb444ae2ca2a745c68 Mon Sep 17 00:00:00 2001 From: Alex Porosanu Date: Wed, 9 Nov 2016 10:46:11 +0200 Subject: [PATCH 024/252] crypto: caam - fix AEAD givenc descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d128af17876d79b87edf048303f98b35f6a53dbc upstream. The AEAD givenc descriptor relies on moving the IV through the output FIFO and then back to the CTX2 for authentication. The SEQ FIFO STORE could be scheduled before the data can be read from OFIFO, especially since the SEQ FIFO LOAD needs to wait for the SEQ FIFO LOAD SKIP to finish first. The SKIP takes more time when the input is SG than when it's a contiguous buffer. If the SEQ FIFO LOAD is not scheduled before the STORE, the DECO will hang waiting for data to be available in the OFIFO so it can be transferred to C2. In order to overcome this, first force transfer of IV to C2 by starting the "cryptlen" transfer first and then starting to store data from OFIFO to the output buffer. Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation") Signed-off-by: Alex Porosanu Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Willy Tarreau --- drivers/crypto/caam/caamalg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index bf416a8391a..0cba9273e6c 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -422,7 +422,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Will read cryptlen */ append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF | + FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); /* Write ICV */ append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | From cafd2159d1fd4e43f50229476b2d9a02a6ab47c3 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 14 Nov 2016 21:04:37 -0500 Subject: [PATCH 025/252] ext4: fix mballoc breakage with 64k block size commit 69e43e8cc971a79dd1ee5d4343d8e63f82725123 upstream. 'border' variable is set to a value of 2 times the block size of the underlying filesystem. With 64k block size, the resulting value won't fit into a 16-bit variable. Hence this commit changes the data type of 'border' to 'unsigned int'. Fixes: c9de560ded61f Signed-off-by: Chandan Rajendra Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cb9eec025ba..a7a1b676b93 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -668,7 +668,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, ext4_grpblk_t min; ext4_grpblk_t max; ext4_grpblk_t chunk; - unsigned short border; + unsigned int border; BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); From ac3d8fba0618c9f68832ea9fc6fe6615bda4de8d Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 14 Nov 2016 21:26:26 -0500 Subject: [PATCH 026/252] ext4: fix stack memory corruption with 64k block size commit 30a9d7afe70ed6bd9191d3000e2ef1a34fb58493 upstream. The number of 'counters' elements needed in 'struct sg' is super_block->s_blocksize_bits + 2. Presently we have 16 'counters' elements in the array. This is insufficient for block sizes >= 32k. In such cases the memcpy operation performed in ext4_mb_seq_groups_show() would cause stack memory corruption. Fixes: c9de560ded61f Signed-off-by: Chandan Rajendra Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a7a1b676b93..83ed61a6cfc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2222,7 +2222,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct ext4_group_info *grinfo; struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[16]; + ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; } sg; group--; From b70876f0d70f5d8e571893a84e59a0ef92098fb1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 10 Dec 2016 09:55:01 -0500 Subject: [PATCH 027/252] ext4: reject inodes with negative size commit 7e6e1ef48fc02f3ac5d0edecbb0c6087cd758d58 upstream. Don't load an inode with a negative size; this causes integer overflow problems in the VFS. [ Added EXT4_ERROR_INODE() to mark file system as corrupted. -TYT] js: use EIO for 3.12 instead of EFSCORRUPTED. Fixes: a48380f769df (ext4: rename i_dir_acl to i_size_high) Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- fs/ext4/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 36df03833bf..5fb975495c2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4183,6 +4183,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct inode *inode; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; + loff_t size; int block; uid_t i_uid; gid_t i_gid; @@ -4275,6 +4276,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); + if ((size = i_size_read(inode)) < 0) { + EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ret = -EIO; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; From e74af998c0ce9e6c4e0da13660d322a9bab139da Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 10 Dec 2016 09:56:01 -0500 Subject: [PATCH 028/252] ext4: return -ENOMEM instead of success commit 578620f451f836389424833f1454eeeb2ffc9e9f upstream. We should set the error code if kzalloc() fails. Fixes: 67cf5b09a46f ("ext4: add the basic function for inline data support") Signed-off-by: Dan Carpenter Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/inline.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index e350be6c7ac..b390de05828 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -339,8 +339,10 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, len -= EXT4_MIN_INLINE_DATA_SIZE; value = kzalloc(len, GFP_NOFS); - if (!value) + if (!value) { + error = -ENOMEM; goto out; + } error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); From 49c82256a18d742b3a2b97875adccfca6e8db2c9 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sun, 20 Nov 2016 19:57:23 +0100 Subject: [PATCH 029/252] f2fs: set ->owner for debugfs status file's file_operations commit 05e6ea2685c964db1e675a24a4f4e2adc22d2388 upstream. The struct file_operations instance serving the f2fs/status debugfs file lacks an initialization of its ->owner. This means that although that file might have been opened, the f2fs module can still get removed. Any further operation on that opened file, releasing included, will cause accesses to unmapped memory. Indeed, Mike Marshall reported the following: BUG: unable to handle kernel paging request at ffffffffa0307430 IP: [] full_proxy_release+0x24/0x90 <...> Call Trace: [] __fput+0xdf/0x1d0 [] ____fput+0xe/0x10 [] task_work_run+0x8e/0xc0 [] do_exit+0x2ae/0xae0 [] ? __audit_syscall_entry+0xae/0x100 [] ? syscall_trace_enter+0x1ca/0x310 [] do_group_exit+0x44/0xc0 [] SyS_exit_group+0x14/0x20 [] do_syscall_64+0x61/0x150 [] entry_SYSCALL64_slow_path+0x25/0x25 <...> ---[ end trace f22ae883fa3ea6b8 ]--- Fixing recursive fault but reboot is needed! Fix this by initializing the f2fs/status file_operations' ->owner with THIS_MODULE. This will allow debugfs to grab a reference to the f2fs module upon any open on that file, thus preventing it from getting removed. Fixes: 902829aa0b72 ("f2fs: move proc files to debugfs") Reported-by: Mike Marshall Reported-by: Martin Brandenburg Signed-off-by: Nicolai Stange Signed-off-by: Jaegeuk Kim Signed-off-by: Willy Tarreau --- fs/f2fs/debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8d9943786c3..a73dddedc80 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -294,6 +294,7 @@ static int stat_open(struct inode *inode, struct file *file) } static const struct file_operations stat_fops = { + .owner = THIS_MODULE, .open = stat_open, .read = seq_read, .llseek = seq_lseek, From 97c6c8552bfa368f8fea4215200bd3a2549182d0 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 1 Dec 2016 09:18:28 +0100 Subject: [PATCH 030/252] block: protect iterate_bdevs() against concurrent close commit af309226db916e2c6e08d3eba3fa5c34225200c4 upstream. If a block device is closed while iterate_bdevs() is handling it, the following NULL pointer dereference occurs because bdev->b_disk is NULL in bdev_get_queue(), which is called from blk_get_backing_dev_info() (in turn called by the mapping_cap_writeback_dirty() call in __filemap_fdatawrite_range()): BUG: unable to handle kernel NULL pointer dereference at 0000000000000508 IP: [] blk_get_backing_dev_info+0x10/0x20 PGD 9e62067 PUD 9ee8067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 1 PID: 2422 Comm: sync Not tainted 4.5.0-rc7+ #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) task: ffff880009f4d700 ti: ffff880009f5c000 task.ti: ffff880009f5c000 RIP: 0010:[] [] blk_get_backing_dev_info+0x10/0x20 RSP: 0018:ffff880009f5fe68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88000ec17a38 RCX: ffffffff81a4e940 RDX: 7fffffffffffffff RSI: 0000000000000000 RDI: ffff88000ec176c0 RBP: ffff880009f5fe68 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88000ec17860 R13: ffffffff811b25c0 R14: ffff88000ec178e0 R15: ffff88000ec17a38 FS: 00007faee505d700(0000) GS:ffff88000fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000508 CR3: 0000000009e8a000 CR4: 00000000000006e0 Stack: ffff880009f5feb8 ffffffff8112e7f5 0000000000000000 7fffffffffffffff 0000000000000000 0000000000000000 7fffffffffffffff 0000000000000001 ffff88000ec178e0 ffff88000ec17860 ffff880009f5fec8 ffffffff8112e81f Call Trace: [] __filemap_fdatawrite_range+0x85/0x90 [] filemap_fdatawrite+0x1f/0x30 [] fdatawrite_one_bdev+0x16/0x20 [] iterate_bdevs+0xf2/0x130 [] sys_sync+0x63/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x76 Code: 0f 1f 44 00 00 48 8b 87 f0 00 00 00 55 48 89 e5 <48> 8b 80 08 05 00 00 5d RIP [] blk_get_backing_dev_info+0x10/0x20 RSP CR2: 0000000000000508 ---[ end trace 2487336ceb3de62d ]--- The crash is easily reproducible by running the following command, if an msleep(100) is inserted before the call to func() in iterate_devs(): while :; do head -c1 /dev/nullb0; done > /dev/null & while :; do sync; done Fix it by holding the bd_mutex across the func() call and only calling func() if the bdev is opened. Fixes: 5c0d6b60a0ba ("vfs: Create function for iterating over block devices") Reported-and-tested-by: Wei Fang Signed-off-by: Rabin Vincent Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Willy Tarreau --- fs/block_dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3a85a038c33..8f0267e81e8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1692,6 +1692,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) spin_lock(&inode_sb_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; + struct block_device *bdev; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || @@ -1712,8 +1713,12 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) */ iput(old_inode); old_inode = inode; + bdev = I_BDEV(inode); - func(I_BDEV(inode), arg); + mutex_lock(&bdev->bd_mutex); + if (bdev->bd_openers) + func(bdev, arg); + mutex_unlock(&bdev->bd_mutex); spin_lock(&inode_sb_list_lock); } From 54539504f0c49b2fb550e61dd211922b103a0f40 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 9 Dec 2016 17:16:31 +0100 Subject: [PATCH 031/252] scsi: zfcp: fix use-after-"free" in FC ingress path after TMF commit dac37e15b7d511e026a9313c8c46794c144103cd upstream. When SCSI EH invokes zFCP's callbacks for eh_device_reset_handler() and eh_target_reset_handler(), it expects us to relent the ownership over the given scsi_cmnd and all other scsi_cmnds within the same scope - LUN or target - when returning with SUCCESS from the callback ('release' them). SCSI EH can then reuse those commands. We did not follow this rule to release commands upon SUCCESS; and if later a reply arrived for one of those supposed to be released commands, we would still make use of the scsi_cmnd in our ingress tasklet. This will at least result in undefined behavior or a kernel panic because of a wrong kernel pointer dereference. To fix this, we NULLify all pointers to scsi_cmnds (struct zfcp_fsf_req *)->data in the matching scope if a TMF was successful. This is done under the locks (struct zfcp_adapter *)->abort_lock and (struct zfcp_reqlist *)->lock to prevent the requests from being removed from the request-hashtable, and the ingress tasklet from making use of the scsi_cmnd-pointer in zfcp_fsf_fcp_cmnd_handler(). For cases where a reply arrives during SCSI EH, but before we get a chance to NULLify the pointer - but before we return from the callback -, we assume that the code is protected from races via the CAS operation in blk_complete_request() that is called in scsi_done(). The following stacktrace shows an example for a crash resulting from the previous behavior: Unable to handle kernel pointer dereference at virtual kernel address fffffee17a672000 Oops: 0038 [#1] SMP CPU: 2 PID: 0 Comm: swapper/2 Not tainted task: 00000003f7ff5be0 ti: 00000003f3d38000 task.ti: 00000003f3d38000 Krnl PSW : 0404d00180000000 00000000001156b0 (smp_vcpu_scheduled+0x18/0x40) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 EA:3 Krnl GPRS: 000000200000007e 0000000000000000 fffffee17a671fd8 0000000300000015 ffffffff80000000 00000000005dfde8 07000003f7f80e00 000000004fa4e800 000000036ce8d8f8 000000036ce8d9c0 00000003ece8fe00 ffffffff969c9e93 00000003fffffffd 000000036ce8da10 00000000003bf134 00000003f3b07918 Krnl Code: 00000000001156a2: a7190000 lghi %r1,0 00000000001156a6: a7380015 lhi %r3,21 #00000000001156aa: e32050000008 ag %r2,0(%r5) >00000000001156b0: 482022b0 lh %r2,688(%r2) 00000000001156b4: ae123000 sigp %r1,%r2,0(%r3) 00000000001156b8: b2220020 ipm %r2 00000000001156bc: 8820001c srl %r2,28 00000000001156c0: c02700000001 xilf %r2,1 Call Trace: ([<0000000000000000>] 0x0) [<000003ff807bdb8e>] zfcp_fsf_fcp_cmnd_handler+0x3de/0x490 [zfcp] [<000003ff807be30a>] zfcp_fsf_req_complete+0x252/0x800 [zfcp] [<000003ff807c0a48>] zfcp_fsf_reqid_check+0xe8/0x190 [zfcp] [<000003ff807c194e>] zfcp_qdio_int_resp+0x66/0x188 [zfcp] [<000003ff80440c64>] qdio_kick_handler+0xdc/0x310 [qdio] [<000003ff804463d0>] __tiqdio_inbound_processing+0xf8/0xcd8 [qdio] [<0000000000141fd4>] tasklet_action+0x9c/0x170 [<0000000000141550>] __do_softirq+0xe8/0x258 [<000000000010ce0a>] do_softirq+0xba/0xc0 [<000000000014187c>] irq_exit+0xc4/0xe8 [<000000000046b526>] do_IRQ+0x146/0x1d8 [<00000000005d6a3c>] io_return+0x0/0x8 [<00000000005d6422>] vtime_stop_cpu+0x4a/0xa0 ([<0000000000000000>] 0x0) [<0000000000103d8a>] arch_cpu_idle+0xa2/0xb0 [<0000000000197f94>] cpu_startup_entry+0x13c/0x1f8 [<0000000000114782>] smp_start_secondary+0xda/0xe8 [<00000000005d6efe>] restart_int_handler+0x56/0x6c [<0000000000000000>] 0x0 Last Breaking-Event-Address: [<00000000003bf12e>] arch_spin_lock_wait+0x56/0xb0 Suggested-by: Steffen Maier Signed-off-by: Benjamin Block Fixes: ea127f9754 ("[PATCH] s390 (7/7): zfcp host adapter.") (tglx/history.git) Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_dbf.h | 11 ++++++ drivers/s390/scsi/zfcp_reqlist.h | 30 ++++++++++++++++- drivers/s390/scsi/zfcp_scsi.c | 57 ++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 440aa619da1..e7839ecaf33 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -388,4 +388,15 @@ void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) _zfcp_dbf_scsi(tmp_tag, 1, scmnd, NULL); } +/** + * zfcp_dbf_scsi_nullcmnd() - trace NULLify of SCSI command in dev/tgt-reset. + * @scmnd: SCSI command that was NULLified. + * @fsf_req: request that owned @scmnd. + */ +static inline void zfcp_dbf_scsi_nullcmnd(struct scsi_cmnd *scmnd, + struct zfcp_fsf_req *fsf_req) +{ + _zfcp_dbf_scsi("scfc__1", 3, scmnd, fsf_req); +} + #endif /* ZFCP_DBF_H */ diff --git a/drivers/s390/scsi/zfcp_reqlist.h b/drivers/s390/scsi/zfcp_reqlist.h index 7c2c6194dfc..703fce59bef 100644 --- a/drivers/s390/scsi/zfcp_reqlist.h +++ b/drivers/s390/scsi/zfcp_reqlist.h @@ -4,7 +4,7 @@ * Data structure and helper functions for tracking pending FSF * requests. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2016 */ #ifndef ZFCP_REQLIST_H @@ -180,4 +180,32 @@ static inline void zfcp_reqlist_move(struct zfcp_reqlist *rl, spin_unlock_irqrestore(&rl->lock, flags); } +/** + * zfcp_reqlist_apply_for_all() - apply a function to every request. + * @rl: the requestlist that contains the target requests. + * @f: the function to apply to each request; the first parameter of the + * function will be the target-request; the second parameter is the same + * pointer as given with the argument @data. + * @data: freely chosen argument; passed through to @f as second parameter. + * + * Uses :c:macro:`list_for_each_entry` to iterate over the lists in the hash- + * table (not a 'safe' variant, so don't modify the list). + * + * Holds @rl->lock over the entire request-iteration. + */ +static inline void +zfcp_reqlist_apply_for_all(struct zfcp_reqlist *rl, + void (*f)(struct zfcp_fsf_req *, void *), void *data) +{ + struct zfcp_fsf_req *req; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&rl->lock, flags); + for (i = 0; i < ZFCP_REQ_LIST_BUCKETS; i++) + list_for_each_entry(req, &rl->buckets[i], list) + f(req, data); + spin_unlock_irqrestore(&rl->lock, flags); +} + #endif /* ZFCP_REQLIST_H */ diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 38ee0df633a..0bbc2a9a95c 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -230,6 +230,57 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) return retval; } +struct zfcp_scsi_req_filter { + u8 tmf_scope; + u32 lun_handle; + u32 port_handle; +}; + +static void zfcp_scsi_forget_cmnd(struct zfcp_fsf_req *old_req, void *data) +{ + struct zfcp_scsi_req_filter *filter = + (struct zfcp_scsi_req_filter *)data; + + /* already aborted - prevent side-effects - or not a SCSI command */ + if (old_req->data == NULL || old_req->fsf_command != FSF_QTCB_FCP_CMND) + return; + + /* (tmf_scope == FCP_TMF_TGT_RESET || tmf_scope == FCP_TMF_LUN_RESET) */ + if (old_req->qtcb->header.port_handle != filter->port_handle) + return; + + if (filter->tmf_scope == FCP_TMF_LUN_RESET && + old_req->qtcb->header.lun_handle != filter->lun_handle) + return; + + zfcp_dbf_scsi_nullcmnd((struct scsi_cmnd *)old_req->data, old_req); + old_req->data = NULL; +} + +static void zfcp_scsi_forget_cmnds(struct zfcp_scsi_dev *zsdev, u8 tm_flags) +{ + struct zfcp_adapter *adapter = zsdev->port->adapter; + struct zfcp_scsi_req_filter filter = { + .tmf_scope = FCP_TMF_TGT_RESET, + .port_handle = zsdev->port->handle, + }; + unsigned long flags; + + if (tm_flags == FCP_TMF_LUN_RESET) { + filter.tmf_scope = FCP_TMF_LUN_RESET; + filter.lun_handle = zsdev->lun_handle; + } + + /* + * abort_lock secures against other processings - in the abort-function + * and normal cmnd-handler - of (struct zfcp_fsf_req *)->data + */ + write_lock_irqsave(&adapter->abort_lock, flags); + zfcp_reqlist_apply_for_all(adapter->req_list, zfcp_scsi_forget_cmnd, + &filter); + write_unlock_irqrestore(&adapter->abort_lock, flags); +} + static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) { struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scpnt->device); @@ -262,8 +313,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags); retval = FAILED; - } else + } else { zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags); + zfcp_scsi_forget_cmnds(zfcp_sdev, tm_flags); + } zfcp_fsf_req_free(fsf_req); return retval; From 5d35a963117251365fb9fd6a95beb3f91a9f72cf Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 9 Dec 2016 17:16:32 +0100 Subject: [PATCH 032/252] scsi: zfcp: do not trace pure benign residual HBA responses at default level commit 56d23ed7adf3974f10e91b643bd230e9c65b5f79 upstream. Since quite a while, Linux issues enough SCSI commands per scsi_device which successfully return with FCP_RESID_UNDER, FSF_FCP_RSP_AVAILABLE, and SAM_STAT_GOOD. This floods the HBA trace area and we cannot see other and important HBA trace records long enough. Therefore, do not trace HBA response errors for pure benign residual under counts at the default trace level. This excludes benign residual under count combined with other validity bits set in FCP_RSP_IU, such as FCP_SNS_LEN_VAL. For all those other cases, we still do want to see both the HBA record and the corresponding SCSI record by default. Signed-off-by: Steffen Maier Fixes: a54ca0f62f95 ("[SCSI] zfcp: Redesign of the debug tracing for HBA records.") Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_dbf.h | 30 ++++++++++++++++++++++++++++-- drivers/s390/scsi/zfcp_fsf.h | 3 ++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index e7839ecaf33..a8165f14255 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -2,7 +2,7 @@ * zfcp device driver * debug feature declarations * - * Copyright IBM Corp. 2008, 2015 + * Copyright IBM Corp. 2008, 2016 */ #ifndef ZFCP_DBF_H @@ -283,6 +283,30 @@ struct zfcp_dbf { struct zfcp_dbf_scsi scsi_buf; }; +/** + * zfcp_dbf_hba_fsf_resp_suppress - true if we should not trace by default + * @req: request that has been completed + * + * Returns true if FCP response with only benign residual under count. + */ +static inline +bool zfcp_dbf_hba_fsf_resp_suppress(struct zfcp_fsf_req *req) +{ + struct fsf_qtcb *qtcb = req->qtcb; + u32 fsf_stat = qtcb->header.fsf_status; + struct fcp_resp *fcp_rsp; + u8 rsp_flags, fr_status; + + if (qtcb->prefix.qtcb_type != FSF_IO_COMMAND) + return false; /* not an FCP response */ + fcp_rsp = (struct fcp_resp *)&qtcb->bottom.io.fcp_rsp; + rsp_flags = fcp_rsp->fr_flags; + fr_status = fcp_rsp->fr_status; + return (fsf_stat == FSF_FCP_RSP_AVAILABLE) && + (rsp_flags == FCP_RESID_UNDER) && + (fr_status == SAM_STAT_GOOD); +} + static inline void zfcp_dbf_hba_fsf_resp(char *tag, int level, struct zfcp_fsf_req *req) { @@ -304,7 +328,9 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); } else if (qtcb->header.fsf_status != FSF_GOOD) { - zfcp_dbf_hba_fsf_resp("fs_ferr", 1, req); + zfcp_dbf_hba_fsf_resp("fs_ferr", + zfcp_dbf_hba_fsf_resp_suppress(req) + ? 5 : 1, req); } else if ((req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) || (req->fsf_command == FSF_QTCB_OPEN_LUN)) { diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index 8cad41ffb6b..358b92ece8d 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -3,7 +3,7 @@ * * Interface to the FSF support functions. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef FSF_H @@ -86,6 +86,7 @@ #define FSF_APP_TAG_CHECK_FAILURE 0x00000082 #define FSF_REF_TAG_CHECK_FAILURE 0x00000083 #define FSF_ADAPTER_STATUS_AVAILABLE 0x000000AD +#define FSF_FCP_RSP_AVAILABLE 0x000000AF #define FSF_UNKNOWN_COMMAND 0x000000E2 #define FSF_UNKNOWN_OP_SUBTYPE 0x000000E3 #define FSF_INVALID_COMMAND_OPTION 0x000000E5 From 1c2287b99f5c97cb692c7e30b856e9166d713e1f Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 9 Dec 2016 17:16:33 +0100 Subject: [PATCH 033/252] scsi: zfcp: fix rport unblock race with LUN recovery commit 6f2ce1c6af37191640ee3ff6e8fc39ea10352f4c upstream. It is unavoidable that zfcp_scsi_queuecommand() has to finish requests with DID_IMM_RETRY (like fc_remote_port_chkready()) during the time window when zfcp detected an unavailable rport but fc_remote_port_delete(), which is asynchronous via zfcp_scsi_schedule_rport_block(), has not yet blocked the rport. However, for the case when the rport becomes available again, we should prevent unblocking the rport too early. In contrast to other FCP LLDDs, zfcp has to open each LUN with the FCP channel hardware before it can send I/O to a LUN. So if a port already has LUNs attached and we unblock the rport just after port recovery, recoveries of LUNs behind this port can still be pending which in turn force zfcp_scsi_queuecommand() to unnecessarily finish requests with DID_IMM_RETRY. This also opens a time window with unblocked rport (until the followup LUN reopen recovery has finished). If a scsi_cmnd timeout occurs during this time window fc_timed_out() cannot work as desired and such command would indeed time out and trigger scsi_eh. This prevents a clean and timely path failover. This should not happen if the path issue can be recovered on FC transport layer such as path issues involving RSCNs. Fix this by only calling zfcp_scsi_schedule_rport_register(), to asynchronously trigger fc_remote_port_add(), after all LUN recoveries as children of the rport have finished and no new recoveries of equal or higher order were triggered meanwhile. Finished intentionally includes any recovery result no matter if successful or failed (still unblock rport so other successful LUNs work). For simplicity, we check after each finished LUN recovery if there is another LUN recovery pending on the same port and then do nothing. We handle the special case of a successful recovery of a port without LUN children the same way without changing this case's semantics. For debugging we introduce 2 new trace records written if the rport unblock attempt was aborted due to still unfinished or freshly triggered recovery. The records are only written above the default trace level. Benjamin noticed the important special case of new recovery that can be triggered between having given up the erp_lock and before calling zfcp_erp_action_cleanup() within zfcp_erp_strategy(). We must avoid the following sequence: ERP thread rport_work other context ------------------------- -------------- -------------------------------- port is unblocked, rport still blocked, due to pending/running ERP action, so ((port->status & ...UNBLOCK) != 0) and (port->rport == NULL) unlock ERP zfcp_erp_action_cleanup() case ZFCP_ERP_ACTION_REOPEN_LUN: zfcp_erp_try_rport_unblock() ((status & ...UNBLOCK) != 0) [OLD!] zfcp_erp_port_reopen() lock ERP zfcp_erp_port_block() port->status clear ...UNBLOCK unlock ERP zfcp_scsi_schedule_rport_block() port->rport_task = RPORT_DEL queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task != RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_block() if (!port->rport) return zfcp_scsi_schedule_rport_register() port->rport_task = RPORT_ADD queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task == RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_register() (port->rport == NULL) rport = fc_remote_port_add() port->rport = rport; Now the rport was erroneously unblocked while the zfcp_port is blocked. This is another situation we want to avoid due to scsi_eh potential. This state would at least remain until the new recovery from the other context finished successfully, or potentially forever if it failed. In order to close this race, we take the erp_lock inside zfcp_erp_try_rport_unblock() when checking the status of zfcp_port or LUN. With that, the possible corresponding rport state sequences would be: (unblock[ERP thread],block[other context]) if the ERP thread gets erp_lock first and still sees ((port->status & ...UNBLOCK) != 0), (block[other context],NOP[ERP thread]) if the ERP thread gets erp_lock after the other context has already cleard ...UNBLOCK from port->status. Since checking fields of struct erp_action is unsafe because they could have been overwritten (re-used for new recovery) meanwhile, we only check status of zfcp_port and LUN since these are only changed under erp_lock elsewhere. Regarding the check of the proper status flags (port or port_forced are similar to the shown adapter recovery): [zfcp_erp_adapter_shutdown()] zfcp_erp_adapter_reopen() zfcp_erp_adapter_block() * clear UNBLOCK ---------------------------------------+ zfcp_scsi_schedule_rports_block() | write_lock_irqsave(&adapter->erp_lock, flags);-------+ | zfcp_erp_action_enqueue() | | zfcp_erp_setup_act() | | * set ERP_INUSE -----------------------------------|--|--+ write_unlock_irqrestore(&adapter->erp_lock, flags);--+ | | .context-switch. | | zfcp_erp_thread() | | zfcp_erp_strategy() | | write_lock_irqsave(&adapter->erp_lock, flags);------+ | | ... | | | zfcp_erp_strategy_check_target() | | | zfcp_erp_strategy_check_adapter() | | | zfcp_erp_adapter_unblock() | | | * set UNBLOCK -----------------------------------|--+ | zfcp_erp_action_dequeue() | | * clear ERP_INUSE ---------------------------------|-----+ ... | write_unlock_irqrestore(&adapter->erp_lock, flags);-+ Hence, we should check for both UNBLOCK and ERP_INUSE because they are interleaved. Also we need to explicitly check ERP_FAILED for the link down case which currently does not clear the UNBLOCK flag in zfcp_fsf_link_down_info_eval(). Signed-off-by: Steffen Maier Fixes: 8830271c4819 ("[SCSI] zfcp: Dont fail SCSI commands when transitioning to blocked fc_rport") Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors") Fixes: 5f852be9e11d ("[SCSI] zfcp: Fix deadlock between zfcp ERP and SCSI") Fixes: 338151e06608 ("[SCSI] zfcp: make use of fc_remote_port_delete when target port is unavailable") Fixes: 3859f6a248cb ("[PATCH] zfcp: add rports to enable scsi_add_device to work again") Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_dbf.c | 17 ++++++++-- drivers/s390/scsi/zfcp_erp.c | 61 +++++++++++++++++++++++++++++++++-- drivers/s390/scsi/zfcp_ext.h | 4 ++- drivers/s390/scsi/zfcp_scsi.c | 4 +-- 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index c846a63ea67..bf13e73ecab 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -282,11 +282,12 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, /** - * zfcp_dbf_rec_run - trace event related to running recovery + * zfcp_dbf_rec_run_lvl - trace event related to running recovery + * @level: trace level to be used for event * @tag: identifier for event * @erp: erp_action running */ -void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp) { struct zfcp_dbf *dbf = erp->adapter->dbf; struct zfcp_dbf_rec *rec = &dbf->rec_buf; @@ -312,10 +313,20 @@ void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) else rec->u.run.rec_count = atomic_read(&erp->adapter->erp_counter); - debug_event(dbf->rec, 1, rec, sizeof(*rec)); + debug_event(dbf->rec, level, rec, sizeof(*rec)); spin_unlock_irqrestore(&dbf->rec_lock, flags); } +/** + * zfcp_dbf_rec_run - trace event related to running recovery + * @tag: identifier for event + * @erp: erp_action running + */ +void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +{ + zfcp_dbf_rec_run_lvl(1, tag, erp); +} + /** * zfcp_dbf_rec_run_wka - trace wka port event with info like running recovery * @tag: identifier for event diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index b4cd26d2415..f7e720e1109 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -3,7 +3,7 @@ * * Error Recovery Procedures (ERP). * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -1212,6 +1212,62 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action) } } +/** + * zfcp_erp_try_rport_unblock - unblock rport if no more/new recovery + * @port: zfcp_port whose fc_rport we should try to unblock + */ +static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) +{ + unsigned long flags; + struct zfcp_adapter *adapter = port->adapter; + int port_status; + struct Scsi_Host *shost = adapter->scsi_host; + struct scsi_device *sdev; + + write_lock_irqsave(&adapter->erp_lock, flags); + port_status = atomic_read(&port->status); + if ((port_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (port_status & (ZFCP_STATUS_COMMON_ERP_INUSE | + ZFCP_STATUS_COMMON_ERP_FAILED)) != 0) { + /* new ERP of severity >= port triggered elsewhere meanwhile or + * local link down (adapter erp_failed but not clear unblock) + */ + zfcp_dbf_rec_run_lvl(4, "ertru_p", &port->erp_action); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + spin_lock(shost->host_lock); + __shost_for_each_device(sdev, shost) { + struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); + int lun_status; + + if (zsdev->port != port) + continue; + /* LUN under port of interest */ + lun_status = atomic_read(&zsdev->status); + if ((lun_status & ZFCP_STATUS_COMMON_ERP_FAILED) != 0) + continue; /* unblock rport despite failed LUNs */ + /* LUN recovery not given up yet [maybe follow-up pending] */ + if ((lun_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (lun_status & ZFCP_STATUS_COMMON_ERP_INUSE) != 0) { + /* LUN blocked: + * not yet unblocked [LUN recovery pending] + * or meanwhile blocked [new LUN recovery triggered] + */ + zfcp_dbf_rec_run_lvl(4, "ertru_l", &zsdev->erp_action); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + } + /* now port has no child or all children have completed recovery, + * and no ERP of severity >= port was meanwhile triggered elsewhere + */ + zfcp_scsi_schedule_rport_register(port); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) { struct zfcp_adapter *adapter = act->adapter; @@ -1222,6 +1278,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) case ZFCP_ERP_ACTION_REOPEN_LUN: if (!(act->status & ZFCP_STATUS_ERP_NO_REF)) scsi_device_put(sdev); + zfcp_erp_try_rport_unblock(port); break; case ZFCP_ERP_ACTION_REOPEN_PORT: @@ -1232,7 +1289,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) */ if (act->step != ZFCP_ERP_STEP_UNINITIALIZED) if (result == ZFCP_ERP_SUCCEEDED) - zfcp_scsi_schedule_rport_register(port); + zfcp_erp_try_rport_unblock(port); /* fall through */ case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: put_device(&port->dev); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 01527c31d1d..fdef6a6fe06 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -3,7 +3,7 @@ * * External function declarations. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef ZFCP_EXT_H @@ -49,6 +49,8 @@ extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *); extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, struct zfcp_port *, struct scsi_device *, u8, u8); extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); +extern void zfcp_dbf_rec_run_lvl(int level, char *tag, + struct zfcp_erp_action *erp); extern void zfcp_dbf_rec_run_wka(char *, struct zfcp_fc_wka_port *, u64); extern void zfcp_dbf_hba_fsf_uss(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 0bbc2a9a95c..66c37e77ac7 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -109,9 +109,7 @@ int zfcp_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scpnt) } if (unlikely(!(status & ZFCP_STATUS_COMMON_UNBLOCKED))) { - /* This could be either - * open LUN pending: this is temporary, will result in - * open LUN or ERP_FAILED, so retry command + /* This could be * call to rport_delete pending: mimic retry from * fc_remote_port_chkready until rport is BLOCKED */ From 882f8f11c325c3873f41fe187bbf904f401dca76 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 8 Dec 2016 12:48:26 -0500 Subject: [PATCH 034/252] ftrace/x86_32: Set ftrace_stub to weak to prevent gcc from using short jumps to it commit 847fa1a6d3d00f3bdf68ef5fa4a786f644a0dd67 upstream. With new binutils, gcc may get smart with its optimization and change a jmp from a 5 byte jump to a 2 byte one even though it was jumping to a global function. But that global function existed within a 2 byte radius, and gcc was able to optimize it. Unfortunately, that jump was also being modified when function graph tracing begins. Since ftrace expected that jump to be 5 bytes, but it was only two, it overwrote code after the jump, causing a crash. This was fixed for x86_64 with commit 8329e818f149, with the same subject as this commit, but nothing was done for x86_32. Fixes: d61f82d06672 ("ftrace: use dynamic patching for updating mcount calls") Reported-by: Colin Ian King Tested-by: Colin Ian King Signed-off-by: Steven Rostedt Signed-off-by: Willy Tarreau --- arch/x86/kernel/entry_32.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5c38e2b298c..c502340ef27 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1103,8 +1103,8 @@ ftrace_graph_call: jmp ftrace_stub #endif -.globl ftrace_stub -ftrace_stub: +/* This is weak to keep gas from relaxing the jumps */ +WEAK(ftrace_stub) ret END(ftrace_caller) From 67ae25b4d158d01576b5e545643c8ad4ff84c0fd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:21:17 -0800 Subject: [PATCH 035/252] IB/mad: Fix an array index check commit 2fe2f378dd45847d2643638c07a7658822087836 upstream. The array ib_mad_mgmt_class_table.method_table has MAX_MGMT_CLASS (80) elements. Hence compare the array index with that value instead of with IB_MGMT_MAX_METHODS (128). This patch avoids that Coverity reports the following: Overrunning array class->method_table of 80 8-byte elements at element index 127 (byte offset 1016) using index convert_mgmt_class(mad_hdr->mgmt_class) (which evaluates to 127). Fixes: commit b7ab0b19a85f ("IB/mad: Verify mgmt class in received MADs") Signed-off-by: Bart Van Assche Cc: Sean Hefty Reviewed-by: Hal Rosenstock Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index dc3fd1e8af0..200f6c10eee 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1598,7 +1598,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (!class) goto out; if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= - IB_MGMT_MAX_METHODS) + ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad->mad_hdr.mgmt_class)]; From 029555a52a2bcc709ed6ad6b1c8b208f53011f9f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:22:17 -0800 Subject: [PATCH 036/252] IB/multicast: Check ib_find_pkey() return value commit d3a2418ee36a59bc02e9d454723f3175dcf4bfd9 upstream. This patch avoids that Coverity complains about not checking the ib_find_pkey() return value. Fixes: commit 547af76521b3 ("IB/multicast: Report errors on multicast groups if P_key changes") Signed-off-by: Bart Van Assche Cc: Sean Hefty Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/core/multicast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 180d7f436ed..2f861b59cbc 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -516,8 +516,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, if (status) process_join_error(group, status); else { - ib_find_pkey(group->port->dev->device, group->port->port_num, - be16_to_cpu(rec->pkey), &pkey_index); + + if (ib_find_pkey(group->port->dev->device, + group->port->port_num, be16_to_cpu(rec->pkey), + &pkey_index)) + pkey_index = MCAST_INVALID_PKEY_INDEX; spin_lock_irq(&group->port->lock); group->rec = *rec; From ecf33babd51448a7390dff6adb17cbf8e196383c Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Thu, 6 Oct 2016 13:42:19 +0000 Subject: [PATCH 037/252] powerpc: Convert cmp to cmpd in idle enter sequence commit 80f23935cadb1c654e81951f5a8b7ceae0acc1b4 upstream. PowerPC's "cmp" instruction has four operands. Normally people write "cmpw" or "cmpd" for the second cmp operand 0 or 1. But, frequently people forget, and write "cmp" with just three operands. With older binutils this is silently accepted as if this was "cmpw", while often "cmpd" is wanted. With newer binutils GAS will complain about this for 64-bit code. For 32-bit code it still silently assumes "cmpw" is what is meant. In this instance the code comes directly from ISA v2.07, including the cmp, but cmpd is correct. Backport to stable so that new toolchains can build old kernels. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Segher Boessenkool Signed-off-by: Michael Ellerman Signed-off-by: Joel Stanley Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/idle_power7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index e11863f4e59..ccef1728a4c 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -94,7 +94,7 @@ _GLOBAL(power7_nap) std r0,0(r1) ptesync ld r0,0(r1) -1: cmp cr0,r0,r0 +1: cmpd cr0,r0,r0 bne 1b PPC_NAP b . From 7e3f800640d31cdaf12397ab30c6ba8e954f149e Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Tue, 20 Dec 2016 19:52:16 +0100 Subject: [PATCH 038/252] usb: gadget: composite: Test get_alt() presence instead of set_alt() commit 7e4da3fcf7c9fe042f2f7cb7bf23861a899b4a8f upstream. By convention (according to doc) if function does not provide get_alt() callback composite framework should assume that it has only altsetting 0 and should respond with error if host tries to set other one. After commit dd4dff8b035f ("USB: composite: Fix bug: should test set_alt function pointer before use it") we started checking set_alt() callback instead of get_alt(). This check is useless as we check if set_alt() is set inside usb_add_function() and fail if it's NULL. Let's fix this check and move comment about why we check the get method instead of set a little bit closer to prevent future false fixes. Fixes: dd4dff8b035f ("USB: composite: Fix bug: should test set_alt function pointer before use it") Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/gadget/composite.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index a660716f933..584e43c9748 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1320,9 +1320,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) value = min(w_length, (u16) 1); break; - /* function drivers must handle get/set altsetting; if there's - * no get() method, we know only altsetting zero works. - */ + /* function drivers must handle get/set altsetting */ case USB_REQ_SET_INTERFACE: if (ctrl->bRequestType != USB_RECIP_INTERFACE) goto unknown; @@ -1331,7 +1329,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) f = cdev->config->interface[intf]; if (!f) break; - if (w_value && !f->set_alt) + + /* + * If there's no get_alt() method, we know only altsetting zero + * works. There is no need to check if set_alt() is not NULL + * as we check this in usb_add_function(). + */ + if (w_value && !f->get_alt) break; value = f->set_alt(f, w_index, w_value); if (value == USB_GADGET_DELAYED_STATUS) { From c15d09cfa3c4f3c087addc5c6c1e0856e7219264 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:58 +0100 Subject: [PATCH 039/252] USB: serial: omninet: fix NULL-derefs at open and disconnect commit a5bc01949e3b19d8a23b5eabc6fc71bb50dc820e upstream. Fix NULL-pointer dereferences at open() and disconnect() should the device lack the expected bulk-out endpoints: Unable to handle kernel NULL pointer dereference at virtual address 000000b4 ... [c0170ff0>] (__lock_acquire) from [] (lock_acquire+0x108/0x264) [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x58/0x6c) [] (_raw_spin_lock_irqsave) from [] (tty_port_tty_set+0x28/0xa4) [] (tty_port_tty_set) from [] (omninet_open+0x30/0x40 [omninet]) [] (omninet_open [omninet]) from [] (serial_port_activate+0x68/0x98 [usbserial]) Unable to handle kernel NULL pointer dereference at virtual address 00000234 ... [] (omninet_disconnect [omninet]) from [] (usb_serial_disconnect+0xe4/0x100 [usbserial]) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/omninet.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index 5739bf6f720..24720f65638 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -39,6 +39,7 @@ static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int omninet_write_room(struct tty_struct *tty); static void omninet_disconnect(struct usb_serial *serial); +static int omninet_attach(struct usb_serial *serial); static int omninet_port_probe(struct usb_serial_port *port); static int omninet_port_remove(struct usb_serial_port *port); @@ -57,6 +58,7 @@ static struct usb_serial_driver zyxel_omninet_device = { .description = "ZyXEL - omni.net lcd plus usb", .id_table = id_table, .num_ports = 1, + .attach = omninet_attach, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, .open = omninet_open, @@ -105,6 +107,17 @@ struct omninet_data { __u8 od_outseq; /* Sequence number for bulk_out URBs */ }; +static int omninet_attach(struct usb_serial *serial) +{ + /* The second bulk-out endpoint is used for writing. */ + if (serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int omninet_port_probe(struct usb_serial_port *port) { struct omninet_data *od; From 4f2b2d85f1f6418e5f408d8e011ebd7394963be4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:01 +0100 Subject: [PATCH 040/252] USB: serial: quatech2: fix sleep-while-atomic in close commit f09d1886a41e9063b43da493ef0e845ac8afd2fa upstream. The write URB was being killed using the synchronous interface while holding a spin lock in close(). Simply drop the lock and busy-flag update, something which would have been taken care of by the completion handler if the URB was in flight. Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/quatech2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 13824b5ca34..ecd0a84ffc0 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -408,16 +408,12 @@ static void qt2_close(struct usb_serial_port *port) { struct usb_serial *serial; struct qt2_port_private *port_priv; - unsigned long flags; int i; serial = port->serial; port_priv = usb_get_serial_port_data(port); - spin_lock_irqsave(&port_priv->urb_lock, flags); usb_kill_urb(port_priv->write_urb); - port_priv->urb_in_use = false; - spin_unlock_irqrestore(&port_priv->urb_lock, flags); /* flush the port transmit buffer */ i = usb_control_msg(serial->dev, From 18b787dc3785f8ddf8f9702bbcf4a28111301af0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:00 +0100 Subject: [PATCH 041/252] USB: serial: pl2303: fix NULL-deref at open commit 76ab439ed1b68778e9059c79ecc5d14de76c89a8 upstream. Fix NULL-pointer dereference in open() should a type-0 or type-1 device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at pl2303_open+0x38/0xec [pl2303] Note that a missing interrupt-in endpoint would have caused open() to fail. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/pl2303.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 33313caed50..f496c38d539 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -175,9 +175,17 @@ static int pl2303_vendor_write(__u16 value, __u16 index, static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; + unsigned char num_ports = serial->num_ports; enum pl2303_type type = type_0; unsigned char *buf; + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); if (!spriv) return -ENOMEM; From 1df7c6638a001bbb608e25deb7ce816bd6ffea8a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:48 +0100 Subject: [PATCH 042/252] USB: serial: keyspan_pda: verify endpoints at probe commit 5d9b0f859babe96175cd33d7162a9463a875ffde upstream. Check for the expected endpoints in attach() and fail loudly if not present. Note that failing to do this appears to be benign since da280e348866 ("USB: keyspan_pda: clean up write-urb busy handling") which prevents a NULL-pointer dereference in write() by never marking a non-existent write-urb as free. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/keyspan_pda.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index 5f1d382e55c..05c567bf5cf 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -697,6 +697,19 @@ MODULE_FIRMWARE("keyspan_pda/keyspan_pda.fw"); MODULE_FIRMWARE("keyspan_pda/xircom_pgs.fw"); #endif +static int keyspan_pda_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int keyspan_pda_port_probe(struct usb_serial_port *port) { @@ -774,6 +787,7 @@ static struct usb_serial_driver keyspan_pda_device = { .break_ctl = keyspan_pda_break_ctl, .tiocmget = keyspan_pda_tiocmget, .tiocmset = keyspan_pda_tiocmset, + .attach = keyspan_pda_attach, .port_probe = keyspan_pda_port_probe, .port_remove = keyspan_pda_port_remove, }; From 724449745310fd8e2d14798a3bb9829945050bfd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:02 +0100 Subject: [PATCH 043/252] USB: serial: spcp8x5: fix NULL-deref at open commit cc0909248258f679c4bb4cd315565d40abaf6bc6 upstream. Fix NULL-pointer dereference in open() should the device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at spcp8x5_open+0x30/0xd0 [spcp8x5] Fixes: 619a6f1d1423 ("USB: add usb-serial spcp8x5 driver") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/spcp8x5.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 1694d4ff163..20f00bc1518 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -155,6 +155,19 @@ static int spcp8x5_probe(struct usb_serial *serial, return 0; } +static int spcp8x5_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int spcp8x5_port_probe(struct usb_serial_port *port) { const struct usb_device_id *id = usb_get_serial_data(port->serial); @@ -479,6 +492,7 @@ static struct usb_serial_driver spcp8x5_device = { .tiocmget = spcp8x5_tiocmget, .tiocmset = spcp8x5_tiocmset, .probe = spcp8x5_probe, + .attach = spcp8x5_attach, .port_probe = spcp8x5_port_probe, .port_remove = spcp8x5_port_remove, }; From 0d90b3d7a236f857fb7f0f98765dba0f49f0a63e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:43 +0100 Subject: [PATCH 044/252] USB: serial: io_ti: fix NULL-deref at open commit a323fefc6f5079844dc62ffeb54f491d0242ca35 upstream. Fix NULL-pointer dereference when clearing halt at open should a malicious device lack the expected endpoints when in download mode. Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... [] (edge_open [io_ti]) from [] (serial_port_activate+0x68/0x98 [usbserial]) [] (serial_port_activate [usbserial]) from [] (tty_port_open+0x9c/0xe8) [] (tty_port_open) from [] (serial_open+0x48/0x6c [usbserial]) [] (serial_open [usbserial]) from [] (tty_open+0xcc/0x5cc) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/io_ti.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 8cd6479a8b4..fa0ca9ed07c 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2433,6 +2433,13 @@ static int edge_startup(struct usb_serial *serial) struct edgeport_serial *edge_serial; int status; + /* Make sure we have the required endpoints when in download mode. */ + if (serial->interface->cur_altsetting->desc.bNumEndpoints > 1) { + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) + return -ENODEV; + } + /* create our private serial structure */ edge_serial = kzalloc(sizeof(struct edgeport_serial), GFP_KERNEL); if (edge_serial == NULL) { From 7d04b5c68a8777d5d8ad013ded167cecdb4a57bf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:44 +0100 Subject: [PATCH 045/252] USB: serial: io_ti: fix another NULL-deref at open commit 4f9785cc99feeb3673993b471f646b4dbaec2cc1 upstream. In case a device is left in "boot-mode" we must not register any port devices in order to avoid a NULL-pointer dereference on open due to missing endpoints. This could be used by a malicious device to trigger an OOPS: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... [] (edge_open [io_ti]) from [] (serial_port_activate+0x68/0x98 [usbserial]) [] (serial_port_activate [usbserial]) from [] (tty_port_open+0x9c/0xe8) [] (tty_port_open) from [] (serial_open+0x48/0x6c [usbserial]) [] (serial_open [usbserial]) from [] (tty_open+0xcc/0x5cc) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/io_ti.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index fa0ca9ed07c..e1b3e79b707 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1402,7 +1402,7 @@ stayinbootmode: dev_dbg(dev, "%s - STAYING IN BOOT MODE\n", __func__); serial->product_info.TiMode = TI_MODE_BOOT; - return 0; + return 1; } @@ -2451,11 +2451,14 @@ static int edge_startup(struct usb_serial *serial) usb_set_serial_data(serial, edge_serial); status = download_fw(edge_serial); - if (status) { + if (status < 0) { kfree(edge_serial); return status; } + if (status > 0) + return 1; /* bind but do not register any ports */ + return 0; } From e3c79a80513f5797daca87fa1415c73366b6378a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:47 +0100 Subject: [PATCH 046/252] USB: serial: iuu_phoenix: fix NULL-deref at open commit 90507d54f712d81b74815ef3a4bbb555cd9fab2f upstream. Fix NULL-pointer dereference at open should the device lack a bulk-in or bulk-out endpoint: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at iuu_open+0x78/0x59c [iuu_phoenix] Fixes: 07c3b1a10016 ("USB: remove broken usb-serial num_endpoints check") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/iuu_phoenix.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 790673e5faa..eadab621361 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -69,6 +69,16 @@ struct iuu_private { u32 clk; }; +static int iuu_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || serial->num_bulk_out < num_ports) + return -ENODEV; + + return 0; +} + static int iuu_port_probe(struct usb_serial_port *port) { struct iuu_private *priv; @@ -1199,6 +1209,7 @@ static struct usb_serial_driver iuu_device = { .tiocmset = iuu_tiocmset, .set_termios = iuu_set_termios, .init_termios = iuu_init_termios, + .attach = iuu_attach, .port_probe = iuu_port_probe, .port_remove = iuu_port_remove, }; From a5d48a129a29778692437f1eaf31e34ed89a6838 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:41 +0100 Subject: [PATCH 047/252] USB: serial: garmin_gps: fix memory leak on failed URB submit commit c4ac4496e835b78a45dfbf74f6173932217e4116 upstream. Make sure to free the URB transfer buffer in case submission fails (e.g. due to a disconnect). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/garmin_gps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index b110c573ea8..ea9c4f4aea3 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -1049,6 +1049,7 @@ static int garmin_write_bulk(struct usb_serial_port *port, "%s - usb_submit_urb(write bulk) failed with status = %d\n", __func__, status); count = status; + kfree(buffer); } /* we are done with this urb, so let the host driver From 52bc1e3bc277d064880d8b2e0e1d51d92598658c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:03 +0100 Subject: [PATCH 048/252] USB: serial: ti_usb_3410_5052: fix NULL-deref at open commit ef079936d3cd09e63612834fe2698eeada0d8e3f upstream. Fix NULL-pointer dereference in open() should a malicious device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 .. [] (ti_open [ti_usb_3410_5052]) from [] (serial_port_activate+0x68/0x98 [usbserial]) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ti_usb_3410_5052.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 0a7c68fa5e5..1ccf221d842 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -401,6 +401,13 @@ static int ti_startup(struct usb_serial *serial) goto free_tdev; } + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + status = -ENODEV; + goto free_tdev; + } + return 0; free_tdev: From be7c03191e44efd3c4325abb43991a1f3c1c5cbb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:42 +0100 Subject: [PATCH 049/252] USB: serial: io_edgeport: fix NULL-deref at open commit 0dd408425eb21ddf26a692b3c8044c9e7d1a7948 upstream. Fix NULL-pointer dereference when initialising URBs at open should a non-EPIC device lack a bulk-in or interrupt-in endpoint. Unable to handle kernel NULL pointer dereference at virtual address 00000028 ... PC is at edge_open+0x24c/0x3e8 [io_edgeport] Note that the EPIC-device probe path has the required sanity checks so this makes those checks partially redundant. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/io_edgeport.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index c574d312f1f..9f24fd776ec 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2795,6 +2795,11 @@ static int edge_startup(struct usb_serial *serial) EDGE_COMPATIBILITY_MASK1, EDGE_COMPATIBILITY_MASK2 }; + if (serial->num_bulk_in < 1 || serial->num_interrupt_in < 1) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + dev = serial->dev; /* create our private serial structure */ From 7449b9878c119883849b80c70ea6ffbcf7d125d0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:59 +0100 Subject: [PATCH 050/252] USB: serial: oti6858: fix NULL-deref at open commit 5afeef2366db14587b65558bbfd5a067542e07fb upstream. Fix NULL-pointer dereference in open() should the device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at oti6858_open+0x30/0x1d0 [oti6858] Note that a missing interrupt-in endpoint would have caused open() to fail. Fixes: 49cdee0ed0fc ("USB: oti6858 usb-serial driver (in Nokia CA-42 cable)") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/oti6858.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index 7e3e0782e51..ff83d87ed92 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -135,6 +135,7 @@ static int oti6858_tiocmget(struct tty_struct *tty); static int oti6858_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); static int oti6858_tiocmiwait(struct tty_struct *tty, unsigned long arg); +static int oti6858_attach(struct usb_serial *serial); static int oti6858_port_probe(struct usb_serial_port *port); static int oti6858_port_remove(struct usb_serial_port *port); @@ -159,6 +160,7 @@ static struct usb_serial_driver oti6858_device = { .write_bulk_callback = oti6858_write_bulk_callback, .write_room = oti6858_write_room, .chars_in_buffer = oti6858_chars_in_buffer, + .attach = oti6858_attach, .port_probe = oti6858_port_probe, .port_remove = oti6858_port_remove, }; @@ -328,6 +330,20 @@ static void send_data(struct work_struct *work) usb_serial_port_softint(port); } +static int oti6858_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int oti6858_port_probe(struct usb_serial_port *port) { struct oti6858_private *priv; From ba35ae7a8c9da2d9b964fd75825ceb136e4db0a0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:40 +0100 Subject: [PATCH 051/252] USB: serial: cyberjack: fix NULL-deref at open commit 3dca01114dcecb1cf324534cd8d75fd1306a516b upstream. Fix NULL-pointer dereference when clearing halt at open should the device lack a bulk-out endpoint. Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at cyberjack_open+0x40/0x9c [cyberjack] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/cyberjack.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 781426230d6..bb3c7f09f05 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -51,6 +51,7 @@ #define CYBERJACK_PRODUCT_ID 0x0100 /* Function prototypes */ +static int cyberjack_attach(struct usb_serial *serial); static int cyberjack_port_probe(struct usb_serial_port *port); static int cyberjack_port_remove(struct usb_serial_port *port); static int cyberjack_open(struct tty_struct *tty, @@ -78,6 +79,7 @@ static struct usb_serial_driver cyberjack_device = { .description = "Reiner SCT Cyberjack USB card reader", .id_table = id_table, .num_ports = 1, + .attach = cyberjack_attach, .port_probe = cyberjack_port_probe, .port_remove = cyberjack_port_remove, .open = cyberjack_open, @@ -101,6 +103,14 @@ struct cyberjack_private { short wrsent; /* Data already sent */ }; +static int cyberjack_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_out < serial->num_ports) + return -ENODEV; + + return 0; +} + static int cyberjack_port_probe(struct usb_serial_port *port) { struct cyberjack_private *priv; From 810fb7b7bf8f3ccc4df6ef9a7c05d0a52a9d05ef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:49 +0100 Subject: [PATCH 052/252] USB: serial: kobil_sct: fix NULL-deref in write commit 21ce57840243c7b70fbc1ebd3dceeb70bb6e9e09 upstream. Fix NULL-pointer dereference in write() should the device lack the expected interrupt-out endpoint: Unable to handle kernel NULL pointer dereference at virtual address 00000054 ... PC is at kobil_write+0x144/0x2a0 [kobil_sct] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/kobil_sct.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index efa75b4e51f..63fa400a822 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -52,6 +52,7 @@ /* Function prototypes */ +static int kobil_attach(struct usb_serial *serial); static int kobil_port_probe(struct usb_serial_port *probe); static int kobil_port_remove(struct usb_serial_port *probe); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port); @@ -87,6 +88,7 @@ static struct usb_serial_driver kobil_device = { .description = "KOBIL USB smart card terminal", .id_table = id_table, .num_ports = 1, + .attach = kobil_attach, .port_probe = kobil_port_probe, .port_remove = kobil_port_remove, .ioctl = kobil_ioctl, @@ -114,6 +116,16 @@ struct kobil_private { }; +static int kobil_attach(struct usb_serial *serial) +{ + if (serial->num_interrupt_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing interrupt-out endpoint\n"); + return -ENODEV; + } + + return 0; +} + static int kobil_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; From e514b7137a922e048b67bd5918526b25be5707b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:55 +0100 Subject: [PATCH 053/252] USB: serial: mos7840: fix NULL-deref at open commit 5c75633ef751dd4cd8f443dc35152c1ae563162e upstream. Fix NULL-pointer dereference in open() should the device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at mos7840_open+0x88/0x8dc [mos7840] Note that we continue to treat the interrupt-in endpoint as optional for now. Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/mos7840.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 7df7df62e17..5041b6c5b2d 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -2255,6 +2255,17 @@ static int mos7840_calc_num_ports(struct usb_serial *serial) return mos7840_num_ports; } +static int mos7840_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int mos7840_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; @@ -2537,6 +2548,7 @@ static struct usb_serial_driver moschip7840_4port_device = { .tiocmset = mos7840_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, + .attach = mos7840_attach, .port_probe = mos7840_port_probe, .port_remove = mos7840_port_remove, .read_bulk_callback = mos7840_bulk_in_callback, From 5a896b11930c1c2ce638f73b8ca1af0342cb1e8c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:50 +0100 Subject: [PATCH 054/252] USB: serial: mos7720: fix NULL-deref at open commit b05aebc25fdc5aeeac3ee29f0dc9f58dd07c13cc upstream. Fix NULL-pointer dereference at port open if a device lacks the expected bulk in and out endpoints. Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... [] (mos7720_open [mos7720]) from [] (serial_port_activate+0x68/0x98 [usbserial]) [] (serial_port_activate [usbserial]) from [] (tty_port_open+0x9c/0xe8) [] (tty_port_open) from [] (serial_open+0x48/0x6c [usbserial]) [] (serial_open [usbserial]) from [] (tty_open+0xcc/0x5cc) Fixes: 0f64478cbc7a ("USB: add USB serial mos7720 driver") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/mos7720.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index ddc71d706ac..44936c6a998 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1917,6 +1917,11 @@ static int mos7720_startup(struct usb_serial *serial) u16 product; int ret_val; + if (serial->num_bulk_in < 2 || serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing bulk endpoints\n"); + return -ENODEV; + } + product = le16_to_cpu(serial->dev->descriptor.idProduct); dev = serial->dev; From 54cd03afa89fd41a8afcf2622520b5ae4b3a21c9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:51 +0100 Subject: [PATCH 055/252] USB: serial: mos7720: fix use-after-free on probe errors commit 91a1ff4d53c5184d383d0baeeaeab6f9736f2ff3 upstream. The interrupt URB was submitted on probe but never stopped on probe errors. This can lead to use-after-free issues in the completion handler when accessing the freed usb-serial struct: Unable to handle kernel paging request at virtual address 6b6b6be7 ... [] (mos7715_interrupt_callback [mos7720]) from [] (__usb_hcd_giveback_urb+0x80/0x140) [] (__usb_hcd_giveback_urb) from [] (usb_hcd_giveback_urb+0x50/0x138) [] (usb_hcd_giveback_urb) from [] (musb_giveback+0xc8/0x1cc) Fixes: b69578df7e98 ("USB: usbserial: mos7720: add support for parallel port on moschip 7715") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/mos7720.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 44936c6a998..6c19dbb2926 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1962,8 +1962,10 @@ static int mos7720_startup(struct usb_serial *serial) #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT if (product == MOSCHIP_DEVICE_ID_7715) { ret_val = mos7715_parport_init(serial); - if (ret_val < 0) + if (ret_val < 0) { + usb_kill_urb(serial->port[0]->interrupt_in_urb); return ret_val; + } } #endif /* LSR For Port 1 */ @@ -1975,6 +1977,8 @@ static int mos7720_startup(struct usb_serial *serial) static void mos7720_release(struct usb_serial *serial) { + usb_kill_urb(serial->port[0]->interrupt_in_urb); + #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT /* close the parallel port */ From 756f54d85f37d808d4f5d36f5dce559d11d2b06c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:52 +0100 Subject: [PATCH 056/252] USB: serial: mos7720: fix parport use-after-free on probe errors commit 75dd211e773afcbc264677b0749d1cf7d937ab2d upstream. Do not submit the interrupt URB until after the parport has been successfully registered to avoid another use-after-free in the completion handler when accessing the freed parport private data in case of a racing completion. Fixes: b69578df7e98 ("USB: usbserial: mos7720: add support for parallel port on moschip 7715") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/mos7720.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 6c19dbb2926..b35726c1f33 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1952,22 +1952,20 @@ static int mos7720_startup(struct usb_serial *serial) usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), (__u8)0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5000); - /* start the interrupt urb */ - ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); - if (ret_val) - dev_err(&dev->dev, - "%s - Error %d submitting control urb\n", - __func__, ret_val); - #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT if (product == MOSCHIP_DEVICE_ID_7715) { ret_val = mos7715_parport_init(serial); - if (ret_val < 0) { - usb_kill_urb(serial->port[0]->interrupt_in_urb); + if (ret_val < 0) return ret_val; - } } #endif + /* start the interrupt urb */ + ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); + if (ret_val) { + dev_err(&dev->dev, "failed to submit interrupt urb: %d\n", + ret_val); + } + /* LSR For Port 1 */ read_mos_reg(serial, 0, LSR, &data); dev_dbg(&dev->dev, "LSR:%x\n", data); From 56a4835e8e26673f33217b99efa0b8f0dea15d07 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:53 +0100 Subject: [PATCH 057/252] USB: serial: mos7720: fix parallel probe commit fde1faf872ed86d88e245191bc15a8e57368cd1c upstream. A static usb-serial-driver structure that is used to initialise the interrupt URB was modified during probe depending on the currently probed device type, something which could break a parallel probe of a device of a different type. Fix this up by overriding the default completion callback for MCS7715 devices in attach() instead. We may want to use two usb-serial driver instances for the two types later. Fixes: fb088e335d78 ("USB: serial: add support for serial port on the moschip 7715") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/mos7720.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index b35726c1f33..2d1ad823b1a 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -66,8 +66,6 @@ struct moschip_port { struct urb *write_urb_pool[NUM_URBS]; }; -static struct usb_serial_driver moschip7720_2port_driver; - #define USB_VENDOR_ID_MOSCHIP 0x9710 #define MOSCHIP_DEVICE_ID_7720 0x7720 #define MOSCHIP_DEVICE_ID_7715 0x7715 @@ -966,25 +964,6 @@ static void mos7720_bulk_out_data_callback(struct urb *urb) tty_port_tty_wakeup(&mos7720_port->port->port); } -/* - * mos77xx_probe - * this function installs the appropriate read interrupt endpoint callback - * depending on whether the device is a 7720 or 7715, thus avoiding costly - * run-time checks in the high-frequency callback routine itself. - */ -static int mos77xx_probe(struct usb_serial *serial, - const struct usb_device_id *id) -{ - if (id->idProduct == MOSCHIP_DEVICE_ID_7715) - moschip7720_2port_driver.read_int_callback = - mos7715_interrupt_callback; - else - moschip7720_2port_driver.read_int_callback = - mos7720_interrupt_callback; - - return 0; -} - static int mos77xx_calc_num_ports(struct usb_serial *serial) { u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); @@ -1946,6 +1925,12 @@ static int mos7720_startup(struct usb_serial *serial) tmp->interrupt_in_endpointAddress; serial->port[1]->interrupt_in_urb = NULL; serial->port[1]->interrupt_in_buffer = NULL; + + if (serial->port[0]->interrupt_in_urb) { + struct urb *urb = serial->port[0]->interrupt_in_urb; + + urb->complete = mos7715_interrupt_callback; + } } /* setting configuration feature to one */ @@ -2059,7 +2044,6 @@ static struct usb_serial_driver moschip7720_2port_driver = { .close = mos7720_close, .throttle = mos7720_throttle, .unthrottle = mos7720_unthrottle, - .probe = mos77xx_probe, .attach = mos7720_startup, .release = mos7720_release, .port_probe = mos7720_port_probe, @@ -2073,7 +2057,7 @@ static struct usb_serial_driver moschip7720_2port_driver = { .chars_in_buffer = mos7720_chars_in_buffer, .break_ctl = mos7720_break, .read_bulk_callback = mos7720_bulk_in_callback, - .read_int_callback = NULL /* dynamically assigned in probe() */ + .read_int_callback = mos7720_interrupt_callback, }; static struct usb_serial_driver * const serial_drivers[] = { From 5a45bdd35b88a5ab56c990176f461cd4c529ce9b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 10 Nov 2016 22:33:17 +0300 Subject: [PATCH 058/252] usb: xhci-mem: use passed in GFP flags instead of GFP_KERNEL commit c95a9f83711bf53faeb4ed9bbb63a3f065613dfb upstream. We normally use the passed in gfp flags for allocations, it's just these two which were missed. Fixes: 22d45f01a836 ("usb/xhci: replace pci_*_consistent() with dma_*_coherent()") Cc: Mathias Nyman Signed-off-by: Dan Carpenter Acked-by: Sebastian Andrzej Siewior Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/usb/host/xhci-mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index af9e4e8c906..b07e0754d78 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2306,7 +2306,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * "physically contiguous and 64-byte (cache line) aligned". */ xhci->dcbaa = dma_alloc_coherent(dev, sizeof(*xhci->dcbaa), &dma, - GFP_KERNEL); + flags); if (!xhci->dcbaa) goto fail; memset(xhci->dcbaa, 0, sizeof *(xhci->dcbaa)); @@ -2397,7 +2397,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) xhci->erst.entries = dma_alloc_coherent(dev, sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS, &dma, - GFP_KERNEL); + flags); if (!xhci->erst.entries) goto fail; xhci_dbg(xhci, "// Allocated event ring segment table at 0x%llx\n", From 1a2f2dfba983795f043dc6b4831c84166c2de9d3 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 3 Jan 2017 18:13:48 -0600 Subject: [PATCH 059/252] usb: musb: Fix trying to free already-free IRQ 4 commit 8c300fe282fa254ea730c92cb0983e2642dc1fff upstream. When unloading omap2430, we can get the following splat: WARNING: CPU: 1 PID: 295 at kernel/irq/manage.c:1478 __free_irq+0xa8/0x2c8 Trying to free already-free IRQ 4 ... [] (free_irq) from [] (musbhs_dma_controller_destroy+0x28/0xb0 [musb_hdrc]) [] (musbhs_dma_controller_destroy [musb_hdrc]) from [] (musb_remove+0xf0/0x12c [musb_hdrc]) [] (musb_remove [musb_hdrc]) from [] (platform_drv_remove+0x24/0x3c) ... This is because the irq number in use is 260 nowadays, and the dma controller is using u8 instead of int. Fixes: 6995eb68aab7 ("USB: musb: enable low level DMA operation for Blackfin") Signed-off-by: Tony Lindgren [b-liu@ti.com: added Fixes tag] Signed-off-by: Bin Liu Signed-off-by: Willy Tarreau --- drivers/usb/musb/musbhsdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musbhsdma.h b/drivers/usb/musb/musbhsdma.h index f7b13fd2525..a3dcbd55e43 100644 --- a/drivers/usb/musb/musbhsdma.h +++ b/drivers/usb/musb/musbhsdma.h @@ -157,5 +157,5 @@ struct musb_dma_controller { void __iomem *base; u8 channel_count; u8 used_channels; - u8 irq; + int irq; }; From b1d245157dcfdd7ad15ee44690b5e85403eceb4c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Nov 2016 22:28:40 +0100 Subject: [PATCH 060/252] ALSA: usb-audio: Fix bogus error return in snd_usb_create_stream() commit 4763601a56f155ddf94ef35fc2c41504a2de15f5 upstream. The function returns -EINVAL even if it builds the stream properly. The bogus error code sneaked in during the code refactoring, but it wasn't noticed until now since the returned error code itself is ignored in anyway. Kill it here, but there is no behavior change by this patch, obviously. Fixes: e5779998bf8b ('ALSA: usb-audio: refactor code') Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/usb/card.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 64952e2d3ed..7344ac08326 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -205,7 +205,6 @@ static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int int if (! snd_usb_parse_audio_interface(chip, interface)) { usb_set_interface(dev, interface, 0); /* reset the current interface */ usb_driver_claim_interface(&usb_audio_driver, iface, (void *)-1L); - return -EINVAL; } return 0; From 7a30277390fad228a752e945a0465317acd502c5 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 29 Nov 2016 16:55:02 +0100 Subject: [PATCH 061/252] USB: serial: kl5kusb105: abort on open exception path commit 3c3dd1e058cb01e835dcade4b54a6f13ffaeaf7c upstream. Function klsi_105_open() calls usb_control_msg() (to "enable read") and checks its return value. When the return value is unexpected, it only assigns the error code to the return variable retval, but does not terminate the exception path. This patch fixes the bug by inserting "goto err_generic_close;" when the call to usb_control_msg() fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pan Bian [johan: rebase on prerequisite fix and amend commit message] Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/kl5kusb105.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 70e163d21e9..69eb056dd6e 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -319,6 +319,7 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) if (rc < 0) { dev_err(&port->dev, "Enabling read failed (error = %d)\n", rc); retval = rc; + goto err_generic_close; } else dev_dbg(&port->dev, "%s - enabled reading\n", __func__); @@ -345,6 +346,7 @@ err_disable_read: 0, /* index */ NULL, 0, KLSI_TIMEOUT); +err_generic_close: usb_serial_generic_close(port); err_free_cfg: kfree(cfg); From 57eb4ca31fd227c5301e77a4f450bc583506d74a Mon Sep 17 00:00:00 2001 From: Eva Rachel Retuya Date: Sun, 9 Oct 2016 00:05:39 +0800 Subject: [PATCH 062/252] staging: iio: ad7606: fix improper setting of oversampling pins commit b321a38d2407c7e425c54bc09be909a34e49f740 upstream. The oversampling ratio is controlled using the oversampling pins, OS [2:0] with OS2 being the MSB control bit, and OS0 the LSB control bit. The gpio connected to the OS2 pin is not being set correctly, only OS0 and OS1 pins are being set. Fix the typo to allow proper control of the oversampling pins. Signed-off-by: Eva Rachel Retuya Fixes: b9618c0 ("staging: IIO: ADC: New driver for AD7606/AD7606-6/AD7606-4") Acked-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Willy Tarreau --- drivers/staging/iio/adc/ad7606_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/adc/ad7606_core.c b/drivers/staging/iio/adc/ad7606_core.c index d104b437842..fa763dd9a4b 100644 --- a/drivers/staging/iio/adc/ad7606_core.c +++ b/drivers/staging/iio/adc/ad7606_core.c @@ -185,7 +185,7 @@ static ssize_t ad7606_store_oversampling_ratio(struct device *dev, mutex_lock(&indio_dev->mlock); gpio_set_value(st->pdata->gpio_os0, (ret >> 0) & 1); gpio_set_value(st->pdata->gpio_os1, (ret >> 1) & 1); - gpio_set_value(st->pdata->gpio_os1, (ret >> 2) & 1); + gpio_set_value(st->pdata->gpio_os2, (ret >> 2) & 1); st->oversampling = lval; mutex_unlock(&indio_dev->mlock); From 34c83571b4fde39ec8d1256362def303e03ea724 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 20 Dec 2016 14:14:40 +0200 Subject: [PATCH 063/252] usb: dwc3: gadget: always unmap EP0 requests commit d62145929992f331fdde924d5963ab49588ccc7d upstream. commit 0416e494ce7d ("usb: dwc3: ep0: correct cache sync issue in case of ep0_bounced") introduced a bug where we would leak DMA resources which would cause us to starve the system of them resulting in failing DMA transfers. Fix the bug by making sure that we always unmap EP0 requests since those are *always* mapped. Fixes: 0416e494ce7d ("usb: dwc3: ep0: correct cache sync issue in case of ep0_bounced") Tested-by: Tomasz Medrek Reported-by: Janusz Dziedzic Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/dwc3/gadget.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 0dfee61f787..5a2eaf401b0 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -265,11 +265,11 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, if (req->request.status == -EINPROGRESS) req->request.status = status; - if (dwc->ep0_bounced && dep->number == 0) + if (dwc->ep0_bounced && dep->number <= 1) dwc->ep0_bounced = false; - else - usb_gadget_unmap_request(&dwc->gadget, &req->request, - req->direction); + + usb_gadget_unmap_request(&dwc->gadget, &req->request, + req->direction); dev_dbg(dwc->dev, "request %p from %s completed %d/%d ===> %d\n", req, dep->name, req->request.actual, From 5c9b542c02a8d096daba16c92bcbd60715058d02 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 30 Oct 2016 09:09:18 -0700 Subject: [PATCH 064/252] cris: Only build flash rescue image if CONFIG_ETRAX_AXISFLASHMAP is selected commit 328cf6927bb72cadefddebbc9a23c793108147a2 upstream. If CONFIG_ETRAX_AXISFLASHMAP is not configured, the flash rescue image object file is empty. With recent versions of binutils, this results in the following build error. cris-linux-objcopy: error: the input file 'arch/cris/boot/rescue/rescue.o' has no sections This is seen, for example, when trying to build cris:allnoconfig with recently generated toolchains. Since it does not make sense to build a flash rescue image if there is no flash, only build it if CONFIG_ETRAX_AXISFLASHMAP is enabled. Reported-by: kbuild test robot Fixes: 66ab3a74c5ce ("CRIS: Merge machine dependent boot/compressed ..") Signed-off-by: Guenter Roeck Signed-off-by: Jesper Nilsson Signed-off-by: Willy Tarreau --- arch/cris/boot/rescue/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/cris/boot/rescue/Makefile b/arch/cris/boot/rescue/Makefile index 52bd0bd1dd2..d98edbb30a1 100644 --- a/arch/cris/boot/rescue/Makefile +++ b/arch/cris/boot/rescue/Makefile @@ -10,6 +10,9 @@ asflags-y += $(LINUXINCLUDE) ccflags-y += -O2 $(LINUXINCLUDE) + +ifdef CONFIG_ETRAX_AXISFLASHMAP + arch-$(CONFIG_ETRAX_ARCH_V10) = v10 arch-$(CONFIG_ETRAX_ARCH_V32) = v32 @@ -28,6 +31,11 @@ $(obj)/rescue.bin: $(obj)/rescue.o FORCE $(call if_changed,objcopy) cp -p $(obj)/rescue.bin $(objtree) +else +$(obj)/rescue.bin: + +endif + $(obj)/testrescue.bin: $(obj)/testrescue.o $(OBJCOPY) $(OBJCOPYFLAGS) $(obj)/testrescue.o tr.bin # Pad it to 784 bytes From 232e0c1bb3bd6e549bb26c5e09f355ed88b470a7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 20 Nov 2016 10:37:39 -0800 Subject: [PATCH 065/252] hwmon: (ds620) Fix overflows seen when writing temperature limits commit e36ce99ee0815d7919a7b589bfb66f3de50b6bc7 upstream. Module test reports: temp1_max: Suspected overflow: [160000 vs. 0] temp1_min: Suspected overflow: [160000 vs. 0] This is seen because the values passed when writing temperature limits are unbound. Reviewed-by: Jean Delvare Fixes: 6099469805c2 ("hwmon: Support for Dallas Semiconductor DS620") Signed-off-by: Guenter Roeck Signed-off-by: Willy Tarreau --- drivers/hwmon/ds620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ds620.c b/drivers/hwmon/ds620.c index f1d6b422cf0..c25700f7db9 100644 --- a/drivers/hwmon/ds620.c +++ b/drivers/hwmon/ds620.c @@ -166,7 +166,7 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *da, if (res) return res; - val = (val * 10 / 625) * 8; + val = (clamp_val(val, -128000, 128000) * 10 / 625) * 8; mutex_lock(&data->update_lock); data->temp[attr->index] = val; From b09fd959af0fae9be22688e17160d736b083c536 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 1 Dec 2016 14:25:44 +0800 Subject: [PATCH 066/252] clk: clk-wm831x: fix a logic error commit 20979202ee6e4c68dab7bcf408787225a656d18e upstream. Fix bug https://bugzilla.kernel.org/show_bug.cgi?id=188561. Function wm831x_clkout_is_prepared() returns "true" when it fails to read CLOCK_CONTROL_1. "true" means the device is already prepared. So return "true" on the read failure seems improper. Signed-off-by: Pan Bian Acked-by: Charles Keepax Fixes: f05259a6ffa4 ("clk: wm831x: Add initial WM831x clock driver") Signed-off-by: Stephen Boyd Signed-off-by: Willy Tarreau --- drivers/clk/clk-wm831x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c index 917a3ab482f..e2e5e76e980 100644 --- a/drivers/clk/clk-wm831x.c +++ b/drivers/clk/clk-wm831x.c @@ -248,7 +248,7 @@ static int wm831x_clkout_is_enabled(struct clk_hw *hw) if (ret < 0) { dev_err(wm831x->dev, "Unable to read CLOCK_CONTROL_1: %d\n", ret); - return true; + return false; } return (ret & WM831X_CLKOUT_ENA) != 0; From 7712b94d5326ae165dcb8b5350ee3c2d72ceab73 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 12 Dec 2016 07:28:26 -0500 Subject: [PATCH 067/252] iommu/amd: Fix the left value check of cmd buffer commit 432abf68a79332282329286d190e21fe3ac02a31 upstream. The generic command buffer entry is 128 bits (16 bytes), so the offset of tail and head pointer should be 16 bytes aligned and increased with 0x10 per command. When cmd buf is full, head = (tail + 0x10) % CMD_BUFFER_SIZE. So when left space of cmd buf should be able to store only two command, we should be issued one COMPLETE_WAIT additionally to wait all older commands completed. Then the left space should be increased after IOMMU fetching from cmd buf. So left check value should be left <= 0x20 (two commands). Signed-off-by: Huang Rui Fixes: ac0ea6e92b222 ('x86/amd-iommu: Improve handling of full command buffer') Signed-off-by: Joerg Roedel Signed-off-by: Willy Tarreau --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1c62c248da6..0e7cd14bf7b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1029,7 +1029,7 @@ again: next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; left = (head - next_tail) % iommu->cmd_buf_size; - if (left <= 2) { + if (left <= 0x20) { struct iommu_cmd sync_cmd; volatile u64 sem = 0; int ret; From 4d091cfb5a1c0e142b3c719cc8393012180f15e1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Nov 2016 16:08:34 +0100 Subject: [PATCH 068/252] scsi: mvsas: fix command_active typo commit af15769ffab13d777e55fdef09d0762bf0c249c4 upstream. gcc-7 notices that the condition in mvs_94xx_command_active looks suspicious: drivers/scsi/mvsas/mv_94xx.c: In function 'mvs_94xx_command_active': drivers/scsi/mvsas/mv_94xx.c:671:15: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context] This was introduced when the mv_printk() statement got added, and leads to the condition being ignored. This is probably harmless. Changing '&&' to '&' makes the code look reasonable, as we check the command bit before setting and printing it. Fixes: a4632aae8b66 ("[SCSI] mvsas: Add new macros and functions") Signed-off-by: Arnd Bergmann Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/mvsas/mv_94xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mvsas/mv_94xx.c b/drivers/scsi/mvsas/mv_94xx.c index 1e4479f3331..55716c5184f 100644 --- a/drivers/scsi/mvsas/mv_94xx.c +++ b/drivers/scsi/mvsas/mv_94xx.c @@ -621,7 +621,7 @@ static void mvs_94xx_command_active(struct mvs_info *mvi, u32 slot_idx) { u32 tmp; tmp = mvs_cr32(mvi, MVS_COMMAND_ACTIVE+(slot_idx >> 3)); - if (tmp && 1 << (slot_idx % 32)) { + if (tmp & 1 << (slot_idx % 32)) { mv_printk("command active %08X, slot [%x].\n", tmp, slot_idx); mvs_cw32(mvi, MVS_COMMAND_ACTIVE + (slot_idx >> 3), 1 << (slot_idx % 32)); From 6075e944fe561a1b2460129ef09d2f644bde75eb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Dec 2016 15:27:04 +0300 Subject: [PATCH 069/252] target/iscsi: Fix double free in lio_target_tiqn_addtpg() commit a91918cd3ea11f91c68e08e1e8ce1b560447a80e upstream. This iscsit_tpg_add_portal_group() function is only called from lio_target_tiqn_addtpg(). Both functions free the "tpg" pointer on error so it's a double free bug. The memory is allocated in the caller so it should be freed in the caller and not here. Fixes: e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1") Signed-off-by: Dan Carpenter Reviewed-by: David Disseldorp [ bvanassche: Added "Fix" at start of patch title ] Signed-off-by: Bart Van Assche Signed-off-by: Willy Tarreau --- drivers/target/iscsi/iscsi_target_tpg.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 75a4e83842c..a6801e8a811 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -256,7 +256,6 @@ err_out: iscsi_release_param_list(tpg->param_list); tpg->param_list = NULL; } - kfree(tpg); return -ENOMEM; } From 985d68bfb1024fffd855eaba5d1a36fb8d96cd7c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Nov 2016 14:31:34 +0300 Subject: [PATCH 070/252] mmc: mmc_test: Uninitialized return value commit 16652a936e96f5dae53c3fbd38a570497baadaa8 upstream. We never set "ret" to RESULT_OK. Fixes: 9f9c4180f88d ("mmc: mmc_test: add test for non-blocking transfers") Signed-off-by: Dan Carpenter Signed-off-by: Ulf Hansson Signed-off-by: Willy Tarreau --- drivers/mmc/card/mmc_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 759714ed6be..abad0b49bd7 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -795,7 +795,7 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test, struct mmc_async_req *cur_areq = &test_areq[0].areq; struct mmc_async_req *other_areq = &test_areq[1].areq; int i; - int ret; + int ret = RESULT_OK; test_areq[0].test = test; test_areq[1].test = test; From b734c02b4ccd6307232278eb4cc908aa962be4a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 16:26:03 +0100 Subject: [PATCH 071/252] powerpc/pci/rpadlpar: Fix device reference leaks commit 99e5cde5eae78bef95bfe7c16ccda87fb070149b upstream. Make sure to drop any device reference taken by vio_find_node() when adding and removing virtual I/O slots. Fixes: 5eeb8c63a38f ("[PATCH] PCI Hotplug: rpaphp: Move VIO registration") Signed-off-by: Johan Hovold Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- drivers/pci/hotplug/rpadlpar_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index b29e20b7862..ffb2de5c4d5 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -259,8 +259,13 @@ static int dlpar_add_phb(char *drc_name, struct device_node *dn) static int dlpar_add_vio_slot(char *drc_name, struct device_node *dn) { - if (vio_find_node(dn)) + struct vio_dev *vio_dev; + + vio_dev = vio_find_node(dn); + if (vio_dev) { + put_device(&vio_dev->dev); return -EINVAL; + } if (!vio_register_device_node(dn)) { printk(KERN_ERR @@ -336,6 +341,9 @@ static int dlpar_remove_vio_slot(char *drc_name, struct device_node *dn) return -EINVAL; vio_unregister_device(vio_dev); + + put_device(&vio_dev->dev); + return 0; } From 3dc64c463c77d799e28205775b707186da5fc6b4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Dec 2016 14:22:03 +0300 Subject: [PATCH 072/252] ser_gigaset: return -ENOMEM on error instead of success commit 93a97c50cbf1c007caf12db5cc23e0d5b9c8473c upstream. If we can't allocate the resources in gigaset_initdriver() then we should return -ENOMEM instead of zero. Fixes: 2869b23e4b95 ("[PATCH] drivers/isdn/gigaset: new M101 driver (v2)") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/isdn/gigaset/ser-gigaset.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 3ac9c419481..53dfe1693e5 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -787,8 +787,10 @@ static int __init ser_gigaset_init(void) driver = gigaset_initdriver(GIGASET_MINOR, GIGASET_MINORS, GIGASET_MODULENAME, GIGASET_DEVNAME, &ops, THIS_MODULE); - if (!driver) + if (!driver) { + rc = -ENOMEM; goto error; + } rc = tty_register_ldisc(N_GIGASET_M101, &gigaset_ldisc); if (rc != 0) { From 4949c9e2e32b47b03071c2bfdcdf6c49a90739ed Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Dec 2016 18:04:11 +0100 Subject: [PATCH 073/252] net, sched: fix soft lockup in tc_classify commit 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 upstream. Shahar reported a soft lockup in tc_classify(), where we run into an endless loop when walking the classifier chain due to tp->next == tp which is a state we should never run into. The issue only seems to trigger under load in the tc control path. What happens is that in tc_ctl_tfilter(), thread A allocates a new tp, initializes it, sets tp_created to 1, and calls into tp->ops->change() with it. In that classifier callback we had to unlock/lock the rtnl mutex and returned with -EAGAIN. One reason why we need to drop there is, for example, that we need to request an action module to be loaded. This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning after we loaded and found the requested action, we need to redo the whole request so we don't race against others. While we had to unlock rtnl in that time, thread B's request was processed next on that CPU. Thread B added a new tp instance successfully to the classifier chain. When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN and destroying its tp instance which never got linked, we goto replay and redo A's request. This time when walking the classifier chain in tc_ctl_tfilter() for checking for existing tp instances we had a priority match and found the tp instance that was created and linked by thread B. Now calling again into tp->ops->change() with that tp was successful and returned without error. tp_created was never cleared in the second round, thus kernel thinks that we need to link it into the classifier chain (once again). tp and *back point to the same object due to the match we had earlier on. Thus for thread B's already public tp, we reset tp->next to tp itself and link it into the chain, which eventually causes the mentioned endless loop in tc_classify() once a packet hits the data path. Fix is to clear tp_created at the beginning of each request, also when we replay it. On the paths that can cause -EAGAIN we already destroy the original tp instance we had and on replay we really need to start from scratch. It seems that this issue was first introduced in commit 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup"). Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup") Reported-by: Shahar Klein Signed-off-by: Daniel Borkmann Cc: Cong Wang Acked-by: Eric Dumazet Tested-by: Shahar Klein Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/sched/cls_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ea40d1877a..042e5d83962 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -136,12 +136,14 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) unsigned long cl; unsigned long fh; int err; - int tp_created = 0; + int tp_created; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; replay: + tp_created = 0; + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; From 8d82183768d716ee902269e7c3713bd0f5421484 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 27 Dec 2016 18:23:06 -0800 Subject: [PATCH 074/252] net: stmmac: Fix race between stmmac_drv_probe and stmmac_open commit 5701659004d68085182d2fd4199c79172165fa65 upstream. There is currently a small window during which the network device registered by stmmac can be made visible, yet all resources, including and clock and MDIO bus have not had a chance to be set up, this can lead to the following error to occur: [ 473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): stmmac_dvr_probe: warning: cannot get CSR clock [ 473.919382] stmmaceth 0000:01:00.0: no reset control found [ 473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42 [ 473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported [ 473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported [ 473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported [ 473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): Enable RX Mitigation via HW Watchdog Timer [ 473.921395] libphy: PHY stmmac-1:00 not found [ 473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY [ 473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to PHY (error: -19) [ 473.959710] libphy: stmmac: probed [ 473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL (stmmac-1:00) active [ 473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL (stmmac-1:01) [ 473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL (stmmac-1:02) [ 473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL (stmmac-1:03) Fix this by making sure that register_netdev() is the last thing being done, which guarantees that the clock and the MDIO bus are available. Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove") Reported-by: Kweh, Hock Leong Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e9eab29db7b..5cb09ecfd75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2697,12 +2697,6 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - ret = register_netdev(ndev); - if (ret) { - pr_err("%s: ERROR %i registering the device\n", __func__, ret); - goto error_netdev_register; - } - priv->stmmac_clk = clk_get(priv->device, STMMAC_RESOURCE_NAME); if (IS_ERR(priv->stmmac_clk)) { pr_warn("%s: warning: cannot get CSR clock\n", __func__); @@ -2733,13 +2727,23 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, } } + ret = register_netdev(ndev); + if (ret) { + netdev_err(priv->dev, "%s: ERROR %i registering the device\n", + __func__, ret); + goto error_netdev_register; + } + return priv; +error_netdev_register: + if (priv->pcs != STMMAC_PCS_RGMII && + priv->pcs != STMMAC_PCS_TBI && + priv->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); error_mdio_register: clk_put(priv->stmmac_clk); error_clk_get: - unregister_netdev(ndev); -error_netdev_register: netif_napi_del(&priv->napi); error_free_netdev: free_netdev(ndev); From 9872698ce0426c5daa9f5152e61ee4d4c6611e66 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2017 12:24:01 -0800 Subject: [PATCH 075/252] gro: Enter slow-path if there is no tailroom commit 1272ce87fa017ca4cf32920764d879656b7a005a upstream. The GRO path has a fast-path where we avoid calling pskb_may_pull and pskb_expand by directly accessing frag0. However, this should only be done if we have enough tailroom in the skb as otherwise we'll have to expand it later anyway. This patch adds the check by capping frag0_len with the skb tailroom. Fixes: cb18978cbf45 ("gro: Open-code final pskb_may_pull") Reported-by: Slava Shwartsman Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6494918b3ea..95394a2f365 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3891,7 +3891,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), + skb->end - skb->tail); } } From f2c23ae58013e9b6454a2e0a7d2d8cf39f609037 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Jan 2017 19:52:43 -0800 Subject: [PATCH 076/252] gro: use min_t() in skb_gro_reset_offset() commit 7cfd5fd5a9813f1430290d20c0fead9b4582a307 upstream. On 32bit arches, (skb->end - skb->data) is not 'unsigned int', so we shall use min_t() instead of min() to avoid a compiler error. Fixes: 1272ce87fa01 ("gro: Enter slow-path if there is no tailroom") Reported-by: kernel test robot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 95394a2f365..6900ff08008 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3891,8 +3891,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), - skb->end - skb->tail); + NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, + skb_frag_size(frag0), + skb->end - skb->tail); } } From 8ec6068428a33e224bf9309c6130d9ec9535bf6a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2017 12:24:15 -0800 Subject: [PATCH 077/252] gro: Disable frag0 optimization on IPv6 ext headers commit 57ea52a865144aedbcd619ee0081155e658b6f7d upstream. The GRO fast path caches the frag0 address. This address becomes invalid if frag0 is modified by pskb_may_pull or its variants. So whenever that happens we must disable the frag0 optimization. This is usually done through the combination of gro_header_hard and gro_header_slow, however, the IPv6 extension header path did the pulling directly and would continue to use the GRO fast path incorrectly. This patch fixes it by disabling the fast path when we enter the IPv6 extension header path. Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address") Reported-by: Slava Shwartsman Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- include/linux/netdevice.h | 9 +++++++-- net/ipv6/ip6_offload.c | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45a618b5886..157a47c998e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1729,14 +1729,19 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) return NAPI_GRO_CB(skb)->frag0_len < hlen; } +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { if (!pskb_may_pull(skb, hlen)) return NULL; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + skb_gro_frag0_invalidate(skb); return skb->data + offset; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 71b766ee821..88a2e8827ef 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -174,6 +174,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); + skb_gro_frag0_invalidate(skb); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); skb_reset_transport_header(skb); From 8b372d39064d9a27b428d261fc7dc97cf150bd17 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 22 Dec 2016 21:06:53 -0600 Subject: [PATCH 078/252] powerpc: Fix build warning on 32-bit PPC commit 8ae679c4bc2ea2d16d92620da8e3e9332fa4039f upstream. I am getting the following warning when I build kernel 4.9-git on my PowerBook G4 with a 32-bit PPC processor: AS arch/powerpc/kernel/misc_32.o arch/powerpc/kernel/misc_32.S:299:7: warning: "CONFIG_FSL_BOOKE" is not defined [-Wundef] This problem is evident after commit 989cea5c14be ("kbuild: prevent lib-ksyms.o rebuilds"); however, this change in kbuild only exposes an error that has been in the code since 2005 when this source file was created. That was with commit 9994a33865f4 ("powerpc: Introduce entry_{32,64}.S, misc_{32,64}.S, systbl.S"). The offending line does not make a lot of sense. This error does not seem to cause any errors in the executable, thus I am not recommending that it be applied to any stable versions. Thanks to Nicholas Piggin for suggesting this solution. Fixes: 9994a33865f4 ("powerpc: Introduce entry_{32,64}.S, misc_{32,64}.S, systbl.S") Signed-off-by: Larry Finger Cc: Nicholas Piggin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/misc_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e469f30e6ee..ad8573f053d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -295,7 +295,7 @@ _GLOBAL(flush_instruction_cache) lis r3, KERNELBASE@h iccci 0,r3 #endif -#elif CONFIG_FSL_BOOKE +#elif defined(CONFIG_FSL_BOOKE) BEGIN_FTR_SECTION mfspr r3,SPRN_L1CSR0 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC From 4323a76ebd6f5b939a168289b0862fbfdc036de7 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Sun, 18 Dec 2016 15:26:12 -0800 Subject: [PATCH 079/252] Input: i8042 - add Pegatron touchpad to noloop table commit 41c567a5d7d1a986763e58c3394782813c3bcb03 upstream. Avoid AUX loopback in Pegatron C15B touchpad, so input subsystem is able to recognize a Synaptics touchpad in the AUX port. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=93791 (Touchpad is not detected on DNS 0801480 notebook (PEGATRON C15B)) Suggested-by: Dmitry Torokhov Signed-off-by: Marcos Paulo de Souza Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/input/serio/i8042-x86ia64io.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 5102b4f68f1..875e680e90c 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -211,6 +211,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), + DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), + }, + }, { } }; From 10e96e6b6d9f2f1eaac163a3fc340fafd7047eab Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 10 Jan 2017 16:58:27 -0800 Subject: [PATCH 080/252] mm/hugetlb.c: fix reservation race when freeing surplus pages commit e5bbc8a6c992901058bc09e2ce01d16c111ff047 upstream. return_unused_surplus_pages() decrements the global reservation count, and frees any unused surplus pages that were backing the reservation. Commit 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in return_unused_surplus_pages()") added a call to cond_resched_lock in the loop freeing the pages. As a result, the hugetlb_lock could be dropped, and someone else could use the pages that will be freed in subsequent iterations of the loop. This could result in inconsistent global hugetlb page state, application api failures (such as mmap) failures or application crashes. When dropping the lock in return_unused_surplus_pages, make sure that the global reservation count (resv_huge_pages) remains sufficiently large to prevent someone else from claiming pages about to be freed. Analyzed by Paul Cassella. Fixes: 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in return_unused_surplus_pages()") Link: http://lkml.kernel.org/r/1483991767-6879-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reported-by: Paul Cassella Suggested-by: Michal Hocko Cc: Masayoshi Mizuma Cc: Naoya Horiguchi Cc: Aneesh Kumar Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- mm/hugetlb.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e9fd382bf25..69832290015 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1070,23 +1070,32 @@ free: } /* - * When releasing a hugetlb pool reservation, any surplus pages that were - * allocated to satisfy the reservation must be explicitly freed if they were - * never used. - * Called with hugetlb_lock held. + * This routine has two main purposes: + * 1) Decrement the reservation count (resv_huge_pages) by the value passed + * in unused_resv_pages. This corresponds to the prior adjustments made + * to the associated reservation map. + * 2) Free any unused surplus pages that may have been allocated to satisfy + * the reservation. As many as unused_resv_pages may be freed. + * + * Called with hugetlb_lock held. However, the lock could be dropped (and + * reacquired) during calls to cond_resched_lock. Whenever dropping the lock, + * we must make sure nobody else can claim pages we are in the process of + * freeing. Do this by ensuring resv_huge_page always is greater than the + * number of huge pages we plan to free when dropping the lock. */ static void return_unused_surplus_pages(struct hstate *h, unsigned long unused_resv_pages) { unsigned long nr_pages; - /* Uncommit the reservation */ - h->resv_huge_pages -= unused_resv_pages; - /* Cannot return gigantic pages currently */ if (h->order >= MAX_ORDER) - return; + goto out; + /* + * Part (or even all) of the reservation could have been backed + * by pre-allocated pages. Only free surplus pages. + */ nr_pages = min(unused_resv_pages, h->surplus_huge_pages); /* @@ -1096,12 +1105,22 @@ static void return_unused_surplus_pages(struct hstate *h, * when the nodes with surplus pages have no free pages. * free_pool_huge_page() will balance the the freed pages across the * on-line nodes with memory and will handle the hstate accounting. + * + * Note that we decrement resv_huge_pages as we free the pages. If + * we drop the lock, resv_huge_pages will still be sufficiently large + * to cover subsequent pages we may free. */ while (nr_pages--) { + h->resv_huge_pages--; + unused_resv_pages--; if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) - break; + goto out; cond_resched_lock(&hugetlb_lock); } + +out: + /* Fully uncommit the reservation */ + h->resv_huge_pages -= unused_resv_pages; } /* From b031f56c69b9a8379bdff54ad2467f4cce43f3d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Jan 2017 12:05:37 +0100 Subject: [PATCH 081/252] USB: serial: kl5kusb105: fix line-state error handling commit 146cc8a17a3b4996f6805ee5c080e7101277c410 upstream. The current implementation failed to detect short transfers when attempting to read the line state, and also, to make things worse, logged the content of the uninitialised heap transfer buffer. Fixes: abf492e7b3ae ("USB: kl5kusb105: fix DMA buffers on stack") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/kl5kusb105.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 69eb056dd6e..b6794baf0a3 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -198,10 +198,11 @@ static int klsi_105_get_line_state(struct usb_serial_port *port, status_buf, KLSI_STATUSBUF_LEN, 10000 ); - if (rc < 0) - dev_err(&port->dev, "Reading line status failed (error = %d)\n", - rc); - else { + if (rc != KLSI_STATUSBUF_LEN) { + dev_err(&port->dev, "reading line status failed: %d\n", rc); + if (rc >= 0) + rc = -EIO; + } else { status = get_unaligned_le16(status_buf); dev_info(&port->serial->dev->dev, "read status %x %x", From 86e0a6b4db5a8de70a9315a3c2369120b7f20f1d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:10 +0100 Subject: [PATCH 082/252] USB: serial: ch341: fix initial modem-control state commit 4e2da44691cffbfffb1535f478d19bc2dca3e62b upstream. DTR and RTS will be asserted by the tty-layer when the port is opened and deasserted on close (if HUPCL is set). Make sure the initial state is not-asserted before the port is first opened as well. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ch341.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index c2a4171ab9c..2272f4f8e4a 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -252,7 +252,6 @@ static int ch341_port_probe(struct usb_serial_port *port) spin_lock_init(&priv->lock); priv->baud_rate = DEFAULT_BAUD_RATE; - priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; r = ch341_configure(port->serial->dev, priv); if (r < 0) From 6a9f8d82721ae27d492af512426d1c36c40a82a1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:13 +0100 Subject: [PATCH 083/252] USB: serial: ch341: fix open error handling commit f2950b78547ffb8475297ada6b92bc2d774d5461 upstream. Make sure to stop the interrupt URB before returning on errors during open. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ch341.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2272f4f8e4a..52ac5fd0581 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -315,15 +315,15 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) r = ch341_configure(serial->dev, priv); if (r) - goto out; + return r; r = ch341_set_handshake(serial->dev, priv->line_control); if (r) - goto out; + return r; r = ch341_set_baudrate(serial->dev, priv); if (r) - goto out; + return r; dev_dbg(&port->dev, "%s - submitting interrupt urb", __func__); r = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); @@ -331,12 +331,19 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, r); ch341_close(port); - goto out; + return r; } r = usb_serial_generic_open(tty, port); + if (r) + goto err_kill_interrupt_urb; -out: return r; + return 0; + +err_kill_interrupt_urb: + usb_kill_urb(port->interrupt_in_urb); + + return r; } /* Old_termios contains the original termios settings and From 352ba51dac0bcfe02df1076715a4dc9248e448fc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:18 +0100 Subject: [PATCH 084/252] USB: serial: ch341: fix control-message error handling commit 2d5a9c72d0c4ac73cf97f4b7814ed6c44b1e49ae upstream. A short control transfer would currently fail to be detected, something which could lead to stale buffer data being used as valid input. Check for short transfers, and make sure to log any transfer errors. Note that this also avoids leaking heap data to user space (TIOCMGET) and the remote device (break control). Fixes: 6ce76104781a ("USB: Driver for CH341 USB-serial adaptor") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ch341.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 52ac5fd0581..db37b16a275 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -97,6 +97,8 @@ static int ch341_control_out(struct usb_device *dev, u8 request, r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, value, index, NULL, 0, DEFAULT_TIMEOUT); + if (r < 0) + dev_err(&dev->dev, "failed to send control message: %d\n", r); return r; } @@ -114,7 +116,20 @@ static int ch341_control_in(struct usb_device *dev, r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, buf, bufsize, DEFAULT_TIMEOUT); - return r; + if (r < bufsize) { + if (r >= 0) { + dev_err(&dev->dev, + "short control message received (%d < %u)\n", + r, bufsize); + r = -EIO; + } + + dev_err(&dev->dev, "failed to receive control message: %d\n", + r); + return r; + } + + return 0; } static int ch341_set_baudrate(struct usb_device *dev, @@ -156,9 +171,9 @@ static int ch341_set_handshake(struct usb_device *dev, u8 control) static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; unsigned long flags; buffer = kmalloc(size, GFP_KERNEL); @@ -169,15 +184,10 @@ static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) if (r < 0) goto out; - /* setup the private status if available */ - if (r == 2) { - r = 0; - spin_lock_irqsave(&priv->lock, flags); - priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; - priv->multi_status_change = 0; - spin_unlock_irqrestore(&priv->lock, flags); - } else - r = -EPROTO; + spin_lock_irqsave(&priv->lock, flags); + priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; + priv->multi_status_change = 0; + spin_unlock_irqrestore(&priv->lock, flags); out: kfree(buffer); return r; @@ -187,9 +197,9 @@ out: kfree(buffer); static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; buffer = kmalloc(size, GFP_KERNEL); if (!buffer) From 8024b1a0ed99490844462baebd3feabd272ca4a6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:11 +0100 Subject: [PATCH 085/252] USB: serial: ch341: fix open and resume after B0 commit a20047f36e2f6a1eea4f1fd261aaa55882369868 upstream. The private baud_rate variable is used to configure the port at open and reset-resume and must never be set to (and left at) zero or reset-resume and all further open attempts will fail. Fixes: aa91def41a7b ("USB: ch341: set tty baud speed according to tty struct") Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ch341.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index db37b16a275..e9cfd40e9e4 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -368,12 +368,11 @@ static void ch341_set_termios(struct tty_struct *tty, baud_rate = tty_get_baud_rate(tty); - priv->baud_rate = baud_rate; - if (baud_rate) { spin_lock_irqsave(&priv->lock, flags); priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); spin_unlock_irqrestore(&priv->lock, flags); + priv->baud_rate = baud_rate; ch341_set_baudrate(port->serial->dev, priv); } else { spin_lock_irqsave(&priv->lock, flags); From 2563aee7a933557afbf9a40f7a6a82c6ae5ecca7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:14 +0100 Subject: [PATCH 086/252] USB: serial: ch341: fix resume after reset commit ce5e292828117d1b71cbd3edf9e9137cf31acd30 upstream. Fix reset-resume handling which failed to resubmit the read and interrupt URBs, thereby leaving a port that was open before suspend in a broken state until closed and reopened. Fixes: 1ded7ea47b88 ("USB: ch341 serial: fix port number changed after resume") Fixes: 2bfd1c96a9fb ("USB: serial: ch341: remove reset_resume callback") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ch341.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index e9cfd40e9e4..be51cd98311 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -585,14 +585,23 @@ static int ch341_tiocmget(struct tty_struct *tty) static int ch341_reset_resume(struct usb_serial *serial) { - struct ch341_private *priv; - - priv = usb_get_serial_port_data(serial->port[0]); + struct usb_serial_port *port = serial->port[0]; + struct ch341_private *priv = usb_get_serial_port_data(port); + int ret; /* reconfigure ch341 serial port after bus-reset */ ch341_configure(serial->dev, priv); - return 0; + if (test_bit(ASYNCB_INITIALIZED, &port->port.flags)) { + ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); + if (ret) { + dev_err(&port->dev, "failed to submit interrupt urb: %d\n", + ret); + return ret; + } + } + + return usb_serial_generic_resume(serial); } static struct usb_serial_driver ch341_device = { From 2c65c73aac7481085b0bcf51bd9327d4ea64a72e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:12 +0100 Subject: [PATCH 087/252] USB: serial: ch341: fix modem-control and B0 handling commit 030ee7ae52a46a2be52ccc8242c4a330aba8d38e upstream. The modem-control signals are managed by the tty-layer during open and should not be asserted prematurely when set_termios is called from driver open. Also make sure that the signals are asserted only when changing speed from B0. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ch341.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index be51cd98311..a4e5be5aea4 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -369,24 +369,24 @@ static void ch341_set_termios(struct tty_struct *tty, baud_rate = tty_get_baud_rate(tty); if (baud_rate) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); priv->baud_rate = baud_rate; ch341_set_baudrate(port->serial->dev, priv); - } else { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); } - ch341_set_handshake(port->serial->dev, priv->line_control); - /* Unimplemented: * (cflag & CSIZE) : data bits [5, 8] * (cflag & PARENB) : parity {NONE, EVEN, ODD} * (cflag & CSTOPB) : stop bits [1, 2] */ + + spin_lock_irqsave(&priv->lock, flags); + if (C_BAUD(tty) == B0) + priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); + else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) + priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); + spin_unlock_irqrestore(&priv->lock, flags); + + ch341_set_handshake(port->serial->dev, priv->line_control); } static void ch341_break_ctl(struct tty_struct *tty, int break_state) From 0954c87c16bab5579b8c111887aafb6a75a17c0b Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Wed, 28 Dec 2016 14:55:40 +0100 Subject: [PATCH 088/252] x86/cpu: Fix bootup crashes by sanitizing the argument of the 'clearcpuid=' command-line option commit dd853fd216d1485ed3045ff772079cc8689a9a4a upstream. A negative number can be specified in the cmdline which will be used as setup_clear_cpu_cap() argument. With that we can clear/set some bit in memory predceeding boot_cpu_data/cpu_caps_cleared which may cause kernel to misbehave. This patch adds lower bound check to setup_disablecpuid(). Boris Petkov reproduced a crash: [ 1.234575] BUG: unable to handle kernel paging request at ffffffff858bd540 [ 1.236535] IP: memcpy_erms+0x6/0x10 Signed-off-by: Lukasz Odzioba Acked-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andi.kleen@intel.com Cc: bp@alien8.de Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: slaoub@gmail.com Fixes: ac72e7888a61 ("x86: add generic clearcpuid=... option") Link: http://lkml.kernel.org/r/1482933340-11857-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Willy Tarreau --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 81e0fe48b9b..7e09789d2cf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1066,7 +1066,7 @@ static __init int setup_disablecpuid(char *arg) { int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) + if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) setup_clear_cpu_cap(bit); else return 0; From c242797afd5ce0c22ee6c3d16851d64635a04419 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Dec 2016 11:19:31 +1100 Subject: [PATCH 089/252] NFSv4.1: nfs4_fl_prepare_ds must be careful about reporting success. commit cfd278c280f997cf2fe4662e0acab0fe465f637b upstream. Various places assume that if nfs4_fl_prepare_ds() turns a non-NULL 'ds', then ds->ds_clp will also be non-NULL. This is not necessasrily true in the case when the process received a fatal signal while nfs4_pnfs_ds_connect is waiting in nfs4_wait_ds_connect(). In that case ->ds_clp may not be set, and the devid may not recently have been marked unavailable. So add a test for ds_clp == NULL and return NULL in that case. Fixes: c23266d532b4 ("NFS4.1 Fix data server connection race") Signed-off-by: NeilBrown Acked-by: Olga Kornievskaia Acked-by: Adamson, Andy Signed-off-by: Trond Myklebust Signed-off-by: Willy Tarreau --- fs/nfs/nfs4filelayoutdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 678cb896453..b906d8e55d8 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -821,7 +821,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) nfs4_wait_ds_connect(ds); } out_test_devid: - if (filelayout_test_devid_unavailable(devid)) + if (ret->ds_clp == NULL || + filelayout_test_devid_unavailable(devid)) ret = NULL; out: return ret; From 34c23b28dbb2169ab1c1500632e76e8625ea707c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 16:26:01 +0100 Subject: [PATCH 090/252] powerpc/ibmebus: Fix further device reference leaks commit 815a7141c4d1b11610dccb7fcbb38633759824f2 upstream. Make sure to drop any reference taken by bus_find_device() when creating devices during init and driver registration. Fixes: 55347cc9962f ("[POWERPC] ibmebus: Add device creation and bus probing based on of_device") Signed-off-by: Johan Hovold Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/ibmebus.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 8220baa46fa..cce1a4430ec 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -180,6 +180,7 @@ static int ibmebus_create_device(struct device_node *dn) static int ibmebus_create_devices(const struct of_device_id *matches) { struct device_node *root, *child; + struct device *dev; int ret = 0; root = of_find_node_by_path("/"); @@ -188,9 +189,12 @@ static int ibmebus_create_devices(const struct of_device_id *matches) if (!of_match_node(matches, child)) continue; - if (bus_find_device(&ibmebus_bus_type, NULL, child, - ibmebus_match_node)) + dev = bus_find_device(&ibmebus_bus_type, NULL, child, + ibmebus_match_node); + if (dev) { + put_device(dev); continue; + } ret = ibmebus_create_device(child); if (ret) { From 2292bdab91dc7b58671cd328d788ee41196ccbd0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 16:26:00 +0100 Subject: [PATCH 091/252] powerpc/ibmebus: Fix device reference leaks in sysfs interface commit fe0f3168169f7c34c29b0cf0c489f126a7f29643 upstream. Make sure to drop any reference taken by bus_find_device() in the sysfs callbacks that are used to create and destroy devices based on device-tree entries. Fixes: 6bccf755ff53 ("[POWERPC] ibmebus: dynamic addition/removal of adapters, some code cleanup") Signed-off-by: Johan Hovold Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/ibmebus.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index cce1a4430ec..a1812fbc264 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -266,6 +266,7 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, const char *buf, size_t count) { struct device_node *dn = NULL; + struct device *dev; char *path; ssize_t rc = 0; @@ -273,8 +274,10 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, if (!path) return -ENOMEM; - if (bus_find_device(&ibmebus_bus_type, NULL, path, - ibmebus_match_path)) { + dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path); + if (dev) { + put_device(dev); printk(KERN_WARNING "%s: %s has already been probed\n", __func__, path); rc = -EEXIST; @@ -310,6 +313,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, ibmebus_match_path))) { of_device_unregister(to_platform_device(dev)); + put_device(dev); kfree(path); return count; From dcb7311dc3a8abe166c8c8a154b98d8e70d1fa9b Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Nov 2016 11:30:53 +0200 Subject: [PATCH 092/252] IB/mlx4: Set traffic class in AH commit af4295c117b82a521b05d0daf39ce879d26e6cb1 upstream. Set traffic class within sl_tclass_flowlabel when create iboe AH. Without this the TOS value will be empty when running VLAN tagged traffic, because the TOS value is taken from the traffic class in the address handle attributes. Fixes: 9106c4106974 ('IB/mlx4: Fix SL to 802.1Q priority-bits mapping for IBoE') Signed-off-by: Maor Gottlieb Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/hw/mlx4/ah.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index f55d69500a5..3a85e766906 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -118,7 +118,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) --ah->av.eth.stat_rate; } - + ah->av.eth.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); /* * HW requires multicast LID so we just choose one. */ @@ -126,7 +128,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } From 7d26287eee2e0f91137a6910204c8ebacca48d4b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 10 Nov 2016 11:30:59 +0200 Subject: [PATCH 093/252] IB/mlx4: Fix port query for 56Gb Ethernet links commit 6fa26208206c406fa529cd73f7ae6bf4181e270b upstream. Report the correct speed in the port attributes when using a 56Gbps ethernet link. Without this change the field is incorrectly set to 10. Fixes: a9c766bb75ee ('IB/mlx4: Fix info returned when querying IBoE ports') Fixes: 2e96691c31ec ('IB: Use central enum for speed instead of hard-coded values') Signed-off-by: Saeed Mahameed Signed-off-by: Yishai Hadas Signed-off-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/hw/mlx4/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 23d734349d8..6b810b12433 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -312,9 +312,11 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, if (err) goto out; - props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? - IB_WIDTH_4X : IB_WIDTH_1X; - props->active_speed = IB_SPEED_QDR; + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) || + (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_SPEED_FDR : IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; From 016f80305e3f299583d1fcedea7c1d045a6a8062 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 17:20:47 -0300 Subject: [PATCH 094/252] perf scripting: Avoid leaking the scripting_context variable commit cf346d5bd4b9d61656df2f72565c9b354ef3ca0d upstream. Both register_perl_scripting() and register_python_scripting() allocate this variable, fix it by checking if it already was. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tom Zanussi Cc: Wang Nan Fixes: 7e4b21b84c43 ("perf/scripts: Add Python scripting engine") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Willy Tarreau --- tools/perf/util/trace-event-scripting.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 8715a1006d0..ae061a45fa0 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -90,7 +90,8 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering py script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPYTHON @@ -153,7 +154,8 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering pl script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPERL From 2024f68a981fdf9301e01a43fbdec193a9e5181e Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 26 Sep 2016 03:03:40 +0300 Subject: [PATCH 095/252] ARM: dts: imx31: fix clock control module interrupts description commit 2e575cbc930901718cc18e084566ecbb9a4b5ebb upstream. The type of AVIC interrupt controller found on i.MX31 is one-cell, namely 31 for CCM DVFS and 53 for CCM, however for clock control module its interrupts are specified as 3-cells, fix it. Fixes: ef0e4a606fb6 ("ARM: mx31: Replace clk_register_clkdev with clock DT lookup") Acked-by: Rob Herring Signed-off-by: Vladimir Zapolskiy Signed-off-by: Shawn Guo Signed-off-by: Willy Tarreau --- Documentation/devicetree/bindings/clock/imx31-clock.txt | 2 +- arch/arm/boot/dts/imx31.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.txt b/Documentation/devicetree/bindings/clock/imx31-clock.txt index 19df842c694..8163d565f69 100644 --- a/Documentation/devicetree/bindings/clock/imx31-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx31-clock.txt @@ -77,7 +77,7 @@ Examples: clks: ccm@53f80000{ compatible = "fsl,imx31-ccm"; reg = <0x53f80000 0x4000>; - interrupts = <0 31 0x04 0 53 0x04>; + interrupts = <31>, <53>; #clock-cells = <1>; }; diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index c5449257ad9..3085ac20423 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -97,7 +97,7 @@ clks: ccm@53f80000{ compatible = "fsl,imx31-ccm"; reg = <0x53f80000 0x4000>; - interrupts = <0 31 0x04 0 53 0x04>; + interrupts = <31>, <53>; #clock-cells = <1>; }; }; From 6a4a5fd4c7bc6a06ca26ad7327d046d8d3c0932a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 9 Jan 2017 17:15:18 -0500 Subject: [PATCH 096/252] svcrpc: don't leak contexts on PROC_DESTROY commit 78794d1890708cf94e3961261e52dcec2cc34722 upstream. Context expiry times are in units of seconds since boot, not unix time. The use of get_seconds() here therefore sets the expiry time decades in the future. This prevents timely freeing of contexts destroyed by client RPC_GSS_PROC_DESTROY requests. We'd still free them eventually (when the module is unloaded or the container shut down), but a lot of contexts could pile up before then. Fixes: c5b29f885afe "sunrpc: use seconds since boot in expiry cache" Reported-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Willy Tarreau --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 62663a08ffb..e625efe0e03 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1518,7 +1518,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; - rsci->h.expiry_time = get_seconds(); + rsci->h.expiry_time = seconds_since_boot(); set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; From 8bc899e505b6b346acbd6b116abb9a183af64bfc Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 5 Jan 2017 19:24:04 +0000 Subject: [PATCH 097/252] mmc: mxs-mmc: Fix additional cycles after transmission stop commit 01167c7b9cbf099c69fe411a228e4e9c7104e123 upstream. According to the code the intention is to append 8 SCK cycles instead of 4 at end of a MMC_STOP_TRANSMISSION command. But this will never happened because it's an AC command not an ADTC command. So fix this by moving the statement into the right function. Signed-off-by: Stefan Wahren Fixes: e4243f13d10e (mmc: mxs-mmc: add mmc host driver for i.MX23/28) Signed-off-by: Ulf Hansson Signed-off-by: Willy Tarreau --- drivers/mmc/host/mxs-mmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index f3a42321310..01951cd6599 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -312,6 +312,9 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host) cmd0 = BF_SSP(cmd->opcode, CMD0_CMD); cmd1 = cmd->arg; + if (cmd->opcode == MMC_STOP_TRANSMISSION) + cmd0 |= BM_SSP_CMD0_APPEND_8CYC; + if (host->sdio_irq_en) { ctrl0 |= BM_SSP_CTRL0_SDIO_IRQ_CHECK; cmd0 |= BM_SSP_CMD0_CONT_CLKING_EN | BM_SSP_CMD0_SLOW_CLKING_EN; @@ -420,8 +423,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host) ssp->base + HW_SSP_BLOCK_SIZE); } - if ((cmd->opcode == MMC_STOP_TRANSMISSION) || - (cmd->opcode == SD_IO_RW_EXTENDED)) + if (cmd->opcode == SD_IO_RW_EXTENDED) cmd0 |= BM_SSP_CMD0_APPEND_8CYC; cmd1 = cmd->arg; From 490960c19b181238af805e3b027162b721aed933 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 5 Dec 2016 22:14:36 +0100 Subject: [PATCH 098/252] mtd: nand: xway: disable module support commit 73529c872a189c747bdb528ce9b85b67b0e28dec upstream. The xway_nand driver accesses the ltq_ebu_membase symbol which is not exported. This also should not get exported and we should handle the EBU interface in a better way later. This quick fix just deactivated support for building as module. Fixes: 99f2b107924c ("mtd: lantiq: Add NAND support on Lantiq XWAY SoC.") Signed-off-by: Hauke Mehrtens Signed-off-by: Boris Brezillon Signed-off-by: Willy Tarreau --- drivers/mtd/nand/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 50543f16621..faab8cdc1c9 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -537,7 +537,7 @@ config MTD_NAND_FSMC Flexible Static Memory Controller (FSMC) config MTD_NAND_XWAY - tristate "Support for NAND on Lantiq XWAY SoC" + bool "Support for NAND on Lantiq XWAY SoC" depends on LANTIQ && SOC_TYPE_XWAY select MTD_NAND_PLATFORM help From 13c485b26572b0993ad6d26ae73a1ec0b8a38bcb Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 10 Jan 2017 11:49:40 +0100 Subject: [PATCH 099/252] ubifs: Fix journal replay wrt. xattr nodes commit 1cb51a15b576ee325d527726afff40947218fd5e upstream. When replaying the journal it can happen that a journal entry points to a garbage collected node. This is the case when a power-cut occurred between a garbage collect run and a commit. In such a case nodes have to be read using the failable read functions to detect whether the found node matches what we expect. One corner case was forgotten, when the journal contains an entry to remove an inode all xattrs have to be removed too. UBIFS models xattr like directory entries, so the TNC code iterates over all xattrs of the inode and removes them too. This code re-uses the functions for walking directories and calls ubifs_tnc_next_ent(). ubifs_tnc_next_ent() expects to be used only after the journal and aborts when a node does not match the expected result. This behavior can render an UBIFS volume unmountable after a power-cut when xattrs are used. Fix this issue by using failable read functions in ubifs_tnc_next_ent() too when replaying the journal. Fixes: 1e51764a3c2ac05a ("UBIFS: add new flash file system") Reported-by: Rock Lee Reviewed-by: David Gstir Signed-off-by: Richard Weinberger Signed-off-by: Willy Tarreau --- fs/ubifs/tnc.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 349f31a30f4..fdf2ca1dd77 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -34,6 +34,11 @@ #include #include "ubifs.h" +static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs); +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_zbranch *zbr, void *node); + /* * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. * @NAME_LESS: name corresponding to the first argument is less than second @@ -419,7 +424,19 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, return 0; } - err = ubifs_tnc_read_node(c, zbr, node); + if (c->replaying) { + err = fallible_read_node(c, &zbr->key, zbr, node); + /* + * When the node was not found, return -ENOENT, 0 otherwise. + * Negative return codes stay as-is. + */ + if (err == 0) + err = -ENOENT; + else if (err == 1) + err = 0; + } else { + err = ubifs_tnc_read_node(c, zbr, node); + } if (err) return err; @@ -2783,7 +2800,11 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, if (nm->name) { if (err) { /* Handle collisions */ - err = resolve_collision(c, key, &znode, &n, nm); + if (c->replaying) + err = fallible_resolve_collision(c, key, &znode, &n, + nm, 0); + else + err = resolve_collision(c, key, &znode, &n, nm); dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); if (unlikely(err < 0)) From f4c0fd32f5889f09021f39200b0b4576abf1f0e1 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:20 +0000 Subject: [PATCH 100/252] arm64/ptrace: Preserve previous registers for short regset write commit 9a17b876b573441bfb3387ad55d98bf7184daf9d upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Fixes: 478fcb2cdb23 ("arm64: Debugging support") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Willy Tarreau --- arch/arm64/kernel/ptrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index dfad98fda4f..777763d701d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -509,7 +509,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_pt_regs newregs; + struct user_pt_regs newregs = task_pt_regs(target)->user_regs; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); if (ret) @@ -539,7 +539,8 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_fpsimd_state newstate; + struct user_fpsimd_state newstate = + target->thread.fpsimd_state.user_fpsimd; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) @@ -562,7 +563,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - unsigned long tls; + unsigned long tls = target->thread.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) From b36ad645442832e7a6c1c2ea663f6b7dfb410c4c Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:23 +0000 Subject: [PATCH 101/252] arm64/ptrace: Avoid uninitialised struct padding in fpr_set() commit aeb1f39d814b2e21e5e5706a48834bfd553d0059 upstream. This patch adds an explicit __reserved[] field to user_fpsimd_state to replace what was previously unnamed padding. This ensures that data in this region are propagated across assignment rather than being left possibly uninitialised at the destination. Fixes: 60ffc30d5652 ("arm64: Exception handling") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Willy Tarreau --- arch/arm64/include/uapi/asm/ptrace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 6913643bbe5..c136fd53c84 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -75,6 +75,7 @@ struct user_fpsimd_state { __uint128_t vregs[32]; __u32 fpsr; __u32 fpcr; + __u32 __reserved[2]; }; struct user_hwdebug_state { From 518a744a69892a3afa0ae12689e9b34130a2eb5b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:24 +0000 Subject: [PATCH 102/252] arm64/ptrace: Reject attempts to set incomplete hardware breakpoint fields commit ad9e202aa1ce571b1d7fed969d06f66067f8a086 upstream. We cannot preserve partial fields for hardware breakpoints, because the values written by userspace to the hardware breakpoint registers can't subsequently be recovered intact from the hardware. So, just reject attempts to write incomplete fields with -EINVAL. Fixes: 478fcb2cdb23 ("arm64: Debugging support") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Willy Tarreau --- arch/arm64/kernel/ptrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 777763d701d..015775ad760 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -464,6 +464,8 @@ static int hw_break_set(struct task_struct *target, /* (address, ctrl) registers */ limit = regset->n * regset->size; while (count && offset < limit) { + if (count < PTRACE_HBP_ADDR_SZ) + return -EINVAL; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, offset, offset + PTRACE_HBP_ADDR_SZ); if (ret) @@ -473,6 +475,8 @@ static int hw_break_set(struct task_struct *target, return ret; offset += PTRACE_HBP_ADDR_SZ; + if (!count) + break; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, offset, offset + PTRACE_HBP_CTRL_SZ); if (ret) From ef60264cbd5d5a9d183799c2284f543ac69f321a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Nov 2016 16:20:37 +0100 Subject: [PATCH 103/252] ARM: ux500: fix prcmu_is_cpu_in_wfi() calculation commit f0e8faa7a5e894b0fc99d24be1b18685a92ea466 upstream. This function clearly never worked and always returns true, as pointed out by gcc-7: arch/arm/mach-ux500/pm.c: In function 'prcmu_is_cpu_in_wfi': arch/arm/mach-ux500/pm.c:137:212: error: ?: using integer constants in boolean context, the expression will always evaluate to 'true' [-Werror=int-in-bool-context] With the added braces, the condition actually makes sense. Fixes: 34fe6f107eab ("mfd : Check if the other db8500 core is in WFI") Signed-off-by: Arnd Bergmann Acked-by: Daniel Lezcano Signed-off-by: Linus Walleij Signed-off-by: Willy Tarreau --- arch/arm/mach-ux500/pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-ux500/pm.c b/arch/arm/mach-ux500/pm.c index 1a468f0fd22..9d532568b8b 100644 --- a/arch/arm/mach-ux500/pm.c +++ b/arch/arm/mach-ux500/pm.c @@ -128,8 +128,8 @@ bool prcmu_pending_irq(void) */ bool prcmu_is_cpu_in_wfi(int cpu) { - return readl(PRCM_ARM_WFI_STANDBY) & cpu ? PRCM_ARM_WFI_STANDBY_WFI1 : - PRCM_ARM_WFI_STANDBY_WFI0; + return readl(PRCM_ARM_WFI_STANDBY) & + (cpu ? PRCM_ARM_WFI_STANDBY_WFI1 : PRCM_ARM_WFI_STANDBY_WFI0); } /* From abae7dd2bf7af0a6dab4f04f7f8641bac15489ed Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sat, 10 Sep 2016 13:59:49 -0300 Subject: [PATCH 104/252] ite-cir: initialize use_demodulator before using it commit 7ec03e60ef81c19b5d3a46dd070ee966774b860f upstream. Function ite_set_carrier_params() uses variable use_demodulator after having initialized it to false in some if branches, but this variable is never set to true otherwise. This bug has been found using clang -Wsometimes-uninitialized warning flag. Fixes: 620a32bba4a2 ("[media] rc: New rc-based ite-cir driver for several ITE CIRs") Signed-off-by: Nicolas Iooss Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/rc/ite-cir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index 63b42252166..7a754ec826a 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -263,6 +263,8 @@ static void ite_set_carrier_params(struct ite_dev *dev) if (allowance > ITE_RXDCR_MAX) allowance = ITE_RXDCR_MAX; + + use_demodulator = true; } } From d3005b09a6bebb2d259441a7e8c88b98b1f0669d Mon Sep 17 00:00:00 2001 From: Robert Doebbelin Date: Mon, 7 Mar 2016 09:50:56 +0100 Subject: [PATCH 105/252] fuse: do not use iocb after it may have been freed commit 7cabc61e01a0a8b663bd2b4c982aa53048218734 upstream. There's a race in fuse_direct_IO(), whereby is_sync_kiocb() is called on an iocb that could have been freed if async io has already completed. The fix in this case is simple and obvious: cache the result before starting io. It was discovered by KASan: Kernel: ================================================================== Kernel: BUG: KASan: use after free in fuse_direct_IO+0xb1a/0xcc0 at addr ffff88036c414390 Signed-off-by: Robert Doebbelin Signed-off-by: Miklos Szeredi Fixes: bcba24ccdc82 ("fuse: enable asynchronous processing direct IO") Signed-off-by: Jan Kara Signed-off-by: Willy Tarreau --- fs/fuse/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 35f604b5f40..7ada0f04e31 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2398,6 +2398,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t i_size; size_t count = iov_length(iov, nr_segs); struct fuse_io_priv *io; + bool is_sync = is_sync_kiocb(iocb); pos = offset; inode = file->f_mapping->host; @@ -2433,7 +2434,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) + if (!is_sync && (offset + count > i_size) && rw == WRITE) io->async = false; if (rw == WRITE) @@ -2445,7 +2446,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!is_sync_kiocb(iocb)) + if (!is_sync) return -EIOCBQUEUED; ret = wait_on_sync_kiocb(iocb); From 7c2e0b982abec2fc0c429b32b7b705ea1d3dd3b1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 9 Aug 2016 08:27:17 +0100 Subject: [PATCH 106/252] crypto: caam - fix non-hmac hashes commit a0118c8b2be9297aed8e915c60b4013326b256d4 upstream. Since 6de62f15b581 ("crypto: algif_hash - Require setkey before accept(2)"), the AF_ALG interface requires userspace to provide a key to any algorithm that has a setkey method. However, the non-HMAC algorithms are not keyed, so setting a key is unnecessary. Fix this by removing the setkey method from the non-keyed hash algorithms. Fixes: 6de62f15b581 ("crypto: algif_hash - Require setkey before accept(2)") Signed-off-by: Russell King Signed-off-by: Herbert Xu Signed-off-by: Willy Tarreau --- drivers/crypto/caam/caamhash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index dda43cc4b6c..e9d8b235f68 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1793,6 +1793,7 @@ caam_hash_alloc(struct device *ctrldev, struct caam_hash_template *template, template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); + t_alg->ahash_alg.setkey = NULL; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; From 784e4e9f517cca64eb3262058562a7d39911b8ae Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:42 +0200 Subject: [PATCH 107/252] drm/i915: Don't leak edid in intel_crt_detect_ddc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c34f078675f505c4437919bb1897b1351f16a050 upstream. In the path where intel_crt_detect_ddc() detects a CRT, if would return true without freeing the edid. Fixes: a2bd1f541f19 ("drm/i915: check whether we actually received an edid in detect_ddc") Cc: Chris Wilson Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-1-git-send-email-ander.conselvan.de.oliveira@intel.com Signed-off-by: Jani Nikula Signed-off-by: Willy Tarreau --- drivers/gpu/drm/i915/intel_crt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 53435a9d847..93c80d7143e 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -428,6 +428,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) struct drm_i915_private *dev_priv = crt->base.base.dev->dev_private; struct edid *edid; struct i2c_adapter *i2c; + bool ret = false; BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG); @@ -444,17 +445,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) */ if (!is_digital) { DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); - return true; + ret = true; + } else { + DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } - - DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } else { DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n"); } kfree(edid); - return false; + return ret; } static enum drm_connector_status From a632ba439fae8a8e4740d9c56eace0809fb2ddbe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Dec 2016 09:41:29 -0200 Subject: [PATCH 108/252] s5k4ecgx: select CRC32 helper commit c739c0a7c3c2472d7562b8f802cdce44d2597c8b upstream. A rare randconfig build failure shows up in this driver when the CRC32 helper is not there: drivers/media/built-in.o: In function `s5k4ecgx_s_power': s5k4ecgx.c:(.text+0x9eb4): undefined reference to `crc32_le' This adds the 'select' that all other users of this function have. Fixes: 8b99312b7214 ("[media] Add v4l2 subdev driver for S5K4ECGX sensor") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/i2c/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index f981d50a2a8..936ef2f881f 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -549,6 +549,7 @@ config VIDEO_S5K6AA config VIDEO_S5K4ECGX tristate "Samsung S5K4ECGX sensor support" depends on I2C && VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API + select CRC32 ---help--- This is a V4L2 sensor-level driver for Samsung S5K4ECGX 5M camera sensor with an embedded SoC image signal processor. From b2c6a97c8b80b26916de7059db214dd131b550fd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2017 18:39:40 +0200 Subject: [PATCH 109/252] platform/x86: intel_mid_powerbtn: Set IRQ_ONESHOT commit 5a00b6c2438460b870a451f14593fc40d3c7edf6 upstream. The commit 1c6c69525b40 ("genirq: Reject bogus threaded irq requests") starts refusing misconfigured interrupt handlers. This makes intel_mid_powerbtn not working anymore. Add a mandatory flag to a threaded IRQ request in the driver. Fixes: 1c6c69525b40 ("genirq: Reject bogus threaded irq requests") Signed-off-by: Andy Shevchenko Signed-off-by: Willy Tarreau --- drivers/platform/x86/intel_mid_powerbtn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index f59683aa13d..fc6d84e202e 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -78,8 +78,8 @@ static int mfld_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - error = request_threaded_irq(irq, NULL, mfld_pb_isr, IRQF_NO_SUSPEND, - DRIVER_NAME, input); + error = request_threaded_irq(irq, NULL, mfld_pb_isr, IRQF_NO_SUSPEND | + IRQF_ONESHOT, DRIVER_NAME, input); if (error) { dev_err(&pdev->dev, "Unable to request irq %d for mfld power" "button\n", irq); From 48decd9af36a3d922449cc35f7a262527e0a1023 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2017 12:12:17 -0800 Subject: [PATCH 110/252] net: fix harmonize_features() vs NETIF_F_HIGHDMA commit 7be2c82cfd5d28d7adb66821a992604eb6dd112e upstream. Ashizuka reported a highmem oddity and sent a patch for freescale fec driver. But the problem root cause is that core networking stack must ensure no skb with highmem fragment is ever sent through a device that does not assert NETIF_F_HIGHDMA in its features. We need to call illegal_highdma() from harmonize_features() regardless of CSUM checks. Fixes: ec5f06156423 ("net: Kill link between CSUM and SG features.") Signed-off-by: Eric Dumazet Cc: Pravin Shelar Reported-by: "Ashizuka, Yuusuke" Signed-off-by: David S. Miller Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6900ff08008..4dbc7af9673 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2461,9 +2461,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(dev, skb)) { - features &= ~NETIF_F_SG; } + if (illegal_highdma(dev, skb)) + features &= ~NETIF_F_SG; return features; } From 5bcc3facb5489f6974393528b3278ab819495e54 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 19 Jan 2017 16:36:39 +0300 Subject: [PATCH 111/252] tcp: initialize max window for a new fastopen socket commit 0dbd7ff3ac5017a46033a9d0a87a8267d69119d9 upstream. Found that if we run LTP netstress test with large MSS (65K), the first attempt from server to send data comparable to this MSS on fastopen connection will be delayed by the probe timer. Here is an example: < S seq 0:0 win 43690 options [mss 65495 wscale 7 tfo cookie] length 32 > S. seq 0:0 ack 1 win 43690 options [mss 65495 wscale 7] length 0 < . ack 1 win 342 length 0 Inside tcp_sendmsg(), tcp_send_mss() returns max MSS in 'mss_now', as well as in 'size_goal'. This results the segment not queued for transmition until all the data copied from user buffer. Then, inside __tcp_push_pending_frames(), it breaks on send window test and continues with the check probe timer. Fragmentation occurs in tcp_write_wakeup()... +0.2 > P. seq 1:43777 ack 1 win 342 length 43776 < . ack 43777, win 1365 length 0 > P. seq 43777:65001 ack 1 win 342 options [...] length 21224 ... This also contradicts with the fact that we should bound to the half of the window if it is large. Fix this flaw by correctly initializing max_window. Before that, it could have large values that affect further calculations of 'size_goal'. [js] the code is in tcp_ipv4.c in 3.12 yet Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: Alexey Kodanev Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- net/ipv4/tcp_ipv4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6504a085ca6..1e24e5a426c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1422,6 +1422,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + tp->max_window = tp->snd_wnd; /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the SYN table of the parent From dd105cf0be30462440ae0b7ca398ca5c05cd011a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 Jan 2017 11:37:50 -0500 Subject: [PATCH 112/252] svcrpc: fix oops in absence of krb5 module commit 034dd34ff4916ec1f8f74e39ca3efb04eab2f791 upstream. Olga Kornievskaia says: "I ran into this oops in the nfsd (below) (4.10-rc3 kernel). To trigger this I had a client (unsuccessfully) try to mount the server with krb5 where the server doesn't have the rpcsec_gss_krb5 module built." The problem is that rsci.cred is copied from a svc_cred structure that gss_proxy didn't properly initialize. Fix that. [120408.542387] general protection fault: 0000 [#1] SMP ... [120408.565724] CPU: 0 PID: 3601 Comm: nfsd Not tainted 4.10.0-rc3+ #16 [120408.567037] Hardware name: VMware, Inc. VMware Virtual = Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [120408.569225] task: ffff8800776f95c0 task.stack: ffffc90003d58000 [120408.570483] RIP: 0010:gss_mech_put+0xb/0x20 [auth_rpcgss] ... [120408.584946] ? rsc_free+0x55/0x90 [auth_rpcgss] [120408.585901] gss_proxy_save_rsc+0xb2/0x2a0 [auth_rpcgss] [120408.587017] svcauth_gss_proxy_init+0x3cc/0x520 [auth_rpcgss] [120408.588257] ? __enqueue_entity+0x6c/0x70 [120408.589101] svcauth_gss_accept+0x391/0xb90 [auth_rpcgss] [120408.590212] ? try_to_wake_up+0x4a/0x360 [120408.591036] ? wake_up_process+0x15/0x20 [120408.592093] ? svc_xprt_do_enqueue+0x12e/0x2d0 [sunrpc] [120408.593177] svc_authenticate+0xe1/0x100 [sunrpc] [120408.594168] svc_process_common+0x203/0x710 [sunrpc] [120408.595220] svc_process+0x105/0x1c0 [sunrpc] [120408.596278] nfsd+0xe9/0x160 [nfsd] [120408.597060] kthread+0x101/0x140 [120408.597734] ? nfsd_destroy+0x60/0x60 [nfsd] [120408.598626] ? kthread_park+0x90/0x90 [120408.599448] ret_from_fork+0x22/0x30 Fixes: 1d658336b05f "SUNRPC: Add RPC based upcall mechanism for RPCGSS auth" Cc: Simo Sorce Reported-by: Olga Kornievskaia Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Willy Tarreau --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index e0062c544ac..a9ca70579eb 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, if (!oa->data) return -ENOMEM; - creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL); + creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { kfree(oa->data); return -ENOMEM; From fcd0b4411d7b0ec70560bf4797e7ebc5b5be6aa8 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 17:11:56 +0100 Subject: [PATCH 113/252] ARM: 8643/3: arm/ptrace: Preserve previous registers for short regset write commit 228dbbfb5d77f8e047b2a1d78da14b7158433027 upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Fixes: 5be6f62b0059 ("ARM: 6883/1: ptrace: Migrate to regsets framework") Signed-off-by: Dave Martin Acked-by: Russell King Signed-off-by: Russell King Signed-off-by: Willy Tarreau --- arch/arm/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 4e2110d48c4..dfdd683d373 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -600,7 +600,7 @@ static int gpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - struct pt_regs newregs; + struct pt_regs newregs = *task_pt_regs(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, From 22ae324e22230fe9d7b50f32355f89eb9acf1fa9 Mon Sep 17 00:00:00 2001 From: Thorsten Horstmann Date: Fri, 3 Feb 2017 14:38:29 +0100 Subject: [PATCH 114/252] mac80211: Fix adding of mesh vendor IEs commit da7061c82e4a1bc6a5e134ef362c86261906c860 upstream. The function ieee80211_ie_split_vendor doesn't return 0 on errors. Instead it returns any offset < ielen when WLAN_EID_VENDOR_SPECIFIC is found. The return value in mesh_add_vendor_ies must therefore be checked against ifmsh->ie_len and not 0. Otherwise all ifmsh->ie starting with WLAN_EID_VENDOR_SPECIFIC will be rejected. Fixes: 082ebb0c258d ("mac80211: fix mesh beacon format") Signed-off-by: Thorsten Horstmann Signed-off-by: Mathias Kretschmer Signed-off-by: Simon Wunderlich [sven@narfation.org: Add commit message] Signed-off-by: Sven Eckelmann Signed-off-by: Johannes Berg Signed-off-by: Willy Tarreau --- net/mac80211/mesh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f8765cc84e4..ddc63f92fa2 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -345,7 +345,7 @@ int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, /* fast-forward to vendor IEs */ offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0); - if (offset) { + if (offset < ifmsh->ie_len) { len = ifmsh->ie_len - offset; data = ifmsh->ie + offset; if (skb_tailroom(skb) < len) From 01e26ca64b78e069b522a18c151bba37c2715144 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Wed, 8 Feb 2017 15:34:22 +0100 Subject: [PATCH 115/252] scsi: zfcp: fix use-after-free by not tracing WKA port open/close on failed send commit 2dfa6688aafdc3f74efeb1cf05fb871465d67f79 upstream. Dan Carpenter kindly reported: The patch d27a7cb91960: "zfcp: trace on request for open and close of WKA port" from Aug 10, 2016, leads to the following static checker warning: drivers/s390/scsi/zfcp_fsf.c:1615 zfcp_fsf_open_wka_port() warn: 'req' was already freed. drivers/s390/scsi/zfcp_fsf.c 1609 zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); 1610 retval = zfcp_fsf_req_send(req); 1611 if (retval) 1612 zfcp_fsf_req_free(req); ^^^ Freed. 1613 out: 1614 spin_unlock_irq(&qdio->req_q_lock); 1615 if (req && !IS_ERR(req)) 1616 zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id); ^^^^^^^^^^^ Use after free. 1617 return retval; 1618 } Same thing for zfcp_fsf_close_wka_port() as well. Rather than relying on req being NULL (or ERR_PTR) for all cases where we don't want to trace or should not trace, simply check retval which is unconditionally initialized with -EIO != 0 and it can only become 0 on successful retval = zfcp_fsf_req_send(req). With that we can also remove the then again unnecessary unconditional initialization of req which was introduced with that earlier commit. Reported-by: Dan Carpenter Suggested-by: Benjamin Block Signed-off-by: Steffen Maier Fixes: d27a7cb91960 ("zfcp: trace on request for open and close of WKA port") Reviewed-by: Benjamin Block Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/s390/scsi/zfcp_fsf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index f246097b7c6..ad5718401ea 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1607,7 +1607,7 @@ out: int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; - struct zfcp_fsf_req *req = NULL; + struct zfcp_fsf_req *req; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1636,7 +1636,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) zfcp_fsf_req_free(req); out: spin_unlock_irq(&qdio->req_q_lock); - if (req && !IS_ERR(req)) + if (!retval) zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id); return retval; } @@ -1662,7 +1662,7 @@ static void zfcp_fsf_close_wka_port_handler(struct zfcp_fsf_req *req) int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; - struct zfcp_fsf_req *req = NULL; + struct zfcp_fsf_req *req; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1691,7 +1691,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) zfcp_fsf_req_free(req); out: spin_unlock_irq(&qdio->req_q_lock); - if (req && !IS_ERR(req)) + if (!retval) zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id); return retval; } From 5c9d55e1764549ac7735c7fba6d2f26907e31b00 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 26 Jan 2017 17:32:11 +0300 Subject: [PATCH 116/252] drm/i915: fix use-after-free in page_flip_completed() commit 5351fbb1bf1413f6024892093528280769ca852f upstream. page_flip_completed() dereferences 'work' variable after executing queue_work(). This is not safe as the 'work' item might be already freed by queued work: BUG: KASAN: use-after-free in page_flip_completed+0x3ff/0x490 at addr ffff8803dc010f90 Call Trace: __asan_report_load8_noabort+0x59/0x80 page_flip_completed+0x3ff/0x490 intel_finish_page_flip_mmio+0xe3/0x130 intel_pipe_handle_vblank+0x2d/0x40 gen8_irq_handler+0x4a7/0xed0 __handle_irq_event_percpu+0xf6/0x860 handle_irq_event_percpu+0x6b/0x160 handle_irq_event+0xc7/0x1b0 handle_edge_irq+0x1f4/0xa50 handle_irq+0x41/0x70 do_IRQ+0x9a/0x200 common_interrupt+0x89/0x89 Freed: kfree+0x113/0x4d0 intel_unpin_work_fn+0x29a/0x3b0 process_one_work+0x79e/0x1b70 worker_thread+0x611/0x1460 kthread+0x241/0x3a0 ret_from_fork+0x27/0x40 Move queue_work() after trace_i915_flip_complete() to fix this. Fixes: e5510fac98a7 ("drm/i915: add tracepoints for flip requests & completions") Signed-off-by: Andrey Ryabinin Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170126143211.24013-1-aryabinin@virtuozzo.com Signed-off-by: Jani Nikula Signed-off-by: Andrey Ryabinin Signed-off-by: Willy Tarreau --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8814b0dbfc4..a7dbdec6899 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7052,9 +7052,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev, wake_up_all(&dev_priv->pending_flip_queue); - queue_work(dev_priv->wq, &work->work); - trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj); + + queue_work(dev_priv->wq, &work->work); } void intel_finish_page_flip(struct drm_device *dev, int pipe) From efca6f526324ac56ff6611d616727dbb4858e9fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Feb 2017 10:31:35 -0800 Subject: [PATCH 117/252] net: use a work queue to defer net_disable_timestamp() work commit 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec upstream. Dmitry reported a warning [1] showing that we were calling net_disable_timestamp() -> static_key_slow_dec() from a non process context. Grabbing a mutex while holding a spinlock or rcu_read_lock() is not allowed. As Cong suggested, we now use a work queue. It is possible netstamp_clear() exits while netstamp_needed_deferred is not zero, but it is probably not worth trying to do better than that. netstamp_needed_deferred atomic tracks the exact number of deferred decrements. [1] [ INFO: suspicious RCU usage. ] 4.10.0-rc5+ #192 Not tainted ------------------------------- ./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 2 locks held by syz-executor14/23111: #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock include/net/sock.h:1454 [inline] #0: (sk_lock-AF_INET6){+.+.+.}, at: [] rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919 #1: (rcu_read_lock){......}, at: [] nf_hook include/linux/netfilter.h:201 [inline] #1: (rcu_read_lock){......}, at: [] __ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160 stack backtrace: CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452 rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline] ___might_sleep+0x560/0x650 kernel/sched/core.c:7748 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sock_wfree+0xae/0x120 net/core/sock.c:1645 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put include/net/inet_frag.h:133 [inline] nf_ct_frag6_gather+0x1106/0x3840 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook include/linux/netfilter.h:212 [inline] __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x600 net/socket.c:848 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 vfs_writev+0x87/0xc0 fs/read_write.c:911 do_writev+0x110/0x2c0 fs/read_write.c:944 SYSC_writev fs/read_write.c:1017 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1014 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x445559 RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559 RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005 RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000 R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:752 in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14 INFO: lockdep is turned off. CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sock_wfree+0xae/0x120 net/core/sock.c:1645 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put include/net/inet_frag.h:133 [inline] nf_ct_frag6_gather+0x1106/0x3840 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook include/linux/netfilter.h:212 [inline] __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x600 net/socket.c:848 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 vfs_writev+0x87/0xc0 fs/read_write.c:911 do_writev+0x110/0x2c0 fs/read_write.c:944 SYSC_writev fs/read_write.c:1017 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1014 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x445559 Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") Suggested-by: Cong Wang Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/dev.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4dbc7af9673..11535a94604 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1559,24 +1559,19 @@ EXPORT_SYMBOL(call_netdevice_notifiers); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call static_key_slow_dec() from irq context - * If net_disable_timestamp() is called from irq context, defer the - * static_key_slow_dec() calls. - */ static atomic_t netstamp_needed_deferred; +static void netstamp_clear(struct work_struct *work) +{ + int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + + while (deferred--) + static_key_slow_dec(&netstamp_needed); +} +static DECLARE_WORK(netstamp_work, netstamp_clear); #endif void net_enable_timestamp(void) { -#ifdef HAVE_JUMP_LABEL - int deferred = atomic_xchg(&netstamp_needed_deferred, 0); - - if (deferred) { - while (--deferred) - static_key_slow_dec(&netstamp_needed); - return; - } -#endif static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1584,12 +1579,12 @@ EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - if (in_interrupt()) { - atomic_inc(&netstamp_needed_deferred); - return; - } -#endif + /* net_disable_timestamp() can be called from non process context */ + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_dec(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_disable_timestamp); From 050c309794e9e68319843d452196994bcbde245f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Feb 2017 11:16:52 -0800 Subject: [PATCH 118/252] ipv4: keep skb->dst around in presence of IP options commit 34b2cef20f19c87999fff3da4071e66937db9644 upstream. Andrey Konovalov got crashes in __ip_options_echo() when a NULL skb->dst is accessed. ipv4_pktinfo_prepare() should not drop the dst if (evil) IP options are present. We could refine the test to the presence of ts_needtime or srr, but IP options are not often used, so let's be conservative. Thanks to syzkaller team for finding this bug. Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/ip_sockglue.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f6603142cb3..3d009e17416 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1042,7 +1042,14 @@ void ipv4_pktinfo_prepare(struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, From 96296cddf9d174bd1a925e2ffa4463f2eb3259c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Feb 2017 00:03:26 -0800 Subject: [PATCH 119/252] netlabel: out of bound access in cipso_v4_validate() commit d71b7896886345c53ef1d84bda2bc758554f5d61 upstream. syzkaller found another out of bound access in ip_options_compile(), or more exactly in cipso_v4_validate() Fixes: 20e2a8648596 ("cipso: handle CIPSO options correctly when NetLabel is disabled") Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Paul Moore Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- include/net/cipso_ipv4.h | 4 ++++ net/ipv4/cipso_ipv4.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a8c2ef6d3b9..9078b31d336 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -303,6 +303,10 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, } for (opt_iter = 6; opt_iter < opt_len;) { + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto out; + } tag_len = opt[opt_iter + 1]; if ((tag_len == 0) || (opt[opt_iter + 1] > (opt_len - opt_iter))) { err_offset = opt_iter + 1; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 667c1d4ca98..4322372dddb 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1649,6 +1649,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) goto validate_return_locked; } + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto validate_return_locked; + } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; From a05c8e396426c36837798bb253140774b047600a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Feb 2017 23:18:55 -0800 Subject: [PATCH 120/252] ip6_gre: fix ip6gre_err() invalid reads commit 7892032cfe67f4bde6fc2ee967e45a8fbaf33756 upstream. Andrey Konovalov reported out of bound accesses in ip6gre_err() If GRE flags contains GRE_KEY, the following expression *(((__be32 *)p) + (grehlen / 4) - 1) accesses data ~40 bytes after the expected point, since grehlen includes the size of IPv6 headers. Let's use a "struct gre_base_hdr *greh" pointer to make this code more readable. p[1] becomes greh->protocol. grhlen is the GRE header length. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/ip6_gre.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 603f251b6ca..ae88e17f5c7 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -55,6 +55,7 @@ #include #include #include +#include static bool log_ecn_error = true; @@ -365,35 +366,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(skb->data + offset); - int grehlen = offset + 4; + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); struct ip6_tnl *t; + int key_off = 0; __be16 flags; + __be32 key; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (!pskb_may_pull(skb, grehlen)) + if (!pskb_may_pull(skb, offset + grehlen)) return; ipv6h = (const struct ipv6hdr *)skb->data; - p = (__be16 *)(skb->data + offset); + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + key, greh->protocol); if (t == NULL) return; From c6dfb877c41855552173987d6869a2f2da287527 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 7 Feb 2017 12:59:46 -0800 Subject: [PATCH 121/252] ping: fix a null pointer dereference commit 73d2c6678e6c3af7e7a42b1e78cd0211782ade32 upstream. Andrey reported a kernel crash: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 3880 Comm: syz-executor1 Not tainted 4.10.0-rc6+ #124 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff880060048040 task.stack: ffff880069be8000 RIP: 0010:ping_v4_push_pending_frames net/ipv4/ping.c:647 [inline] RIP: 0010:ping_v4_sendmsg+0x1acd/0x23f0 net/ipv4/ping.c:837 RSP: 0018:ffff880069bef8b8 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: ffff880069befb90 RCX: 0000000000000000 RDX: 0000000000000018 RSI: ffff880069befa30 RDI: 00000000000000c2 RBP: ffff880069befbb8 R08: 0000000000000008 R09: 0000000000000000 R10: 0000000000000002 R11: 0000000000000000 R12: ffff880069befab0 R13: ffff88006c624a80 R14: ffff880069befa70 R15: 0000000000000000 FS: 00007f6f7c716700(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004a6f28 CR3: 000000003a134000 CR4: 00000000000006e0 Call Trace: inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 SYSC_sendto+0x660/0x810 net/socket.c:1687 SyS_sendto+0x40/0x50 net/socket.c:1655 entry_SYSCALL_64_fastpath+0x1f/0xc2 This is because we miss a check for NULL pointer for skb_peek() when the queue is empty. Other places already have the same check. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/ping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 459b957104a..431d597515d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -450,6 +450,8 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + if (!skb) + return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); From 3d753377d2f56c59cae37f9e46db4b8fee643f73 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Feb 2017 16:15:52 -0800 Subject: [PATCH 122/252] l2tp: do not use udp_ioctl() commit 72fb96e7bdbbdd4421b0726992496531060f3636 upstream. udp_ioctl(), as its name suggests, is used by UDP protocols, but is also used by L2TP :( L2TP should use its own handler, because it really does not look the same. SIOCINQ for instance should not assume UDP checksum or headers. Thanks to Andrey and syzkaller team for providing the report and a nice reproducer. While crashes only happen on recent kernels (after commit 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")), this probably needs to be backported to older kernels. Fixes: 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue") Fixes: 85584672012e ("udp: Fix udp_poll() and ioctl()") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Acked-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/l2tp/l2tp_core.h | 1 + net/l2tp/l2tp_ip.c | 27 ++++++++++++++++++++++++++- net/l2tp/l2tp_ip6.c | 2 +- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 2f89d43877d..a98c854c252 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -256,6 +256,7 @@ extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); /* Session reference counts. Incremented when code obtains a reference * to a session. diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 07f8b97f9ae..f4d30b509cd 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -554,6 +555,30 @@ out: return err ? err : copied; } +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int amount; + + switch (cmd) { + case SIOCOUTQ: + amount = sk_wmem_alloc_get(sk); + break; + case SIOCINQ: + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + amount = skb ? skb->len : 0; + spin_unlock_bh(&sk->sk_receive_queue.lock); + break; + + default: + return -ENOIOCTLCMD; + } + + return put_user(amount, (int __user *)arg); +} +EXPORT_SYMBOL(l2tp_ioctl); + static struct proto l2tp_ip_prot = { .name = "L2TP/IP", .owner = THIS_MODULE, @@ -562,7 +587,7 @@ static struct proto l2tp_ip_prot = { .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, .disconnect = l2tp_ip_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index db96af978da..15367918413 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -716,7 +716,7 @@ static struct proto l2tp_ip6_prot = { .bind = l2tp_ip6_bind, .connect = l2tp_ip6_connect, .disconnect = l2tp_ip6_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip6_destroy_sock, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, From 2a272abc4e543f488b3a73292ee75a06f20d077a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Feb 2017 09:03:51 -0800 Subject: [PATCH 123/252] packet: fix races in fanout_add() commit d199fab63c11998a602205f7ee7ff7c05c97164b upstream. Multiple threads can call fanout_add() at the same time. We need to grab fanout_mutex earlier to avoid races that could lead to one thread freeing po->rollover that was set by another thread. Do the same in fanout_release(), for peace of mind, and to help us finding lockdep issues earlier. [js] no rollover in 3.12 Fixes: dc99f600698d ("packet: Add fanout support.") Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- net/packet/af_packet.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e38c69969c3..25ef495506a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1304,13 +1304,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) return -EINVAL; } - if (!po->running) - return -EINVAL; - - if (po->fanout) - return -EALREADY; - mutex_lock(&fanout_mutex); + + err = -EINVAL; + if (!po->running) + goto out; + + err = -EALREADY; + if (po->fanout) + goto out; + match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -1366,17 +1369,16 @@ static void fanout_release(struct sock *sk) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; - f = po->fanout; - if (!f) - return; - mutex_lock(&fanout_mutex); - po->fanout = NULL; + f = po->fanout; + if (f) { + po->fanout = NULL; - if (atomic_dec_and_test(&f->sk_ref)) { - list_del(&f->list); - dev_remove_pack(&f->prot_hook); - kfree(f); + if (atomic_dec_and_test(&f->sk_ref)) { + list_del(&f->list); + dev_remove_pack(&f->prot_hook); + kfree(f); + } } mutex_unlock(&fanout_mutex); } From 9aab35929faed156ca7ade4bd412621f527756f2 Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 15 Feb 2017 20:25:39 +0000 Subject: [PATCH 124/252] packet: Do not call fanout_release from atomic contexts commit 2bd624b4611ffee36422782d16e1c944d1351e98 upstream. Commit 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev"), unfortunately, introduced the following issues. 1. calling mutex_lock(&fanout_mutex) (fanout_release()) from inside rcu_read-side critical section. rcu_read_lock disables preemption, most often, which prohibits calling sleeping functions. [ ] include/linux/rcupdate.h:560 Illegal context switch in RCU read-side critical section! [ ] [ ] rcu_scheduler_active = 1, debug_locks = 0 [ ] 4 locks held by ovs-vswitchd/1969: [ ] #0: (cb_lock){++++++}, at: [] genl_rcv+0x19/0x40 [ ] #1: (ovs_mutex){+.+.+.}, at: [] ovs_vport_cmd_del+0x4a/0x100 [openvswitch] [ ] #2: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ ] #3: (rcu_read_lock){......}, at: [] packet_notifier+0x5/0x3f0 [ ] [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] lockdep_rcu_suspicious+0x107/0x110 [ ] [] ___might_sleep+0x57/0x210 [ ] [] __might_sleep+0x70/0x90 [ ] [] mutex_lock_nested+0x3c/0x3a0 [ ] [] ? vprintk_default+0x1f/0x30 [ ] [] ? printk+0x4d/0x4f [ ] [] fanout_release+0x1d/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 2. calling mutex_lock(&fanout_mutex) inside spin_lock(&po->bind_lock). "sleeping function called from invalid context" [ ] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ ] in_atomic(): 1, irqs_disabled(): 0, pid: 1969, name: ovs-vswitchd [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] ___might_sleep+0x202/0x210 [ ] [] __might_sleep+0x70/0x90 [ ] [] mutex_lock_nested+0x3c/0x3a0 [ ] [] fanout_release+0x1d/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 3. calling dev_remove_pack(&fanout->prot_hook), from inside spin_lock(&po->bind_lock) or rcu_read-side critical-section. dev_remove_pack() -> synchronize_net(), which might sleep. [ ] BUG: scheduling while atomic: ovs-vswitchd/1969/0x00000002 [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] __schedule_bug+0x64/0x73 [ ] [] __schedule+0x6b/0xd10 [ ] [] schedule+0x6b/0x80 [ ] [] schedule_timeout+0x38d/0x410 [ ] [] synchronize_sched_expedited+0x53d/0x810 [ ] [] synchronize_rcu_expedited+0xe/0x10 [ ] [] synchronize_net+0x35/0x50 [ ] [] dev_remove_pack+0x13/0x20 [ ] [] fanout_release+0xbe/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 4. fanout_release() races with calls from different CPU. To fix the above problems, remove the call to fanout_release() under rcu_read_lock(). Instead, call __dev_remove_pack(&fanout->prot_hook) and netdev_run_todo will be happy that &dev->ptype_specific list is empty. In order to achieve this, I moved dev_{add,remove}_pack() out of fanout_{add,release} to __fanout_{link,unlink}. So, call to {,__}unregister_prot_hook() will make sure fanout->prot_hook is removed as well. [js] no rollover in 3.12 Fixes: 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev") Reported-by: Eric Dumazet Signed-off-by: Anoob Soman Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- net/packet/af_packet.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 25ef495506a..4b1734a14ff 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1257,6 +1257,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) f->arr[f->num_members] = sk; smp_wmb(); f->num_members++; + if (f->num_members == 1) + dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1273,6 +1275,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) BUG_ON(i >= f->num_members); f->arr[i] = f->arr[f->num_members - 1]; f->num_members--; + if (f->num_members == 0) + __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1343,7 +1347,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.id_match = match_fanout_group; - dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } err = -EINVAL; @@ -1364,7 +1367,12 @@ out: return err; } -static void fanout_release(struct sock *sk) +/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes + * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. + * It is the responsibility of the caller to call fanout_release_data() and + * free the returned packet_fanout (after synchronize_net()) + */ +static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; @@ -1374,13 +1382,14 @@ static void fanout_release(struct sock *sk) if (f) { po->fanout = NULL; - if (atomic_dec_and_test(&f->sk_ref)) { + if (atomic_dec_and_test(&f->sk_ref)) list_del(&f->list); - dev_remove_pack(&f->prot_hook); - kfree(f); - } + else + f = NULL; } mutex_unlock(&fanout_mutex); + + return f; } static const struct proto_ops packet_ops; @@ -2430,6 +2439,7 @@ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; + struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; @@ -2469,9 +2479,13 @@ static int packet_release(struct socket *sock) packet_set_ring(sk, &req_u, 1, 1); } - fanout_release(sk); + f = fanout_release(sk); synchronize_net(); + + if (f) { + kfree(f); + } /* * Now the socket is dead. No more input will appear. */ @@ -3392,7 +3406,6 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); From 0b5240c45e2029986526b1405ab24906c708f770 Mon Sep 17 00:00:00 2001 From: Maxime Jayat Date: Tue, 21 Feb 2017 18:35:51 +0100 Subject: [PATCH 125/252] net: socket: fix recvmmsg not returning error from sock_error commit e623a9e9dec29ae811d11f83d0074ba254aba374 upstream. Commit 34b88a68f26a ("net: Fix use after free in the recvmmsg exit path"), changed the exit path of recvmmsg to always return the datagrams variable and modified the error paths to set the variable to the error code returned by recvmsg if necessary. However in the case sock_error returned an error, the error code was then ignored, and recvmmsg returned 0. Change the error path of recvmmsg to correctly return the error code of sock_error. The bug was triggered by using recvmmsg on a CAN interface which was not up. Linux 4.6 and later return 0 in this case while earlier releases returned -ENETDOWN. Fixes: 34b88a68f26a ("net: Fix use after free in the recvmmsg exit path") Signed-off-by: Maxime Jayat Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index e91e8ed1b8d..773ba3abb10 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2326,8 +2326,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, return err; err = sock_error(sock->sk); - if (err) + if (err) { + datagrams = err; goto out_put; + } entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; From c8c6bd83bdb540466bef6a8ab89898e8e9ce2fd6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 9 Feb 2017 12:11:41 +0100 Subject: [PATCH 126/252] USB: serial: mos7840: fix another NULL-deref at open commit 5182c2cf2a9bfb7f066ef0bdd2bb6330b94dd74e upstream. Fix another NULL-pointer dereference at open should a malicious device lack an interrupt-in endpoint. Note that the driver has a broken check for an interrupt-in endpoint which means that an interrupt URB has never even been submitted. Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/mos7840.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 5041b6c5b2d..3308c43d231 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1041,6 +1041,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) * (can't set it up in mos7840_startup as the structures * * were not set up at that time.) */ if (port0->open_ports == 1) { + /* FIXME: Buffer never NULL, so URB is not submitted. */ if (serial->port[0]->interrupt_in_buffer == NULL) { /* set up interrupt urb */ usb_fill_int_urb(serial->port[0]->interrupt_in_urb, @@ -2258,7 +2259,8 @@ static int mos7840_calc_num_ports(struct usb_serial *serial) static int mos7840_attach(struct usb_serial *serial) { if (serial->num_bulk_in < serial->num_ports || - serial->num_bulk_out < serial->num_ports) { + serial->num_bulk_out < serial->num_ports || + serial->num_interrupt_in < 1) { dev_err(&serial->interface->dev, "missing endpoints\n"); return -ENODEV; } From aa814edbd31280c5ff3c465c8fdca1ced52dc34d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Jan 2017 14:56:11 +0100 Subject: [PATCH 127/252] USB: serial: ftdi_sio: fix modem-status error handling commit 427c3a95e3e29e65f59d99aaf320d7506f3eed57 upstream. Make sure to detect short responses when fetching the modem status in order to avoid parsing uninitialised buffer data and having bits of it leak to user space. Note that we still allow for short 1-byte responses. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ftdi_sio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4e865664699..ba043088582 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2452,8 +2452,12 @@ static int ftdi_get_modem_status(struct usb_serial_port *port, FTDI_SIO_GET_MODEM_STATUS_REQUEST_TYPE, 0, priv->interface, buf, len, WDR_TIMEOUT); - if (ret < 0) { + + /* NOTE: We allow short responses and handle that below. */ + if (ret < 1) { dev_err(&port->dev, "failed to get modem status: %d\n", ret); + if (ret >= 0) + ret = -EIO; ret = usb_translate_errors(ret); goto out; } From af1f7fb6d94f55f435c68c94176d8b0a52cc80d9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 25 Jan 2017 15:35:20 +0100 Subject: [PATCH 128/252] USB: serial: ftdi_sio: fix extreme low-latency setting commit c6dce2626606ef16434802989466636bc28c1419 upstream. Since commit 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY flag") the FTDI driver has been using a receive latency-timer value of 1 ms instead of the device default of 16 ms. The latency timer is used to periodically empty a non-full receive buffer, but a status header is always sent when the timer expires including when the buffer is empty. This means that a two-byte bulk message is received every millisecond also for an otherwise idle port as long as it is open. Let's restore the pre-2009 behaviour which reduces the rate of the status messages to 1/16th (e.g. interrupt frequency drops from 1 kHz to 62.5 Hz) by not setting ASYNC_LOW_LATENCY by default. Anyone willing to pay the price for the minimum-latency behaviour should set the flag explicitly instead using the TIOCSSERIAL ioctl or a tool such as setserial (e.g. setserial /dev/ttyUSB0 low_latency). Note that since commit 0cbd81a9f6ba ("USB: ftdi_sio: remove tty->low_latency") the ASYNC_LOW_LATENCY flag has no other effects but to set a minimal latency timer. Reported-by: Antoine Aubert Fixes: 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY flag") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ftdi_sio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index ba043088582..960d0e402f0 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1813,8 +1813,6 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port) mutex_init(&priv->cfg_lock); - priv->flags = ASYNC_LOW_LATENCY; - if (quirk && quirk->port_probe) quirk->port_probe(priv); From c1514183aa0f7b688288b59d325be4628b145ad4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 2 Feb 2017 17:38:35 +0100 Subject: [PATCH 129/252] USB: serial: ftdi_sio: fix line-status over-reporting commit a6bb1e17a39818b01b55d8e6238b4b5f06d55038 upstream. FTDI devices use a receive latency timer to periodically empty the receive buffer and report modem and line status (also when the buffer is empty). When a break or error condition is detected the corresponding status flags will be set on a packet with nonzero data payload and the flags are not updated until the break is over or further characters are received. In order to avoid over-reporting break and error conditions, these flags must therefore only be processed for packets with payload. This specifically fixes the case where after an overrun, the error condition is continuously reported and NULL-characters inserted until further data is received. Reported-by: Michael Walle Fixes: 72fda3ca6fc1 ("USB: serial: ftd_sio: implement sysrq handling on break") Fixes: 166ceb690750 ("USB: ftdi_sio: clean up line-status handling") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ftdi_sio.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 960d0e402f0..ce884f7434b 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2089,6 +2089,20 @@ static int ftdi_process_packet(struct usb_serial_port *port, priv->prev_status = status; } + /* save if the transmitter is empty or not */ + if (packet[1] & FTDI_RS_TEMT) + priv->transmit_empty = 1; + else + priv->transmit_empty = 0; + + len -= 2; + if (!len) + return 0; /* status only */ + + /* + * Break and error status must only be processed for packets with + * data payload to avoid over-reporting. + */ flag = TTY_NORMAL; if (packet[1] & FTDI_RS_ERR_MASK) { /* Break takes precedence over parity, which takes precedence @@ -2111,15 +2125,6 @@ static int ftdi_process_packet(struct usb_serial_port *port, } } - /* save if the transmitter is empty or not */ - if (packet[1] & FTDI_RS_TEMT) - priv->transmit_empty = 1; - else - priv->transmit_empty = 0; - - len -= 2; - if (!len) - return 0; /* status only */ port->icount.rx += len; ch = packet + 2; From 3e642b832fb63cdb6d89a415942d7a2d97fcce83 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Jan 2017 14:56:21 +0100 Subject: [PATCH 130/252] USB: serial: spcp8x5: fix modem-status handling commit 5ed8d41023751bdd3546f2fe4118304357efe8d2 upstream. Make sure to detect short control transfers and return zero on success when retrieving the modem status. This fixes the TIOCMGET implementation which since e1ed212d8593 ("USB: spcp8x5: add proper modem-status support") has returned TIOCM_LE on successful retrieval, and avoids leaking bits from the stack on short transfers. This also fixes the carrier-detect implementation which since the above mentioned commit unconditionally has returned true. Fixes: e1ed212d8593 ("USB: spcp8x5: add proper modem-status support") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/spcp8x5.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 20f00bc1518..595a3f0b021 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -231,11 +231,17 @@ static int spcp8x5_get_msr(struct usb_serial_port *port, u8 *status) ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), GET_UART_STATUS, GET_UART_STATUS_TYPE, 0, GET_UART_STATUS_MSR, buf, 1, 100); - if (ret < 0) + if (ret < 1) { dev_err(&port->dev, "failed to get modem status: %d", ret); + if (ret >= 0) + ret = -EIO; + goto out; + } dev_dbg(&port->dev, "0xc0:0x22:0:6 %d - 0x02%x", ret, *buf); *status = *buf; + ret = 0; +out: kfree(buf); return ret; From 1110205cc573bd6db1f0894767ccb5cb6a035ee1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 13 Jan 2017 13:21:08 +0100 Subject: [PATCH 131/252] USB: serial: opticon: fix CTS retrieval at open commit 2eee05020a0e7ee7c04422cbacdb07859e45dce6 upstream. The opticon driver used a control request at open to trigger a CTS status notification to be sent over the bulk-in pipe. When the driver was converted to using the generic read implementation, an inverted test prevented this request from being sent, something which could lead to TIOCMGET reporting an incorrect CTS state. Reported-by: Dan Carpenter Fixes: 7a6ee2b02751 ("USB: opticon: switch to generic read implementation") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/opticon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index b0eb1dfc601..b93ab96573e 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -143,7 +143,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port) usb_clear_halt(port->serial->dev, port->read_urb->pipe); res = usb_serial_generic_open(tty, port); - if (!res) + if (res) return res; /* Request CTS line state, sometimes during opening the current From 9ce78b1e29b53073979587042edebf49ac512958 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Jan 2017 14:56:09 +0100 Subject: [PATCH 132/252] USB: serial: ark3116: fix register-accessor error handling commit 9fef37d7cf170522fb354d6d0ea6de09b9b16678 upstream. The current implementation failed to detect short transfers, something which could lead to bits of the uninitialised heap transfer buffer leaking to user space. Fixes: 149fc791a452 ("USB: ark3116: Setup some basic infrastructure for new ark3116 driver.") Fixes: f4c1e8d597d1 ("USB: ark3116: Make existing functions 16450-aware and add close and release functions.") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/ark3116.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c index 40e7fd94646..62fb2553c77 100644 --- a/drivers/usb/serial/ark3116.c +++ b/drivers/usb/serial/ark3116.c @@ -100,10 +100,17 @@ static int ark3116_read_reg(struct usb_serial *serial, usb_rcvctrlpipe(serial->dev, 0), 0xfe, 0xc0, 0, reg, buf, 1, ARK_TIMEOUT); - if (result < 0) + if (result < 1) { + dev_err(&serial->interface->dev, + "failed to read register %u: %d\n", + reg, result); + if (result >= 0) + result = -EIO; + return result; - else - return buf[0]; + } + + return buf[0]; } static inline int calc_divisor(int bps) From d3926e196305a4075aaf3da82e5fbd5bc192a58d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Feb 2017 11:11:50 +0100 Subject: [PATCH 133/252] x86/platform/goldfish: Prevent unconditional loading commit 47512cfd0d7a8bd6ab71d01cd89fca19eb2093eb upstream. The goldfish platform code registers the platform device unconditionally which causes havoc in several ways if the goldfish_pdev_bus driver is enabled: - Access to the hardcoded physical memory region, which is either not available or contains stuff which is completely unrelated. - Prevents that the interrupt of the serial port can be requested - In case of a spurious interrupt it goes into a infinite loop in the interrupt handler of the pdev_bus driver (which needs to be fixed seperately). Add a 'goldfish' command line option to make the registration opt-in when the platform is compiled in. I'm seriously grumpy about this engineering trainwreck, which has seven SOBs from Intel developers for 50 lines of code. And none of them figured out that this is broken. Impressive fail! Fixes: ddd70cf93d78 ("goldfish: platform device for x86") Reported-by: Gabriel C Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/platform/goldfish/goldfish.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 98da831a14b..daf83824fda 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -955,6 +955,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. When zero, profiling data is discarded and associated debugfs files are removed at module unload time. + goldfish [X86] Enable the goldfish android emulator platform. + Don't use this when you are not running on the + android emulator + gpt [EFI] Forces disk with valid GPT signature but invalid Protective MBR to be treated as GPT. diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c index 1693107a518..0d17c0aafeb 100644 --- a/arch/x86/platform/goldfish/goldfish.c +++ b/arch/x86/platform/goldfish/goldfish.c @@ -42,10 +42,22 @@ static struct resource goldfish_pdev_bus_resources[] = { } }; +static bool goldfish_enable __initdata; + +static int __init goldfish_setup(char *str) +{ + goldfish_enable = true; + return 0; +} +__setup("goldfish", goldfish_setup); + static int __init goldfish_init(void) { + if (!goldfish_enable) + return -ENODEV; + platform_device_register_simple("goldfish_pdev_bus", -1, - goldfish_pdev_bus_resources, 2); + goldfish_pdev_bus_resources, 2); return 0; } device_initcall(goldfish_init); From 7fb0b4473100f95f4e99802b8f11d529624185fd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Feb 2017 11:11:51 +0100 Subject: [PATCH 134/252] goldfish: Sanitize the broken interrupt handler commit 6cf18e6927c0b224f972e3042fb85770d63cb9f8 upstream. This interrupt handler is broken in several ways: - It loops forever when the op code is not decodeable - It never returns IRQ_HANDLED because the only way to exit the loop returns IRQ_NONE unconditionally. The whole concept of this is broken. Creating devices in an interrupt handler is beyond any point of sanity. Make it at least behave halfways sane so accidental users do not have to deal with a hard to debug lockup. Fixes: e809c22b8fb028 ("goldfish: add the goldfish virtual bus") Reported-by: Gabriel C Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/platform/goldfish/pdev_bus.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/platform/goldfish/pdev_bus.c b/drivers/platform/goldfish/pdev_bus.c index 92cc4cfafde..6bcd57cb2f7 100644 --- a/drivers/platform/goldfish/pdev_bus.c +++ b/drivers/platform/goldfish/pdev_bus.c @@ -153,23 +153,26 @@ static int goldfish_new_pdev(void) static irqreturn_t goldfish_pdev_bus_interrupt(int irq, void *dev_id) { irqreturn_t ret = IRQ_NONE; + while (1) { u32 op = readl(pdev_bus_base + PDEV_BUS_OP); - switch (op) { - case PDEV_BUS_OP_DONE: - return IRQ_NONE; + switch (op) { case PDEV_BUS_OP_REMOVE_DEV: goldfish_pdev_remove(); + ret = IRQ_HANDLED; break; case PDEV_BUS_OP_ADD_DEV: goldfish_new_pdev(); + ret = IRQ_HANDLED; break; + + case PDEV_BUS_OP_DONE: + default: + return ret; } - ret = IRQ_HANDLED; } - return ret; } static int goldfish_pdev_bus_probe(struct platform_device *pdev) From 511ae3b96a0f36719924dac16b5e98fbd9320d25 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 29 Aug 2014 15:18:58 -0700 Subject: [PATCH 135/252] ocfs2: do not write error flag to user structure we cannot copy from/to commit 2b462638e41ea62230297c21c4da9955937b7a3c upstream. If we failed to copy from the structure, writing back the flags leaks 31 bits of kernel memory (the rest of the ir_flags field). In any case, if we cannot copy from/to the structure, why should we expect putting just the flags to work? Also make sure ocfs2_info_handle_freeinode() returns the right error code if the copy_to_user() fails. Fixes: ddee5cdb70e6 ('Ocfs2: Add new OCFS2_IOC_INFO ioctl for ocfs2 v8.') Signed-off-by: Ben Hutchings Cc: Joel Becker Acked-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- fs/ocfs2/ioctl.c | 129 ++++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 86 deletions(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 0c60ef2d805..b9d16098ede 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -34,9 +34,8 @@ copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) /* - * This call is void because we are already reporting an error that may - * be -EFAULT. The error will be returned from the ioctl(2) call. It's - * just a best-effort to tell userspace that this request caused the error. + * This is just a best-effort to tell userspace that this request + * caused the error. */ static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, struct ocfs2_info_request __user *req) @@ -145,136 +144,105 @@ bail: int ocfs2_info_handle_blocksize(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_blocksize oib; if (o2info_from_user(oib, req)) - goto bail; + return -EFAULT; oib.ib_blocksize = inode->i_sb->s_blocksize; o2info_set_request_filled(&oib.ib_req); if (o2info_to_user(oib, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oib.ib_req, req); - - return status; + return 0; } int ocfs2_info_handle_clustersize(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_clustersize oic; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oic, req)) - goto bail; + return -EFAULT; oic.ic_clustersize = osb->s_clustersize; o2info_set_request_filled(&oic.ic_req); if (o2info_to_user(oic, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oic.ic_req, req); - - return status; + return 0; } int ocfs2_info_handle_maxslots(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_maxslots oim; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oim, req)) - goto bail; + return -EFAULT; oim.im_max_slots = osb->max_slots; o2info_set_request_filled(&oim.im_req); if (o2info_to_user(oim, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oim.im_req, req); - - return status; + return 0; } int ocfs2_info_handle_label(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_label oil; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oil, req)) - goto bail; + return -EFAULT; memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); o2info_set_request_filled(&oil.il_req); if (o2info_to_user(oil, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oil.il_req, req); - - return status; + return 0; } int ocfs2_info_handle_uuid(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_uuid oiu; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oiu, req)) - goto bail; + return -EFAULT; memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); o2info_set_request_filled(&oiu.iu_req); if (o2info_to_user(oiu, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oiu.iu_req, req); - - return status; + return 0; } int ocfs2_info_handle_fs_features(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_fs_features oif; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oif, req)) - goto bail; + return -EFAULT; oif.if_compat_features = osb->s_feature_compat; oif.if_incompat_features = osb->s_feature_incompat; @@ -283,39 +251,28 @@ int ocfs2_info_handle_fs_features(struct inode *inode, o2info_set_request_filled(&oif.if_req); if (o2info_to_user(oif, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oif.if_req, req); - - return status; + return 0; } int ocfs2_info_handle_journal_size(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_journal_size oij; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oij, req)) - goto bail; + return -EFAULT; oij.ij_journal_size = osb->journal->j_inode->i_size; o2info_set_request_filled(&oij.ij_req); if (o2info_to_user(oij, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oij.ij_req, req); - - return status; + return 0; } int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, @@ -371,7 +328,7 @@ int ocfs2_info_handle_freeinode(struct inode *inode, u32 i; u64 blkno = -1; char namebuf[40]; - int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; + int status, type = INODE_ALLOC_SYSTEM_INODE; struct ocfs2_info_freeinode *oifi = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *inode_alloc = NULL; @@ -383,8 +340,10 @@ int ocfs2_info_handle_freeinode(struct inode *inode, goto out_err; } - if (o2info_from_user(*oifi, req)) - goto bail; + if (o2info_from_user(*oifi, req)) { + status = -EFAULT; + goto out_free; + } oifi->ifi_slotnum = osb->max_slots; @@ -421,14 +380,16 @@ int ocfs2_info_handle_freeinode(struct inode *inode, o2info_set_request_filled(&oifi->ifi_req); - if (o2info_to_user(*oifi, req)) - goto bail; + if (o2info_to_user(*oifi, req)) { + status = -EFAULT; + goto out_free; + } status = 0; bail: if (status) o2info_set_request_error(&oifi->ifi_req, req); - +out_free: kfree(oifi); out_err: return status; @@ -655,7 +616,7 @@ int ocfs2_info_handle_freefrag(struct inode *inode, { u64 blkno = -1; char namebuf[40]; - int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; + int status, type = GLOBAL_BITMAP_SYSTEM_INODE; struct ocfs2_info_freefrag *oiff; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -668,8 +629,10 @@ int ocfs2_info_handle_freefrag(struct inode *inode, goto out_err; } - if (o2info_from_user(*oiff, req)) - goto bail; + if (o2info_from_user(*oiff, req)) { + status = -EFAULT; + goto out_free; + } /* * chunksize from userspace should be power of 2. */ @@ -708,14 +671,14 @@ int ocfs2_info_handle_freefrag(struct inode *inode, if (o2info_to_user(*oiff, req)) { status = -EFAULT; - goto bail; + goto out_free; } status = 0; bail: if (status) o2info_set_request_error(&oiff->iff_req, req); - +out_free: kfree(oiff); out_err: return status; @@ -724,23 +687,17 @@ out_err: int ocfs2_info_handle_unknown(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_request oir; if (o2info_from_user(oir, req)) - goto bail; + return -EFAULT; o2info_clear_request_filled(&oir); if (o2info_to_user(oir, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oir, req); - - return status; + return 0; } /* From dad5635c15055b22304f11423442522dd8c086c4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Nov 2013 11:04:20 +0300 Subject: [PATCH 136/252] mfd: pm8921: Potential NULL dereference in pm8921_remove() commit d6daef95127e41233ac8e2d8472d8c0cd8687d38 upstream. We assume that "pmic" could be NULL and then dereference it two lines later. I fix this by moving the dereference inside the NULL check. Fixes: c013f0a56c56 ('mfd: Add pm8xxx irq support') Signed-off-by: Dan Carpenter Signed-off-by: Lee Jones Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/mfd/pm8921-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c index ecc137ffa8c..a28f434147f 100644 --- a/drivers/mfd/pm8921-core.c +++ b/drivers/mfd/pm8921-core.c @@ -173,11 +173,12 @@ static int pm8921_remove(struct platform_device *pdev) drvdata = platform_get_drvdata(pdev); if (drvdata) pmic = drvdata->pm_chip_data; - if (pmic) + if (pmic) { mfd_remove_devices(pmic->dev); - if (pmic->irq_chip) { - pm8xxx_irq_exit(pmic->irq_chip); - pmic->irq_chip = NULL; + if (pmic->irq_chip) { + pm8xxx_irq_exit(pmic->irq_chip); + pmic->irq_chip = NULL; + } } platform_set_drvdata(pdev, NULL); kfree(pmic); From c4c175dbd7c40e14393a83065edc14f147c5a087 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Nov 2013 01:18:47 +0300 Subject: [PATCH 137/252] drm/nv50/disp: min/max are reversed in nv50_crtc_gamma_set() commit bdefc8cbdfc71ea73e0573dbd2d24c0a68232218 upstream. We should be taking the minimum here instead of the max. It could lead to a buffer overflow. Fixes: 438d99e3b175 ('drm/nvd0/disp: initial crtc object implementation') Signed-off-by: Dan Carpenter a/drm/nv50_display.c b/drm/nv50_display.c index f8e66c08b11a..4e384a2f99c3 100644 Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/gpu/drm/nouveau/nv50_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index dd5e01f89f2..969acd36c40 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -1253,7 +1253,7 @@ nv50_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, uint32_t start, uint32_t size) { struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - u32 end = max(start + size, (u32)256); + u32 end = min_t(u32, start + size, 256); u32 i; for (i = start; i < end; i++) { From 3ac993d473612d4830646fc5dfa25fb542aa40f1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 6 Jan 2014 01:45:50 +0100 Subject: [PATCH 138/252] net: 6lowpan: fix lowpan_header_create non-compression memcpy call commit 965801e1eb624154fe5e9dc5d2ff0b7f1951a11c upstream. In function lowpan_header_create(), we invoke the following code construct: struct ipv6hdr *hdr; ... hdr = ipv6_hdr(skb); ... if (...) memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); else memcpy(hc06_ptr, &hdr, 4); Where the else path of the condition, that is, non-compression path, calls memcpy() with a pointer to struct ipv6hdr *hdr as source, thus two levels of indirection. This cannot be correct, and likely only one level of pointer was intended as source buffer for memcpy() here. Fixes: 44331fe2aa0d ("IEEE802.15.4: 6LoWPAN basic support") Signed-off-by: Daniel Borkmann Cc: Alexander Smirnov Cc: Dmitry Eremin-Solenikov Cc: Werner Almesberger Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ieee802154/6lowpan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index ca118e8cb14..9d06b37acc4 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -459,7 +459,7 @@ static int lowpan_header_create(struct sk_buff *skb, hc06_ptr += 3; } else { /* compress nothing */ - memcpy(hc06_ptr, &hdr, 4); + memcpy(hc06_ptr, hdr, 4); /* replace the top byte with new ECN | DSCP format */ *hc06_ptr = tmp; hc06_ptr += 4; From 37fdac589c27ee92e3634b2269e810b39d1ebd58 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Apr 2014 09:01:03 +0200 Subject: [PATCH 139/252] vti4: Don't count header length twice. commit a32452366b7250c42e96a18ffc3ad8db9e0ca3c2 upstream. We currently count the size of LL_MAX_HEADER and struct iphdr twice for vti4 devices, this leads to a wrong device mtu. The size of LL_MAX_HEADER and struct iphdr is already counted in ip_tunnel_bind_dev(), so don't do it again in vti_tunnel_init(). Fixes: b9959fd3 ("vti: switch to new ip tunnel code") Signed-off-by: Steffen Klassert Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- net/ipv4/ip_vti.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 4ec34275160..eadafac6f46 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -582,7 +582,6 @@ static void vti_tunnel_setup(struct net_device *dev) dev->type = ARPHRD_TUNNEL; dev->destructor = vti_dev_free; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN; dev->flags = IFF_NOARP; dev->iflink = 0; From beb27bf1bcb7e29caed520d5f7737f45eaeaf25e Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Fri, 21 Oct 2016 00:18:08 +0300 Subject: [PATCH 140/252] net/sched: em_meta: Fix 'meta vlan' to correctly recognize zero VID frames commit d65f2fa680d6f91438461df54c83a331b3a631c9 upstream. META_COLLECTOR int_vlan_tag() assumes that if the accel tag (vlan_tci) is zero, then no vlan accel tag is present. This is incorrect for zero VID vlan accel packets, making the following match fail: tc filter add ... basic match 'meta(vlan mask 0xfff eq 0)' ... Apparently 'int_vlan_tag' was implemented prior VLAN_TAG_PRESENT was introduced in 05423b2 "vlan: allow null VLAN ID to be used" (and at time introduced, the 'vlan_tx_tag_get' call in em_meta was not adapted). Fix, testing skb_vlan_tag_present instead of testing skb_vlan_tag_get's value. Fixes: 05423b2413 ("vlan: allow null VLAN ID to be used") Fixes: 1a31f2042e ("netsched: Allow meta match on vlan tag on receive") Signed-off-by: Shmulik Ladkani Cc: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/sched/em_meta.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 7c3de6ffa51..eba9d1e49fa 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,11 +176,12 @@ META_COLLECTOR(int_vlan_tag) { unsigned short tag; - tag = vlan_tx_tag_get(skb); - if (!tag && __vlan_get_tag(skb, &tag)) - *err = -1; - else + if (vlan_tx_tag_present(skb)) + dst->value = vlan_tx_tag_get(skb); + else if (!__vlan_get_tag(skb, &tag)) dst->value = tag; + else + *err = -1; } From e51712a87d2f73f8cc45366728ee1dbd9c531804 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Mon, 9 Jan 2017 16:52:28 +0000 Subject: [PATCH 141/252] MIPS: OCTEON: Fix copy_from_user fault handling for large buffers commit 884b426917e4b3c85f33b382c792a94305dfdd62 upstream. If copy_from_user is called with a large buffer (>= 128 bytes) and the userspace buffer refers partially to unreadable memory, then it is possible for Octeon's copy_from_user to report the wrong number of bytes have been copied. In the case where the buffer size is an exact multiple of 128 and the fault occurs in the last 64 bytes, copy_from_user will report that all the bytes were copied successfully but leave some garbage in the destination buffer. The bug is in the main __copy_user_common loop in octeon-memcpy.S where in the middle of the loop, src and dst are incremented by 128 bytes. The l_exc_copy fault handler is used after this but that assumes that "src < THREAD_BUADDR($28)". This is not the case if src has already been incremented. Fix by adding an extra fault handler which rewinds the src and dst pointers 128 bytes before falling though to l_exc_copy. Thanks to the pwritev test from the strace test suite for originally highlighting this bug! Fixes: 5b3b16880f40 ("MIPS: Add Cavium OCTEON processor support ...") Signed-off-by: James Cowgill Acked-by: David Daney Reviewed-by: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14978/ Signed-off-by: James Hogan Signed-off-by: Willy Tarreau --- arch/mips/cavium-octeon/octeon-memcpy.S | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 64e08df51d6..8b700413249 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -208,18 +208,18 @@ EXC( STORE t2, UNIT(6)(dst), s_exc_p10u) ADD src, src, 16*NBYTES EXC( STORE t3, UNIT(7)(dst), s_exc_p9u) ADD dst, dst, 16*NBYTES -EXC( LOAD t0, UNIT(-8)(src), l_exc_copy) -EXC( LOAD t1, UNIT(-7)(src), l_exc_copy) -EXC( LOAD t2, UNIT(-6)(src), l_exc_copy) -EXC( LOAD t3, UNIT(-5)(src), l_exc_copy) +EXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16) +EXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16) +EXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16) +EXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16) EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u) EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u) EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) -EXC( LOAD t0, UNIT(-4)(src), l_exc_copy) -EXC( LOAD t1, UNIT(-3)(src), l_exc_copy) -EXC( LOAD t2, UNIT(-2)(src), l_exc_copy) -EXC( LOAD t3, UNIT(-1)(src), l_exc_copy) +EXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16) +EXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16) +EXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16) +EXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16) EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u) EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u) EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u) @@ -383,6 +383,10 @@ done: nop END(memcpy) +l_exc_copy_rewind16: + /* Rewind src and dst by 16*NBYTES for l_exc_copy */ + SUB src, src, 16*NBYTES + SUB dst, dst, 16*NBYTES l_exc_copy: /* * Copy bytes from src until faulting load address (or until a From 1809a7833dbce00e9b644a3625407dea43dd73ee Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:02 +0000 Subject: [PATCH 142/252] MIPS: Clear ISA bit correctly in get_frame_info() commit ccaf7caf2c73c6db920772bf08bf1d47b2170634 upstream. get_frame_info() can be called in microMIPS kernels with the ISA bit already clear. For example this happens when unwind_stack_by_address() is called because we begin with a PC that has the ISA bit set & subtract the (odd) offset from the preceding symbol (which does not have the ISA bit set). Since get_frame_info() unconditionally subtracts 1 from the PC in microMIPS kernels it incorrectly misaligns the address it then attempts to access code at, leading to an address error exception. Fix this by using msk_isa16_mode() to clear the ISA bit, which allows get_frame_info() to function regardless of whether it is provided with a PC that has the ISA bit set or not. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14528/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/process.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c6a041d9d05..11468a02907 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -322,17 +322,14 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { -#ifdef CONFIG_CPU_MICROMIPS - union mips_instruction *ip = (void *) (((char *) info->func) - 1); -#else - union mips_instruction *ip = info->func; -#endif + union mips_instruction *ip; unsigned max_insns = info->func_size / sizeof(union mips_instruction); unsigned i; info->pc_offset = -1; info->frame_size = 0; + ip = (void *)msk_isa16_mode((ulong)info->func); if (!ip) goto err; From 15d2aa74c4f34768af117b4ad38262450075aed2 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:03 +0000 Subject: [PATCH 143/252] MIPS: Prevent unaligned accesses during stack unwinding commit a3552dace7d1d0cabf573e88fc3025cb90c4a601 upstream. During stack unwinding we call a number of functions to determine what type of instruction we're looking at. The union mips_instruction pointer provided to them may be pointing at a 2 byte, but not 4 byte, aligned address & we thus cannot directly access the 4 byte wide members of the union mips_instruction. To avoid this is_ra_save_ins() copies the required half-words of the microMIPS instruction to a correctly aligned union mips_instruction on the stack, which it can then access safely. The is_jump_ins() & is_sp_move_ins() functions do not correctly perform this temporary copy, and instead attempt to directly dereference 4 byte fields which may be misaligned and lead to an address exception. Fix this by copying the instruction halfwords to a temporary union mips_instruction in get_frame_info() such that we can provide a 4 byte aligned union mips_instruction to the is_*_ins() functions and they do not need to deal with misalignment themselves. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14529/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/process.c | 70 +++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 11468a02907..71110b9debe 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -217,8 +217,6 @@ struct mips_frame_info { static inline int is_ra_save_ins(union mips_instruction *ip) { #ifdef CONFIG_CPU_MICROMIPS - union mips_instruction mmi; - /* * swsp ra,offset * swm16 reglist,offset(sp) @@ -228,23 +226,20 @@ static inline int is_ra_save_ins(union mips_instruction *ip) * * microMIPS is way more fun... */ - if (mm_insn_16bit(ip->halfword[0])) { - mmi.word = (ip->halfword[0] << 16); - return ((mmi.mm16_r5_format.opcode == mm_swsp16_op && - mmi.mm16_r5_format.rt == 31) || - (mmi.mm16_m_format.opcode == mm_pool16c_op && - mmi.mm16_m_format.func == mm_swm16_op)); + if (mm_insn_16bit(ip->halfword[1])) { + return (ip->mm16_r5_format.opcode == mm_swsp16_op && + ip->mm16_r5_format.rt == 31) || + (ip->mm16_m_format.opcode == mm_pool16c_op && + ip->mm16_m_format.func == mm_swm16_op); } else { - mmi.halfword[0] = ip->halfword[1]; - mmi.halfword[1] = ip->halfword[0]; - return ((mmi.mm_m_format.opcode == mm_pool32b_op && - mmi.mm_m_format.rd > 9 && - mmi.mm_m_format.base == 29 && - mmi.mm_m_format.func == mm_swm32_func) || - (mmi.i_format.opcode == mm_sw32_op && - mmi.i_format.rs == 29 && - mmi.i_format.rt == 31)); + return (ip->mm_m_format.opcode == mm_pool32b_op && + ip->mm_m_format.rd > 9 && + ip->mm_m_format.base == 29 && + ip->mm_m_format.func == mm_swm32_func) || + (ip->i_format.opcode == mm_sw32_op && + ip->i_format.rs == 29 && + ip->i_format.rt == 31); } #else /* sw / sd $ra, offset($sp) */ @@ -265,12 +260,8 @@ static inline int is_jump_ins(union mips_instruction *ip) * * microMIPS is kind of more fun... */ - union mips_instruction mmi; - - mmi.word = (ip->halfword[0] << 16); - - if ((mmi.mm16_r5_format.opcode == mm_pool16c_op && - (mmi.mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || + if ((ip->mm16_r5_format.opcode == mm_pool16c_op && + (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || @@ -299,15 +290,13 @@ static inline int is_sp_move_ins(union mips_instruction *ip) * * microMIPS is not more fun... */ - if (mm_insn_16bit(ip->halfword[0])) { - union mips_instruction mmi; - - mmi.word = (ip->halfword[0] << 16); - return ((mmi.mm16_r3_format.opcode == mm_pool16d_op && - mmi.mm16_r3_format.simmediate && mm_addiusp_func) || - (mmi.mm16_r5_format.opcode == mm_pool16d_op && - mmi.mm16_r5_format.rt == 29)); + if (mm_insn_16bit(ip->halfword[1])) { + return (ip->mm16_r3_format.opcode == mm_pool16d_op && + ip->mm16_r3_format.simmediate && mm_addiusp_func) || + (ip->mm16_r5_format.opcode == mm_pool16d_op && + ip->mm16_r5_format.rt == 29); } + return (ip->mm_i_format.opcode == mm_addiu32_op && ip->mm_i_format.rt == 29 && ip->mm_i_format.rs == 29); #else @@ -322,7 +311,8 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { - union mips_instruction *ip; + bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); + union mips_instruction insn, *ip; unsigned max_insns = info->func_size / sizeof(union mips_instruction); unsigned i; @@ -338,11 +328,21 @@ static int get_frame_info(struct mips_frame_info *info) max_insns = min(128U, max_insns); for (i = 0; i < max_insns; i++, ip++) { + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { + insn.halfword[0] = 0; + insn.halfword[1] = ip->halfword[0]; + } else if (is_mmips) { + insn.halfword[0] = ip->halfword[1]; + insn.halfword[1] = ip->halfword[0]; + } else { + insn.word = ip->word; + } - if (is_jump_ins(ip)) + if (is_jump_ins(&insn)) break; + if (!info->frame_size) { - if (is_sp_move_ins(ip)) + if (is_sp_move_ins(&insn)) { #ifdef CONFIG_CPU_MICROMIPS if (mm_insn_16bit(ip->halfword[0])) @@ -365,7 +365,7 @@ static int get_frame_info(struct mips_frame_info *info) } continue; } - if (info->pc_offset == -1 && is_ra_save_ins(ip)) { + if (info->pc_offset == -1 && is_ra_save_ins(&insn)) { info->pc_offset = ip->i_format.simmediate / sizeof(long); break; From 9c01ee595e614083c19432f4c444d7556ede3538 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:04 +0000 Subject: [PATCH 144/252] MIPS: Fix get_frame_info() handling of microMIPS function size commit b6c7a324df37bf05ef7a2c1580683cf10d082d97 upstream. get_frame_info() is meant to iterate over up to the first 128 instructions within a function, but for microMIPS kernels it will not reach that many instructions unless the function is 512 bytes long since we calculate the maximum number of instructions to check by dividing the function length by the 4 byte size of a union mips_instruction. In microMIPS kernels this won't do since instructions are variable length. Fix this by instead checking whether the pointer to the current instruction has reached the end of the function, and use max_insns as a simple constant to check the number of iterations against. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14530/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/process.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 71110b9debe..e67e17aec9e 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -312,9 +312,9 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip; - unsigned max_insns = info->func_size / sizeof(union mips_instruction); - unsigned i; + union mips_instruction insn, *ip, *ip_end; + const unsigned int max_insns = 128; + unsigned int i; info->pc_offset = -1; info->frame_size = 0; @@ -323,11 +323,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - if (max_insns == 0) - max_insns = 128U; /* unknown function size */ - max_insns = min(128U, max_insns); + ip_end = (void *)ip + info->func_size; - for (i = 0; i < max_insns; i++, ip++) { + for (i = 0; i < max_insns && ip < ip_end; i++, ip++) { if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.halfword[0] = 0; insn.halfword[1] = ip->halfword[0]; From a3e70c33ba493cd59a1d170d9e626e13d17201fb Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:05 +0000 Subject: [PATCH 145/252] MIPS: Fix is_jump_ins() handling of 16b microMIPS instructions commit 67c75057709a6d85c681c78b9b2f9b71191f01a2 upstream. is_jump_ins() checks 16b instruction fields without verifying that the instruction is indeed 16b, as is done by is_ra_save_ins() & is_sp_move_ins(). Add the appropriate check. Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14531/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/process.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index e67e17aec9e..427187b1807 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -260,9 +260,14 @@ static inline int is_jump_ins(union mips_instruction *ip) * * microMIPS is kind of more fun... */ - if ((ip->mm16_r5_format.opcode == mm_pool16c_op && - (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op) || - ip->j_format.opcode == mm_jal32_op) + if (mm_insn_16bit(ip->halfword[1])) { + if ((ip->mm16_r5_format.opcode == mm_pool16c_op && + (ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op)) + return 1; + return 0; + } + + if (ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || ip->r_format.func != mm_pool32axf_op) From 8577fb6b337ce1ecd07fcbe0e41ffd3497838861 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:06 +0000 Subject: [PATCH 146/252] MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/process.c | 83 +++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 427187b1807..5a933699770 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -214,7 +214,7 @@ struct mips_frame_info { #define J_TARGET(pc,target) \ (((unsigned long)(pc) & 0xf0000000) | ((target) << 2)) -static inline int is_ra_save_ins(union mips_instruction *ip) +static inline int is_ra_save_ins(union mips_instruction *ip, int *poff) { #ifdef CONFIG_CPU_MICROMIPS /* @@ -227,25 +227,70 @@ static inline int is_ra_save_ins(union mips_instruction *ip) * microMIPS is way more fun... */ if (mm_insn_16bit(ip->halfword[1])) { - return (ip->mm16_r5_format.opcode == mm_swsp16_op && - ip->mm16_r5_format.rt == 31) || - (ip->mm16_m_format.opcode == mm_pool16c_op && - ip->mm16_m_format.func == mm_swm16_op); + switch (ip->mm16_r5_format.opcode) { + case mm_swsp16_op: + if (ip->mm16_r5_format.rt != 31) + return 0; + + *poff = ip->mm16_r5_format.simmediate; + *poff = (*poff << 2) / sizeof(ulong); + return 1; + + case mm_pool16c_op: + switch (ip->mm16_m_format.func) { + case mm_swm16_op: + *poff = ip->mm16_m_format.imm; + *poff += 1 + ip->mm16_m_format.rlist; + *poff = (*poff << 2) / sizeof(ulong); + return 1; + + default: + return 0; + } + + default: + return 0; + } } - else { - return (ip->mm_m_format.opcode == mm_pool32b_op && - ip->mm_m_format.rd > 9 && - ip->mm_m_format.base == 29 && - ip->mm_m_format.func == mm_swm32_func) || - (ip->i_format.opcode == mm_sw32_op && - ip->i_format.rs == 29 && - ip->i_format.rt == 31); + + switch (ip->i_format.opcode) { + case mm_sw32_op: + if (ip->i_format.rs != 29) + return 0; + if (ip->i_format.rt != 31) + return 0; + + *poff = ip->i_format.simmediate / sizeof(ulong); + return 1; + + case mm_pool32b_op: + switch (ip->mm_m_format.func) { + case mm_swm32_func: + if (ip->mm_m_format.rd < 0x10) + return 0; + if (ip->mm_m_format.base != 29) + return 0; + + *poff = ip->mm_m_format.simmediate; + *poff += (ip->mm_m_format.rd & 0xf) * sizeof(u32); + *poff /= sizeof(ulong); + return 1; + default: + return 0; + } + + default: + return 0; } #else /* sw / sd $ra, offset($sp) */ - return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) && - ip->i_format.rs == 29 && - ip->i_format.rt == 31; + if ((ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) && + ip->i_format.rs == 29 && ip->i_format.rt == 31) { + *poff = ip->i_format.simmediate / sizeof(ulong); + return 1; + } + + return 0; #endif } @@ -368,11 +413,9 @@ static int get_frame_info(struct mips_frame_info *info) } continue; } - if (info->pc_offset == -1 && is_ra_save_ins(&insn)) { - info->pc_offset = - ip->i_format.simmediate / sizeof(long); + if (info->pc_offset == -1 && + is_ra_save_ins(&insn, &info->pc_offset)) break; - } } if (info->frame_size && info->pc_offset >= 0) /* nested */ return 0; From 9dc2420f7d76f0ee002e22c7135bfcb1066e492c Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Nov 2016 15:07:07 +0000 Subject: [PATCH 147/252] MIPS: Handle microMIPS jumps in the same way as MIPS32/MIPS64 jumps commit 096a0de427ea333f56f0ee00328cff2a2731bcf1 upstream. is_jump_ins() checks for plain jump ("j") instructions since commit e7438c4b893e ("MIPS: Fix sibling call handling in get_frame_info") but that commit didn't make the same change to the microMIPS code, leaving it inconsistent with the MIPS32/MIPS64 code. Handle the microMIPS encoding of the jump instruction too such that it behaves consistently. Signed-off-by: Paul Burton Fixes: e7438c4b893e ("MIPS: Fix sibling call handling in get_frame_info") Cc: Tony Wu Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14533/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 5a933699770..3cfa3bc288f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -312,6 +312,8 @@ static inline int is_jump_ins(union mips_instruction *ip) return 0; } + if (ip->j_format.opcode == mm_j32_op) + return 1; if (ip->j_format.opcode == mm_jal32_op) return 1; if (ip->r_format.opcode != mm_pool32a_op || From 0259f8b800e641bf760164f340796019bdc8a6bb Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 12 Dec 2016 09:16:51 -0200 Subject: [PATCH 148/252] uvcvideo: Fix a wrong macro commit 17c341ec0115837a610b2da15e32546e26068234 upstream. Don't mix up UVC_BUF_STATE_* and VB2_BUF_STATE_* codes. Fixes: 6998b6fb4b1c ("[media] uvcvideo: Use videobuf2-vmalloc") Signed-off-by: Guennadi Liakhovetski Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/usb/uvc/uvc_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_queue.c b/drivers/media/usb/uvc/uvc_queue.c index cd962be860c..7e743958dbc 100644 --- a/drivers/media/usb/uvc/uvc_queue.c +++ b/drivers/media/usb/uvc/uvc_queue.c @@ -375,7 +375,7 @@ struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue, nextbuf = NULL; spin_unlock_irqrestore(&queue->irqlock, flags); - buf->state = buf->error ? VB2_BUF_STATE_ERROR : UVC_BUF_STATE_DONE; + buf->state = buf->error ? UVC_BUF_STATE_ERROR : UVC_BUF_STATE_DONE; vb2_set_plane_payload(&buf->buf, 0, buf->bytesused); vb2_buffer_done(&buf->buf, VB2_BUF_STATE_DONE); From 639011409a82e9d6e65cb7a5f4a3d54b04e15dd5 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Thu, 16 Feb 2017 12:51:21 -0800 Subject: [PATCH 149/252] scsi: aacraid: Reorder Adapter status check commit c421530bf848604e97d0785a03b3fe2c62775083 upstream. The driver currently checks the SELF_TEST_FAILED first and then KERNEL_PANIC next. Under error conditions(boot code failure) both SELF_TEST_FAILED and KERNEL_PANIC can be set at the same time. The driver has the capability to reset the controller on an KERNEL_PANIC, but not on SELF_TEST_FAILED. Fixed by first checking KERNEL_PANIC and then the others. Fixes: e8b12f0fb835223752 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC base controller family) Signed-off-by: Raghava Aditya Renukunta Reviewed-by: David Carroll Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/aacraid/src.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 7e17107643d..05c999429ff 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -358,17 +358,24 @@ static int aac_src_check_health(struct aac_dev *dev) { u32 status = src_readl(dev, MUnit.OMR); - /* - * Check to see if the board failed any self tests. - */ - if (unlikely(status & SELF_TEST_FAILED)) - return -1; - /* * Check to see if the board panic'd. */ if (unlikely(status & KERNEL_PANIC)) - return (status >> 16) & 0xFF; + goto err_blink; + + /* + * Check to see if the board failed any self tests. + */ + if (unlikely(status & SELF_TEST_FAILED)) + goto err_out; + + /* + * Check to see if the board failed any self tests. + */ + if (unlikely(status & MONITOR_PANIC)) + goto err_out; + /* * Wait for the adapter to be up and running. */ @@ -378,6 +385,12 @@ static int aac_src_check_health(struct aac_dev *dev) * Everything is OK */ return 0; + +err_out: + return -1; + +err_blink: + return (status > 16) & 0xFF; } /** From f90660e18ac55e73a1ed641350995a3a6e5d72ff Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 14 Feb 2017 20:10:30 +0100 Subject: [PATCH 150/252] ath9k: use correct OTP register offsets for the AR9340 and AR9550 commit c9f1e32600816d695f817477d56490bfc2ba43c6 upstream. This patch fixes the OTP register definitions for the AR934x and AR9550 WMAC SoC. Previously, the ath9k driver was unable to initialize the integrated WMAC on an Aerohive AP121: | ath: phy0: timeout (1000 us) on reg 0x30018: 0xbadc0ffe & 0x00000007 != 0x00000004 | ath: phy0: timeout (1000 us) on reg 0x30018: 0xbadc0ffe & 0x00000007 != 0x00000004 | ath: phy0: Unable to initialize hardware; initialization status: -5 | ath9k ar934x_wmac: failed to initialize device | ath9k: probe of ar934x_wmac failed with error -5 It turns out that the AR9300_OTP_STATUS and AR9300_OTP_DATA definitions contain a typo. Cc: Gabor Juhos Fixes: add295a4afbdf5852d0 "ath9k: use correct OTP register offsets for AR9550" Signed-off-by: Christian Lamparter Signed-off-by: Chris Blake Signed-off-by: Kalle Valo Signed-off-by: Willy Tarreau --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h index 874f6570bd1..d83ad9df660 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.h @@ -71,13 +71,13 @@ #define AR9300_OTP_BASE \ ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x30000 : 0x14000) #define AR9300_OTP_STATUS \ - ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x30018 : 0x15f18) + ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x31018 : 0x15f18) #define AR9300_OTP_STATUS_TYPE 0x7 #define AR9300_OTP_STATUS_VALID 0x4 #define AR9300_OTP_STATUS_ACCESS_BUSY 0x2 #define AR9300_OTP_STATUS_SM_BUSY 0x1 #define AR9300_OTP_READ_DATA \ - ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x3001c : 0x15f1c) + ((AR_SREV_9340(ah) || AR_SREV_9550(ah)) ? 0x3101c : 0x15f1c) enum targetPowerHTRates { HT_TARGET_RATE_0_8_16, From 5beea85761cade8086b1420d788257eb3c5aac50 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 22 Feb 2017 20:08:25 +0100 Subject: [PATCH 151/252] fuse: add missing FR_FORCE commit 2e38bea99a80eab408adee27f873a188d57b76cb upstream. fuse_file_put() was missing the "force" flag for the RELEASE request when sending synchronously (fuseblk). If this flag is not set, then a sync request may be interrupted before it is dequeued by the userspace filesystem. In this case the OPEN won't be balanced with a RELEASE. [js] force is a variable, not a bit Signed-off-by: Miklos Szeredi Fixes: 5a18ec176c93 ("fuse: fix hang of single threaded fuseblk filesystem") Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7ada0f04e31..1dce9304101 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -128,6 +128,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) struct fuse_req *req = ff->reserved_req; if (sync) { + req->force = 1; req->background = 0; fuse_request_send(ff->fc, req); path_put(&req->misc.release.path); From e9a1e1cc1cd1a7f684a938eff41caf2cdff615d2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Dec 2016 18:07:52 -0700 Subject: [PATCH 152/252] RDMA/core: Fix incorrect structure packing for booleans commit 55efcfcd7776165b294f8b5cd6e05ca00ec89b7c upstream. The RDMA core uses ib_pack() to convert from unpacked CPU structs to on-the-wire bitpacked structs. This process requires that 1 bit fields are declared as u8 in the unpacked struct, otherwise the packing process does not read the value properly and the packed result is wired to 0. Several places wrongly used int. Crucially this means the kernel has never, set reversible correctly in the path record request. It has always asked for irreversible paths even if the ULP requests otherwise. When the kernel is used with a SM that supports this feature, it completely breaks communication management if reversible paths are not properly requested. The only reason this ever worked is because opensm ignores the reversible bit. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- include/rdma/ib_sa.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 8275e539bac..969aff6f657 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -137,12 +137,12 @@ struct ib_sa_path_rec { union ib_gid sgid; __be16 dlid; __be16 slid; - int raw_traffic; + u8 raw_traffic; /* reserved */ __be32 flow_label; u8 hop_limit; u8 traffic_class; - int reversible; + u8 reversible; u8 numb_path; __be16 pkey; __be16 qos_class; @@ -193,7 +193,7 @@ struct ib_sa_mcmember_rec { u8 hop_limit; u8 scope; u8 join_state; - int proxy_join; + u8 proxy_join; }; /* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ From 58ccba85f42f938eb0a73e277f6d54cc39658456 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 23 Feb 2017 14:53:39 -0500 Subject: [PATCH 153/252] NFSv4: fix getacl head length estimation commit 6682c14bbe505a8b912c57faf544f866777ee48d upstream. Bitmap and attrlen follow immediately after the op reply header. This was an oversight from commit bf118a342f. Consequences of this are just minor efficiency (extra calls to xdr_shrink_bufhead). Fixes: bf118a342f10 "NFSv4: include bitmap in nfsv4 get acl data" Reviewed-by: Kinglong Mee Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker Signed-off-by: Willy Tarreau --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 988efb4caac..f5d27ca1014 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2435,7 +2435,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, From 8e68a4d13a726af98c64d17f669f0972af75e97c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 21 Nov 2016 13:37:48 +0100 Subject: [PATCH 154/252] s390/qdio: clear DSCI prior to scanning multiple input queues commit 1e4a382fdc0ba8d1a85b758c0811de3a3631085e upstream. For devices with multiple input queues, tiqdio_call_inq_handlers() iterates over all input queues and clears the device's DSCI during each iteration. If the DSCI is re-armed during one of the later iterations, we therefore do not scan the previous queues again. The re-arming also raises a new adapter interrupt. But its handler does not trigger a rescan for the device, as the DSCI has already been erroneously cleared. This can result in queue stalls on devices with multiple input queues. Fix it by clearing the DSCI just once, prior to scanning the queues. As the code is moved in front of the loop, we also need to access the DSCI directly (ie irq->dsci) instead of going via each queue's parent pointer to the same irq. This is not a functional change, and a follow-up patch will clean up the other users. In practice, this bug only affects CQ-enabled HiperSockets devices, ie. devices with sysfs-attribute "hsuid" set. Setting a hsuid is needed for AF_IUCV socket applications that use HiperSockets communication. Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks") Reviewed-by: Ursula Braun Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky Signed-off-by: Willy Tarreau --- drivers/s390/cio/qdio_thinint.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index bde5255200d..1d1e585bd03 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -142,11 +142,11 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) struct qdio_q *q; int i; - for_each_input_queue(irq, q, i) { - if (!references_shared_dsci(irq) && - has_multiple_inq_on_dsci(irq)) - xchg(q->irq_ptr->dsci, 0); + if (!references_shared_dsci(irq) && + has_multiple_inq_on_dsci(irq)) + xchg(irq->dsci, 0); + for_each_input_queue(irq, q, i) { if (q->u.in.queue_start_poll) { /* skip if polling is enabled or already in work */ if (test_and_set_bit(QDIO_QUEUE_IRQS_DISABLED, From 2c318737ecf7239eeae448d8333ee2643d42a85d Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Wed, 28 Dec 2016 14:47:23 +0200 Subject: [PATCH 155/252] IB/ipoib: Fix deadlock between rmmod and set_mode commit 0a0007f28304cb9fc87809c86abb80ec71317f20 upstream. When calling set_mode from sys/fs, the call flow locks the sys/fs lock first and then tries to lock rtnl_lock (when calling ipoib_set_mod). On the other hand, the rmmod call flow takes the rtnl_lock first (when calling unregister_netdev) and then tries to take the sys/fs lock. Deadlock a->b, b->a. The problem starts when ipoib_set_mod frees it's rtnl_lck and tries to get it after that. set_mod: [] ? check_preempt_curr+0x6d/0x90 [] __mutex_lock_slowpath+0x13e/0x180 [] ? __rtnl_unlock+0x15/0x20 [] mutex_lock+0x2b/0x50 [] rtnl_lock+0x15/0x20 [] ipoib_set_mode+0x97/0x160 [ib_ipoib] [] set_mode+0x3b/0x80 [ib_ipoib] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xe5/0x170 [] vfs_write+0xb8/0x1a0 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b rmmod: [] ? put_dec+0x10c/0x110 [] ? number+0x2ee/0x320 [] schedule_timeout+0x215/0x2e0 [] ? vsnprintf+0x484/0x5f0 [] ? string+0x40/0x100 [] wait_for_common+0x123/0x180 [] ? default_wake_function+0x0/0x20 [] ? ifind_fast+0x5e/0xb0 [] wait_for_completion+0x1d/0x20 [] sysfs_addrm_finish+0x228/0x270 [] sysfs_remove_dir+0xa3/0xf0 [] kobject_del+0x16/0x40 [] device_del+0x184/0x1e0 [] netdev_unregister_kobject+0xab/0xc0 [] rollback_registered+0xae/0x130 [] unregister_netdevice+0x22/0x70 [] unregister_netdev+0x1e/0x30 [] ipoib_remove_one+0xe0/0x120 [ib_ipoib] [] ib_unregister_device+0x4f/0x100 [ib_core] [] mlx4_ib_remove+0x41/0x180 [mlx4_ib] [] mlx4_remove_device+0x71/0x90 [mlx4_core] Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks") Cc: Or Gerlitz Signed-off-by: Feras Daoud Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 12 +++++++----- drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 ++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index aa9ad2d70dd..c781c7c633f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1482,12 +1482,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ret = ipoib_set_mode(dev, buf); - rtnl_unlock(); + /* The assumption is that the function ipoib_set_mode returned + * with the rtnl held by it, if not the value -EBUSY returned, + * then no need to rtnl_unlock + */ + if (ret != -EBUSY) + rtnl_unlock(); - if (!ret) - return count; - - return ret; + return (!ret || ret == -EBUSY) ? count : ret; } static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 375f9edd402..b022d710810 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -234,8 +234,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); - rtnl_lock(); - return 0; + return (!rtnl_trylock()) ? -EBUSY : 0; } if (!strcmp(buf, "datagram\n")) { @@ -244,8 +243,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); rtnl_unlock(); ipoib_flush_paths(dev); - rtnl_lock(); - return 0; + return (!rtnl_trylock()) ? -EBUSY : 0; } return -EINVAL; From 5bb7a6c43800704b75696d375ecc88a39ce231d2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 7 Feb 2017 12:05:25 -0500 Subject: [PATCH 156/252] ktest: Fix child exit code processing commit 32677207dcc5e594254b7fb4fb2352b1755b1d5b upstream. The child_exit errno needs to be shifted by 8 bits to compare against the return values for the bisect variables. Fixes: c5dacb88f0a64 ("ktest: Allow overriding bisect test results") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Willy Tarreau --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 0d7fd8b5154..a0a8314df4b 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2375,7 +2375,7 @@ sub do_run_test { } waitpid $child_pid, 0; - $child_exit = $?; + $child_exit = $? >> 8; if (!$bug && $in_bisect) { if (defined($bisect_ret_good)) { From ebd9572e502c014e942bc5376fb99d4b5c94f6bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 11 Feb 2017 10:37:38 -0500 Subject: [PATCH 157/252] nlm: Ensure callback code also checks that the files match commit 251af29c320d86071664f02c76f0d063a19fefdf upstream. It is not sufficient to just check that the lock pids match when granting a callback, we also need to ensure that we're granting the callback on the right file. Reported-by: Pankaj Singh Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Willy Tarreau --- include/linux/lockd/lockd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0adf073f13b..669af5eaa89 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -355,7 +355,8 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return fl1->fl_pid == fl2->fl_pid + return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) + && fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end From cc3d0c266f10d4d685eac8818989187fa42b61f1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 15 Mar 2017 16:28:51 -0400 Subject: [PATCH 158/252] dm: flush queued bios when process blocks to avoid deadlock commit d67a5f4b5947aba4bfe9a80a2b86079c215ca755 upstream. Commit df2cb6daa4 ("block: Avoid deadlocks with bio allocation by stacking drivers") created a workqueue for every bio set and code in bio_alloc_bioset() that tries to resolve some low-memory deadlocks by redirecting bios queued on current->bio_list to the workqueue if the system is low on memory. However other deadlocks (see below **) may happen, without any low memory condition, because generic_make_request is queuing bios to current->bio_list (rather than submitting them). ** the related dm-snapshot deadlock is detailed here: https://www.redhat.com/archives/dm-devel/2016-July/msg00065.html Fix this deadlock by redirecting any bios on current->bio_list to the bio_set's rescue workqueue on every schedule() call. Consequently, when the process blocks on a mutex, the bios queued on current->bio_list are dispatched to independent workqueus and they can complete without waiting for the mutex to be available. The structure blk_plug contains an entry cb_list and this list can contain arbitrary callback functions that are called when the process blocks. To implement this fix DM (ab)uses the onstack plug's cb_list interface to get its flush_current_bio_list() called at schedule() time. This fixes the snapshot deadlock - if the map method blocks, flush_current_bio_list() will be called and it redirects bios waiting on current->bio_list to appropriate workqueues. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1267650 Depends-on: df2cb6daa4 ("block: Avoid deadlocks with bio allocation by stacking drivers") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Willy Tarreau --- drivers/md/dm.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a77ef6cac62..975bb316a55 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -976,11 +976,62 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); +/* + * Flush current->bio_list when the target map method blocks. + * This fixes deadlocks in snapshot and possibly in other targets. + */ +struct dm_offload { + struct blk_plug plug; + struct blk_plug_cb cb; +}; + +static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) +{ + struct dm_offload *o = container_of(cb, struct dm_offload, cb); + struct bio_list list; + struct bio *bio; + + INIT_LIST_HEAD(&o->cb.list); + + if (unlikely(!current->bio_list)) + return; + + list = *current->bio_list; + bio_list_init(current->bio_list); + + while ((bio = bio_list_pop(&list))) { + struct bio_set *bs = bio->bi_pool; + if (unlikely(!bs) || bs == fs_bio_set) { + bio_list_add(current->bio_list, bio); + continue; + } + + spin_lock(&bs->rescue_lock); + bio_list_add(&bs->rescue_list, bio); + queue_work(bs->rescue_workqueue, &bs->rescue_work); + spin_unlock(&bs->rescue_lock); + } +} + +static void dm_offload_start(struct dm_offload *o) +{ + blk_start_plug(&o->plug); + o->cb.callback = flush_current_bio_list; + list_add(&o->cb.list, ¤t->plug->cb_list); +} + +static void dm_offload_end(struct dm_offload *o) +{ + list_del(&o->cb.list); + blk_finish_plug(&o->plug); +} + static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct mapped_device *md; + struct dm_offload o; struct bio *clone = &tio->clone; struct dm_target *ti = tio->ti; @@ -994,7 +1045,11 @@ static void __map_bio(struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_sector; + + dm_offload_start(&o); r = ti->type->map(ti, clone); + dm_offload_end(&o); + if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ From 7c759899db3550db0f2cfb7995fde0cbb64b3904 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 31 Jan 2017 17:17:27 +0100 Subject: [PATCH 159/252] USB: serial: digi_acceleport: fix OOB data sanity check commit 2d380889215fe20b8523345649dee0579821800c upstream. Make sure to check for short transfers to avoid underflow in a loop condition when parsing the receive buffer. Also fix an off-by-one error in the incomplete sanity check which could lead to invalid data being parsed. Fixes: 8c209e6782ca ("USB: make actual_length in struct urb field u32") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/digi_acceleport.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 8c34d9cfb22..15b9cb30bb5 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1489,16 +1489,20 @@ static int digi_read_oob_callback(struct urb *urb) struct usb_serial *serial = port->serial; struct tty_struct *tty; struct digi_port *priv = usb_get_serial_port_data(port); + unsigned char *buf = urb->transfer_buffer; int opcode, line, status, val; int i; unsigned int rts; + if (urb->actual_length < 4) + return -1; + /* handle each oob command */ - for (i = 0; i < urb->actual_length - 3;) { - opcode = ((unsigned char *)urb->transfer_buffer)[i++]; - line = ((unsigned char *)urb->transfer_buffer)[i++]; - status = ((unsigned char *)urb->transfer_buffer)[i++]; - val = ((unsigned char *)urb->transfer_buffer)[i++]; + for (i = 0; i < urb->actual_length - 4; i += 4) { + opcode = buf[i]; + line = buf[i + 1]; + status = buf[i + 2]; + val = buf[i + 3]; dev_dbg(&port->dev, "digi_read_oob_callback: opcode=%d, line=%d, status=%d, val=%d\n", opcode, line, status, val); From 5e44fdd1718003fe5660c912d433057e82b0e4e0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Feb 2017 19:11:28 +0100 Subject: [PATCH 160/252] USB: serial: digi_acceleport: fix OOB-event processing commit 2e46565cf622dd0534a9d8bffe152a577b48d7aa upstream. A recent change claimed to fix an off-by-one error in the OOB-port completion handler, but instead introduced such an error. This could specifically led to modem-status changes going unnoticed, effectively breaking TIOCMGET. Note that the offending commit fixes a loop-condition underflow and is marked for stable, but should not be backported without this fix. Reported-by: Ben Hutchings Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity check") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/digi_acceleport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 15b9cb30bb5..e8d7c1beae8 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1498,7 +1498,7 @@ static int digi_read_oob_callback(struct urb *urb) return -1; /* handle each oob command */ - for (i = 0; i < urb->actual_length - 4; i += 4) { + for (i = 0; i < urb->actual_length - 3; i += 4) { opcode = buf[i]; line = buf[i + 1]; status = buf[i + 2]; From 06a5c0b6e9ff8841019759b819aa801eef787439 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Feb 2017 17:43:50 +0100 Subject: [PATCH 161/252] MIPS: ip27: Disable qlge driver in defconfig commit b617649468390713db1515ea79fc772d2eb897a8 upstream. One of the last remaining failures in kernelci.org is for a gcc bug: drivers/net/ethernet/qlogic/qlge/qlge_main.c:4819:1: error: insn does not satisfy its constraints: drivers/net/ethernet/qlogic/qlge/qlge_main.c:4819:1: internal compiler error: in extract_constrain_insn, at recog.c:2190 This is apparently broken in gcc-6 but fixed in gcc-7, and I cannot reproduce the problem here. However, it is clear that ip27_defconfig does not actually need this driver as the platform has only PCI-X but not PCIe, and the qlge adapter in turn is PCIe-only. The driver was originally enabled in 2010 along with lots of other drivers. Fixes: 59d302b342e5 ("MIPS: IP27: Make defconfig useful again.") Signed-off-by: Arnd Bergmann Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15197/ Signed-off-by: James Hogan Signed-off-by: Willy Tarreau --- arch/mips/configs/ip27_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 0e36abcd39c..7446284dd7b 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -206,7 +206,6 @@ CONFIG_MLX4_EN=m # CONFIG_MLX4_DEBUG is not set CONFIG_TEHUTI=m CONFIG_BNX2X=m -CONFIG_QLGE=m CONFIG_SFC=m CONFIG_BE2NET=m CONFIG_LIBERTAS_THINFIRM=m From 2b6aa6271ad4dd92c397829171ba6c64223f4b55 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 28 Sep 2016 22:55:54 -0400 Subject: [PATCH 162/252] tracing: Add #undef to fix compile error commit bf7165cfa23695c51998231c4efa080fe1d3548d upstream. There are several trace include files that define TRACE_INCLUDE_FILE. Include several of them in the same .c file (as I currently have in some code I am working on), and the compile will blow up with a "warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls" Every other include file in include/trace/events/ avoids that issue by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h should have one, too. Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer") Signed-off-by: Rik van Riel Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Willy Tarreau --- include/trace/events/syscalls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 5a4c04a75b3..55c9b99ff9a 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM raw_syscalls +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) From 61ab4e5c98a47b11ba1bf50bbab5673c40868fde Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:41 +0100 Subject: [PATCH 163/252] USB: serial: safe_serial: fix information leak in completion handler commit 8c76d7cd520ebffc1ea9ea0850d87a224a50c7f2 upstream. Add missing sanity check to the bulk-in completion handler to avoid an integer underflow that could be triggered by a malicious device. This avoids leaking up to 56 bytes from after the URB transfer buffer to user space. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/safe_serial.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index 21cd7bf2a8c..8e24f8ff2fc 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -215,6 +215,11 @@ static void safe_process_read_urb(struct urb *urb) if (!safe) goto out; + if (length < 2) { + dev_err(&port->dev, "malformed packet\n"); + return; + } + fcs = fcs_compute10(data, length, CRC10_INITFCS); if (fcs) { dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs); From 50b607408150dc047a5c5b81beb8b2ec2c025247 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:38 +0100 Subject: [PATCH 164/252] USB: serial: omninet: fix reference leaks at open commit 30572418b445d85fcfe6c8fe84c947d2606767d8 upstream. This driver needlessly took another reference to the tty on open, a reference which was then never released on close. This lead to not just a leak of the tty, but also a driver reference leak that prevented the driver from being unloaded after a port had once been opened. Fixes: 4a90f09b20f4 ("tty: usb-serial krefs") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/omninet.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index 24720f65638..8028e5ffe80 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -143,12 +143,6 @@ static int omninet_port_remove(struct usb_serial_port *port) static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port) { - struct usb_serial *serial = port->serial; - struct usb_serial_port *wport; - - wport = serial->port[1]; - tty_port_tty_set(&wport->port, tty); - return usb_serial_generic_open(tty, port); } From 88ee6312bc679bca45bb88a81cb133bcc1c39883 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2017 16:11:03 +0100 Subject: [PATCH 165/252] USB: iowarrior: fix NULL-deref at probe commit b7321e81fc369abe353cf094d4f0dc2fe11ab95f upstream. Make sure to check for the required interrupt-in endpoint to avoid dereferencing a NULL-pointer should a malicious device lack such an endpoint. Note that a fairly recent change purported to fix this issue, but added an insufficient test on the number of endpoints only, a test which can now be removed. Fixes: 4ec0ef3a8212 ("USB: iowarrior: fix oops with malicious USB descriptors") Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/misc/iowarrior.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 4c24ba0a657..6dda72ef6cc 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -792,12 +792,6 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); - if (iface_desc->desc.bNumEndpoints < 1) { - dev_err(&interface->dev, "Invalid number of endpoints\n"); - retval = -EINVAL; - goto error; - } - /* set up the endpoint information */ for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; @@ -808,6 +802,13 @@ static int iowarrior_probe(struct usb_interface *interface, /* this one will match for the IOWarrior56 only */ dev->int_out_endpoint = endpoint; } + + if (!dev->int_in_endpoint) { + dev_err(&interface->dev, "no interrupt-in endpoint found\n"); + retval = -ENODEV; + goto error; + } + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && From 04992d78a39c4938f4d464e55e3aff6bb7ee0d43 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2017 16:11:04 +0100 Subject: [PATCH 166/252] USB: iowarrior: fix NULL-deref in write commit de46e56653de7b3b54baa625bd582635008b8d05 upstream. Make sure to verify that we have the required interrupt-out endpoint for IOWarrior56 devices to avoid dereferencing a NULL-pointer in write should a malicious device lack such an endpoint. Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/misc/iowarrior.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 6dda72ef6cc..05aa716cf6b 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -809,6 +809,14 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } + if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if (!dev->int_out_endpoint) { + dev_err(&interface->dev, "no interrupt-out endpoint found\n"); + retval = -ENODEV; + goto error; + } + } + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && From 58a600a77df15f0f00b9f55ef0259fd3500bc6ae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:37 +0100 Subject: [PATCH 167/252] USB: serial: io_ti: fix NULL-deref in interrupt callback commit 0b1d250afb8eb9d65afb568bac9b9f9253a82b49 upstream. Fix a NULL-pointer dereference in the interrupt callback should a malicious device send data containing a bad port number by adding the missing sanity check. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/io_ti.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index e1b3e79b707..e2dc182150f 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1575,6 +1575,12 @@ static void edge_interrupt_callback(struct urb *urb) function = TIUMP_GET_FUNC_FROM_CODE(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, info 0x%x\n", __func__, port_number, function, data[1]); + + if (port_number >= edge_serial->serial->num_ports) { + dev_err(dev, "bad port number %d\n", port_number); + goto exit; + } + port = edge_serial->serial->port[port_number]; edge_port = usb_get_serial_port_data(port); if (!edge_port) { From cb32438b77c3cf330c9e830b45a2ceddc6d0eb14 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:40 +0100 Subject: [PATCH 168/252] USB: serial: io_ti: fix information leak in completion handler commit 654b404f2a222f918af9b0cd18ad469d0c941a8e upstream. Add missing sanity check to the bulk-in completion handler to avoid an integer underflow that can be triggered by a malicious device. This avoids leaking 128 kB of memory content from after the URB transfer buffer to user space. Fixes: 8c209e6782ca ("USB: make actual_length in struct urb field u32") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/usb/serial/io_ti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index e2dc182150f..20814d528c1 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1661,7 +1661,7 @@ static void edge_bulk_in_callback(struct urb *urb) port_number = edge_port->port->number - edge_port->port->serial->minor; - if (edge_port->lsr_event) { + if (urb->actual_length > 0 && edge_port->lsr_event) { edge_port->lsr_event = 0; dev_dbg(dev, "%s ===== Port %u LSR Status = %02x, Data = %02x ======\n", __func__, port_number, edge_port->lsr_mask, *data); From e5c6b9c54e19b4fad9a699a1ae5506e01231eae9 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 23 Feb 2017 17:19:41 +0100 Subject: [PATCH 169/252] vxlan: correctly validate VXLAN ID against VXLAN_N_VID commit 4e37d6911f36545b286d15073f6f2222f840e81c upstream. The incorrect check caused an off-by-one error: the maximum VID 0xffffff was unusable. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Signed-off-by: Matthias Schiffer Acked-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a1dc186c6f6..8912ba83fd7 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1386,7 +1386,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (data[IFLA_VXLAN_ID]) { __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); - if (id >= VXLAN_VID_MASK) + if (id >= VXLAN_N_VID) return -ERANGE; } From 7c72e8511794214929517c3742c63d8193a59905 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 26 Feb 2017 17:14:35 +0200 Subject: [PATCH 170/252] ipv4: mask tos for input route commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 upstream. Restore the lost masking of TOS in input route code to allow ip rules to match it properly. Problem [1] noticed by Shmulik Ladkani [1] http://marc.info/?t=137331755300040&r=1&w=2 Fixes: 89aef8921bfb ("ipv4: Delete routing cache.") Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e59d6332458..d9c79134363 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1789,6 +1789,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, { int res; + tos &= IPTOS_RT_MASK; rcu_read_lock(); /* Multicast recognition logic is moved from route cache to here. From 12f1a0f987bec98b105f24225691ab1ea7c77477 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2015 15:03:40 +0200 Subject: [PATCH 171/252] locking/static_keys: Add static_key_{en,dis}able() helpers commit e33886b38cc82a9fc3b2d655dfc7f50467594138 upstream. Add two helpers to make it easier to treat the refcount as boolean. [js] do not involve WARN_ON_ONCE as it causes build failures Suggested-by: Jason Baron Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Jiri Slaby [wt: only backported for use in next fix ; s/static_key_count(key)/atomic_read(&key->enabled)/] Signed-off-by: Willy Tarreau --- include/linux/jump_label.h | 16 ++++++++++++++++ kernel/sched/core.c | 6 ++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0976fc46d1e..7f831b28ee6 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -208,4 +208,20 @@ static inline bool static_key_enabled(struct static_key *key) return (atomic_read(&key->enabled) > 0); } +static inline void static_key_enable(struct static_key *key) +{ + int count = atomic_read(&key->enabled); + + if (!count) + static_key_slow_inc(key); +} + +static inline void static_key_disable(struct static_key *key) +{ + int count = atomic_read(&key->enabled); + + if (count) + static_key_slow_dec(key); +} + #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6a366f9d08d..506e56ec56a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -179,14 +179,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - if (static_key_enabled(&sched_feat_keys[i])) - static_key_slow_dec(&sched_feat_keys[i]); + static_key_disable(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - if (!static_key_enabled(&sched_feat_keys[i])) - static_key_slow_inc(&sched_feat_keys[i]); + static_key_enable(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; From 9cfe94233cfd74268b4a819c87748c3f3d08df33 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Mar 2017 14:28:39 -0800 Subject: [PATCH 172/252] net: net_enable_timestamp() can be called from irq contexts commit 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d upstream. It is now very clear that silly TCP listeners might play with enabling/disabling timestamping while new children are added to their accept queue. Meaning net_enable_timestamp() can be called from BH context while current state of the static key is not enabled. Lets play safe and allow all contexts. The work queue is scheduled only under the problematic cases, which are the static key enable/disable transition, to not slow down critical paths. This extends and improves what we did in commit 5fa8bbda38c6 ("net: use a work queue to defer net_disable_timestamp() work") Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/core/dev.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 11535a94604..682bf5ad63a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1560,27 +1560,54 @@ EXPORT_SYMBOL(call_netdevice_notifiers); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL static atomic_t netstamp_needed_deferred; +static atomic_t netstamp_wanted; static void netstamp_clear(struct work_struct *work) { int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + int wanted; - while (deferred--) - static_key_slow_dec(&netstamp_needed); + wanted = atomic_add_return(deferred, &netstamp_wanted); + if (wanted > 0) + static_key_enable(&netstamp_needed); + else + static_key_disable(&netstamp_needed); } static DECLARE_WORK(netstamp_work, netstamp_clear); #endif void net_enable_timestamp(void) { +#ifdef HAVE_JUMP_LABEL + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 0) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) + return; + } + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_inc(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - /* net_disable_timestamp() can be called from non process context */ - atomic_inc(&netstamp_needed_deferred); + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 1) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) + return; + } + atomic_dec(&netstamp_needed_deferred); schedule_work(&netstamp_work); #else static_key_slow_dec(&netstamp_needed); From 29c4bf40ac3bbdf742397d29e7b4fee7f8c9f61a Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Fri, 10 Mar 2017 16:40:33 +1100 Subject: [PATCH 173/252] dccp/tcp: fix routing redirect race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 45caeaa5ac0b4b11784ac6f932c0ad4c6b67cda0 upstream. As Eric Dumazet pointed out this also needs to be fixed in IPv6. v2: Contains the IPv6 tcp/Ipv6 dccp patches as well. We have seen a few incidents lately where a dst_enty has been freed with a dangling TCP socket reference (sk->sk_dst_cache) pointing to that dst_entry. If the conditions/timings are right a crash then ensues when the freed dst_entry is referenced later on. A Common crashing back trace is: #8 [] page_fault at ffffffff8163e648 [exception RIP: __tcp_ack_snd_check+74] . . #9 [] tcp_rcv_established at ffffffff81580b64 #10 [] tcp_v4_do_rcv at ffffffff8158b54a #11 [] tcp_v4_rcv at ffffffff8158cd02 #12 [] ip_local_deliver_finish at ffffffff815668f4 #13 [] ip_local_deliver at ffffffff81566bd9 #14 [] ip_rcv_finish at ffffffff8156656d #15 [] ip_rcv at ffffffff81566f06 #16 [] __netif_receive_skb_core at ffffffff8152b3a2 #17 [] __netif_receive_skb at ffffffff8152b608 #18 [] netif_receive_skb at ffffffff8152b690 #19 [] vmxnet3_rq_rx_complete at ffffffffa015eeaf [vmxnet3] #20 [] vmxnet3_poll_rx_only at ffffffffa015f32a [vmxnet3] #21 [] net_rx_action at ffffffff8152bac2 #22 [] __do_softirq at ffffffff81084b4f #23 [] call_softirq at ffffffff8164845c #24 [] do_softirq at ffffffff81016fc5 #25 [] irq_exit at ffffffff81084ee5 #26 [] do_IRQ at ffffffff81648ff8 Of course it may happen with other NIC drivers as well. It's found the freed dst_entry here: 224 static bool tcp_in_quickack_mode(struct sock *sk)↩ 225 {↩ 226 ▹ const struct inet_connection_sock *icsk = inet_csk(sk);↩ 227 ▹ const struct dst_entry *dst = __sk_dst_get(sk);↩ 228 ↩ 229 ▹ return (dst && dst_metric(dst, RTAX_QUICKACK)) ||↩ 230 ▹ ▹ (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);↩ 231 }↩ But there are other backtraces attributed to the same freed dst_entry in netfilter code as well. All the vmcores showed 2 significant clues: - Remote hosts behind the default gateway had always been redirected to a different gateway. A rtable/dst_entry will be added for that host. Making more dst_entrys with lower reference counts. Making this more probable. - All vmcores showed a postitive LockDroppedIcmps value, e.g: LockDroppedIcmps 267 A closer look at the tcp_v4_err() handler revealed that do_redirect() will run regardless of whether user space has the socket locked. This can result in a race condition where the same dst_entry cached in sk->sk_dst_entry can be decremented twice for the same socket via: do_redirect()->__sk_dst_check()-> dst_release(). Which leads to the dst_entry being prematurely freed with another socket pointing to it via sk->sk_dst_cache and a subsequent crash. To fix this skip do_redirect() if usespace has the socket locked. Instead let the redirect take place later when user space does not have the socket locked. The dccp/IPv6 code is very similar in this respect, so fixing it there too. As Eric Garver pointed out the following commit now invalidates routes. Which can set the dst->obsolete flag so that ipv4_dst_check() returns null and triggers the dst_release(). Fixes: ceb3320610d6 ("ipv4: Kill routes during PMTU/redirect updates.") Cc: Eric Garver Cc: Hannes Sowa Signed-off-by: Jon Maxwell Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/dccp/ipv4.c | 3 ++- net/dccp/ipv6.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 8 +++++--- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 294c642fbeb..3bb5ff9e14a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -263,7 +263,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) switch (type) { case ICMP_REDIRECT: - dccp_do_redirect(skb, sk); + if (!sock_owned_by_user(sk)) + dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 94f8224d543..9ad2416f8e3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -132,10 +132,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1e24e5a426c..195c618aba6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -389,7 +389,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 70b10ed169a..ecbdc4b29f2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -382,10 +382,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } From 20af6b403d9828a18a3119d12f58f1b723b8971f Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Fri, 24 Feb 2017 11:00:32 -0500 Subject: [PATCH 174/252] net sched actions: decrement module reference count after table flush. commit edb9d1bff4bbe19b8ae0e71b1f38732591a9eeb2 upstream. When tc actions are loaded as a module and no actions have been installed, flushing them would result in actions removed from the memory, but modules reference count not being decremented, so that the modules would not be unloaded. Following is example with GACT action: % sudo modprobe act_gact % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions ls action gact % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 1 % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 2 % sudo rmmod act_gact rmmod: ERROR: Module act_gact is in use .... After the fix: % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions add action pass index 1 % sudo tc actions add action pass index 2 % sudo tc actions add action pass index 3 % lsmod Module Size Used by act_gact 16384 3 % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 0 % sudo rmmod act_gact % lsmod Module Size Used by % Fixes: f97017cdefef ("net-sched: Fix actions flushing") Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/sched/act_api.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 15d46b9166d..0a31f2c51e9 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -814,10 +814,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); - if (err < 0) + if (err <= 0) goto out_module_put; - if (err == 0) - goto noflush_out; nla_nest_end(skb, nest); @@ -835,7 +833,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, out_module_put: module_put(a->ops->owner); err_out: -noflush_out: kfree_skb(skb); kfree(a); return err; From 1e0d06b281f97e575091540ab9a581c7dfe38a58 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:49 +0100 Subject: [PATCH 175/252] perf/core: Fix event inheritance on fork() commit e7cc4865f0f31698ef2f7aac01a50e78968985b7 upstream. While hunting for clues to a use-after-free, Oleg spotted that perf_event_init_context() can loose an error value with the result that fork() can succeed even though we did not fully inherit the perf event context. Spotted-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: oleg@redhat.com Fixes: 889ff0150661 ("perf/core: Split context's event group list into pinned and non-pinned lists") Link: http://lkml.kernel.org/r/20170316125823.190342547@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Willy Tarreau --- kernel/events/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 76e26b8e4e4..5a550f2e37f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7470,7 +7470,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } /* @@ -7486,7 +7486,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } raw_spin_lock_irqsave(&parent_ctx->lock, flags); @@ -7514,6 +7514,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) } raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +out_unlock: mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); From 47bad9193ba11435d3869fc7aae9382a8def3eb5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:39:01 +0100 Subject: [PATCH 176/252] isdn/gigaset: fix NULL-deref at probe commit 68c32f9c2a36d410aa242e661506e5b2c2764179 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: cf7776dc05b8 ("[PATCH] isdn4linux: Siemens Gigaset drivers - direct USB connection") Cc: Hansjoerg Lipp Signed-off-by: Johan Hovold Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/isdn/gigaset/bas-gigaset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index c44950d3eb7..6d4d9c1c2cf 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface, return -ENODEV; } + if (hostif->desc.bNumEndpoints < 1) + return -ENODEV; + dev_info(&udev->dev, "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n", __func__, le16_to_cpu(udev->descriptor.idVendor), From d7a684288d11348df9bd11f4fb7bed918fe3294d Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 17 Mar 2017 00:48:18 +0000 Subject: [PATCH 177/252] xen: do not re-use pirq number cached in pci device msi msg data commit c74fd80f2f41d05f350bb478151021f88551afe8 upstream. Revert the main part of commit: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") That commit introduced reading the pci device's msi message data to see if a pirq was previously configured for the device's msi/msix, and re-use that pirq. At the time, that was the correct behavior. However, a later change to Qemu caused it to call into the Xen hypervisor to unmap all pirqs for a pci device, when the pci device disables its MSI/MSIX vectors; specifically the Qemu commit: c976437c7dba9c7444fb41df45468968aaa326ad ("qemu-xen: free all the pirqs for msi/msix when driver unload") Once Qemu added this pirq unmapping, it was no longer correct for the kernel to re-use the pirq number cached in the pci device msi message data. All Qemu releases since 2.1.0 contain the patch that unmaps the pirqs when the pci device disables its MSI/MSIX vectors. This bug is causing failures to initialize multiple NVMe controllers under Xen, because the NVMe driver sets up a single MSIX vector for each controller (concurrently), and then after using that to talk to the controller for some configuration data, it disables the single MSIX vector and re-configures all the MSIX vectors it needs. So the MSIX setup code tries to re-use the cached pirq from the first vector for each controller, but the hypervisor has already given away that pirq to another controller, and its initialization fails. This is discussed in more detail at: https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") Signed-off-by: Dan Streetman Reviewed-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Willy Tarreau --- arch/x86/pci/xen.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 48e8461057b..6e4580b8760 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -227,23 +227,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; list_for_each_entry(msidesc, &dev->msi_list, list) { - __read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi", From de9d09c345b73857bd4297e4eedb65a0582da106 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2017 13:21:28 -0700 Subject: [PATCH 178/252] net: properly release sk_frag.page commit 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 upstream. I mistakenly added the code to release sk->sk_frag in sk_common_release() instead of sk_destruct() TCP sockets using sk->sk_allocation == GFP_ATOMIC do no call sk_common_release() at close time, thus leaking one (order-3) page. iSCSI is using such sockets. Fixes: 5640f7685831 ("net: use a per task frag allocator") Signed-off-by: Eric Dumazet Signed-off-by: Willy Tarreau --- net/core/sock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index e3cb45411f3..96e12591932 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1403,6 +1403,11 @@ static void __sk_free(struct sock *sk) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); @@ -2556,11 +2561,6 @@ void sk_common_release(struct sock *sk) sk_refcnt_debug_release(sk); - if (sk->sk_frag.page) { - put_page(sk->sk_frag.page); - sk->sk_frag.page = NULL; - } - sock_put(sk); } EXPORT_SYMBOL(sk_common_release); From e24a53af1b24b6bfd22968cdff7871711fa24261 Mon Sep 17 00:00:00 2001 From: Andrey Ulanov Date: Tue, 14 Mar 2017 20:16:42 -0700 Subject: [PATCH 179/252] net: unix: properly re-increment inflight counter of GC discarded candidates commit 7df9c24625b9981779afb8fcdbe2bb4765e61147 upstream. Dmitry has reported that a BUG_ON() condition in unix_notinflight() may be triggered by a simple code that forwards unix socket in an SCM_RIGHTS message. That is caused by incorrect unix socket GC implementation in unix_gc(). The GC first collects list of candidates, then (a) decrements their "children's" inflight counter, (b) checks which inflight counters are now 0, and then (c) increments all inflight counters back. (a) and (c) are done by calling scan_children() with inc_inflight or dec_inflight as the second argument. Commit 6209344f5a37 ("net: unix: fix inflight counting bug in garbage collector") changed scan_children() such that it no longer considers sockets that do not have UNIX_GC_CANDIDATE flag. It also added a block of code that that unsets this flag _before_ invoking scan_children(, dec_iflight, ). This may lead to incorrect inflight counters for some sockets. This change fixes this bug by changing order of operations: UNIX_GC_CANDIDATE is now unset only after all inflight counters are restored to the original state. kernel BUG at net/unix/garbage.c:149! RIP: 0010:[] [] unix_notinflight+0x3b4/0x490 net/unix/garbage.c:149 Call Trace: [] unix_detach_fds.isra.19+0xff/0x170 net/unix/af_unix.c:1487 [] unix_destruct_scm+0xf9/0x210 net/unix/af_unix.c:1496 [] skb_release_head_state+0x101/0x200 net/core/skbuff.c:655 [] skb_release_all+0x1a/0x60 net/core/skbuff.c:668 [] __kfree_skb+0x1a/0x30 net/core/skbuff.c:684 [] kfree_skb+0x184/0x570 net/core/skbuff.c:705 [] unix_release_sock+0x5b5/0xbd0 net/unix/af_unix.c:559 [] unix_release+0x49/0x90 net/unix/af_unix.c:836 [] sock_release+0x92/0x1f0 net/socket.c:570 [] sock_close+0x1b/0x20 net/socket.c:1017 [] __fput+0x34e/0x910 fs/file_table.c:208 [] ____fput+0x1a/0x20 fs/file_table.c:244 [] task_work_run+0x1a0/0x280 kernel/task_work.c:116 [< inline >] exit_task_work include/linux/task_work.h:21 [] do_exit+0x183a/0x2640 kernel/exit.c:828 [] do_group_exit+0x14e/0x420 kernel/exit.c:931 [] get_signal+0x663/0x1880 kernel/signal.c:2307 [] do_signal+0xc5/0x2190 arch/x86/kernel/signal.c:807 [] exit_to_usermode_loop+0x1ea/0x2d0 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x4d3/0x570 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Link: https://lkml.org/lkml/2017/3/6/252 Signed-off-by: Andrey Ulanov Reported-by: Dmitry Vyukov Fixes: 6209344 ("net: unix: fix inflight counting bug in garbage collector") Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/unix/garbage.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a72182d6750..58ba0e5f147 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -152,6 +152,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); + BUG_ON(!atomic_long_read(&u->inflight)); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); @@ -358,6 +359,14 @@ void unix_gc(void) } list_del(&cursor); + /* Now gc_candidates contains only garbage. Restore original + * inflight counters for these as well, and remove the skbuffs + * which are creating the cycle(s). + */ + skb_queue_head_init(&hitlist); + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, inc_inflight, &hitlist); + /* * not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. @@ -368,15 +377,6 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* - * Now gc_candidates contains only garbage. Restore original - * inflight counters for these as well, and remove the skbuffs - * which are creating the cycle(s). - */ - skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) - scan_children(&u->sk, inc_inflight, &hitlist); - spin_unlock(&unix_gc_lock); /* Here we are. Hitlist is filled. Die. */ From ec5cc03d1d4e9670bb7d61bb482ebda1a449db25 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:36:13 -0700 Subject: [PATCH 180/252] Input: ims-pcu - validate number of endpoints before using them commit 1916d319271664241b7aa0cd2b05e32bdb310ce9 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack control-interface endpoints. Fixes: 628329d52474 ("Input: add IMS Passenger Control Unit driver") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/input/misc/ims-pcu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index 77164dc1bed..8fb814ccfd7 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1437,6 +1437,10 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc return -EINVAL; alt = pcu->ctrl_intf->cur_altsetting; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + pcu->ep_ctrl = &alt->endpoint[0].desc; pcu->max_ctrl_size = usb_endpoint_maxp(pcu->ep_ctrl); From ab84b30a2c61fc82ab993fdbef6bfca00360e971 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:39:29 -0700 Subject: [PATCH 181/252] Input: hanwang - validate number of endpoints before using them commit ba340d7b83703768ce566f53f857543359aa1b98 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: bba5394ad3bd ("Input: add support for Hanwang tablets") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/input/tablet/hanwang.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/tablet/hanwang.c b/drivers/input/tablet/hanwang.c index 5cc04124995..263c85e72e1 100644 --- a/drivers/input/tablet/hanwang.c +++ b/drivers/input/tablet/hanwang.c @@ -341,6 +341,9 @@ static int hanwang_probe(struct usb_interface *intf, const struct usb_device_id int error; int i; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + hanwang = kzalloc(sizeof(struct hanwang), GFP_KERNEL); input_dev = input_allocate_device(); if (!hanwang || !input_dev) { From e121cd408c49a88c16a898b1e0fc419555db826e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:37:01 -0700 Subject: [PATCH 182/252] Input: yealink - validate number of endpoints before using them commit 5cc4a1a9f5c179795c8a1f2b0f4361829d6a070e upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: aca951a22a1d ("[PATCH] input-driver-yealink-P1K-usb-phone") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/input/misc/yealink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c index 285a5bd6cbc..3b6fdb389a2 100644 --- a/drivers/input/misc/yealink.c +++ b/drivers/input/misc/yealink.c @@ -876,6 +876,10 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id) int ret, pipe, i; interface = intf->cur_altsetting; + + if (interface->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) return -ENODEV; From 9a1526465e181f580a11c2a53fa809da2c11966a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:35:12 -0700 Subject: [PATCH 183/252] Input: cm109 - validate number of endpoints before using them commit ac2ee9ba953afe88f7a673e1c0c839227b1d7891 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: c04148f915e5 ("Input: add driver for USB VoIP phones with CM109...") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/input/misc/cm109.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c index 082684e7f39..d6a35a71385 100644 --- a/drivers/input/misc/cm109.c +++ b/drivers/input/misc/cm109.c @@ -669,6 +669,10 @@ static int cm109_usb_probe(struct usb_interface *intf, int error = -ENOMEM; interface = intf->cur_altsetting; + + if (interface->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) From 79e603e9eed2e26d35082c52b88d0fe8f065c6c7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:50 +0100 Subject: [PATCH 184/252] USB: uss720: fix NULL-deref at probe commit f259ca3eed6e4b79ac3d5c5c9fb259fb46e86217 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. Note that the endpoint access that causes the NULL-deref is currently only used for debugging purposes during probe so the oops only happens when dynamic debugging is enabled. This means the driver could be rewritten to continue to accept device with only two endpoints, should such devices exist. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/misc/uss720.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index e129cf66122..20d7e5312f0 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -709,6 +709,11 @@ static int uss720_probe(struct usb_interface *intf, interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 3) { + usb_put_dev(usbdev); + return -ENODEV; + } + /* * Allocate parport interface */ From 712c7fb1aed1d1418dc65f2e4e2ed965271ba9e0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:48 +0100 Subject: [PATCH 185/252] USB: idmouse: fix NULL-deref at probe commit b0addd3fa6bcd119be9428996d5d4522479ab240 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/misc/idmouse.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index ce978384fda..3b885c61b73 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -347,6 +347,9 @@ static int idmouse_probe(struct usb_interface *interface, if (iface_desc->desc.bInterfaceClass != 0x0A) return -ENODEV; + if (iface_desc->desc.bNumEndpoints < 1) + return -ENODEV; + /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) From 626f2770d95bcc9242550505c2cf54bbbae27ce8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:51 +0100 Subject: [PATCH 186/252] USB: wusbcore: fix NULL-deref at probe commit 03ace948a4eb89d1cf51c06afdfc41ebca5fdb27 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. This specifically fixes the NULL-pointer dereference when probing HWA HC devices. Fixes: df3654236e31 ("wusb: add the Wire Adapter (WA) core") Cc: Inaky Perez-Gonzalez Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/wusbcore/wa-hc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c index a09b65ebd9b..2bb0fd3f342 100644 --- a/drivers/usb/wusbcore/wa-hc.c +++ b/drivers/usb/wusbcore/wa-hc.c @@ -38,6 +38,9 @@ int wa_create(struct wahc *wa, struct usb_interface *iface) int result; struct device *dev = &iface->dev; + if (iface->cur_altsetting->desc.bNumEndpoints < 3) + return -ENODEV; + result = wa_rpipes_create(wa); if (result < 0) goto error_rpipes_create; From b00b018b74b7589798d74a93ab581fd31aaac902 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:53 +0100 Subject: [PATCH 187/252] uwb: i1480-dfu: fix NULL-deref at probe commit 4ce362711d78a4999011add3115b8f4b0bc25e8c upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Note that the dereference happens in the cmd and wait_init_done callbacks which are called during probe. Fixes: 1ba47da52712 ("uwb: add the i1480 DFU driver") Cc: Inaky Perez-Gonzalez Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/uwb/i1480/dfu/usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index 2bfc846ac07..6345e85822a 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -362,6 +362,9 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) result); } + if (iface->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + result = -ENOMEM; i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL); if (i1480_usb == NULL) { From 735a19db3b3e2decc73f0496eae98595b54a14e4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:52 +0100 Subject: [PATCH 188/252] uwb: hwa-rc: fix NULL-deref at probe commit daf229b15907fbfdb6ee183aac8ca428cb57e361 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Note that the dereference happens in the start callback which is called during probe. Fixes: de520b8bd552 ("uwb: add HWA radio controller driver") Cc: Inaky Perez-Gonzalez Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/uwb/hwa-rc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 810c90ae2c5..cd8bf69aa69 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -811,6 +811,9 @@ static int hwarc_probe(struct usb_interface *iface, struct hwarc *hwarc; struct device *dev = &iface->dev; + if (iface->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + result = -ENOMEM; uwb_rc = uwb_rc_alloc(); if (uwb_rc == NULL) { From e32d76a4d5e7d4fe3bf50c248a16163a5d1847c4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:40:22 +0100 Subject: [PATCH 189/252] mmc: ushc: fix NULL-deref at probe commit 181302dc7239add8ab1449c23ecab193f52ee6ab upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: 53f3a9e26ed5 ("mmc: USB SD Host Controller (USHC) driver") Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Ulf Hansson Signed-off-by: Willy Tarreau --- drivers/mmc/host/ushc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c index c0105a2e269..d5493a5a7e7 100644 --- a/drivers/mmc/host/ushc.c +++ b/drivers/mmc/host/ushc.c @@ -426,6 +426,9 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id struct ushc_data *ushc; int ret; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); if (mmc == NULL) return -ENOMEM; From 3f102dc505e05b1b0944a94faab788c82d0aa01c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 15 Mar 2017 14:52:02 -0400 Subject: [PATCH 190/252] ext4: mark inode dirty after converting inline directory commit b9cf625d6ecde0d372e23ae022feead72b4228a6 upstream. If ext4_convert_inline_data() was called on a directory with inline data, the filesystem was left in an inconsistent state (as considered by e2fsck) because the file size was not increased to cover the new block. This happened because the inode was not marked dirty after i_disksize was updated. Fix this by marking the inode dirty at the end of ext4_finish_convert_inline_dir(). This bug was probably not noticed before because most users mark the inode dirty afterwards for other reasons. But if userspace executed FS_IOC_SET_ENCRYPTION_POLICY with invalid parameters, as exercised by 'kvm-xfstests -c adv generic/396', then the inode was never marked dirty after updating i_disksize. Fixes: 3c47d54170b6a678875566b1b8d6dcf57904e49b Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/inline.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index b390de05828..55af0d98d96 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1147,10 +1147,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, set_buffer_uptodate(dir_block); err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); if (err) - goto out; + return err; set_buffer_verified(dir_block); -out: - return err; + return ext4_mark_inode_dirty(handle, inode); } static int ext4_convert_inline_data_nolock(handle_t *handle, From 21ec215c5345737b7288f4067364ebcff396a140 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Mar 2017 23:07:28 +0800 Subject: [PATCH 191/252] scsi: libsas: fix ata xfer length commit 9702c67c6066f583b629cf037d2056245bb7a8e6 upstream. The total ata xfer length may not be calculated properly, in that we do not use the proper method to get an sg element dma length. According to the code comment, sg_dma_len() should be used after dma_map_sg() is called. This issue was found by turning on the SMMUv3 in front of the hisi_sas controller in hip07. Multiple sg elements were being combined into a single element, but the original first element length was being use as the total xfer length. Fixes: ff2aeb1eb64c8a4770a6 ("libata: convert to chained sg") Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/libsas/sas_ata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index d2895836f9f..83e3ca703cd 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -219,7 +219,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->num_scatter = qc->n_elem; } else { for_each_sg(qc->sg, sg, qc->n_elem, si) - xfer += sg->length; + xfer += sg_dma_len(sg); task->total_xfer_len = xfer; task->num_scatter = si; From 4b0ca39a00e4367ce9c9941b84c248eb6c64abd9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Feb 2017 17:16:48 +0100 Subject: [PATCH 192/252] ALSA: ctxfi: Fallback DMA mask to 32bit commit 15c75b09f8d190f89ab4db463b87d411ca349dfe upstream. Currently ctxfi driver tries to set only the 64bit DMA mask on 64bit architectures, and bails out if it fails. This causes a problem on some platforms since the 64bit DMA isn't always guaranteed. We should fall back to the default 32bit DMA when 64bit DMA fails. Fixes: 6d74b86d3c0f ("ALSA: ctxfi - Allow 64bit DMA") Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/pci/ctxfi/cthw20k1.c | 19 ++++++------------- sound/pci/ctxfi/cthw20k2.c | 18 ++++++------------ 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c index 6ac40beb49d..abd0a2d0856 100644 --- a/sound/pci/ctxfi/cthw20k1.c +++ b/sound/pci/ctxfi/cthw20k1.c @@ -27,12 +27,6 @@ #include "cthw20k1.h" #include "ct20k1reg.h" -#if BITS_PER_LONG == 32 -#define CT_XFI_DMA_MASK DMA_BIT_MASK(32) /* 32 bit PTE */ -#else -#define CT_XFI_DMA_MASK DMA_BIT_MASK(64) /* 64 bit PTE */ -#endif - struct hw20k1 { struct hw hw; spinlock_t reg_20k1_lock; @@ -1903,19 +1897,18 @@ static int hw_card_start(struct hw *hw) { int err; struct pci_dev *pci = hw->pci; + const unsigned int dma_bits = BITS_PER_LONG; err = pci_enable_device(pci); if (err < 0) return err; /* Set DMA transfer mask */ - if (pci_set_dma_mask(pci, CT_XFI_DMA_MASK) < 0 || - pci_set_consistent_dma_mask(pci, CT_XFI_DMA_MASK) < 0) { - printk(KERN_ERR "architecture does not support PCI " - "busmaster DMA with mask 0x%llx\n", - CT_XFI_DMA_MASK); - err = -ENXIO; - goto error1; + if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); + } else { + dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32)); } if (!hw->io_base) { diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c index b1438861d38..5828a3ec58b 100644 --- a/sound/pci/ctxfi/cthw20k2.c +++ b/sound/pci/ctxfi/cthw20k2.c @@ -26,12 +26,6 @@ #include "cthw20k2.h" #include "ct20k2reg.h" -#if BITS_PER_LONG == 32 -#define CT_XFI_DMA_MASK DMA_BIT_MASK(32) /* 32 bit PTE */ -#else -#define CT_XFI_DMA_MASK DMA_BIT_MASK(64) /* 64 bit PTE */ -#endif - struct hw20k2 { struct hw hw; /* for i2c */ @@ -2026,18 +2020,18 @@ static int hw_card_start(struct hw *hw) int err = 0; struct pci_dev *pci = hw->pci; unsigned int gctl; + const unsigned int dma_bits = BITS_PER_LONG; err = pci_enable_device(pci); if (err < 0) return err; /* Set DMA transfer mask */ - if (pci_set_dma_mask(pci, CT_XFI_DMA_MASK) < 0 || - pci_set_consistent_dma_mask(pci, CT_XFI_DMA_MASK) < 0) { - printk(KERN_ERR "ctxfi: architecture does not support PCI " - "busmaster DMA with mask 0x%llx\n", CT_XFI_DMA_MASK); - err = -ENXIO; - goto error1; + if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); + } else { + dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32)); } if (!hw->io_base) { From a11b00f876d2698ccff00b65cbe87d20f9b14a57 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Mar 2017 10:08:19 +0100 Subject: [PATCH 193/252] ALSA: ctxfi: Fix the incorrect check of dma_set_mask() call commit f363a06642f28caaa78cb6446bbad90c73fe183c upstream. In the commit [15c75b09f8d1: ALSA: ctxfi: Fallback DMA mask to 32bit], I forgot to put "!" at dam_set_mask() call check in cthw20k1.c (while cthw20k2.c is OK). This patch fixes that obvious bug. (As a side note: although the original commit was completely wrong, it's still working for most of machines, as it sets to 32bit DMA mask in the end. So the bug severity is low.) Fixes: 15c75b09f8d1 ("ALSA: ctxfi: Fallback DMA mask to 32bit") Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/pci/ctxfi/cthw20k1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c index abd0a2d0856..7f414b05644 100644 --- a/sound/pci/ctxfi/cthw20k1.c +++ b/sound/pci/ctxfi/cthw20k1.c @@ -1904,7 +1904,7 @@ static int hw_card_start(struct hw *hw) return err; /* Set DMA transfer mask */ - if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); } else { dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); From 0f06de41a3786c7a104b305f9d37f6442a5fec9e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Jun 2015 18:32:02 +0200 Subject: [PATCH 194/252] ACPI / PNP: Avoid conflicting resource reservations commit 0f1b414d190724617eb1cdd615592fa8cd9d0b50 upstream. Commit b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" overlooked the fact that the memory and/or I/O regions reserved by acpi_reserve_resources() may conflict with those reserved by the PNP "system" driver. If that conflict actually takes place, it causes the reservations made by the "system" driver to fail while before commit b9a5e5e18fbf all reservations made by it and by acpi_reserve_resources() would be successful. In turn, that allows the resources that haven't been reserved by the "system" driver to be used by others (e.g. PCI) which sometimes leads to functional problems (up to and including boot failures). To fix that issue, introduce a common resource reservation routine, acpi_reserve_region(), to be used by both acpi_reserve_resources() and the "system" driver, that will track all resources reserved by it and avoid making conflicting requests. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Signed-off-by: Rafael J. Wysocki Signed-off-by: Willy Tarreau --- drivers/acpi/osl.c | 6 +- drivers/acpi/resource.c | 160 ++++++++++++++++++++++++++++++++++++++++ drivers/pnp/system.c | 35 ++++++--- include/linux/acpi.h | 10 +++ 4 files changed, 197 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 11441ad69de..2edbb5b02a7 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -166,11 +166,7 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - /* Resources are never freed */ - if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) - request_region(addr, length, desc); - else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - request_mem_region(addr, length, desc); + acpi_reserve_region(addr, length, gas->space_id, 0, desc); } static void __init acpi_reserve_resources(void) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index b9cfaf1d94d..226aadb8ad7 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86 @@ -540,3 +541,162 @@ int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, return c.count; } EXPORT_SYMBOL_GPL(acpi_dev_get_resources); + +struct reserved_region { + struct list_head node; + u64 start; + u64 end; +}; + +static LIST_HEAD(reserved_io_regions); +static LIST_HEAD(reserved_mem_regions); + +static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, + char *desc) +{ + unsigned int length = end - start + 1; + struct resource *res; + + res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? + request_region(start, length, desc) : + request_mem_region(start, length, desc); + if (!res) + return -EIO; + + res->flags &= ~flags; + return 0; +} + +static int add_region_before(u64 start, u64 end, u8 space_id, + unsigned long flags, char *desc, + struct list_head *head) +{ + struct reserved_region *reg; + int error; + + reg = kmalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) + return -ENOMEM; + + error = request_range(start, end, space_id, flags, desc); + if (error) + return error; + + reg->start = start; + reg->end = end; + list_add_tail(®->node, head); + return 0; +} + +/** + * acpi_reserve_region - Reserve an I/O or memory region as a system resource. + * @start: Starting address of the region. + * @length: Length of the region. + * @space_id: Identifier of address space to reserve the region from. + * @flags: Resource flags to clear for the region after requesting it. + * @desc: Region description (for messages). + * + * Reserve an I/O or memory region as a system resource to prevent others from + * using it. If the new region overlaps with one of the regions (in the given + * address space) already reserved by this routine, only the non-overlapping + * parts of it will be reserved. + * + * Returned is either 0 (success) or a negative error code indicating a resource + * reservation problem. It is the code of the first encountered error, but the + * routine doesn't abort until it has attempted to request all of the parts of + * the new region that don't overlap with other regions reserved previously. + * + * The resources requested by this routine are never released. + */ +int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, + unsigned long flags, char *desc) +{ + struct list_head *regions; + struct reserved_region *reg; + u64 end = start + length - 1; + int ret = 0, error = 0; + + if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) + regions = &reserved_io_regions; + else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + regions = &reserved_mem_regions; + else + return -EINVAL; + + if (list_empty(regions)) + return add_region_before(start, end, space_id, flags, desc, regions); + + list_for_each_entry(reg, regions, node) + if (reg->start == end + 1) { + /* The new region can be prepended to this one. */ + ret = request_range(start, end, space_id, flags, desc); + if (!ret) + reg->start = start; + + return ret; + } else if (reg->start > end) { + /* No overlap. Add the new region here and get out. */ + return add_region_before(start, end, space_id, flags, + desc, ®->node); + } else if (reg->end == start - 1) { + goto combine; + } else if (reg->end >= start) { + goto overlap; + } + + /* The new region goes after the last existing one. */ + return add_region_before(start, end, space_id, flags, desc, regions); + + overlap: + /* + * The new region overlaps an existing one. + * + * The head part of the new region immediately preceding the existing + * overlapping one can be combined with it right away. + */ + if (reg->start > start) { + error = request_range(start, reg->start - 1, space_id, flags, desc); + if (error) + ret = error; + else + reg->start = start; + } + + combine: + /* + * The new region is adjacent to an existing one. If it extends beyond + * that region all the way to the next one, it is possible to combine + * all three of them. + */ + while (reg->end < end) { + struct reserved_region *next = NULL; + u64 a = reg->end + 1, b = end; + + if (!list_is_last(®->node, regions)) { + next = list_next_entry(reg, node); + if (next->start <= end) + b = next->start - 1; + } + error = request_range(a, b, space_id, flags, desc); + if (!error) { + if (next && next->start == b + 1) { + reg->end = next->end; + list_del(&next->node); + kfree(next); + } else { + reg->end = end; + break; + } + } else if (next) { + if (!ret) + ret = error; + + reg = next; + } else { + break; + } + } + + return ret ? ret : error; +} +EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 49c1720df59..515f33882ab 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,6 +7,7 @@ * Bjorn Helgaas */ +#include #include #include #include @@ -22,25 +23,41 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; +#ifdef CONFIG_ACPI +static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) +{ + u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; + return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); +} +#else +static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) +{ + struct resource *res; + + res = io ? request_region(start, length, desc) : + request_mem_region(start, length, desc); + if (res) { + res->flags &= ~IORESOURCE_BUSY; + return true; + } + return false; +} +#endif + static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - struct resource *res; + bool reserved; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - if (port) - res = request_region(start, end - start + 1, regionid); - else - res = request_mem_region(start, end - start + 1, regionid); - if (res) - res->flags &= ~IORESOURCE_BUSY; - else + reserved = __reserve_range(start, end - start + 1, !!port, regionid); + if (!reserved) kfree(regionid); /* @@ -49,7 +66,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - res ? "has been" : "could not be"); + reserved ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 17b5b596764..1c7ae01a160 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -283,6 +283,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); +int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, + unsigned long flags, char *desc); + #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -440,6 +443,13 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } +static inline int acpi_reserve_region(u64 start, unsigned int length, + u8 space_id, unsigned long flags, + char *desc) +{ + return -ENXIO; +} + struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) From 23b2b0e0bc393479a5b285b8f06b544a8abc1dd8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Jun 2015 17:30:15 +0300 Subject: [PATCH 195/252] ACPI / resources: free memory on error in add_region_before() commit 7bc10388ccdd79b3d20463151a1f8e7a590a775b upstream. There is a small memory leak on error. Fixes: 0f1b414d1907 (ACPI / PNP: Avoid conflicting resource reservations) Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki Signed-off-by: Willy Tarreau --- drivers/acpi/resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 226aadb8ad7..2a7711fb008 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -579,8 +579,10 @@ static int add_region_before(u64 start, u64 end, u8 space_id, return -ENOMEM; error = request_range(start, end, space_id, flags, desc); - if (error) + if (error) { + kfree(reg); return error; + } reg->start = start; reg->end = end; From 62bdbcf4f26d36de1ec9979a604b084635a50988 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Jul 2015 03:09:03 +0200 Subject: [PATCH 196/252] ACPI / PNP: Reserve ACPI resources at the fs_initcall_sync stage commit 0294112ee3135fbd15eaa70015af8283642dd970 upstream. This effectively reverts the following three commits: 7bc10388ccdd ACPI / resources: free memory on error in add_region_before() 0f1b414d1907 ACPI / PNP: Avoid conflicting resource reservations b9a5e5e18fbf ACPI / init: Fix the ordering of acpi_reserve_resources() (commit b9a5e5e18fbf introduced regressions some of which, but not all, were addressed by commit 0f1b414d1907 and commit 7bc10388ccdd was a fixup on top of the latter) and causes ACPI fixed hardware resources to be reserved at the fs_initcall_sync stage of system initialization. The story is as follows. First, a boot regression was reported due to an apparent resource reservation ordering change after a commit that shouldn't lead to such changes. Investigation led to the conclusion that the problem happened because acpi_reserve_resources() was executed at the device_initcall() stage of system initialization which wasn't strictly ordered with respect to driver initialization (and with respect to the initialization of the pcieport driver in particular), so a random change causing the device initcalls to be run in a different order might break things. The response to that was to attempt to run acpi_reserve_resources() as soon as we knew that ACPI would be in use (commit b9a5e5e18fbf). However, that turned out to be too early, because it caused resource reservations made by the PNP system driver to fail on at least one system and that failure was addressed by commit 0f1b414d1907. That fix still turned out to be insufficient, though, because calling acpi_reserve_resources() before the fs_initcall stage of system initialization caused a boot regression to happen on the eCAFE EC-800-H20G/S netbook. That meant that we only could call acpi_reserve_resources() at the fs_initcall initialization stage or later, but then we might just as well call it after the PNP initalization in which case commit 0f1b414d1907 wouldn't be necessary any more. For this reason, the changes made by commit 0f1b414d1907 are reverted (along with a memory leak fixup on top of that commit), the changes made by commit b9a5e5e18fbf that went too far are reverted too and acpi_reserve_resources() is changed into fs_initcall_sync, which will cause it to be executed after the PNP subsystem initialization (which is an fs_initcall) and before device initcalls (including the pcieport driver initialization) which should avoid the initial issue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100581 Link: http://marc.info/?t=143092384600002&r=1&w=2 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Signed-off-by: Rafael J. Wysocki Signed-off-by: Willy Tarreau --- drivers/acpi/osl.c | 12 ++- drivers/acpi/resource.c | 162 ---------------------------------------- drivers/pnp/system.c | 35 +++------ include/linux/acpi.h | 10 --- 4 files changed, 18 insertions(+), 201 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 2edbb5b02a7..276ea4727ad 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -166,10 +166,14 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - acpi_reserve_region(addr, length, gas->space_id, 0, desc); + /* Resources are never freed */ + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + request_region(addr, length, desc); + else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + request_mem_region(addr, length, desc); } -static void __init acpi_reserve_resources(void) +static int __init acpi_reserve_resources(void) { acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length, "ACPI PM1a_EVT_BLK"); @@ -198,7 +202,10 @@ static void __init acpi_reserve_resources(void) if (!(acpi_gbl_FADT.gpe1_block_length & 0x1)) acpi_request_region(&acpi_gbl_FADT.xgpe1_block, acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK"); + + return 0; } +fs_initcall_sync(acpi_reserve_resources); void acpi_os_printf(const char *fmt, ...) { @@ -1720,7 +1727,6 @@ acpi_status __init acpi_os_initialize(void) acpi_status __init acpi_os_initialize1(void) { - acpi_reserve_resources(); kacpid_wq = alloc_workqueue("kacpid", 0, 1); kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1); kacpi_hotplug_wq = alloc_workqueue("kacpi_hotplug", 0, 1); diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 2a7711fb008..b9cfaf1d94d 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #ifdef CONFIG_X86 @@ -541,164 +540,3 @@ int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, return c.count; } EXPORT_SYMBOL_GPL(acpi_dev_get_resources); - -struct reserved_region { - struct list_head node; - u64 start; - u64 end; -}; - -static LIST_HEAD(reserved_io_regions); -static LIST_HEAD(reserved_mem_regions); - -static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, - char *desc) -{ - unsigned int length = end - start + 1; - struct resource *res; - - res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? - request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (!res) - return -EIO; - - res->flags &= ~flags; - return 0; -} - -static int add_region_before(u64 start, u64 end, u8 space_id, - unsigned long flags, char *desc, - struct list_head *head) -{ - struct reserved_region *reg; - int error; - - reg = kmalloc(sizeof(*reg), GFP_KERNEL); - if (!reg) - return -ENOMEM; - - error = request_range(start, end, space_id, flags, desc); - if (error) { - kfree(reg); - return error; - } - - reg->start = start; - reg->end = end; - list_add_tail(®->node, head); - return 0; -} - -/** - * acpi_reserve_region - Reserve an I/O or memory region as a system resource. - * @start: Starting address of the region. - * @length: Length of the region. - * @space_id: Identifier of address space to reserve the region from. - * @flags: Resource flags to clear for the region after requesting it. - * @desc: Region description (for messages). - * - * Reserve an I/O or memory region as a system resource to prevent others from - * using it. If the new region overlaps with one of the regions (in the given - * address space) already reserved by this routine, only the non-overlapping - * parts of it will be reserved. - * - * Returned is either 0 (success) or a negative error code indicating a resource - * reservation problem. It is the code of the first encountered error, but the - * routine doesn't abort until it has attempted to request all of the parts of - * the new region that don't overlap with other regions reserved previously. - * - * The resources requested by this routine are never released. - */ -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc) -{ - struct list_head *regions; - struct reserved_region *reg; - u64 end = start + length - 1; - int ret = 0, error = 0; - - if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) - regions = &reserved_io_regions; - else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - regions = &reserved_mem_regions; - else - return -EINVAL; - - if (list_empty(regions)) - return add_region_before(start, end, space_id, flags, desc, regions); - - list_for_each_entry(reg, regions, node) - if (reg->start == end + 1) { - /* The new region can be prepended to this one. */ - ret = request_range(start, end, space_id, flags, desc); - if (!ret) - reg->start = start; - - return ret; - } else if (reg->start > end) { - /* No overlap. Add the new region here and get out. */ - return add_region_before(start, end, space_id, flags, - desc, ®->node); - } else if (reg->end == start - 1) { - goto combine; - } else if (reg->end >= start) { - goto overlap; - } - - /* The new region goes after the last existing one. */ - return add_region_before(start, end, space_id, flags, desc, regions); - - overlap: - /* - * The new region overlaps an existing one. - * - * The head part of the new region immediately preceding the existing - * overlapping one can be combined with it right away. - */ - if (reg->start > start) { - error = request_range(start, reg->start - 1, space_id, flags, desc); - if (error) - ret = error; - else - reg->start = start; - } - - combine: - /* - * The new region is adjacent to an existing one. If it extends beyond - * that region all the way to the next one, it is possible to combine - * all three of them. - */ - while (reg->end < end) { - struct reserved_region *next = NULL; - u64 a = reg->end + 1, b = end; - - if (!list_is_last(®->node, regions)) { - next = list_next_entry(reg, node); - if (next->start <= end) - b = next->start - 1; - } - error = request_range(a, b, space_id, flags, desc); - if (!error) { - if (next && next->start == b + 1) { - reg->end = next->end; - list_del(&next->node); - kfree(next); - } else { - reg->end = end; - break; - } - } else if (next) { - if (!ret) - ret = error; - - reg = next; - } else { - break; - } - } - - return ret ? ret : error; -} -EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 515f33882ab..49c1720df59 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,7 +7,6 @@ * Bjorn Helgaas */ -#include #include #include #include @@ -23,41 +22,25 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; -#ifdef CONFIG_ACPI -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; - return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); -} -#else -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - struct resource *res; - - res = io ? request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (res) { - res->flags &= ~IORESOURCE_BUSY; - return true; - } - return false; -} -#endif - static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - bool reserved; + struct resource *res; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - reserved = __reserve_range(start, end - start + 1, !!port, regionid); - if (!reserved) + if (port) + res = request_region(start, end - start + 1, regionid); + else + res = request_mem_region(start, end - start + 1, regionid); + if (res) + res->flags &= ~IORESOURCE_BUSY; + else kfree(regionid); /* @@ -66,7 +49,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - reserved ? "has been" : "could not be"); + res ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1c7ae01a160..17b5b596764 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -283,9 +283,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -443,13 +440,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) From d106ce33dc18753c8c2e56d92813e46b11c9e693 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 30 Jun 2015 11:25:54 -0400 Subject: [PATCH 197/252] USB: OHCI: Fix race between ED unlink and URB submission commit 7d8021c967648accd1b78e5e1ddaad655cd2c61f upstream. This patch fixes a bug introduced by commit 977dcfdc6031 ("USB: OHCI: don't lose track of EDs when a controller dies"). The commit changed ed_state from ED_UNLINK to ED_IDLE too early, before finish_urb() had been called. The user-visible consequence is that the driver occasionally crashes or locks up when an URB is submitted while another URB for the same endpoint is being unlinked. This patch moves the ED state change later, to the right place. The drawback is that now we may unnecessarily execute some instructions multiple times when a controller dies. Since controllers dying is an exceptional occurrence, a little wasted time won't matter. Signed-off-by: Alan Stern Reported-by: Heiko Przybyl Tested-by: Heiko Przybyl Fixes: 977dcfdc60311e7aa571cabf6f39c36dde13339e Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/usb/host/ohci-q.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index 1e1563da181..7f93dc26fb1 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -927,10 +927,6 @@ rescan_all: int completed, modified; __hc32 *prev; - /* Is this ED already invisible to the hardware? */ - if (ed->state == ED_IDLE) - goto ed_idle; - /* only take off EDs that the HC isn't using, accounting for * frame counter wraps and EDs with partially retired TDs */ @@ -961,14 +957,12 @@ skip_ed: } /* ED's now officially unlinked, hc doesn't see */ - ed->state = ED_IDLE; if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT) ohci->eds_scheduled--; ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H); ed->hwNextED = 0; wmb(); ed->hwINFO &= ~cpu_to_hc32(ohci, ED_SKIP | ED_DEQUEUE); -ed_idle: /* reentrancy: if we drop the schedule lock, someone might * have modified this list. normally it's just prepending @@ -1039,6 +1033,7 @@ rescan_this: if (list_empty(&ed->td_list)) { *last = ed->ed_next; ed->ed_next = NULL; + ed->state = ED_IDLE; } else if (ohci->rh_state == OHCI_RH_RUNNING) { *last = ed->ed_next; ed->ed_next = NULL; From 98dcb81b30f999ad4850969f51f1c2adcf1d0d63 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 26 Oct 2015 10:38:27 +0100 Subject: [PATCH 198/252] i2c: at91: manage unexpected RXRDY flag when starting a transfer commit a9bed6b10bd117a300cceb9062003f7a2761ef99 upstream. In some cases, we could start a new i2c transfer with the RXRDY flag set. It is not a clean state and it leads to print annoying error messages even if there no real issue. The cause is only having garbage data in the Receive Holding Register because of a weird behavior of the RXRDY flag. Reported-by: Peter Rosin Signed-off-by: Ludovic Desroches Tested-by: Peter Rosin Signed-off-by: Wolfram Sang Fixes: 93563a6a71bb ("i2c: at91: fix a race condition when using the DMA controller") Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/i2c/busses/i2c-at91.c | 36 +++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index c880d13f540..f079877bd4e 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -273,8 +273,14 @@ error: static void at91_twi_read_next_byte(struct at91_twi_dev *dev) { - if (dev->buf_len <= 0) + /* + * If we are in this case, it means there is garbage data in RHR, so + * delete them. + */ + if (!dev->buf_len) { + at91_twi_read(dev, AT91_TWI_RHR); return; + } *dev->buf = at91_twi_read(dev, AT91_TWI_RHR) & 0xff; --dev->buf_len; @@ -371,6 +377,24 @@ static irqreturn_t atmel_twi_interrupt(int irq, void *dev_id) if (!irqstatus) return IRQ_NONE; + /* + * In reception, the behavior of the twi device (before sama5d2) is + * weird. There is some magic about RXRDY flag! When a data has been + * almost received, the reception of a new one is anticipated if there + * is no stop command to send. That is the reason why ask for sending + * the stop command not on the last data but on the second last one. + * + * Unfortunately, we could still have the RXRDY flag set even if the + * transfer is done and we have read the last data. It might happen + * when the i2c slave device sends too quickly data after receiving the + * ack from the master. The data has been almost received before having + * the order to send stop. In this case, sending the stop command could + * cause a RXRDY interrupt with a TXCOMP one. It is better to manage + * the RXRDY interrupt first in order to not keep garbage data in the + * Receive Holding Register for the next transfer. + */ + if (irqstatus & AT91_TWI_RXRDY) + at91_twi_read_next_byte(dev); /* * When a NACK condition is detected, the I2C controller sets the NACK, @@ -413,8 +437,6 @@ static irqreturn_t atmel_twi_interrupt(int irq, void *dev_id) if (irqstatus & (AT91_TWI_TXCOMP | AT91_TWI_NACK)) { at91_disable_twi_interrupts(dev); complete(&dev->cmd_complete); - } else if (irqstatus & AT91_TWI_RXRDY) { - at91_twi_read_next_byte(dev); } else if (irqstatus & AT91_TWI_TXRDY) { at91_twi_write_next_byte(dev); } @@ -429,7 +451,6 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) { int ret; bool has_unre_flag = dev->pdata->has_unre_flag; - unsigned sr; /* * WARNING: the TXCOMP bit in the Status Register is NOT a clear on @@ -466,7 +487,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) dev->transfer_status = 0; /* Clear pending interrupts, such as NACK. */ - sr = at91_twi_read(dev, AT91_TWI_SR); + at91_twi_read(dev, AT91_TWI_SR); if (!dev->buf_len) { at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_QUICK); @@ -474,11 +495,6 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) } else if (dev->msg->flags & I2C_M_RD) { unsigned start_flags = AT91_TWI_START; - if (sr & AT91_TWI_RXRDY) { - dev_err(dev->dev, "RXRDY still set!"); - at91_twi_read(dev, AT91_TWI_RHR); - } - /* if only one byte is to be read, immediately stop transfer */ if (dev->buf_len <= 1 && !(dev->msg->flags & I2C_M_RECV_LEN)) start_flags |= AT91_TWI_STOP; From 4fbf1009acdd8cbc10011e574e6b4929a1c87b4d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 1 Dec 2015 16:31:08 +0100 Subject: [PATCH 199/252] ipv4: igmp: Allow removing groups from a removed interface commit 4eba7bb1d72d9bde67d810d09bf62dc207b63c5c upstream. When a multicast group is joined on a socket, a struct ip_mc_socklist is appended to the sockets mc_list containing information about the joined group. If the interface is hot unplugged, this entry becomes stale. Prior to commit 52ad353a5344f ("igmp: fix the problem when mc leave group") it was possible to remove the stale entry by performing a IP_DROP_MEMBERSHIP, passing either the old ifindex or ip address on the interface. However, this fix enforces that the interface must still exist. Thus with time, the number of stale entries grows, until sysctl_igmp_max_memberships is reached and then it is not possible to join and more groups. The previous patch fixes an issue where a IP_DROP_MEMBERSHIP is performed without specifying the interface, either by ifindex or ip address. However here we do supply one of these. So loosen the restriction on device existence to only apply when the interface has not been specified. This then restores the ability to clean up the stale entries. Signed-off-by: Andrew Lunn Fixes: 52ad353a5344f "(igmp: fix the problem when mc leave group") Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/igmp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 155adf8729c..b0178b04bd8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1874,7 +1874,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); - if (!in_dev) { + if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) { ret = -ENODEV; goto out; } @@ -1895,8 +1895,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) *imlp = iml->next_rcu; - ip_mc_dec_group(in_dev, group); + if (in_dev) + ip_mc_dec_group(in_dev, group); rtnl_unlock(); + /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); kfree_rcu(iml, rcu); From b8054bf654c6ab4d3c9a773808334fab52f98d3c Mon Sep 17 00:00:00 2001 From: "bsegall@google.com" Date: Fri, 7 Apr 2017 16:04:51 -0700 Subject: [PATCH 200/252] ptrace: fix PTRACE_LISTEN race corrupting task->state commit 5402e97af667e35e54177af8f6575518bf251d51 upstream. In PT_SEIZED + LISTEN mode STOP/CONT signals cause a wakeup against __TASK_TRACED. If this races with the ptrace_unfreeze_traced at the end of a PTRACE_LISTEN, this can wake the task /after/ the check against __TASK_TRACED, but before the reset of state to TASK_TRACED. This causes it to instead clobber TASK_WAKING, allowing a subsequent wakeup against TRACED while the task is still on the rq wake_list, corrupting it. Oleg said: "The kernel can crash or this can lead to other hard-to-debug problems. In short, "task->state = TASK_TRACED" in ptrace_unfreeze_traced() assumes that nobody else can wake it up, but PTRACE_LISTEN breaks the contract. Obviusly it is very wrong to manipulate task->state if this task is already running, or WAKING, or it sleeps again" [akpm@linux-foundation.org: coding-style fixes] Fixes: 9899d11f ("ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL") Link: http://lkml.kernel.org/r/xm26y3vfhmkp.fsf_-_@bsegall-linux.mtv.corp.google.com Signed-off-by: Ben Segall Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- kernel/ptrace.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 72b0b3e0e06..d34c05ac6f9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -150,11 +150,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task) WARN_ON(!task->ptrace || task->parent != current); + /* + * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. + * Recheck state under the lock to close this race. + */ spin_lock_irq(&task->sighand->siglock); - if (__fatal_signal_pending(task)) - wake_up_state(task, __TASK_TRACED); - else - task->state = TASK_TRACED; + if (task->state == __TASK_TRACED) { + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + } spin_unlock_irq(&task->sighand->siglock); } From 13e03cf812b2af8f99fca24944d4ef78a562fdf1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 17 Jun 2016 17:33:59 +0000 Subject: [PATCH 201/252] ring-buffer: Fix return value check in test_ringbuffer() commit 62277de758b155dc04b78f195a1cb5208c37b2df upstream. In case of error, the function kthread_run() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Link: http://lkml.kernel.org/r/1466184839-14927-1-git-send-email-weiyj_lk@163.com Fixes: 6c43e554a ("ring-buffer: Add ring buffer startup selftest") Signed-off-by: Wei Yongjun Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Willy Tarreau --- kernel/trace/ring_buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c4ce3a951a1..2948be99b20 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4837,9 +4837,9 @@ static __init int test_ringbuffer(void) rb_data[cpu].cnt = cpu; rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu], "rbtester/%d", cpu); - if (WARN_ON(!rb_threads[cpu])) { + if (WARN_ON(IS_ERR(rb_threads[cpu]))) { pr_cont("FAILED\n"); - ret = -1; + ret = PTR_ERR(rb_threads[cpu]); goto out_free; } @@ -4849,9 +4849,9 @@ static __init int test_ringbuffer(void) /* Now create the rb hammer! */ rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer"); - if (WARN_ON(!rb_hammer)) { + if (WARN_ON(IS_ERR(rb_hammer))) { pr_cont("FAILED\n"); - ret = -1; + ret = PTR_ERR(rb_hammer); goto out_free; } From 994c8e5af5c712ad6132fccbbea2ebf7dbabfe02 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 11:23:18 +0100 Subject: [PATCH 202/252] metag/usercopy: Fix alignment error checking commit 2257211942bbbf6c798ab70b487d7e62f7835a1a upstream. Fix the error checking of the alignment adjustment code in raw_copy_from_user(), which mistakenly considers it safe to skip the error check when aligning the source buffer on a 2 or 4 byte boundary. If the destination buffer was unaligned it may have started to copy using byte or word accesses, which could well be at the start of a new (valid) source page. This would result in it appearing to have copied 1 or 2 bytes at the end of the first (invalid) page rather than none at all. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- arch/metag/lib/usercopy.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index b3ebfe9c8e8..a26c8897b2a 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -830,6 +830,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, if ((unsigned long) src & 1) { __asm_copy_from_user_1(dst, src, retn); n--; + if (retn) + goto copy_exception_bytes; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ @@ -843,6 +845,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, if (((unsigned long) src & 2) && n >= 2) { __asm_copy_from_user_2(dst, src, retn); n -= 2; + if (retn) + goto copy_exception_bytes; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ @@ -854,12 +858,6 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, } } - /* We only need one check after the unalignment-adjustments, - because if both adjustments were done, either both or - neither reference had an exception. */ - if (retn != 0) - goto copy_exception_bytes; - #ifdef USE_RAPF /* 64 bit copy loop */ if (!(((unsigned long) src | (unsigned long) dst) & 7)) { From 9a83add8a964a6ccb9da9094856b702049c46173 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 13:35:01 +0100 Subject: [PATCH 203/252] metag/usercopy: Add early abort to copy_to_user commit fb8ea062a8f2e85256e13f55696c5c5f0dfdcc8b upstream. When copying to userland on Meta, if any faults are encountered immediately abort the copy instead of continuing on and repeatedly faulting, and worse potentially copying further bytes successfully to subsequent valid pages. Fixes: 373cd784d0fc ("metag: Memory handling") Reported-by: Al Viro Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- arch/metag/lib/usercopy.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index a26c8897b2a..212c99a1b76 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -538,23 +538,31 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, if ((unsigned long) src & 1) { __asm_copy_to_user_1(dst, src, retn); n--; + if (retn) + return retn + n; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ while (n > 0) { __asm_copy_to_user_1(dst, src, retn); n--; + if (retn) + return retn + n; } } if (((unsigned long) src & 2) && n >= 2) { __asm_copy_to_user_2(dst, src, retn); n -= 2; + if (retn) + return retn + n; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ while (n >= 2) { __asm_copy_to_user_2(dst, src, retn); n -= 2; + if (retn) + return retn + n; } } @@ -569,6 +577,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 8) { __asm_copy_to_user_8x64(dst, src, retn); n -= 8; + if (retn) + return retn + n; } } if (n >= RAPF_MIN_BUF_SIZE) { @@ -581,6 +591,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 8) { __asm_copy_to_user_8x64(dst, src, retn); n -= 8; + if (retn) + return retn + n; } } #endif @@ -588,11 +600,15 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 16) { __asm_copy_to_user_16(dst, src, retn); n -= 16; + if (retn) + return retn + n; } while (n >= 4) { __asm_copy_to_user_4(dst, src, retn); n -= 4; + if (retn) + return retn + n; } switch (n) { @@ -609,6 +625,10 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, break; } + /* + * If we get here, retn correctly reflects the number of failing + * bytes. + */ return retn; } EXPORT_SYMBOL(__copy_user); From cabb4ce1458c43c74f250c6f95dd8fce0a3c21f9 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 4 Apr 2017 11:43:26 +0100 Subject: [PATCH 204/252] metag/usercopy: Set flags before ADDZ commit fd40eee1290ad7add7aa665e3ce6b0f9fe9734b4 upstream. The fixup code for the copy_to_user rapf loops reads TXStatus.LSM_STEP to decide how far to rewind the source pointer. There is a special case for the last execution of an MGETL/MGETD, since it leaves LSM_STEP=0 even though the number of MGETLs/MGETDs attempted was 4. This uses ADDZ which is conditional upon the Z condition flag, but the AND instruction which masked the TXStatus.LSM_STEP field didn't set the condition flags based on the result. Fix that now by using ANDS which does set the flags, and also marking the condition codes as clobbered by the inline assembly. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- arch/metag/lib/usercopy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index 212c99a1b76..c3697a1a6f0 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -316,7 +316,7 @@ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ - : "D1Ar1", "D0Ar2", "memory") + : "D1Ar1", "D0Ar2", "cc", "memory") /* rewind 'to' and 'from' pointers when a fault occurs * @@ -342,7 +342,7 @@ #define __asm_copy_to_user_64bit_rapf_loop(to, from, ret, n, id)\ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ "LSR D0Ar2, D0Ar2, #8\n" \ - "AND D0Ar2, D0Ar2, #0x7\n" \ + "ANDS D0Ar2, D0Ar2, #0x7\n" \ "ADDZ D0Ar2, D0Ar2, #4\n" \ "SUB D0Ar2, D0Ar2, #1\n" \ "MOV D1Ar1, #4\n" \ @@ -487,7 +487,7 @@ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ - : "D1Ar1", "D0Ar2", "memory") + : "D1Ar1", "D0Ar2", "cc", "memory") /* rewind 'to' and 'from' pointers when a fault occurs * @@ -513,7 +513,7 @@ #define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ "LSR D0Ar2, D0Ar2, #8\n" \ - "AND D0Ar2, D0Ar2, #0x7\n" \ + "ANDS D0Ar2, D0Ar2, #0x7\n" \ "ADDZ D0Ar2, D0Ar2, #4\n" \ "SUB D0Ar2, D0Ar2, #1\n" \ "MOV D1Ar1, #4\n" \ From 816a1aec47661696de464dea9b7afaa8814a38d8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 3 Apr 2017 17:41:40 +0100 Subject: [PATCH 205/252] metag/usercopy: Fix src fixup in from user rapf loops commit 2c0b1df88b987a12d95ea1d6beaf01894f3cc725 upstream. The fixup code to rewind the source pointer in __asm_copy_from_user_{32,64}bit_rapf_loop() always rewound the source by a single unit (4 or 8 bytes), however this is insufficient if the fault didn't occur on the first load in the loop, as the source pointer will have been incremented but nothing will have been stored until all 4 register [pairs] are loaded. Read the LSM_STEP field of TXSTATUS (which is already loaded into a register), a bit like the copy_to_user versions, to determine how many iterations of MGET[DL] have taken place, all of which need rewinding. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- arch/metag/lib/usercopy.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index c3697a1a6f0..4adbab22e13 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -809,29 +809,49 @@ EXPORT_SYMBOL(__copy_user); * * Rationale: * A fault occurs while reading from user buffer, which is the - * source. Since the fault is at a single address, we only - * need to rewind by 8 bytes. + * source. * Since we don't write to kernel buffer until we read first, * the kernel buffer is at the right state and needn't be - * corrected. + * corrected, but the source must be rewound to the beginning of + * the block, which is LSM_STEP*8 bytes. + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. */ #define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id) \ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ - "SUB %1, %1, #8\n") + "LSR D0Ar2, D0Ar2, #5\n" \ + "ANDS D0Ar2, D0Ar2, #0x38\n" \ + "ADDZ D0Ar2, D0Ar2, #32\n" \ + "SUB %1, %1, D0Ar2\n") /* rewind 'from' pointer when a fault occurs * * Rationale: * A fault occurs while reading from user buffer, which is the - * source. Since the fault is at a single address, we only - * need to rewind by 4 bytes. + * source. * Since we don't write to kernel buffer until we read first, * the kernel buffer is at the right state and needn't be - * corrected. + * corrected, but the source must be rewound to the beginning of + * the block, which is LSM_STEP*4 bytes. + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. */ #define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id) \ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ - "SUB %1, %1, #4\n") + "LSR D0Ar2, D0Ar2, #6\n" \ + "ANDS D0Ar2, D0Ar2, #0x1c\n" \ + "ADDZ D0Ar2, D0Ar2, #16\n" \ + "SUB %1, %1, D0Ar2\n") /* Copy from user to kernel, zeroing the bytes that were inaccessible in From bc37f9a5e4db87f0548565676f81bad436598766 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 4 Apr 2017 08:51:34 +0100 Subject: [PATCH 206/252] metag/usercopy: Add missing fixups commit b884a190afcecdbef34ca508ea5ee88bb7c77861 upstream. The rapf copy loops in the Meta usercopy code is missing some extable entries for HTP cores with unaligned access checking enabled, where faults occur on the instruction immediately after the faulting access. Add the fixup labels and extable entries for these cases so that corner case user copy failures don't cause kernel crashes. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- arch/metag/lib/usercopy.c | 84 +++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index 4adbab22e13..dfe77b26bea 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -260,27 +260,31 @@ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "22:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #32\n" \ "23:\n" \ - "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "SUB %3, %3, #32\n" \ "24:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "25:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "26:\n" \ "SUB %3, %3, #32\n" \ "DCACHE [%1+#-64], D0Ar6\n" \ "BR $Lloop"id"\n" \ \ "MOV RAPF, %1\n" \ - "25:\n" \ - "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "26:\n" \ - "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #32\n" \ "27:\n" \ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "28:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %0, %0, #8\n" \ "29:\n" \ + "SUB %3, %3, #32\n" \ + "30:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "31:\n" \ + "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "32:\n" \ + "SUB %0, %0, #8\n" \ + "33:\n" \ "SETL [%0++], D0.7, D1.7\n" \ "SUB %3, %3, #32\n" \ "1:" \ @@ -312,7 +316,11 @@ " .long 26b,3b\n" \ " .long 27b,3b\n" \ " .long 28b,3b\n" \ - " .long 29b,4b\n" \ + " .long 29b,3b\n" \ + " .long 30b,3b\n" \ + " .long 31b,3b\n" \ + " .long 32b,3b\n" \ + " .long 33b,4b\n" \ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ @@ -403,47 +411,55 @@ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "22:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #16\n" \ "23:\n" \ - "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "24:\n" \ - "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ "SUB %3, %3, #16\n" \ - "25:\n" \ + "24:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "26:\n" \ + "25:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "26:\n" \ "SUB %3, %3, #16\n" \ "27:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "28:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "29:\n" \ + "SUB %3, %3, #16\n" \ + "30:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "31:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "32:\n" \ "SUB %3, %3, #16\n" \ "DCACHE [%1+#-64], D0Ar6\n" \ "BR $Lloop"id"\n" \ \ "MOV RAPF, %1\n" \ - "29:\n" \ - "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "30:\n" \ - "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #16\n" \ - "31:\n" \ - "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "32:\n" \ - "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #16\n" \ "33:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "34:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #16\n" \ "35:\n" \ - "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "SUB %3, %3, #16\n" \ "36:\n" \ - "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %0, %0, #4\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "37:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "38:\n" \ + "SUB %3, %3, #16\n" \ + "39:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "40:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "41:\n" \ + "SUB %3, %3, #16\n" \ + "42:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "43:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "44:\n" \ + "SUB %0, %0, #4\n" \ + "45:\n" \ "SETD [%0++], D0.7\n" \ "SUB %3, %3, #16\n" \ "1:" \ @@ -483,7 +499,15 @@ " .long 34b,3b\n" \ " .long 35b,3b\n" \ " .long 36b,3b\n" \ - " .long 37b,4b\n" \ + " .long 37b,3b\n" \ + " .long 38b,3b\n" \ + " .long 39b,3b\n" \ + " .long 40b,3b\n" \ + " .long 41b,3b\n" \ + " .long 42b,3b\n" \ + " .long 43b,3b\n" \ + " .long 44b,3b\n" \ + " .long 45b,4b\n" \ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ From 6f68a8dac494cc56d990307ef513cc5e95abc24c Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Cerri Date: Mon, 13 Mar 2017 12:14:58 -0300 Subject: [PATCH 207/252] s390/decompressor: fix initrd corruption caused by bss clear commit d82c0d12c92705ef468683c9b7a8298dd61ed191 upstream. Reorder the operations in decompress_kernel() to ensure initrd is moved to a safe location before the bss section is zeroed. During decompression bss can overlap with the initrd and this can corrupt the initrd contents depending on the size of the compressed kernel (which affects where the initrd is placed by the bootloader) and the size of the bss section of the decompressor. Also use the correct initrd size when checking for overlaps with parmblock. Fixes: 06c0dd72aea3 ([S390] fix boot failures with compressed kernels) Reviewed-by: Joy Latten Reviewed-by: Vineetha HariPai Signed-off-by: Marcelo Henrique Cerri Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Willy Tarreau --- arch/s390/boot/compressed/misc.c | 35 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index c4c6a1cf221..05ab8824925 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -138,31 +138,34 @@ static void check_ipl_parmblock(void *start, unsigned long size) unsigned long decompress_kernel(void) { - unsigned long output_addr; - unsigned char *output; + void *output, *kernel_end; - output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL; - check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start); - memset(&_bss, 0, &_ebss - &_bss); - free_mem_ptr = (unsigned long)&_end; - free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - output = (unsigned char *) output_addr; + output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE); + kernel_end = output + SZ__bss_start; + check_ipl_parmblock((void *) 0, (unsigned long) kernel_end); #ifdef CONFIG_BLK_DEV_INITRD /* * Move the initrd right behind the end of the decompressed - * kernel image. + * kernel image. This also prevents initrd corruption caused by + * bss clearing since kernel_end will always be located behind the + * current bss section.. */ - if (INITRD_START && INITRD_SIZE && - INITRD_START < (unsigned long) output + SZ__bss_start) { - check_ipl_parmblock(output + SZ__bss_start, - INITRD_START + INITRD_SIZE); - memmove(output + SZ__bss_start, - (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) output + SZ__bss_start; + if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) { + check_ipl_parmblock(kernel_end, INITRD_SIZE); + memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = (unsigned long) kernel_end; } #endif + /* + * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be + * initialized afterwards since they reside in bss. + */ + memset(&_bss, 0, &_ebss - &_bss); + free_mem_ptr = (unsigned long) &_end; + free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; + puts("Uncompressing Linux... "); decompress(input_data, input_len, NULL, NULL, output, NULL, error); puts("Ok, booting the kernel.\n"); From 2b3d0c063672971522e2c35c3e9dc9ea8667c737 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 29 Dec 2016 18:37:10 +0200 Subject: [PATCH 208/252] net/mlx4_en: Fix bad WQE issue commit 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc upstream. Single send WQE in RX buffer should be stamped with software ownership in order to prevent the flow of QP in error in FW once UPDATE_QP is called. Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues') Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sumit Semwal Signed-off-by: Willy Tarreau --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 02aee1ebd20..2a541500ad2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -350,8 +350,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind].mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; From d5efe5c2f9654572ff9585a24cbcb06749815d30 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 16 Jan 2017 18:31:37 +0200 Subject: [PATCH 209/252] net/mlx4_core: Fix racy CQ (Completion Queue) free commit 291c566a28910614ce42d0ffe82196eddd6346f4 upstream. In function mlx4_cq_completion() and mlx4_cq_event(), the radix_tree_lookup requires a rcu_read_lock. This is mandatory: if another core frees the CQ, it could run the radix_tree_node_rcu_free() call_rcu() callback while its being used by the radix tree lookup function. Additionally, in function mlx4_cq_event(), since we are adding the rcu lock around the radix-tree lookup, we no longer need to take the spinlock. Also, the synchronize_irq() call for the async event eliminates the need for incrementing the cq reference count in mlx4_cq_event(). Other changes: 1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock: we no longer take this spinlock in the interrupt context. The spinlock here, therefore, simply protects against different threads simultaneously invoking mlx4_cq_free() for different cq's. 2. In function mlx4_cq_free(), we move the radix tree delete to before the synchronize_irq() calls. This guarantees that we will not access this cq during any subsequent interrupts, and therefore can safely free the CQ after the synchronize_irq calls. The rcu_read_lock in the interrupt handlers only needs to protect against corrupting the radix tree; the interrupt handlers may access the cq outside the rcu_read_lock due to the synchronize_irq calls which protect against premature freeing of the cq. 3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg. 4. We leave the cq reference count mechanism in place, because it is still needed for the cq completion tasklet mechanism. Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ") Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sumit Semwal Signed-off-by: Willy Tarreau --- drivers/net/ethernet/mellanox/mlx4/cq.c | 38 +++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 004e4231af6..528597f6593 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -57,13 +57,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) { struct mlx4_cq *cq; + rcu_read_lock(); cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cqn & (dev->caps.num_cqs - 1)); + rcu_read_unlock(); + if (!cq) { mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ ++cq->arm_sn; cq->comp(cq); @@ -74,23 +80,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; - spin_lock(&cq_table->lock); - + rcu_read_lock(); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); - - spin_unlock(&cq_table->lock); + rcu_read_unlock(); if (!cq) { - mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); + mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ cq->event(cq, event_type); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, @@ -261,9 +263,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (err) return err; - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); if (err) goto err_icm; @@ -303,9 +305,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, return 0; err_radix: - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); err_icm: mlx4_cq_free_icm(dev, cq->cqn); @@ -324,11 +326,11 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - synchronize_irq(priv->eq_table.eq[cq->vector].irq); - - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); + + synchronize_irq(priv->eq_table.eq[cq->vector].irq); if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); From 6800d881c1d1117cc7cc5cd6e9400aaf3eecfe25 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Apr 2016 10:40:55 +0200 Subject: [PATCH 210/252] char: Drop bogus dependency of DEVPORT on !M68K commit 309124e2648d668a0c23539c5078815660a4a850 upstream. According to full-history-linux commit d3794f4fa7c3edc3 ("[PATCH] M68k update (part 25)"), port operations are allowed on m68k if CONFIG_ISA is defined. However, commit 153dcc54df826d2f ("[PATCH] mem driver: fix conditional on isa i/o support") accidentally changed an "||" into an "&&", disabling it completely on m68k. This logic was retained when introducing the DEVPORT symbol in commit 4f911d64e04a44c4 ("Make /dev/port conditional on config symbol"). Drop the bogus dependency on !M68K to fix this. Fixes: 153dcc54df826d2f ("[PATCH] mem driver: fix conditional on isa i/o support") Signed-off-by: Geert Uytterhoeven Tested-by: Al Stone Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/char/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 3bb6fa3930b..30878924e65 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -580,7 +580,6 @@ config TELCLOCK config DEVPORT bool - depends on !M68K depends on ISA || PCI default y From 797971a00afd4b24b0b64938a514e8c547644623 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 20 Mar 2017 17:49:03 +1100 Subject: [PATCH 211/252] powerpc: Disable HFSCR[TM] if TM is not supported commit 7ed23e1bae8bf7e37fd555066550a00b95a3a98b upstream. On Power8 & Power9 the early CPU inititialisation in __init_HFSCR() turns on HFSCR[TM] (Hypervisor Facility Status and Control Register [Transactional Memory]), but that doesn't take into account that TM might be disabled by CPU features, or disabled by the kernel being built with CONFIG_PPC_TRANSACTIONAL_MEM=n. So later in boot, when we have setup the CPU features, clear HSCR[TM] if the TM CPU feature has been disabled. We use CPU_FTR_TM_COMP to account for the CONFIG_PPC_TRANSACTIONAL_MEM=n case. Without this a KVM guest might try use TM, even if told not to, and cause an oops in the host kernel. Typically the oops is seen in __kvmppc_vcore_entry() and may or may not be fatal to the host, but is always bad news. In practice all shipping CPU revisions do support TM, and all host kernels we are aware of build with TM support enabled, so no one should actually be able to hit this in the wild. Fixes: 2a3563b023e5 ("powerpc: Setup in HFSCR for POWER8") Signed-off-by: Benjamin Herrenschmidt Tested-by: Sam Bobroff [mpe: Rewrite change log with input from Sam, add Fixes/stable] Signed-off-by: Michael Ellerman [sb: Backported to linux-4.4.y: adjusted context] Signed-off-by: Sam Bobroff Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/setup_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 389fb8077cc..1d3d3d65367 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -142,6 +142,15 @@ static void check_smt_enabled(void) of_node_put(dn); } } + + /* + * Fixup HFSCR:TM based on CPU features. The bit is set by our + * early asm init because at that point we haven't updated our + * CPU features from firmware and device-tree. Here we have, + * so let's do it. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); } /* Look for smt-enabled= cmdline option */ From 6a1d611e9c4b871f44ef7a1ff8069461b2792add Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:03 +0000 Subject: [PATCH 212/252] pegasus: Use heap buffers for all register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5593523f968bc86d42a035c6df47d5e0979b5ace upstream. Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") References: https://bugs.debian.org/852556 Reported-by: Lisandro Damián Nicanor Pérez Meyer Tested-by: Lisandro Damián Nicanor Pérez Meyer Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Cc: Brad Spengler Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/net/usb/pegasus.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 03e8a15d7de..f32a57ed1d1 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -126,40 +126,61 @@ static void async_ctrl_callback(struct urb *urb) static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) { + u8 *buf; int ret; + buf = kmalloc(size, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_rcvctrlpipe(pegasus->usb, 0), PEGASUS_REQ_GET_REGS, PEGASUS_REQT_READ, 0, - indx, data, size, 1000); + indx, buf, size, 1000); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + else if (ret <= size) + memcpy(data, buf, ret); + kfree(buf); return ret; } -static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) +static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, + const void *data) { + u8 *buf; int ret; + buf = kmemdup(data, size, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, - indx, data, size, 100); + indx, buf, size, 100); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + kfree(buf); return ret; } static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { + u8 *buf; int ret; + buf = kmemdup(&data, 1, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, - indx, &data, 1, 1000); + indx, buf, 1, 1000); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + kfree(buf); return ret; } From 57420afadca5f7da172c6bdfcbefe8910a9a9125 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:32 +0000 Subject: [PATCH 213/252] rtl8150: Use heap buffers for all register access commit 7926aff5c57b577ab0f43364ff0c59d968f6a414 upstream. Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Cc: Brad Spengler Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/net/usb/rtl8150.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 6cbdac67f3a..59d6a3a5830 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -156,16 +156,36 @@ static const char driver_name [] = "rtl8150"; */ static int get_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) { - return usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), - RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, - indx, 0, data, size, 500); + void *buf; + int ret; + + buf = kmalloc(size, GFP_NOIO); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), + RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, + indx, 0, buf, size, 500); + if (ret > 0 && ret <= size) + memcpy(data, buf, ret); + kfree(buf); + return ret; } -static int set_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) +static int set_registers(rtl8150_t * dev, u16 indx, u16 size, const void *data) { - return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, - indx, 0, data, size, 500); + void *buf; + int ret; + + buf = kmemdup(data, size, GFP_NOIO); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), + RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, + indx, 0, buf, size, 500); + kfree(buf); + return ret; } static void async_set_reg_cb(struct urb *urb) From 95948b1e75decc8bc577cc2bd38157108b27d4cb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 19 Apr 2017 12:07:08 -0400 Subject: [PATCH 214/252] tracing: Allocate the snapshot buffer before enabling probe commit df62db5be2e5f070ecd1a5ece5945b590ee112e0 upstream. Currently the snapshot trigger enables the probe and then allocates the snapshot. If the probe triggers before the allocation, it could cause the snapshot to fail and turn tracing off. It's best to allocate the snapshot buffer first, and then enable the trigger. If something goes wrong in the enabling of the trigger, the snapshot buffer is still allocated, but it can also be freed by the user by writting zero into the snapshot buffer file. Also add a check of the return status of alloc_snapshot(). Fixes: 77fd5c15e3 ("tracing: Add snapshot trigger to function probes") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Willy Tarreau --- kernel/trace/trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d6e72522fc4..edffb6781c0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5468,11 +5468,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash, return ret; out_reg: + ret = alloc_snapshot(&global_trace); + if (ret < 0) + goto out; + ret = register_ftrace_function_probe(glob, ops, count); - if (ret >= 0) - alloc_snapshot(&global_trace); - + out: return ret < 0 ? ret : 0; } From 6d5f7804da1dd0b31007c11df89d1792cde45a3f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 19 Apr 2017 14:29:46 -0400 Subject: [PATCH 215/252] ring-buffer: Have ring_buffer_iter_empty() return true when empty commit 78f7a45dac2a2d2002f98a3a95f7979867868d73 upstream. I noticed that reading the snapshot file when it is empty no longer gives a status. It suppose to show the status of the snapshot buffer as well as how to allocate and use it. For example: ># cat snapshot # tracer: nop # # # * Snapshot is allocated * # # Snapshot commands: # echo 0 > snapshot : Clears and frees snapshot buffer # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated. # Takes a snapshot of the main buffer. # echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free) # (Doesn't have to be '2' works with any number that # is not a '0' or '1') But instead it just showed an empty buffer: ># cat snapshot # tracer: nop # # entries-in-buffer/entries-written: 0/0 #P:4 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | What happened was that it was using the ring_buffer_iter_empty() function to see if it was empty, and if it was, it showed the status. But that function was returning false when it was empty. The reason was that the iter header page was on the reader page, and the reader page was empty, but so was the buffer itself. The check only tested to see if the iter was on the commit page, but the commit page was no longer pointing to the reader page, but as all pages were empty, the buffer is also. Fixes: 651e22f2701b ("ring-buffer: Always reset iterator to reader page") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Willy Tarreau --- kernel/trace/ring_buffer.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2948be99b20..1caa75585fa 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3402,11 +3402,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); int ring_buffer_iter_empty(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer; + struct buffer_page *reader; + struct buffer_page *head_page; + struct buffer_page *commit_page; + unsigned commit; cpu_buffer = iter->cpu_buffer; - return iter->head_page == cpu_buffer->commit_page && - iter->head == rb_commit_index(cpu_buffer); + /* Remember, trace recording is off when iterator is in use */ + reader = cpu_buffer->reader_page; + head_page = cpu_buffer->head_page; + commit_page = cpu_buffer->commit_page; + commit = rb_page_commit(commit_page); + + return ((iter->head_page == commit_page && iter->head == commit) || + (iter->head_page == reader && commit_page == head_page && + head_page->read == commit && + iter->head == rb_page_commit(cpu_buffer->reader_page))); } EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); From 9bc89352bac927c260b49b21519565fa14ed3795 Mon Sep 17 00:00:00 2001 From: Hongxu Jia Date: Tue, 29 Nov 2016 21:56:26 -0500 Subject: [PATCH 216/252] netfilter: arp_tables: fix invoking 32bit "iptable -P INPUT ACCEPT" failed in 64bit kernel commit 17a49cd549d9dc8707dc9262210166455c612dde upstream. Since 09d9686047db ("netfilter: x_tables: do compat validation via translate_table"), it used compatr structure to assign newinfo structure. In translate_compat_table of ip_tables.c and ip6_tables.c, it used compatr->hook_entry to replace info->hook_entry and compatr->underflow to replace info->underflow, but not do the same replacement in arp_tables.c. It caused invoking 32-bit "arptbale -P INPUT ACCEPT" failed in 64bit kernel. -------------------------------------- root@qemux86-64:~# arptables -P INPUT ACCEPT root@qemux86-64:~# arptables -P INPUT ACCEPT ERROR: Policy for `INPUT' offset 448 != underflow 0 arptables: Incompatible with this kernel -------------------------------------- Fixes: 09d9686047db ("netfilter: x_tables: do compat validation via translate_table") Signed-off-by: Hongxu Jia Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 95a5f261fe8..f8f0518772d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1309,8 +1309,8 @@ static int translate_compat_table(struct xt_table_info **pinfo, newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; From 509fda4739438283dbbb9787a4e29dd791e4690c Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Wed, 22 Mar 2017 15:27:01 -0500 Subject: [PATCH 217/252] net: phy: handle state correctly in phy_stop_machine commit 49d52e8108a21749dc2114b924c907db43358984 upstream. If the PHY is halted on stop, then do not set the state to PHY_UP. This ensures the phy will be restarted later in phy_start when the machine is started again. Fixes: 00db8189d984 ("This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details.") Signed-off-by: Nathan Sullivan Signed-off-by: Brad Mouring Acked-by: Xander Huff Acked-by: Kyle Roeschley Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 299d35552a3..bd245c3039e 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -474,7 +474,7 @@ void phy_stop_machine(struct phy_device *phydev) cancel_delayed_work_sync(&phydev->state_queue); mutex_lock(&phydev->lock); - if (phydev->state > PHY_UP) + if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); From 43f8c8b058214682ec49a6c9a4a3d6bb1a0d6b84 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 3 Apr 2017 12:03:13 +0200 Subject: [PATCH 218/252] l2tp: take reference on sessions being dumped commit e08293a4ccbcc993ded0fdc46f1e57926b833d63 upstream. Take a reference on the sessions returned by l2tp_session_find_nth() (and rename it l2tp_session_get_nth() to reflect this change), so that caller is assured that the session isn't going to disappear while processing it. For procfs and debugfs handlers, the session is held in the .start() callback and dropped in .show(). Given that pppol2tp_seq_session_show() dereferences the associated PPPoL2TP socket and that l2tp_dfs_seq_session_show() might call pppol2tp_show(), we also need to call the session's .ref() callback to prevent the socket from going away from under us. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info") Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: Willy Tarreau --- net/l2tp/l2tp_core.c | 8 ++++++-- net/l2tp/l2tp_core.h | 3 ++- net/l2tp/l2tp_debugfs.c | 10 +++++++--- net/l2tp/l2tp_netlink.c | 7 +++++-- net/l2tp/l2tp_ppp.c | 10 +++++++--- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 797ff373e48..787ac0ef182 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -280,7 +280,8 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn } EXPORT_SYMBOL_GPL(l2tp_session_find); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref) { int hash; struct l2tp_session *session; @@ -290,6 +291,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -300,7 +304,7 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_nth); +EXPORT_SYMBOL_GPL(l2tp_session_get_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a98c854c252..54f89f38386 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -236,7 +236,8 @@ out: extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); extern void l2tp_tunnel_sock_put(struct sock *sk); extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); -extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); +extern struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref); extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 072d7202e18..c6bd783cfb1 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -237,10 +237,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) } /* Show the tunnel or session context */ - if (pd->session == NULL) + if (!pd->session) { l2tp_dfs_seq_tunnel_show(m, pd->tunnel); - else + } else { l2tp_dfs_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0825ff26e11..490024eaece 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -719,7 +719,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_find_nth(tunnel, si); + session = l2tp_session_get_nth(tunnel, si, false); if (session == NULL) { ti++; tunnel = NULL; @@ -729,8 +729,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session) <= 0) + session) <= 0) { + l2tp_session_dec_refcount(session); break; + } + l2tp_session_dec_refcount(session); si++; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index c3ae2411650..c06c7ed47b6 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1576,7 +1576,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -1703,10 +1703,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) /* Show the tunnel or session context. */ - if (pd->session == NULL) + if (!pd->session) { pppol2tp_seq_tunnel_show(m, pd->tunnel); - else + } else { pppol2tp_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; From d1f922a5e27bc8f1471ccd4a77f439194a9a0e11 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 30 Mar 2017 16:06:02 +0100 Subject: [PATCH 219/252] MIPS: KGDB: Use kernel context for sleeping threads commit 162b270c664dca2e0944308e92f9fcc887151a72 upstream. KGDB is a kernel debug stub and it can't be used to debug userland as it can only safely access kernel memory. On MIPS however KGDB has always got the register state of sleeping processes from the userland register context at the beginning of the kernel stack. This is meaningless for kernel threads (which never enter userland), and for user threads it prevents the user seeing what it is doing while in the kernel: (gdb) info threads Id Target Id Frame ... 3 Thread 2 (kthreadd) 0x0000000000000000 in ?? () 2 Thread 1 (init) 0x000000007705c4b4 in ?? () 1 Thread -2 (shadowCPU0) 0xffffffff8012524c in arch_kgdb_breakpoint () at arch/mips/kernel/kgdb.c:201 Get the register state instead from the (partial) kernel register context stored in the task's thread_struct for resume() to restore. All threads now correctly appear to be in context_switch(): (gdb) info threads Id Target Id Frame ... 3 Thread 2 (kthreadd) context_switch (rq=, cookie=..., next=, prev=0x0) at kernel/sched/core.c:2903 2 Thread 1 (init) context_switch (rq=, cookie=..., next=, prev=0x0) at kernel/sched/core.c:2903 1 Thread -2 (shadowCPU0) 0xffffffff8012524c in arch_kgdb_breakpoint () at arch/mips/kernel/kgdb.c:201 Call clobbered registers which aren't saved and exception registers (BadVAddr & Cause) which can't be easily determined without stack unwinding are reported as 0. The PC is taken from the return address, such that the state presented matches that found immediately after returning from resume(). Fixes: 8854700115ec ("[MIPS] kgdb: add arch support for the kernel's kgdb core") Signed-off-by: James Hogan Cc: Jason Wessel Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15829/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/kgdb.c | 46 ++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index fcaac2f132f..910db386d9e 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -236,9 +236,6 @@ static int compute_signal(int tt) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { int reg; - struct thread_info *ti = task_thread_info(p); - unsigned long ksp = (unsigned long)ti + THREAD_SIZE - 32; - struct pt_regs *regs = (struct pt_regs *)ksp - 1; #if (KGDB_GDB_REG_SIZE == 32) u32 *ptr = (u32 *)gdb_regs; #else @@ -246,25 +243,46 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) #endif for (reg = 0; reg < 16; reg++) - *(ptr++) = regs->regs[reg]; + *(ptr++) = 0; /* S0 - S7 */ - for (reg = 16; reg < 24; reg++) - *(ptr++) = regs->regs[reg]; + *(ptr++) = p->thread.reg16; + *(ptr++) = p->thread.reg17; + *(ptr++) = p->thread.reg18; + *(ptr++) = p->thread.reg19; + *(ptr++) = p->thread.reg20; + *(ptr++) = p->thread.reg21; + *(ptr++) = p->thread.reg22; + *(ptr++) = p->thread.reg23; for (reg = 24; reg < 28; reg++) *(ptr++) = 0; /* GP, SP, FP, RA */ - for (reg = 28; reg < 32; reg++) - *(ptr++) = regs->regs[reg]; + *(ptr++) = (long)p; + *(ptr++) = p->thread.reg29; + *(ptr++) = p->thread.reg30; + *(ptr++) = p->thread.reg31; - *(ptr++) = regs->cp0_status; - *(ptr++) = regs->lo; - *(ptr++) = regs->hi; - *(ptr++) = regs->cp0_badvaddr; - *(ptr++) = regs->cp0_cause; - *(ptr++) = regs->cp0_epc; + *(ptr++) = p->thread.cp0_status; + + /* lo, hi */ + *(ptr++) = 0; + *(ptr++) = 0; + + /* + * BadVAddr, Cause + * Ideally these would come from the last exception frame up the stack + * but that requires unwinding, otherwise we can't know much for sure. + */ + *(ptr++) = 0; + *(ptr++) = 0; + + /* + * PC + * use return address (RA), i.e. the moment after return from resume() + */ + *(ptr++) = p->thread.reg31; } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) From 3e721e29e8f63c5191e149e1e0ddb04533dc23be Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 26 Sep 2016 03:03:41 +0300 Subject: [PATCH 220/252] ARM: dts: imx31: move CCM device node to AIPS2 bus devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1f87aee6a2e55eda466a43ba6248a8b75eede153 upstream. i.MX31 Clock Control Module controller is found on AIPS2 bus, move it there from SPBA bus to avoid a conflict of device IO space mismatch. Fixes: ef0e4a606fb6 ("ARM: mx31: Replace clk_register_clkdev with clock DT lookup") Signed-off-by: Vladimir Zapolskiy Acked-by: Uwe Kleine-König Signed-off-by: Shawn Guo Signed-off-by: Willy Tarreau --- arch/arm/boot/dts/imx31.dtsi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index 3085ac20423..e765571661f 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -93,13 +93,6 @@ clock-names = "ipg", "per"; status = "disabled"; }; - - clks: ccm@53f80000{ - compatible = "fsl,imx31-ccm"; - reg = <0x53f80000 0x4000>; - interrupts = <31>, <53>; - #clock-cells = <1>; - }; }; aips@53f00000 { /* AIPS2 */ @@ -109,6 +102,13 @@ reg = <0x53f00000 0x100000>; ranges; + clks: ccm@53f80000{ + compatible = "fsl,imx31-ccm"; + reg = <0x53f80000 0x4000>; + interrupts = <31>, <53>; + #clock-cells = <1>; + }; + gpt: timer@53f90000 { compatible = "fsl,imx31-gpt"; reg = <0x53f90000 0x4000>; From bfac3620dc5335d65dfc4c9e308f0558c1763f29 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 17 Nov 2016 03:30:51 +0200 Subject: [PATCH 221/252] ARM: dts: imx31: fix AVIC base address commit af92305e567b7f4c9cf48b9e46c1f48ec9ffb1fb upstream. On i.MX31 AVIC interrupt controller base address is at 0x68000000. The problem was shadowed by the AVIC driver, which takes the correct base address from a SoC specific header file. Fixes: d2a37b3d91f4 ("ARM i.MX31: Add devicetree support") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Willy Tarreau --- arch/arm/boot/dts/imx31.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index e765571661f..b73190d08ba 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -20,11 +20,11 @@ serial4 = &uart5; }; - avic: avic-interrupt-controller@60000000 { + avic: interrupt-controller@68000000 { compatible = "fsl,imx31-avic", "fsl,avic"; interrupt-controller; #interrupt-cells = <1>; - reg = <0x60000000 0x100000>; + reg = <0x68000000 0x100000>; }; soc { From bda0832822183c326b2ee65cd7f8442532adbe36 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Nov 2014 04:30:14 +0800 Subject: [PATCH 222/252] tun: Fix TUN_PKT_STRIP setting commit 2eb783c43e7cf807a45899c10ed556b6dc116625 upstream. We set the flag TUN_PKT_STRIP if the user buffer provided is too small to contain the entire packet plus meta-data. However, this has been broken ever since we added GSO meta-data. VLAN acceleration also has the same problem. This patch fixes this by taking both into account when setting the TUN_PKT_STRIP flag. The fact that this has been broken for six years without anyone realising means that nobody actually uses this flag. Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller [wt: no tuntap VLAN offloading in 3.10] Signed-off-by: Willy Tarreau --- drivers/net/tun.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 582497103fe..ea6ada39db1 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1272,12 +1272,16 @@ static ssize_t tun_put_user(struct tun_struct *tun, { struct tun_pi pi = { 0, skb->protocol }; ssize_t total = 0; + int vnet_hdr_sz = 0; + + if (tun->flags & TUN_VNET_HDR) + vnet_hdr_sz = tun->vnet_hdr_sz; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) < 0) return -EINVAL; - if (len < skb->len) { + if (len < skb->len + vnet_hdr_sz) { /* Packet will be striped */ pi.flags |= TUN_PKT_STRIP; } @@ -1287,9 +1291,9 @@ static ssize_t tun_put_user(struct tun_struct *tun, total += sizeof(pi); } - if (tun->flags & TUN_VNET_HDR) { + if (vnet_hdr_sz) { struct virtio_net_hdr gso = { 0 }; /* no info leak */ - if ((len -= tun->vnet_hdr_sz) < 0) + if ((len -= vnet_hdr_sz) < 0) return -EINVAL; if (skb_is_gso(skb)) { @@ -1332,7 +1336,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total, sizeof(gso)))) return -EFAULT; - total += tun->vnet_hdr_sz; + total += vnet_hdr_sz; } len = min_t(int, skb->len, len); From ad1336d2b48c0dfb8f90da13b1cc864fc95fd167 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Nov 2013 10:55:43 +0300 Subject: [PATCH 223/252] Staging: vt6655-6: potential NULL dereference in hostap_disable_hostapd() commit cb4855b49deb1acce27706ad9509d63c4fe8e988 upstream. We fixed this to use free_netdev() instead of kfree() but unfortunately free_netdev() doesn't accept NULL pointers. Smatch complains about this, it's not something I discovered through testing. Fixes: 3030d40b5036 ('staging: vt6655: use free_netdev instead of kfree') Fixes: 0a438d5b381e ('staging: vt6656: use free_netdev instead of kfree') Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman [wt: only vt6656 was converted to free_netdev in 3.10] Signed-off-by: Willy Tarreau --- drivers/staging/vt6656/hostap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vt6656/hostap.c b/drivers/staging/vt6656/hostap.c index c699a3058b3..cfffdd20e43 100644 --- a/drivers/staging/vt6656/hostap.c +++ b/drivers/staging/vt6656/hostap.c @@ -133,7 +133,8 @@ static int hostap_disable_hostapd(struct vnt_private *pDevice, int rtnl_locked) DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "%s: Netdevice %s unregistered\n", pDevice->dev->name, pDevice->apdev->name); } - free_netdev(pDevice->apdev); + if (pDevice->apdev) + free_netdev(pDevice->apdev); pDevice->apdev = NULL; pDevice->bEnable8021x = false; pDevice->bEnableHostWEP = false; From 8ceaec02b17397d12719a282945abc95f8149d9f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 20 Feb 2014 20:51:06 +0100 Subject: [PATCH 224/252] net: sctp: rework multihoming retransmission path selection to rfc4960 commit 4c47af4d5eb2c2f78f886079a3920a7078a6f0a0 upstream. Problem statement: 1) both paths (primary path1 and alternate path2) are up after the association has been established i.e., HB packets are normally exchanged, 2) path2 gets inactive after path_max_retrans * max_rto timed out (i.e. path2 is down completely), 3) now, if a transmission times out on the only surviving/active path1 (any ~1sec network service impact could cause this like a channel bonding failover), then the retransmitted packets are sent over the inactive path2; this happens with partial failover and without it. Besides not being optimal in the above scenario, a small failure or timeout in the only existing path has the potential to cause long delays in the retransmission (depending on RTO_MAX) until the still active path is reselected. Further, when the T3-timeout occurs, we have active_patch == retrans_path, and even though the timeout occurred on the initial transmission of data, not a retransmit, we end up updating retransmit path. RFC4960, section 6.4. "Multi-Homed SCTP Endpoints" states under 6.4.1. "Failover from an Inactive Destination Address" the following: Some of the transport addresses of a multi-homed SCTP endpoint may become inactive due to either the occurrence of certain error conditions (see Section 8.2) or adjustments from the SCTP user. When there is outbound data to send and the primary path becomes inactive (e.g., due to failures), or where the SCTP user explicitly requests to send data to an inactive destination transport address, before reporting an error to its ULP, the SCTP endpoint should try to send the data to an alternate __active__ destination transport address if one exists. When retransmitting data that timed out, if the endpoint is multihomed, it should consider each source-destination address pair in its retransmission selection policy. When retransmitting timed-out data, the endpoint should attempt to pick the most divergent source-destination pair from the original source-destination pair to which the packet was transmitted. Note: Rules for picking the most divergent source-destination pair are an implementation decision and are not specified within this document. So, we should first reconsider to take the current active retransmission transport if we cannot find an alternative active one. If all of that fails, we can still round robin through unkown, partial failover, and inactive ones in the hope to find something still suitable. Commit 4141ddc02a92 ("sctp: retran_path update bug fix") broke that behaviour by selecting the next inactive transport when no other active transport was found besides the current assoc's peer.retran_path. Before commit 4141ddc02a92, we would have traversed through the list until we reach our peer.retran_path again, and in case that is still in state SCTP_ACTIVE, we would take it and return. Only if that is not the case either, we take the next inactive transport. Besides all that, another issue is that transports in state SCTP_UNKNOWN could be preferred over transports in state SCTP_ACTIVE in case a SCTP_ACTIVE transport appears after SCTP_UNKNOWN in the transport list yielding a weaker transport state to be used in retransmission. This patch mostly reverts 4141ddc02a92, but also rewrites this function to introduce more clarity and strictness into the code. A strict priority of transport states is enforced in this patch, hence selection is active > unkown > partial failover > inactive. Fixes: 4141ddc02a92 ("sctp: retran_path update bug fix") Signed-off-by: Daniel Borkmann Cc: Gui Jianfeng Acked-by: Vlad Yasevich Signed-off-by: David S. Miller [wt: picked updated function from 3.12 except the debug statement] Signed-off-by: Willy Tarreau --- net/sctp/associola.c | 157 +++++++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 64 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 6360a14edea..59ab0c40e15 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1301,82 +1301,111 @@ void sctp_assoc_update(struct sctp_association *asoc, } /* Update the retran path for sending a retransmitted packet. - * Round-robin through the active transports, else round-robin - * through the inactive transports as this is the next best thing - * we can try. + * See also RFC4960, 6.4. Multi-Homed SCTP Endpoints: + * + * When there is outbound data to send and the primary path + * becomes inactive (e.g., due to failures), or where the + * SCTP user explicitly requests to send data to an + * inactive destination transport address, before reporting + * an error to its ULP, the SCTP endpoint should try to send + * the data to an alternate active destination transport + * address if one exists. + * + * When retransmitting data that timed out, if the endpoint + * is multihomed, it should consider each source-destination + * address pair in its retransmission selection policy. + * When retransmitting timed-out data, the endpoint should + * attempt to pick the most divergent source-destination + * pair from the original source-destination pair to which + * the packet was transmitted. + * + * Note: Rules for picking the most divergent source-destination + * pair are an implementation decision and are not specified + * within this document. + * + * Our basic strategy is to round-robin transports in priorities + * according to sctp_state_prio_map[] e.g., if no such + * transport with state SCTP_ACTIVE exists, round-robin through + * SCTP_UNKNOWN, etc. You get the picture. */ -void sctp_assoc_update_retran_path(struct sctp_association *asoc) +static const u8 sctp_trans_state_to_prio_map[] = { + [SCTP_ACTIVE] = 3, /* best case */ + [SCTP_UNKNOWN] = 2, + [SCTP_PF] = 1, + [SCTP_INACTIVE] = 0, /* worst case */ +}; + +static u8 sctp_trans_score(const struct sctp_transport *trans) { - struct sctp_transport *t, *next; - struct list_head *head = &asoc->peer.transport_addr_list; - struct list_head *pos; - - if (asoc->peer.transport_count == 1) - return; - - /* Find the next transport in a round-robin fashion. */ - t = asoc->peer.retran_path; - pos = &t->transports; - next = NULL; - - while (1) { - /* Skip the head. */ - if (pos->next == head) - pos = head->next; - else - pos = pos->next; - - t = list_entry(pos, struct sctp_transport, transports); - - /* We have exhausted the list, but didn't find any - * other active transports. If so, use the next - * transport. - */ - if (t == asoc->peer.retran_path) { - t = next; - break; - } - - /* Try to find an active transport. */ - - if ((t->state == SCTP_ACTIVE) || - (t->state == SCTP_UNKNOWN)) { - break; - } else { - /* Keep track of the next transport in case - * we don't find any active transport. - */ - if (t->state != SCTP_UNCONFIRMED && !next) - next = t; - } - } - - if (t) - asoc->peer.retran_path = t; - else - t = asoc->peer.retran_path; - - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" - " %p addr: ", - " port: %d\n", - asoc, - (&t->ipaddr), - ntohs(t->ipaddr.v4.sin_port)); + return sctp_trans_state_to_prio_map[trans->state]; } -/* Choose the transport for sending retransmit packet. */ -struct sctp_transport *sctp_assoc_choose_alter_transport( - struct sctp_association *asoc, struct sctp_transport *last_sent_to) +static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr, + struct sctp_transport *best) +{ + if (best == NULL) + return curr; + + return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best; +} + +void sctp_assoc_update_retran_path(struct sctp_association *asoc) +{ + struct sctp_transport *trans = asoc->peer.retran_path; + struct sctp_transport *trans_next = NULL; + + /* We're done as we only have the one and only path. */ + if (asoc->peer.transport_count == 1) + return; + /* If active_path and retran_path are the same and active, + * then this is the only active path. Use it. + */ + if (asoc->peer.active_path == asoc->peer.retran_path && + asoc->peer.active_path->state == SCTP_ACTIVE) + return; + + /* Iterate from retran_path's successor back to retran_path. */ + for (trans = list_next_entry(trans, transports); 1; + trans = list_next_entry(trans, transports)) { + /* Manually skip the head element. */ + if (&trans->transports == &asoc->peer.transport_addr_list) + continue; + if (trans->state == SCTP_UNCONFIRMED) + continue; + trans_next = sctp_trans_elect_best(trans, trans_next); + /* Active is good enough for immediate return. */ + if (trans_next->state == SCTP_ACTIVE) + break; + /* We've reached the end, time to update path. */ + if (trans == asoc->peer.retran_path) + break; + } + + if (trans_next != NULL) + asoc->peer.retran_path = trans_next; + + SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" + " %p updated new path to addr: ", + " port: %d\n", + asoc, + (&asoc->peer.retran_path->ipaddr), + ntohs(asoc->peer.retran_path->ipaddr.v4.sin_port)); +} + +struct sctp_transport * +sctp_assoc_choose_alter_transport(struct sctp_association *asoc, + struct sctp_transport *last_sent_to) { /* If this is the first time packet is sent, use the active path, * else use the retran path. If the last packet was sent over the * retran path, update the retran path and use it. */ - if (!last_sent_to) + if (last_sent_to == NULL) { return asoc->peer.active_path; - else { + } else { if (last_sent_to == asoc->peer.retran_path) sctp_assoc_update_retran_path(asoc); + return asoc->peer.retran_path; } } From 8c11dcea41df099c585dab4490e92deb8ae7f9eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Oct 2016 11:28:32 -0300 Subject: [PATCH 225/252] perf trace: Use the syscall raw_syscalls:sys_enter timestamp commit ecf1e2253ea79c6204f4d6a5e756e8fb4aed5a7e upstream. Instead of the one when another syscall takes place while another is being processed (in another CPU, but we show it serialized, so need to "interrupt" the other), and also when finally showing the sys_enter + sys_exit + duration, where we were showing the sample->time for the sys_exit, duh. Before: # perf trace sleep 1 0.373 ( 0.001 ms): close(fd: 3 ) = 0 1000.626 (1000.211 ms): nanosleep(rqtp: 0x7ffd6ddddfb0) = 0 1000.653 ( 0.003 ms): close(fd: 1 ) = 0 1000.657 ( 0.002 ms): close(fd: 2 ) = 0 1000.667 ( 0.000 ms): exit_group( ) # After: # perf trace sleep 1 0.336 ( 0.001 ms): close(fd: 3 ) = 0 0.373 (1000.086 ms): nanosleep(rqtp: 0x7ffe303e9550) = 0 1000.481 ( 0.002 ms): close(fd: 1 ) = 0 1000.485 ( 0.001 ms): close(fd: 2 ) = 0 1000.494 ( 0.000 ms): exit_group( ) [root@jouet linux]# [js] no trace__printf_interrupted_entry in 3.12 yet Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ecbzgmu2ni6glc6zkw8p1zmx@git.kernel.org Fixes: 752fde44fd1c ("perf trace: Support interrupted syscalls") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Slaby [wt: 3.10 uses stdout instead of trace->output ; no trace__printf_interrupted_entry() function ] Signed-off-by: Willy Tarreau --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ab3ed4af146..9f2afbd3370 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -330,7 +330,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { if (!trace->duration_filter) { - trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout); + trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, stdout); printf("%-70s\n", ttrace->entry_str); } } else @@ -364,7 +364,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, } else if (trace->duration_filter) goto out; - trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout); + trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, stdout); if (ttrace->entry_pending) { printf("%-70s", ttrace->entry_str); From 3d46433da69cf192317c0368082caa33177a3939 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 14 Mar 2017 17:55:45 +0100 Subject: [PATCH 226/252] USB: usbtmc: add missing endpoint sanity check commit 687e0687f71ec00e0132a21fef802dee88c2f1ad upstream. USBTMC devices are required to have a bulk-in and a bulk-out endpoint, but the driver failed to verify this, something which could lead to the endpoint addresses being taken from uninitialised memory. Make sure to zero all private data as part of allocation, and add the missing endpoint sanity check. Note that this also addresses a more recently introduced issue, where the interrupt-in-presence flag would also be uninitialised whenever the optional interrupt-in endpoint is not present. This in turn could lead to an interrupt urb being allocated, initialised and submitted based on uninitialised values. Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.") Fixes: 5b775f672cc9 ("USB: add USB test and measurement class driver") Signed-off-by: Johan Hovold [ johan: backport to v4.4 ] Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/class/usbtmc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 4c5506ae5e4..64317898a7c 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -989,7 +989,7 @@ static int usbtmc_probe(struct usb_interface *intf, dev_dbg(&intf->dev, "%s called\n", __func__); - data = kmalloc(sizeof(struct usbtmc_device_data), GFP_KERNEL); + data = kzalloc(sizeof(struct usbtmc_device_data), GFP_KERNEL); if (!data) { dev_err(&intf->dev, "Unable to allocate kernel memory\n"); return -ENOMEM; @@ -1035,6 +1035,12 @@ static int usbtmc_probe(struct usb_interface *intf, } } + if (!data->bulk_out || !data->bulk_in) { + dev_err(&intf->dev, "bulk endpoints not found\n"); + retcode = -ENODEV; + goto err_put; + } + retcode = get_capabilities(data); if (retcode) dev_err(&intf->dev, "can't read capabilities\n"); @@ -1058,6 +1064,7 @@ static int usbtmc_probe(struct usb_interface *intf, error_register: sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp); sysfs_remove_group(&intf->dev.kobj, &data_attr_grp); +err_put: kref_put(&data->kref, usbtmc_delete); return retcode; } From 60f704bdd703447fd0e5a3e0a09d9a84d523292e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Mar 2017 19:36:13 -0700 Subject: [PATCH 227/252] ping: implement proper locking commit 43a6684519ab0a6c52024b5e25322476cabad893 upstream. We got a report of yet another bug in ping http://www.openwall.com/lists/oss-security/2017/03/24/6 ->disconnect() is not called with socket lock held. Fix this by acquiring ping rwlock earlier. Thanks to Daniel, Alexander and Andrey for letting us know this problem. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Reported-by: Daniel Jiang Reported-by: Solar Designer Reported-by: Andrey Konovalov Signed-off-by: David S. Miller [wt: the function is ping_v4_unhash() in 3.10] Signed-off-by: Willy Tarreau --- net/ipv4/ping.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 431d597515d..0b778d75e38 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -135,17 +135,18 @@ static void ping_v4_hash(struct sock *sk) static void ping_v4_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); + pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + write_lock_bh(&ping_table.lock); if (sk_hashed(sk)) { - write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&ping_table.lock); } + write_unlock_bh(&ping_table.lock); } static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, From 15668b4354b38b41b316571deed2763d631b2977 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 19 Dec 2016 12:03:41 -0500 Subject: [PATCH 228/252] USB: fix problems with duplicate endpoint addresses commit 0a8fd1346254974c3a852338508e4a4cddbb35f1 upstream. When checking a new device's descriptors, the USB core does not check for duplicate endpoint addresses. This can cause a problem when the sysfs files for those endpoints are created; trying to create multiple files with the same name will provoke a WARNING: WARNING: CPU: 2 PID: 865 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x8a/0xa0 sysfs: cannot create duplicate filename '/devices/platform/dummy_hcd.0/usb2/2-1/2-1:64.0/ep_05' Kernel panic - not syncing: panic_on_warn set ... CPU: 2 PID: 865 Comm: kworker/2:1 Not tainted 4.9.0-rc7+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event ffff88006bee64c8 ffffffff81f96b8a ffffffff00000001 1ffff1000d7dcc2c ffffed000d7dcc24 0000000000000001 0000000041b58ab3 ffffffff8598b510 ffffffff81f968f8 ffffffff850fee20 ffffffff85cff020 dffffc0000000000 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] panic+0x1cb/0x3a9 kernel/panic.c:179 [] __warn+0x1c4/0x1e0 kernel/panic.c:542 [] warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:565 [] sysfs_warn_dup+0x8a/0xa0 fs/sysfs/dir.c:30 [] sysfs_create_dir_ns+0x178/0x1d0 fs/sysfs/dir.c:59 [< inline >] create_dir lib/kobject.c:71 [] kobject_add_internal+0x227/0xa60 lib/kobject.c:229 [< inline >] kobject_add_varg lib/kobject.c:366 [] kobject_add+0x139/0x220 lib/kobject.c:411 [] device_add+0x353/0x1660 drivers/base/core.c:1088 [] device_register+0x1d/0x20 drivers/base/core.c:1206 [] usb_create_ep_devs+0x163/0x260 drivers/usb/core/endpoint.c:195 [] create_intf_ep_devs+0x13b/0x200 drivers/usb/core/message.c:1030 [] usb_set_configuration+0x1083/0x18d0 drivers/usb/core/message.c:1937 [] generic_probe+0x6e/0xe0 drivers/usb/core/generic.c:172 [] usb_probe_device+0xaa/0xe0 drivers/usb/core/driver.c:263 This patch prevents the problem by checking for duplicate endpoint addresses during enumeration and skipping any duplicates. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/usb/core/config.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 3252bb2dcb8..d6481cb469c 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -207,6 +207,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, if (ifp->desc.bNumEndpoints >= num_ep) goto skip_to_next_endpoint_or_interface_descriptor; + /* Check for duplicate endpoint addresses */ + for (i = 0; i < ifp->desc.bNumEndpoints; ++i) { + if (ifp->endpoint[i].desc.bEndpointAddress == + d->bEndpointAddress) { + dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } + } + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; ++ifp->desc.bNumEndpoints; From 5765ee44877fbc13bc112878a807048b0e66a8ad Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 14 Dec 2016 14:55:56 -0500 Subject: [PATCH 229/252] USB: dummy-hcd: fix bug in stop_activity (handle ep0) commit bcdbeb844773333d2d1c08004f3b3e25921040e5 upstream. The stop_activity() routine in dummy-hcd is supposed to unlink all active requests for every endpoint, among other things. But it doesn't handle ep0. As a result, fuzz testing can generate a WARNING like the following: WARNING: CPU: 0 PID: 4410 at drivers/usb/gadget/udc/dummy_hcd.c:672 dummy_free_request+0x153/0x170 Modules linked in: CPU: 0 PID: 4410 Comm: syz-executor Not tainted 4.9.0-rc7+ #32 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006a64ed10 ffffffff81f96b8a ffffffff41b58ab3 1ffff1000d4c9d35 ffffed000d4c9d2d ffff880065f8ac00 0000000041b58ab3 ffffffff8598b510 ffffffff81f968f8 0000000041b58ab3 ffffffff859410e0 ffffffff813f0590 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] __warn+0x19f/0x1e0 kernel/panic.c:550 [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [] dummy_free_request+0x153/0x170 drivers/usb/gadget/udc/dummy_hcd.c:672 [] usb_ep_free_request+0xc0/0x420 drivers/usb/gadget/udc/core.c:195 [] gadgetfs_unbind+0x131/0x190 drivers/usb/gadget/legacy/inode.c:1612 [] usb_gadget_remove_driver+0x10f/0x2b0 drivers/usb/gadget/udc/core.c:1228 [] usb_gadget_unregister_driver+0x154/0x240 drivers/usb/gadget/udc/core.c:1357 This patch fixes the problem by iterating over all the endpoints in the driver's ep array instead of iterating over the gadget's ep_list, which explicitly leaves out ep0. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/gadget/dummy_hcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c index ac0e79e2c2e..644c1053d65 100644 --- a/drivers/usb/gadget/dummy_hcd.c +++ b/drivers/usb/gadget/dummy_hcd.c @@ -266,7 +266,7 @@ static void nuke(struct dummy *dum, struct dummy_ep *ep) /* caller must hold lock */ static void stop_activity(struct dummy *dum) { - struct dummy_ep *ep; + int i; /* prevent any more requests */ dum->address = 0; @@ -274,8 +274,8 @@ static void stop_activity(struct dummy *dum) /* The timer is left running so that outstanding URBs can fail */ /* nuke any pending requests first, so driver i/o is quiesced */ - list_for_each_entry(ep, &dum->gadget.ep_list, ep.ep_list) - nuke(dum, ep); + for (i = 0; i < DUMMY_ENDPOINTS; ++i) + nuke(dum, &dum->ep[i]); /* driver now does any non-usb quiescing necessary */ } From 414989d25f3501383a60d21f2f6e7999ffb6f5ab Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 26 Jul 2016 15:22:17 -0700 Subject: [PATCH 230/252] mm/init: fix zone boundary creation commit 90cae1fe1c3540f791d5b8e025985fa5e699b2bb upstream. As a part of memory initialisation the architecture passes an array to free_area_init_nodes() which specifies the max PFN of each memory zone. This array is not necessarily monotonic (due to unused zones) so this array is parsed to build monotonic lists of the min and max PFN for each zone. ZONE_MOVABLE is special cased here as its limits are managed by the mm subsystem rather than the architecture. Unfortunately, this special casing is broken when ZONE_MOVABLE is the not the last zone in the zone list. The core of the issue is: if (i == ZONE_MOVABLE) continue; arch_zone_lowest_possible_pfn[i] = arch_zone_highest_possible_pfn[i-1]; As ZONE_MOVABLE is skipped the lowest_possible_pfn of the next zone will be set to zero. This patch fixes this bug by adding explicitly tracking where the next zone should start rather than relying on the contents arch_zone_highest_possible_pfn[]. Thie is low priority. To get bitten by this you need to enable a zone that appears after ZONE_MOVABLE in the zone_type enum. As far as I can tell this means running a kernel with ZONE_DEVICE or ZONE_CMA enabled, so I can't see this affecting too many people. I only noticed this because I've been fiddling with ZONE_DEVICE on powerpc and 4.6 broke my test kernel. This bug, in conjunction with the changes in Taku Izumi's kernelcore=mirror patch (d91749c1dda71) and powerpc being the odd architecture which initialises max_zone_pfn[] to ~0ul instead of 0 caused all of system memory to be placed into ZONE_DEVICE at boot, followed a panic since device memory cannot be used for kernel allocations. I've already submitted a patch to fix the powerpc specific bits, but I figured this should be fixed too. Link: http://lkml.kernel.org/r/1462435033-15601-1-git-send-email-oohall@gmail.com Signed-off-by: Oliver O'Halloran Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Arnd Bergmann Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- mm/page_alloc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 494a081ec5e..4e892753929 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5060,15 +5060,18 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) sizeof(arch_zone_lowest_possible_pfn)); memset(arch_zone_highest_possible_pfn, 0, sizeof(arch_zone_highest_possible_pfn)); - arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions(); - arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; - for (i = 1; i < MAX_NR_ZONES; i++) { + + start_pfn = find_min_pfn_with_active_regions(); + + for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; - arch_zone_lowest_possible_pfn[i] = - arch_zone_highest_possible_pfn[i-1]; - arch_zone_highest_possible_pfn[i] = - max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); + + end_pfn = max(max_zone_pfn[i], start_pfn); + arch_zone_lowest_possible_pfn[i] = start_pfn; + arch_zone_highest_possible_pfn[i] = end_pfn; + + start_pfn = end_pfn; } arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0; arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0; From 801c8a021ef6a9bd05f3976f40e8b314a248ca0a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Jan 2017 08:11:44 -0800 Subject: [PATCH 231/252] can: Fix kernel panic at security_sock_rcv_skb commit f1712c73714088a7252d276a57126d56c7d37e64 upstream. Zhang Yanmin reported crashes [1] and provided a patch adding a synchronize_rcu() call in can_rx_unregister() The main problem seems that the sockets themselves are not RCU protected. If CAN uses RCU for delivery, then sockets should be freed only after one RCU grace period. Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's ease stable backports with the following fix instead. [1] BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] selinux_socket_sock_rcv_skb+0x65/0x2a0 Call Trace: [] security_sock_rcv_skb+0x4c/0x60 [] sk_filter+0x41/0x210 [] sock_queue_rcv_skb+0x53/0x3a0 [] raw_rcv+0x2a3/0x3c0 [] can_rcv_filter+0x12b/0x370 [] can_receive+0xd9/0x120 [] can_rcv+0xab/0x100 [] __netif_receive_skb_core+0xd8c/0x11f0 [] __netif_receive_skb+0x24/0xb0 [] process_backlog+0x127/0x280 [] net_rx_action+0x33b/0x4f0 [] __do_softirq+0x184/0x440 [] do_softirq_own_stack+0x1c/0x30 [] do_softirq.part.18+0x3b/0x40 [] do_softirq+0x1d/0x20 [] netif_rx_ni+0xe5/0x110 [] slcan_receive_buf+0x507/0x520 [] flush_to_ldisc+0x21c/0x230 [] process_one_work+0x24f/0x670 [] worker_thread+0x9d/0x6f0 [] ? rescuer_thread+0x480/0x480 [] kthread+0x12c/0x150 [] ret_from_fork+0x3f/0x70 Reported-by: Zhang Yanmin Signed-off-by: Eric Dumazet Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- include/linux/can/core.h | 7 +++---- net/can/af_can.c | 12 ++++++++++-- net/can/af_can.h | 3 ++- net/can/bcm.c | 4 ++-- net/can/gw.c | 2 +- net/can/raw.c | 4 ++-- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 78c6c52073a..6bdc00b6df0 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -45,10 +45,9 @@ struct can_proto { extern int can_proto_register(const struct can_proto *cp); extern void can_proto_unregister(const struct can_proto *cp); -extern int can_rx_register(struct net_device *dev, canid_t can_id, - canid_t mask, - void (*func)(struct sk_buff *, void *), - void *data, char *ident); +int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, + void (*func)(struct sk_buff *, void *), + void *data, char *ident, struct sock *sk); extern void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, diff --git a/net/can/af_can.c b/net/can/af_can.c index d3668c55b08..34064aa88f0 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -425,6 +425,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * @func: callback function on filter match * @data: returned parameter for callback function * @ident: string for calling module indentification + * @sk: socket pointer (might be NULL) * * Description: * Invokes the callback function with the received sk_buff and the given @@ -448,7 +449,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, */ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, - char *ident) + char *ident, struct sock *sk) { struct receiver *r; struct hlist_head *rl; @@ -476,6 +477,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, r->func = func; r->data = data; r->ident = ident; + r->sk = sk; hlist_add_head_rcu(&r->list, rl); d->entries++; @@ -500,8 +502,11 @@ EXPORT_SYMBOL(can_rx_register); static void can_rx_delete_receiver(struct rcu_head *rp) { struct receiver *r = container_of(rp, struct receiver, rcu); + struct sock *sk = r->sk; kmem_cache_free(rcv_cache, r); + if (sk) + sock_put(sk); } /** @@ -576,8 +581,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, spin_unlock(&can_rcvlists_lock); /* schedule the receiver item for deletion */ - if (r) + if (r) { + if (r->sk) + sock_hold(r->sk); call_rcu(&r->rcu, can_rx_delete_receiver); + } } EXPORT_SYMBOL(can_rx_unregister); diff --git a/net/can/af_can.h b/net/can/af_can.h index 1dccb4c3389..0e95be42358 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -50,13 +50,14 @@ struct receiver { struct hlist_node list; - struct rcu_head rcu; canid_t can_id; canid_t mask; unsigned long matches; void (*func)(struct sk_buff *, void *); void *data; char *ident; + struct sock *sk; + struct rcu_head rcu; }; enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX }; diff --git a/net/can/bcm.c b/net/can/bcm.c index dd0781c49eb..725ce812cfb 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1169,7 +1169,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, err = can_rx_register(dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, - "bcm"); + "bcm", sk); op->rx_reg_dev = dev; dev_put(dev); @@ -1178,7 +1178,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } else err = can_rx_register(NULL, op->can_id, REGMASK(op->can_id), - bcm_rx_handler, op, "bcm"); + bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ list_del(&op->list); diff --git a/net/can/gw.c b/net/can/gw.c index de25455b4e3..2ad8aa4f9f0 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -435,7 +435,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj) { return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, - gwj, "gw"); + gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct cgw_job *gwj) diff --git a/net/can/raw.c b/net/can/raw.c index 1085e65f848..f4d86485571 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -168,7 +168,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, for (i = 0; i < count; i++) { err = can_rx_register(dev, filter[i].can_id, filter[i].can_mask, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) @@ -189,7 +189,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, if (err_mask) err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); return err; } From 9391c802ff00fc5873e91ce26dd08c9f4e2a8e92 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 3 Jun 2016 17:09:22 -0700 Subject: [PATCH 232/252] Drivers: hv: avoid vfree() on crash commit a9f61ca793becabdefab03b77568d6c6f8c1bc79 upstream. When we crash from NMI context (e.g. after NMI injection from host when 'sysctl -w kernel.unknown_nmi_panic=1' is set) we hit kernel BUG at mm/vmalloc.c:1530! as vfree() is denied. While the issue could be solved with in_nmi() check instead I opted for skipping vfree on all sorts of crashes to reduce the amount of work which can cause consequent crashes. We don't really need to free anything on crash. [js] no tsc and kexec in 3.12 yet Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Cc: Sumit Semwal Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/hv/hv.c | 5 +++-- drivers/hv/hyperv_vmbus.h | 2 +- drivers/hv/vmbus_drv.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index ae4923756d9..b1039552b62 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -193,7 +193,7 @@ cleanup: * * This routine is called normally during driver unloading or exiting. */ -void hv_cleanup(void) +void hv_cleanup(bool crash) { union hv_x64_msr_hypercall_contents hypercall_msr; @@ -203,7 +203,8 @@ void hv_cleanup(void) if (hv_context.hypercall_page) { hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - vfree(hv_context.hypercall_page); + if (!crash) + vfree(hv_context.hypercall_page); hv_context.hypercall_page = NULL; } } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 12f2f9e989f..11d4e6222f5 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -519,7 +519,7 @@ extern struct hv_context hv_context; extern int hv_init(void); -extern void hv_cleanup(void); +extern void hv_cleanup(bool crash); extern int hv_post_message(union hv_connection_id connection_id, enum hv_message_type message_type, diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 80754e2d808..3190a1fc7bc 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -618,7 +618,7 @@ err_unregister: bus_unregister(&hv_bus); err_cleanup: - hv_cleanup(); + hv_cleanup(false); return ret; } @@ -841,7 +841,7 @@ static void __exit vmbus_exit(void) free_irq(irq, hv_acpi_dev); vmbus_free_channels(); bus_unregister(&hv_bus); - hv_cleanup(); + hv_cleanup(false); acpi_bus_unregister_driver(&vmbus_acpi_driver); hv_cpu_hotplug_quirk(false); } From bb4884f2eee5e7acae86541578210bb6802797ae Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 28 Jan 2016 09:22:44 -0200 Subject: [PATCH 233/252] xc2028: avoid use after free commit 8dfbcc4351a0b6d2f2d77f367552f48ffefafe18 upstream. If struct xc2028_config is passed without a firmware name, the following trouble may happen: [11009.907205] xc2028 5-0061: type set to XCeive xc2028/xc3028 tuner [11009.907491] ================================================================== [11009.907750] BUG: KASAN: use-after-free in strcmp+0x96/0xb0 at addr ffff8803bd78ab40 [11009.907992] Read of size 1 by task modprobe/28992 [11009.907994] ============================================================================= [11009.907997] BUG kmalloc-16 (Tainted: G W ): kasan: bad access detected [11009.907999] ----------------------------------------------------------------------------- [11009.908008] INFO: Allocated in xhci_urb_enqueue+0x214/0x14c0 [xhci_hcd] age=0 cpu=3 pid=28992 [11009.908012] ___slab_alloc+0x581/0x5b0 [11009.908014] __slab_alloc+0x51/0x90 [11009.908017] __kmalloc+0x27b/0x350 [11009.908022] xhci_urb_enqueue+0x214/0x14c0 [xhci_hcd] [11009.908026] usb_hcd_submit_urb+0x1e8/0x1c60 [11009.908029] usb_submit_urb+0xb0e/0x1200 [11009.908032] usb_serial_generic_write_start+0xb6/0x4c0 [11009.908035] usb_serial_generic_write+0x92/0xc0 [11009.908039] usb_console_write+0x38a/0x560 [11009.908045] call_console_drivers.constprop.14+0x1ee/0x2c0 [11009.908051] console_unlock+0x40d/0x900 [11009.908056] vprintk_emit+0x4b4/0x830 [11009.908061] vprintk_default+0x1f/0x30 [11009.908064] printk+0x99/0xb5 [11009.908067] kasan_report_error+0x10a/0x550 [11009.908070] __asan_report_load1_noabort+0x43/0x50 [11009.908074] INFO: Freed in xc2028_set_config+0x90/0x630 [tuner_xc2028] age=1 cpu=3 pid=28992 [11009.908077] __slab_free+0x2ec/0x460 [11009.908080] kfree+0x266/0x280 [11009.908083] xc2028_set_config+0x90/0x630 [tuner_xc2028] [11009.908086] xc2028_attach+0x310/0x8a0 [tuner_xc2028] [11009.908090] em28xx_attach_xc3028.constprop.7+0x1f9/0x30d [em28xx_dvb] [11009.908094] em28xx_dvb_init.part.3+0x8e4/0x5cf4 [em28xx_dvb] [11009.908098] em28xx_dvb_init+0x81/0x8a [em28xx_dvb] [11009.908101] em28xx_register_extension+0xd9/0x190 [em28xx] [11009.908105] em28xx_dvb_register+0x10/0x1000 [em28xx_dvb] [11009.908108] do_one_initcall+0x141/0x300 [11009.908111] do_init_module+0x1d0/0x5ad [11009.908114] load_module+0x6666/0x9ba0 [11009.908117] SyS_finit_module+0x108/0x130 [11009.908120] entry_SYSCALL_64_fastpath+0x16/0x76 [11009.908123] INFO: Slab 0xffffea000ef5e280 objects=25 used=25 fp=0x (null) flags=0x2ffff8000004080 [11009.908126] INFO: Object 0xffff8803bd78ab40 @offset=2880 fp=0x0000000000000001 [11009.908130] Bytes b4 ffff8803bd78ab30: 01 00 00 00 2a 07 00 00 9d 28 00 00 01 00 00 00 ....*....(...... [11009.908133] Object ffff8803bd78ab40: 01 00 00 00 00 00 00 00 b0 1d c3 6a 00 88 ff ff ...........j.... [11009.908137] CPU: 3 PID: 28992 Comm: modprobe Tainted: G B W 4.5.0-rc1+ #43 [11009.908140] Hardware name: /NUC5i7RYB, BIOS RYBDWi35.86A.0350.2015.0812.1722 08/12/2015 [11009.908142] ffff8803bd78a000 ffff8802c273f1b8 ffffffff81932007 ffff8803c6407a80 [11009.908148] ffff8802c273f1e8 ffffffff81556759 ffff8803c6407a80 ffffea000ef5e280 [11009.908153] ffff8803bd78ab40 dffffc0000000000 ffff8802c273f210 ffffffff8155ccb4 [11009.908158] Call Trace: [11009.908162] [] dump_stack+0x4b/0x64 [11009.908165] [] print_trailer+0xf9/0x150 [11009.908168] [] object_err+0x34/0x40 [11009.908171] [] kasan_report_error+0x230/0x550 [11009.908175] [] ? trace_hardirqs_off_caller+0x21/0x290 [11009.908179] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908182] [] __asan_report_load1_noabort+0x43/0x50 [11009.908185] [] ? __asan_register_globals+0x50/0xa0 [11009.908189] [] ? strcmp+0x96/0xb0 [11009.908192] [] strcmp+0x96/0xb0 [11009.908196] [] xc2028_set_config+0x15c/0x630 [tuner_xc2028] [11009.908200] [] xc2028_attach+0x310/0x8a0 [tuner_xc2028] [11009.908203] [] ? memset+0x28/0x30 [11009.908206] [] ? xc2028_set_config+0x630/0x630 [tuner_xc2028] [11009.908211] [] em28xx_attach_xc3028.constprop.7+0x1f9/0x30d [em28xx_dvb] [11009.908215] [] ? em28xx_dvb_init.part.3+0x37c/0x5cf4 [em28xx_dvb] [11009.908219] [] ? hauppauge_hvr930c_init+0x487/0x487 [em28xx_dvb] [11009.908222] [] ? lgdt330x_attach+0x1cc/0x370 [lgdt330x] [11009.908226] [] ? i2c_read_demod_bytes.isra.2+0x210/0x210 [lgdt330x] [11009.908230] [] ? ref_module.part.15+0x10/0x10 [11009.908233] [] ? module_assert_mutex_or_preempt+0x80/0x80 [11009.908238] [] em28xx_dvb_init.part.3+0x8e4/0x5cf4 [em28xx_dvb] [11009.908242] [] ? em28xx_attach_xc3028.constprop.7+0x30d/0x30d [em28xx_dvb] [11009.908245] [] ? string+0x14d/0x1f0 [11009.908249] [] ? symbol_string+0xff/0x1a0 [11009.908253] [] ? uuid_string+0x6f0/0x6f0 [11009.908257] [] ? __kernel_text_address+0x7e/0xa0 [11009.908260] [] ? print_context_stack+0x7f/0xf0 [11009.908264] [] ? __module_address+0xb6/0x360 [11009.908268] [] ? is_ftrace_trampoline+0x99/0xe0 [11009.908271] [] ? __kernel_text_address+0x7e/0xa0 [11009.908275] [] ? debug_check_no_locks_freed+0x290/0x290 [11009.908278] [] ? dump_trace+0x11b/0x300 [11009.908282] [] ? em28xx_register_extension+0x23/0x190 [em28xx] [11009.908285] [] ? trace_hardirqs_off_caller+0x21/0x290 [11009.908289] [] ? trace_hardirqs_on_caller+0x16/0x590 [11009.908292] [] ? trace_hardirqs_on+0xd/0x10 [11009.908296] [] ? em28xx_register_extension+0x23/0x190 [em28xx] [11009.908299] [] ? mutex_trylock+0x400/0x400 [11009.908302] [] ? do_one_initcall+0x131/0x300 [11009.908306] [] ? call_rcu_sched+0x17/0x20 [11009.908309] [] ? put_object+0x48/0x70 [11009.908314] [] em28xx_dvb_init+0x81/0x8a [em28xx_dvb] [11009.908317] [] em28xx_register_extension+0xd9/0x190 [em28xx] [11009.908320] [] ? 0xffffffffa0150000 [11009.908324] [] em28xx_dvb_register+0x10/0x1000 [em28xx_dvb] [11009.908327] [] do_one_initcall+0x141/0x300 [11009.908330] [] ? try_to_run_init_process+0x40/0x40 [11009.908333] [] ? trace_hardirqs_on_caller+0x16/0x590 [11009.908337] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908340] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908343] [] ? kasan_unpoison_shadow+0x36/0x50 [11009.908346] [] ? __asan_register_globals+0x87/0xa0 [11009.908350] [] do_init_module+0x1d0/0x5ad [11009.908353] [] load_module+0x6666/0x9ba0 [11009.908356] [] ? symbol_put_addr+0x50/0x50 [11009.908361] [] ? em28xx_dvb_init.part.3+0x5989/0x5cf4 [em28xx_dvb] [11009.908366] [] ? module_frob_arch_sections+0x20/0x20 [11009.908369] [] ? open_exec+0x50/0x50 [11009.908374] [] ? ns_capable+0x5b/0xd0 [11009.908377] [] SyS_finit_module+0x108/0x130 [11009.908379] [] ? SyS_init_module+0x1f0/0x1f0 [11009.908383] [] ? lockdep_sys_exit_thunk+0x12/0x14 [11009.908394] [] entry_SYSCALL_64_fastpath+0x16/0x76 [11009.908396] Memory state around the buggy address: [11009.908398] ffff8803bd78aa00: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908401] ffff8803bd78aa80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908403] >ffff8803bd78ab00: fc fc fc fc fc fc fc fc 00 00 fc fc fc fc fc fc [11009.908405] ^ [11009.908407] ffff8803bd78ab80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908409] ffff8803bd78ac00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [11009.908411] ================================================================== In order to avoid it, let's set the cached value of the firmware name to NULL after freeing it. While here, return an error if the memory allocation fails. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/tuners/tuner-xc2028.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index 9771cd83c06..38afc54ef34 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -1385,11 +1385,12 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) * in order to avoid troubles during device release. */ kfree(priv->ctrl.fname); + priv->ctrl.fname = NULL; memcpy(&priv->ctrl, p, sizeof(priv->ctrl)); if (p->fname) { priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL); if (priv->ctrl.fname == NULL) - rc = -ENOMEM; + return -ENOMEM; } /* From d9a854e53bed006a57a64d67888486d57f3490d0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Feb 2016 13:34:00 -0200 Subject: [PATCH 234/252] xc2028: unlock on error in xc2028_set_config() commit 210bd104c6acd31c3c6b8b075b3f12d4a9f6b60d upstream. We have to unlock before returning -ENOMEM. Fixes: 8dfbcc4351a0 ('[media] xc2028: avoid use after free') Signed-off-by: Dan Carpenter Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/tuners/tuner-xc2028.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index 38afc54ef34..ab0bfc46f99 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -1389,8 +1389,10 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) memcpy(&priv->ctrl, p, sizeof(priv->ctrl)); if (p->fname) { priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL); - if (priv->ctrl.fname == NULL) - return -ENOMEM; + if (priv->ctrl.fname == NULL) { + rc = -ENOMEM; + goto unlock; + } } /* @@ -1422,6 +1424,7 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) } else priv->state = XC2028_WAITING_FIRMWARE; } +unlock: mutex_unlock(&priv->lock); return rc; From 3bdb157105639fdcdff744432760c3f25c545678 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 Nov 2016 10:49:31 +0100 Subject: [PATCH 235/252] xc2028: Fix use-after-free bug properly commit 22a1e7783e173ab3d86018eb590107d68df46c11 upstream. The commit 8dfbcc4351a0 ("[media] xc2028: avoid use after free") tried to address the reported use-after-free by clearing the reference. However, it's clearing the wrong pointer; it sets NULL to priv->ctrl.fname, but it's anyway overwritten by the next line memcpy(&priv->ctrl, p, sizeof(priv->ctrl)). OTOH, the actual code accessing the freed string is the strcmp() call with priv->fname: if (!firmware_name[0] && p->fname && priv->fname && strcmp(p->fname, priv->fname)) free_firmware(priv); where priv->fname points to the previous file name, and this was already freed by kfree(). For fixing the bug properly, this patch does the following: - Keep the copy of firmware file name in only priv->fname, priv->ctrl.fname isn't changed; - The allocation is done only when the firmware gets loaded; - The kfree() is called in free_firmware() commonly Fixes: commit 8dfbcc4351a0 ('[media] xc2028: avoid use after free') Signed-off-by: Takashi Iwai Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/tuners/tuner-xc2028.c | 36 +++++++++++++---------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index ab0bfc46f99..3a615e4c499 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -289,6 +289,14 @@ static void free_firmware(struct xc2028_data *priv) int i; tuner_dbg("%s called\n", __func__); + /* free allocated f/w string */ + if (priv->fname != firmware_name) + kfree(priv->fname); + priv->fname = NULL; + + priv->state = XC2028_NO_FIRMWARE; + memset(&priv->cur_fw, 0, sizeof(priv->cur_fw)); + if (!priv->firm) return; @@ -299,9 +307,6 @@ static void free_firmware(struct xc2028_data *priv) priv->firm = NULL; priv->firm_size = 0; - priv->state = XC2028_NO_FIRMWARE; - - memset(&priv->cur_fw, 0, sizeof(priv->cur_fw)); } static int load_all_firmwares(struct dvb_frontend *fe, @@ -890,9 +895,9 @@ read_not_reliable: return 0; fail: + free_firmware(priv); priv->state = XC2028_SLEEP; - memset(&priv->cur_fw, 0, sizeof(priv->cur_fw)); if (retry_count < 8) { msleep(50); retry_count++; @@ -1314,11 +1319,8 @@ static int xc2028_dvb_release(struct dvb_frontend *fe) mutex_lock(&xc2028_list_mutex); /* only perform final cleanup if this is the last instance */ - if (hybrid_tuner_report_instance_count(priv) == 1) { + if (hybrid_tuner_report_instance_count(priv) == 1) free_firmware(priv); - kfree(priv->ctrl.fname); - priv->ctrl.fname = NULL; - } if (priv) hybrid_tuner_release_state(priv); @@ -1381,19 +1383,8 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) /* * Copy the config data. - * For the firmware name, keep a local copy of the string, - * in order to avoid troubles during device release. */ - kfree(priv->ctrl.fname); - priv->ctrl.fname = NULL; memcpy(&priv->ctrl, p, sizeof(priv->ctrl)); - if (p->fname) { - priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL); - if (priv->ctrl.fname == NULL) { - rc = -ENOMEM; - goto unlock; - } - } /* * If firmware name changed, frees firmware. As free_firmware will @@ -1408,10 +1399,15 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) if (priv->state == XC2028_NO_FIRMWARE) { if (!firmware_name[0]) - priv->fname = priv->ctrl.fname; + priv->fname = kstrdup(p->fname, GFP_KERNEL); else priv->fname = firmware_name; + if (!priv->fname) { + rc = -ENOMEM; + goto unlock; + } + rc = request_firmware_nowait(THIS_MODULE, 1, priv->fname, priv->i2c_props.adap->dev.parent, From 72bbf335e7aad09c88c50dbdd238f4faabd12174 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jan 2017 16:43:06 -0800 Subject: [PATCH 236/252] ipv6: fix ip6_tnl_parse_tlv_enc_lim() commit fbfa743a9d2a0ffa24251764f10afc13eb21e739 upstream. This function suffers from multiple issues. First one is that pskb_may_pull() may reallocate skb->head, so the 'raw' pointer needs either to be reloaded or not used at all. Second issue is that NEXTHDR_DEST handling does not validate that the options are present in skb->data, so we might read garbage or access non existent memory. With help from Willem de Bruijn. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/ip6_tunnel.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 12984e6794b..33bf1c1f8ee 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -394,18 +394,19 @@ ip6_tnl_dev_uninit(struct net_device *dev) __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; - __u8 nexthdr = ipv6h->nexthdr; - __u16 off = sizeof (*ipv6h); + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; + unsigned int nhoff = raw - skb->data; + unsigned int off = nhoff + sizeof(*ipv6h); + u8 next, nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { - __u16 optlen = 0; struct ipv6_opt_hdr *hdr; - if (raw + off + sizeof (*hdr) > skb->data && - !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) + u16 optlen; + + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; - hdr = (struct ipv6_opt_hdr *) (raw + off); + hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; if (frag_hdr->frag_off) @@ -416,20 +417,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) } else { optlen = ipv6_optlen(hdr); } + /* cache hdr->nexthdr, since pskb_may_pull() might + * invalidate hdr + */ + next = hdr->nexthdr; if (nexthdr == NEXTHDR_DEST) { - __u16 i = off + 2; + u16 i = 2; + + /* Remember : hdr is no longer valid at this point. */ + if (!pskb_may_pull(skb, off + optlen)) + break; + while (1) { struct ipv6_tlv_tnl_enc_lim *tel; /* No more room for encapsulation limit */ - if (i + sizeof (*tel) > off + optlen) + if (i + sizeof(*tel) > optlen) break; - tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; + tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i; /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) - return i; + return i + off - nhoff; /* else jump to next option */ if (tel->type) i += tel->length + 2; @@ -437,7 +447,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) i++; } } - nexthdr = hdr->nexthdr; + nexthdr = next; off += optlen; } return 0; From e246c0986e5c48b97d1532720de2ff4dfaccc3ac Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 1 Feb 2017 11:46:32 +0300 Subject: [PATCH 237/252] ipv6: pointer math error in ip6_tnl_parse_tlv_enc_lim() commit 63117f09c768be05a0bf465911297dc76394f686 upstream. Casting is a high precedence operation but "off" and "i" are in terms of bytes so we need to have some parenthesis here. Fixes: fbfa743a9d2a ("ipv6: fix ip6_tnl_parse_tlv_enc_lim()") Signed-off-by: Dan Carpenter Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 33bf1c1f8ee..61a89951e7f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -435,7 +435,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) if (i + sizeof(*tel) > optlen) break; - tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i; + tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) From abcfcd047b66d66de0cbe2f79b30127274154623 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 2 Jan 2014 13:20:12 +0800 Subject: [PATCH 238/252] ipv6: fix the use of pcpu_tstats in ip6_tunnel commit abb6013cca147ad940b0e9fee260d2d9e93b7018 upstream. when read/write the 64bit data, the correct lock should be hold. Fixes: 87b6d218f3adb ("tunnel: implement 64 bits statistics") Cc: Stephen Hemminger Cc: Eric Dumazet Signed-off-by: Li RongQing Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/ip6_tunnel.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 61a89951e7f..efc77acbe9e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -103,16 +103,25 @@ struct ip6_tnl_net { static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_tstats sum = { 0 }; + struct pcpu_tstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { + unsigned int start; const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + tmp.rx_packets = tstats->rx_packets; + tmp.rx_bytes = tstats->rx_bytes; + tmp.tx_packets = tstats->tx_packets; + tmp.tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + sum.rx_packets += tmp.rx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.tx_bytes += tmp.tx_bytes; } dev->stats.rx_packets = sum.rx_packets; dev->stats.rx_bytes = sum.rx_bytes; @@ -832,8 +841,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); From 1f7cb732afeda60fb18012a79759400f611ab31f Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 6 Feb 2017 18:10:31 -0200 Subject: [PATCH 239/252] sctp: avoid BUG_ON on sctp_wait_for_sndbuf commit 2dcab598484185dea7ec22219c76dcdd59e3cb90 upstream. Alexander Popov reported that an application may trigger a BUG_ON in sctp_wait_for_sndbuf if the socket tx buffer is full, a thread is waiting on it to queue more data and meanwhile another thread peels off the association being used by the first thread. This patch replaces the BUG_ON call with a proper error handling. It will return -EPIPE to the original sendmsg call, similarly to what would have been done if the association wasn't found in the first place. Acked-by: Alexander Popov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ede7c540ea2..152ab4be820 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6724,7 +6724,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ sctp_release_sock(sk); current_timeo = schedule_timeout(current_timeo); - BUG_ON(sk != asoc->base.sk); + if (sk != asoc->base.sk) + goto do_error; sctp_lock_sock(sk); *timeo_p = current_timeo; From 620d73a9b795eea14c24b743423633fe54d65c98 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 23 Feb 2017 09:31:18 -0300 Subject: [PATCH 240/252] sctp: deny peeloff operation on asocs with threads sleeping on it commit dfcb9f4f99f1e9a49e43398a7bfbf56927544af1 upstream. commit 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") attempted to avoid a BUG_ON call when the association being used for a sendmsg() is blocked waiting for more sndbuf and another thread did a peeloff operation on such asoc, moving it to another socket. As Ben Hutchings noticed, then in such case it would return without locking back the socket and would cause two unlocks in a row. Further analysis also revealed that it could allow a double free if the application managed to peeloff the asoc that is created during the sendmsg call, because then sctp_sendmsg() would try to free the asoc that was created only for that call. This patch takes another approach. It will deny the peeloff operation if there is a thread sleeping on the asoc, so this situation doesn't exist anymore. This avoids the issues described above and also honors the syscalls that are already being handled (it can be multiple sendmsg calls). Joint work with Xin Long. Fixes: 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") Cc: Alexander Popov Cc: Ben Hutchings Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/sctp/socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 152ab4be820..4178cf387d2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4310,6 +4310,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!asoc) return -EINVAL; + /* If there is a thread waiting on more sndbuf space for + * sending on this asoc, it cannot be peeled. + */ + if (waitqueue_active(&asoc->wait)) + return -EBUSY; + /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ @@ -6724,8 +6730,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ sctp_release_sock(sk); current_timeo = schedule_timeout(current_timeo); - if (sk != asoc->base.sk) - goto do_error; sctp_lock_sock(sk); *timeo_p = current_timeo; From dd8db8538c8aee325e08d65843d0824e4ee03938 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:17 +0800 Subject: [PATCH 241/252] KVM: x86: clear bus pointer when destroyed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df630b8c1e851b5e265dc2ca9c87222e342c093b upstream. When releasing the bus, let's clear the bus pointers to mark it out. If any further device unregister happens on this bus, we know that we're done if we found the bus being released already. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář Signed-off-by: Willy Tarreau --- virt/kvm/kvm_main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f71c4ad425c..e9128725ff1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -607,8 +607,10 @@ static void kvm_destroy_vm(struct kvm *kvm) list_del(&kvm->vm_list); raw_spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); - for (i = 0; i < KVM_NR_BUSES; i++) + for (i = 0; i < KVM_NR_BUSES; i++) { kvm_io_bus_destroy(kvm->buses[i]); + kvm->buses[i] = NULL; + } kvm_coalesced_mmio_free(kvm); #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); @@ -2959,6 +2961,14 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + + /* + * It's possible the bus being released before hand. If so, + * we're done here. + */ + if (!bus) + return 0; + r = -ENOENT; for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { From d8abb8b8b7035e1c01a4630e71ab3aae619d774a Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Sat, 25 May 2013 06:44:15 +0800 Subject: [PATCH 242/252] kvm: exclude ioeventfd from counting kvm_io_range limit commit 6ea34c9b78c10289846db0abeebd6b84d5aca084 upstream. We can easily reach the 1000 limit by start VM with a couple hundred I/O devices (multifunction=on). The hardcode limit already been adjusted 3 times (6 ~ 200 ~ 300 ~ 1000). In userspace, we already have maximum file descriptor to limit ioeventfd count. But kvm_io_bus devices also are used for pit, pic, ioapic, coalesced_mmio. They couldn't be limited by maximum file descriptor. Currently only ioeventfds take too much kvm_io_bus devices, so just exclude it from counting kvm_io_range limit. Also fixed one indent issue in kvm_host.h Signed-off-by: Amos Kong Reviewed-by: Stefan Hajnoczi Signed-off-by: Gleb Natapov [wt: next patch depends on this one] Signed-off-by: Willy Tarreau --- include/linux/kvm_host.h | 3 ++- virt/kvm/eventfd.c | 2 ++ virt/kvm/kvm_main.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8db53cfaccd..cbe9083128a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -145,7 +145,8 @@ struct kvm_io_range { #define NR_IOBUS_DEVS 1000 struct kvm_io_bus { - int dev_count; + int dev_count; + int ioeventfd_count; struct kvm_io_range range[]; }; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 64ee720b75c..1550637d1b1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -753,6 +753,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (ret < 0) goto unlock_fail; + kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); mutex_unlock(&kvm->slots_lock); @@ -798,6 +799,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); + kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e9128725ff1..36d14e50f25 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2936,7 +2936,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - if (bus->dev_count > NR_IOBUS_DEVS - 1) + /* exclude ioeventfd which is limited by maximum fd */ + if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * From 211bcbcc7866cc4c0243fa04e664866b3d5b4d73 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 23 Mar 2017 18:24:19 +0100 Subject: [PATCH 243/252] KVM: kvm_io_bus_unregister_dev() should never fail commit 90db10434b163e46da413d34db8d0e77404cc645 upstream. No caller currently checks the return value of kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on freeing their device. A stale reference will remain in the io_bus, getting at least used again, when the iobus gets teared down on kvm_destroy_vm() - leading to use after free errors. There is nothing the callers could do, except retrying over and over again. So let's simply remove the bus altogether, print an error and make sure no one can access this broken bus again (returning -ENOMEM on any attempt to access it). Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU") Reported-by: Dmitry Vyukov Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini [wt: no kvm_io_bus_read_cookie in 3.10, slightly different constructs] Signed-off-by: Willy Tarreau --- include/linux/kvm_host.h | 4 ++-- virt/kvm/eventfd.c | 3 ++- virt/kvm/kvm_main.c | 38 +++++++++++++++++++++----------------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cbe9083128a..71bcaf585ed 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -163,8 +163,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev); +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev); #ifdef CONFIG_KVM_ASYNC_PF struct kvm_async_pf { diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 1550637d1b1..362908c5f6c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -799,7 +799,8 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - kvm->buses[bus_idx]->ioeventfd_count--; + if (kvm->buses[bus_idx]) + kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 36d14e50f25..0715673b696 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -608,7 +608,8 @@ static void kvm_destroy_vm(struct kvm *kvm) raw_spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - kvm_io_bus_destroy(kvm->buses[i]); + if (kvm->buses[i]) + kvm_io_bus_destroy(kvm->buses[i]); kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); @@ -2887,6 +2888,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + if (!bus) + return -ENOMEM; idx = kvm_io_bus_get_first_dev(bus, addr, len); if (idx < 0) return -EOPNOTSUPP; @@ -2915,6 +2918,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + if (!bus) + return -ENOMEM; idx = kvm_io_bus_get_first_dev(bus, addr, len); if (idx < 0) return -EOPNOTSUPP; @@ -2936,6 +2941,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + if (!bus) + return -ENOMEM; + /* exclude ioeventfd which is limited by maximum fd */ if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; @@ -2955,45 +2963,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, } /* Caller must hold slots_lock. */ -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev) { - int i, r; + int i; struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - - /* - * It's possible the bus being released before hand. If so, - * we're done here. - */ if (!bus) - return 0; + return; - r = -ENOENT; for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { - r = 0; break; } - if (r) - return r; + if (i == bus->dev_count) + return; new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); - if (!new_bus) - return -ENOMEM; + if (!new_bus) { + pr_err("kvm: failed to shrink bus, removing it completely\n"); + goto broken; + } memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); new_bus->dev_count--; memcpy(new_bus->range + i, bus->range + i + 1, (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); +broken: rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); - return r; + return; } static struct notifier_block kvm_cpu_notifier = { From a9511e79f774fb01c4e06722d375059f1f7431e1 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 26 Nov 2015 19:28:26 +0100 Subject: [PATCH 244/252] TTY: n_hdlc, fix lockdep false positive commit e9b736d88af1a143530565929390cadf036dc799 upstream. The class of 4 n_hdls buf locks is the same because a single function n_hdlc_buf_list_init is used to init all the locks. But since flush_tx_queue takes n_hdlc->tx_buf_list.spinlock and then calls n_hdlc_buf_put which takes n_hdlc->tx_free_buf_list.spinlock, lockdep emits a warning: ============================================= [ INFO: possible recursive locking detected ] 4.3.0-25.g91e30a7-default #1 Not tainted --------------------------------------------- a.out/1248 is trying to acquire lock: (&(&list->spinlock)->rlock){......}, at: [] n_hdlc_buf_put+0x20/0x60 [n_hdlc] but task is already holding lock: (&(&list->spinlock)->rlock){......}, at: [] n_hdlc_tty_ioctl+0x127/0x1d0 [n_hdlc] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&list->spinlock)->rlock); lock(&(&list->spinlock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by a.out/1248: #0: (&tty->ldisc_sem){++++++}, at: [] tty_ldisc_ref_wait+0x20/0x50 #1: (&(&list->spinlock)->rlock){......}, at: [] n_hdlc_tty_ioctl+0x127/0x1d0 [n_hdlc] ... Call Trace: ... [] _raw_spin_lock_irqsave+0x50/0x70 [] n_hdlc_buf_put+0x20/0x60 [n_hdlc] [] n_hdlc_tty_ioctl+0x144/0x1d0 [n_hdlc] [] tty_ioctl+0x3f1/0xe40 ... Fix it by initializing the spin_locks separately. This removes also reduntand memset of a freshly kzallocated space. Signed-off-by: Jiri Slaby Reported-by: Dmitry Vyukov Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- drivers/tty/n_hdlc.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 1b2db9a3038..f26657c0687 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -159,7 +159,6 @@ struct n_hdlc { /* * HDLC buffer list manipulation functions */ -static void n_hdlc_buf_list_init(struct n_hdlc_buf_list *list); static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, struct n_hdlc_buf *buf); static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); @@ -855,10 +854,10 @@ static struct n_hdlc *n_hdlc_alloc(void) memset(n_hdlc, 0, sizeof(*n_hdlc)); - n_hdlc_buf_list_init(&n_hdlc->rx_free_buf_list); - n_hdlc_buf_list_init(&n_hdlc->tx_free_buf_list); - n_hdlc_buf_list_init(&n_hdlc->rx_buf_list); - n_hdlc_buf_list_init(&n_hdlc->tx_buf_list); + spin_lock_init(&n_hdlc->rx_free_buf_list.spinlock); + spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock); + spin_lock_init(&n_hdlc->rx_buf_list.spinlock); + spin_lock_init(&n_hdlc->tx_buf_list.spinlock); /* allocate free rx buffer list */ for(i=0;ispinlock); -} /* end of n_hdlc_buf_list_init() */ - /** * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list * @list - pointer to buffer list From 030e0b58183ab3d7260e5246589427139067c3c4 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Tue, 28 Feb 2017 19:54:40 +0300 Subject: [PATCH 245/252] tty: n_hdlc: get rid of racy n_hdlc.tbuf commit 82f2341c94d270421f383641b7cd670e474db56b upstream. Currently N_HDLC line discipline uses a self-made singly linked list for data buffers and has n_hdlc.tbuf pointer for buffer retransmitting after an error. The commit be10eb7589337e5defbe214dae038a53dd21add8 ("tty: n_hdlc add buffer flushing") introduced racy access to n_hdlc.tbuf. After tx error concurrent flush_tx_queue() and n_hdlc_send_frames() can put one data buffer to tx_free_buf_list twice. That causes double free in n_hdlc_release(). Let's use standard kernel linked list and get rid of n_hdlc.tbuf: in case of tx error put current data buffer after the head of tx_buf_list. Signed-off-by: Alexander Popov Signed-off-by: Willy Tarreau --- drivers/tty/n_hdlc.c | 132 ++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index f26657c0687..66fb0768413 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -114,7 +114,7 @@ #define DEFAULT_TX_BUF_COUNT 3 struct n_hdlc_buf { - struct n_hdlc_buf *link; + struct list_head list_item; int count; char buf[1]; }; @@ -122,8 +122,7 @@ struct n_hdlc_buf { #define N_HDLC_BUF_SIZE (sizeof(struct n_hdlc_buf) + maxframe) struct n_hdlc_buf_list { - struct n_hdlc_buf *head; - struct n_hdlc_buf *tail; + struct list_head list; int count; spinlock_t spinlock; }; @@ -136,7 +135,6 @@ struct n_hdlc_buf_list { * @backup_tty - TTY to use if tty gets closed * @tbusy - reentrancy flag for tx wakeup code * @woke_up - FIXME: describe this field - * @tbuf - currently transmitting tx buffer * @tx_buf_list - list of pending transmit frame buffers * @rx_buf_list - list of received frame buffers * @tx_free_buf_list - list unused transmit frame buffers @@ -149,7 +147,6 @@ struct n_hdlc { struct tty_struct *backup_tty; int tbusy; int woke_up; - struct n_hdlc_buf *tbuf; struct n_hdlc_buf_list tx_buf_list; struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; @@ -159,6 +156,8 @@ struct n_hdlc { /* * HDLC buffer list manipulation functions */ +static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, + struct n_hdlc_buf *buf); static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, struct n_hdlc_buf *buf); static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); @@ -208,16 +207,9 @@ static void flush_tx_queue(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty2n_hdlc(tty); struct n_hdlc_buf *buf; - unsigned long flags; while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list))) n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf); - spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); - if (n_hdlc->tbuf) { - n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf); - n_hdlc->tbuf = NULL; - } - spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); } static struct tty_ldisc_ops n_hdlc_ldisc = { @@ -283,7 +275,6 @@ static void n_hdlc_release(struct n_hdlc *n_hdlc) } else break; } - kfree(n_hdlc->tbuf); kfree(n_hdlc); } /* end of n_hdlc_release() */ @@ -402,13 +393,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) n_hdlc->woke_up = 0; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); - /* get current transmit buffer or get new transmit */ - /* buffer from list of pending transmit buffers */ - - tbuf = n_hdlc->tbuf; - if (!tbuf) - tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); - + tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); while (tbuf) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)sending frame %p, count=%d\n", @@ -420,7 +405,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* rollback was possible and has been done */ if (actual == -ERESTARTSYS) { - n_hdlc->tbuf = tbuf; + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } /* if transmit error, throw frame away by */ @@ -435,10 +420,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* free current transmit buffer */ n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf); - - /* this tx buffer is done */ - n_hdlc->tbuf = NULL; - + /* wait up sleeping writers */ wake_up_interruptible(&tty->write_wait); @@ -448,10 +430,12 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)frame %p pending\n", __FILE__,__LINE__,tbuf); - - /* buffer not accepted by driver */ - /* set this buffer as pending buffer */ - n_hdlc->tbuf = tbuf; + + /* + * the buffer was not accepted by driver, + * return it back into tx queue + */ + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } } @@ -749,7 +733,8 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, int error = 0; int count; unsigned long flags; - + struct n_hdlc_buf *buf = NULL; + if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)n_hdlc_tty_ioctl() called %d\n", __FILE__,__LINE__,cmd); @@ -763,8 +748,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, /* report count of read data available */ /* in next available frame (if any) */ spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags); - if (n_hdlc->rx_buf_list.head) - count = n_hdlc->rx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count = buf->count; else count = 0; spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags); @@ -776,8 +763,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, count = tty_chars_in_buffer(tty); /* add size of next output frame in queue */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags); - if (n_hdlc->tx_buf_list.head) - count += n_hdlc->tx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count += buf->count; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags); error = put_user(count, (int __user *)arg); break; @@ -825,14 +814,14 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_wait(filp, &tty->write_wait, wait); /* set bits for operations that won't block */ - if (n_hdlc->rx_buf_list.head) + if (!list_empty(&n_hdlc->rx_buf_list.list)) mask |= POLLIN | POLLRDNORM; /* readable */ if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= POLLHUP; if (tty_hung_up_p(filp)) mask |= POLLHUP; if (!tty_is_writelocked(tty) && - n_hdlc->tx_free_buf_list.head) + !list_empty(&n_hdlc->tx_free_buf_list.list)) mask |= POLLOUT | POLLWRNORM; /* writable */ } return mask; @@ -858,7 +847,12 @@ static struct n_hdlc *n_hdlc_alloc(void) spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock); spin_lock_init(&n_hdlc->rx_buf_list.spinlock); spin_lock_init(&n_hdlc->tx_buf_list.spinlock); - + + INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list); + /* allocate free rx buffer list */ for(i=0;ispinlock, flags); + + list_add(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); +} + /** * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list - * @list - pointer to buffer list + * @buf_list - pointer to buffer list * @buf - pointer to buffer */ -static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, +static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf) { unsigned long flags; - spin_lock_irqsave(&list->spinlock,flags); - - buf->link=NULL; - if (list->tail) - list->tail->link = buf; - else - list->head = buf; - list->tail = buf; - (list->count)++; - - spin_unlock_irqrestore(&list->spinlock,flags); - + + spin_lock_irqsave(&buf_list->spinlock, flags); + + list_add_tail(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); } /* end of n_hdlc_buf_put() */ /** * n_hdlc_buf_get - remove and return an HDLC buffer from list - * @list - pointer to HDLC buffer list + * @buf_list - pointer to HDLC buffer list * * Remove and return an HDLC buffer from the head of the specified HDLC buffer * list. * Returns a pointer to HDLC buffer if available, otherwise %NULL. */ -static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list) +static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list) { unsigned long flags; struct n_hdlc_buf *buf; - spin_lock_irqsave(&list->spinlock,flags); - - buf = list->head; + + spin_lock_irqsave(&buf_list->spinlock, flags); + + buf = list_first_entry_or_null(&buf_list->list, + struct n_hdlc_buf, list_item); if (buf) { - list->head = buf->link; - (list->count)--; + list_del(&buf->list_item); + buf_list->count--; } - if (!list->head) - list->tail = NULL; - - spin_unlock_irqrestore(&list->spinlock,flags); + + spin_unlock_irqrestore(&buf_list->spinlock, flags); return buf; - } /* end of n_hdlc_buf_get() */ static char hdlc_banner[] __initdata = From 8676954ba619bb7f538a7ff170a044cecd3ffd72 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 Dec 2016 11:16:22 -0500 Subject: [PATCH 246/252] ipv6: handle -EFAULT from skb_copy_bits commit a98f91758995cb59611e61318dddd8a6956b52c3 upstream. By setting certain socket options on ipv6 raw sockets, we can confuse the length calculation in rawv6_push_pending_frames triggering a BUG_ON. RIP: 0010:[] [] rawv6_sendmsg+0xc30/0xc40 RSP: 0018:ffff881f6c4a7c18 EFLAGS: 00010282 RAX: 00000000fffffff2 RBX: ffff881f6c681680 RCX: 0000000000000002 RDX: ffff881f6c4a7cf8 RSI: 0000000000000030 RDI: ffff881fed0f6a00 RBP: ffff881f6c4a7da8 R08: 0000000000000000 R09: 0000000000000009 R10: ffff881fed0f6a00 R11: 0000000000000009 R12: 0000000000000030 R13: ffff881fed0f6a00 R14: ffff881fee39ba00 R15: ffff881fefa93a80 Call Trace: [] ? unmap_page_range+0x693/0x830 [] inet_sendmsg+0x67/0xa0 [] sock_sendmsg+0x38/0x50 [] SYSC_sendto+0xef/0x170 [] SyS_sendto+0xe/0x10 [] do_syscall_64+0x50/0xa0 [] entry_SYSCALL64_slow_path+0x25/0x25 Handle by jumping to the failure path if skb_copy_bits gets an EFAULT. Reproducer: #include #include #include #include #include #include #include #define LEN 504 int main(int argc, char* argv[]) { int fd; int zero = 0; char buf[LEN]; memset(buf, 0, LEN); fd = socket(AF_INET6, SOCK_RAW, 7); setsockopt(fd, SOL_IPV6, IPV6_CHECKSUM, &zero, 4); setsockopt(fd, SOL_IPV6, IPV6_DSTOPTS, &buf, LEN); sendto(fd, buf, 1, 0, (struct sockaddr *) buf, 110); } Signed-off-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/raw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 464b1c9c08e..989bd798798 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -578,8 +578,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - if (skb_copy_bits(skb, offset, &csum, 2)) - BUG(); + err = skb_copy_bits(skb, offset, &csum, 2); + if (err < 0) { + ip6_flush_pending_frames(sk); + goto out; + } /* in case cksum was not initialized */ if (unlikely(csum)) From 43fe8188c534b22c036b3f02ac8843d7ddd7ac3b Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 21 Dec 2016 16:26:24 +1100 Subject: [PATCH 247/252] fs: exec: apply CLOEXEC before changing dumpable task flags commit 613cc2b6f272c1a8ad33aefa21cad77af23139f7 upstream. If you have a process that has set itself to be non-dumpable, and it then undergoes exec(2), any CLOEXEC file descriptors it has open are "exposed" during a race window between the dumpable flags of the process being reset for exec(2) and CLOEXEC being applied to the file descriptors. This can be exploited by a process by attempting to access /proc//fd/... during this window, without requiring CAP_SYS_PTRACE. The race in question is after set_dumpable has been (for get_link, though the trace is basically the same for readlink): [vfs] -> proc_pid_link_inode_operations.get_link -> proc_pid_get_link -> proc_fd_access_allowed -> ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); Which will return 0, during the race window and CLOEXEC file descriptors will still be open during this window because do_close_on_exec has not been called yet. As a result, the ordering of these calls should be reversed to avoid this race window. This is of particular concern to container runtimes, where joining a PID namespace with file descriptors referring to the host filesystem can result in security issues (since PRCTL_SET_DUMPABLE doesn't protect against access of CLOEXEC file descriptors -- file descriptors which may reference filesystem objects the container shouldn't have access to). Cc: dev@opencontainers.org Reported-by: Michael Crosby Signed-off-by: Aleksa Sarai Signed-off-by: Al Viro Signed-off-by: Willy Tarreau --- fs/exec.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index acbd7ac2ded..c945a555eb2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include @@ -1091,6 +1091,13 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); current->personality &= ~bprm->per_clear; + /* + * We have to apply CLOEXEC before we change whether the process is + * dumpable (in setup_new_exec) to avoid a race with a process in userspace + * trying to access the should-be-closed file descriptors of a process + * undergoing exec(2). + */ + do_close_on_exec(current->files); return 0; out: @@ -1141,7 +1148,6 @@ void setup_new_exec(struct linux_binprm * bprm) current->self_exec_id++; flush_signal_handlers(current, 0); - do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); From e64530fd1b86d7cc992dbb3d2c023f43f95c3b74 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 24 Jan 2017 15:17:48 -0800 Subject: [PATCH 248/252] mm/huge_memory.c: respect FOLL_FORCE/FOLL_COW for thp commit 8310d48b125d19fcd9521d83b8293e63eb1646aa upstream. In commit 19be0eaffa3a ("mm: remove gup_flags FOLL_WRITE games from __get_user_pages()"), the mm code was changed from unsetting FOLL_WRITE after a COW was resolved to setting the (newly introduced) FOLL_COW instead. Simultaneously, the check in gup.c was updated to still allow writes with FOLL_FORCE set if FOLL_COW had also been set. However, a similar check in huge_memory.c was forgotten. As a result, remote memory writes to ro regions of memory backed by transparent huge pages cause an infinite loop in the kernel (handle_mm_fault sets FOLL_COW and returns 0 causing a retry, but follow_trans_huge_pmd bails out immidiately because `(flags & FOLL_WRITE) && !pmd_write(*pmd)` is true. While in this state the process is stil SIGKILLable, but little else works (e.g. no ptrace attach, no other signals). This is easily reproduced with the following code (assuming thp are set to always): #include #include #include #include #include #include #include #include #include #include #define TEST_SIZE 5 * 1024 * 1024 int main(void) { int status; pid_t child; int fd = open("/proc/self/mem", O_RDWR); void *addr = mmap(NULL, TEST_SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); assert(addr != MAP_FAILED); pid_t parent_pid = getpid(); if ((child = fork()) == 0) { void *addr2 = mmap(NULL, TEST_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); assert(addr2 != MAP_FAILED); memset(addr2, 'a', TEST_SIZE); pwrite(fd, addr2, TEST_SIZE, (uintptr_t)addr); return 0; } assert(child == waitpid(child, &status, 0)); assert(WIFEXITED(status) && WEXITSTATUS(status) == 0); return 0; } Fix this by updating follow_trans_huge_pmd in huge_memory.c analogously to the update in gup.c in the original commit. The same pattern exists in follow_devmap_pmd. However, we should not be able to reach that check with FOLL_COW set, so add WARN_ONCE to make sure we notice if we ever do. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170106015025.GA38411@juliacomputing.com Signed-off-by: Keno Fischer Acked-by: Kirill A. Shutemov Cc: Greg Thelen Cc: Nicholas Piggin Cc: Willy Tarreau Cc: Oleg Nesterov Cc: Kees Cook Cc: Andy Lutomirski Cc: Michal Hocko Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [bwh: Backported to 3.2: - Drop change to follow_devmap_pmd() - pmd_dirty() is not available; check the page flags as in can_follow_write_pte() - Adjust context] Signed-off-by: Ben Hutchings [mhocko: This has been forward ported from the 3.2 stable tree. And fixed to return NULL.] Reviewed-by: Michal Hocko Signed-off-by: Jiri Slaby Signed-off-by: Willy Tarreau --- mm/huge_memory.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d21c9ef0943..3877483a20f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1235,6 +1235,18 @@ out_unlock: return ret; } +/* + * foll_force can write to even unwritable pmd's, but only + * after we've gone through a cow cycle and they are dirty. + */ +static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page, + unsigned int flags) +{ + return pmd_write(pmd) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && + page && PageAnon(page)); +} + struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, @@ -1245,15 +1257,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, assert_spin_locked(&mm->page_table_lock); - if (flags & FOLL_WRITE && !pmd_write(*pmd)) - goto out; - /* Avoid dumping huge zero page */ if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) return ERR_PTR(-EFAULT); page = pmd_page(*pmd); VM_BUG_ON(!PageHead(page)); + + if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, page, flags)) + return NULL; + if (flags & FOLL_TOUCH) { pmd_t _pmd; /* From 2f954a6249770ef5f2588aab0ca182238028628f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 May 2017 06:29:19 -0700 Subject: [PATCH 249/252] dccp/tcp: do not inherit mc_list from parent commit 657831ffc38e30092a2d5f03d385d710eb88b09a upstream. syzkaller found a way to trigger double frees from ip_mc_drop_socket() It turns out that leave a copy of parent mc_list at accept() time, which is very bad. Very similar to commit 8b485ce69876 ("tcp: do not inherit fastopen_req from parent") Initial report from Pray3r, completed by Andrey one. Thanks a lot to them ! Signed-off-by: Eric Dumazet Reported-by: Pray3r Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/inet_connection_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6acb541c909..40ac1e2cbb3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -688,6 +688,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; + inet_sk(newsk)->mc_list = NULL; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; From 66cb32401b6041356f0be44a302cdb42c20de6c2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 16 May 2017 19:18:55 +0200 Subject: [PATCH 250/252] char: lp: fix possible integer overflow in lp_setup() commit 3e21f4af170bebf47c187c1ff8bf155583c9f3b1 upstream. The lp_setup() code doesn't apply any bounds checking when passing "lp=none", and only in this case, resulting in an overflow of the parport_nr[] array. All versions in Git history are affected. Reported-By: Roee Hay Cc: Ben Hutchings Signed-off-by: Willy Tarreau --- drivers/char/lp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/lp.c b/drivers/char/lp.c index 0913d79424d..6b619105dea 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -857,7 +857,11 @@ static int __init lp_setup (char *str) } else if (!strcmp(str, "auto")) { parport_nr[0] = LP_PARPORT_AUTO; } else if (!strcmp(str, "none")) { - parport_nr[parport_ptr++] = LP_PARPORT_NONE; + if (parport_ptr < LP_NO) + parport_nr[parport_ptr++] = LP_PARPORT_NONE; + else + printk(KERN_INFO "lp: too many ports, %s ignored.\n", + str); } else if (!strcmp(str, "reset")) { reset = 1; } From 4155279174e8cbe4d06b256e4e66b5491a897e5e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 16 Feb 2017 17:22:46 +0100 Subject: [PATCH 251/252] dccp: fix freeing skb too early for IPV6_RECVPKTINFO [ Upstream commit 5edabca9d4cff7f1f2b68f0bac55ef99d9798ba4 ] In the current DCCP implementation an skb for a DCCP_PKT_REQUEST packet is forcibly freed via __kfree_skb in dccp_rcv_state_process if dccp_v6_conn_request successfully returns. However, if IPV6_RECVPKTINFO is set on a socket, the address of the skb is saved to ireq->pktopts and the ref count for skb is incremented in dccp_v6_conn_request, so skb is still in use. Nevertheless, it gets freed in dccp_rcv_state_process. Fix by calling consume_skb instead of doing goto discard and therefore calling __kfree_skb. Similar fixes for TCP: fb7e2399ec17f1004c0e0ccfd17439f8759ede01 [TCP]: skb is unexpectedly freed. 0aea76d35c9651d55bbaf746e7914e5f9ae5a25d tcp: SYN packets are now simply consumed Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/dccp/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 14cdafad7a9..e511ccc74a0 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,7 +606,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - goto discard; + consume_skb(skb); + return 0; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; From a07ea939d7e0406e97739c18e2db6b402eb04cdc Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 15 Jun 2017 19:56:54 +0200 Subject: [PATCH 252/252] Linux 3.10.106 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 80e180e1e4a..2f87f67fb9f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 105 +SUBLEVEL = 106 EXTRAVERSION = NAME = TOSSUG Baby Fish