From 4f402ee59232ea59b0364a290ba4efd46b03510d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 27 Aug 2015 20:16:37 +0200 Subject: [PATCH 01/55] scsi: fix scsi_error_handler vs. scsi_host_dev_release race commit 537b604c8b3aa8b96fe35f87dd085816552e294c upstream. b9d5c6b7ef57 ("[SCSI] cleanup setting task state in scsi_error_handler()") has introduced a race between scsi_error_handler and scsi_host_dev_release resulting in the hang when the device goes away because scsi_error_handler might miss a wake up: CPU0 CPU1 scsi_error_handler scsi_host_dev_release kthread_stop() kthread_should_stop() test_bit(KTHREAD_SHOULD_STOP) set_bit(KTHREAD_SHOULD_STOP) wake_up_process() wait_for_completion() set_current_state(TASK_INTERRUPTIBLE) schedule() The most straightforward solution seems to be to invert the ordering of the set_current_state and kthread_should_stop. The issue has been noticed during reboot test on a 3.0 based kernel but the current code seems to be affected in the same way. [jejb: additional comment added] Reported-and-debugged-by: Mike Mayer Signed-off-by: Michal Hocko Reviewed-by: Dan Williams Reviewed-by: Hannes Reinecke Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_error.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 3668b1b23b5..9acbc885239 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1849,8 +1849,17 @@ int scsi_error_handler(void *data) * We never actually get interrupted because kthread_run * disables signal delivery for the created thread. */ - while (!kthread_should_stop()) { + while (true) { + /* + * The sequence in kthread_stop() sets the stop flag first + * then wakes the process. To avoid missed wakeups, the task + * should always be in a non running state before the stop + * flag is checked + */ set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || shost->host_failed != shost->host_busy) { SCSI_LOG_ERROR_RECOVERY(1, From b200a84e6b618fc37d04f74d7228cf5acd6fb221 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Sep 2015 12:36:12 -0300 Subject: [PATCH 02/55] perf header: Fixup reading of HEADER_NRCPUS feature commit caa470475d9b59eeff093ae650800d34612c4379 upstream. The original patch introducing this header wrote the number of CPUs available and online in one order and then swapped those values when reading, fix it. Before: # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 # echo 0 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 3 # echo 0 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 2 After the fix, bringing back the CPUs online: # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 2 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 3 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Kan Liang Cc: Stephane Eranian Cc: Wang Nan Fixes: fbe96f29ce4b ("perf tools: Make perf.data more self-descriptive (v8)") Link: http://lkml.kernel.org/r/20150911153323.GP23511@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/header.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 326068a593a..bb34199d545 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1729,7 +1729,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1738,7 +1738,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } From a18006390cf8f53f73a7e5e08e7102212807e2e8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Sep 2015 13:24:40 +0100 Subject: [PATCH 03/55] ARM: 8429/1: disable GCC SRA optimization commit a077224fd35b2f7fbc93f14cf67074fc792fbac2 upstream. While working on the 32-bit ARM port of UEFI, I noticed a strange corruption in the kernel log. The following snprintf() statement (in drivers/firmware/efi/efi.c:efi_md_typeattr_format()) snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", was producing the following output in the log: | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | | | | |UC] |RUN| | | | | | | |UC] As it turns out, this is caused by incorrect code being emitted for the string() function in lib/vsprintf.c. The following code if (!(spec.flags & LEFT)) { while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } } for (i = 0; i < len; ++i) { if (buf < end) *buf = *s; ++buf; ++s; } while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } when called with len == 0, triggers an issue in the GCC SRA optimization pass (Scalar Replacement of Aggregates), which handles promotion of signed struct members incorrectly. This is a known but as yet unresolved issue. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932). In this particular case, it is causing the second while loop to be executed erroneously a single time, causing the additional space characters to be printed. So disable the optimization by passing -fno-ipa-sra. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1ba358ba16b..7d4ce431107 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -55,6 +55,14 @@ endif comma = , +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes From a5c58721ab7b93861043b9c1336901a0862dfa6a Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Fri, 31 Jul 2015 14:08:58 +0200 Subject: [PATCH 04/55] windfarm: decrement client count when unregistering commit fe2b592173ff0274e70dc44d1d28c19bb995aa7c upstream. wf_unregister_client() increments the client count when a client unregisters. That is obviously incorrect. Decrement that client count instead. Fixes: 75722d3992f5 ("[PATCH] ppc64: Thermal control for SMU based machines") Signed-off-by: Paul Bolle Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/macintosh/windfarm_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 3ee198b6584..cc7ece1712b 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -435,7 +435,7 @@ int wf_unregister_client(struct notifier_block *nb) { mutex_lock(&wf_lock); blocking_notifier_chain_unregister(&wf_client_list, nb); - wf_client_count++; + wf_client_count--; if (wf_client_count == 0) wf_stop_thread(); mutex_unlock(&wf_lock); From ffebdff74ff04666b221c568daabd878f5939738 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 30 Jul 2015 16:24:43 -0700 Subject: [PATCH 05/55] x86/apic: Serialize LVTT and TSC_DEADLINE writes commit 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 upstream. The APIC LVTT register is MMIO mapped but the TSC_DEADLINE register is an MSR. The write to the TSC_DEADLINE MSR is not serializing, so it's not guaranteed that the write to LVTT has reached the APIC before the TSC_DEADLINE MSR is written. In such a case the write to the MSR is ignored and as a consequence the local timer interrupt never fires. The SDM decribes this issue for xAPIC and x2APIC modes. The serialization methods recommended by the SDM differ. xAPIC: "1. Memory-mapped write to LVT Timer Register, setting bits 18:17 to 10b. 2. WRMSR to the IA32_TSC_DEADLINE MSR a value much larger than current time-stamp counter. 3. If RDMSR of the IA32_TSC_DEADLINE MSR returns zero, go to step 2. 4. WRMSR to the IA32_TSC_DEADLINE MSR the desired deadline." x2APIC: "To allow for efficient access to the APIC registers in x2APIC mode, the serializing semantics of WRMSR are relaxed when writing to the APIC registers. Thus, system software should not use 'WRMSR to APIC registers in x2APIC mode' as a serializing instruction. Read and write accesses to the APIC registers will occur in program order. A WRMSR to an APIC register may complete before all preceding stores are globally visible; software can prevent this by inserting a serializing instruction, an SFENCE, or an MFENCE before the WRMSR." The xAPIC method is to just wait for the memory mapped write to hit the LVTT by checking whether the MSR write has reached the hardware. There is no reason why a proper MFENCE after the memory mapped write would not do the same. Andi Kleen confirmed that MFENCE is sufficient for the xAPIC case as well. Issue MFENCE before writing to the TSC_DEADLINE MSR. This can be done unconditionally as all CPUs which have TSC_DEADLINE also have MFENCE support. [ tglx: Massaged the changelog ] Signed-off-by: Shaohua Li Reviewed-by: Ingo Molnar Cc: Cc: Cc: Cc: Andi Kleen Cc: H. Peter Anvin Link: http://lkml.kernel.org/r/20150909041352.GA2059853@devbig257.prn2.facebook.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 033eb44dc66..9620d18cb63 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -350,6 +350,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } From 9870892fc277debf23fd5f6bef2fef5ae77b97fb Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Sep 2015 14:10:03 +0100 Subject: [PATCH 06/55] x86/platform: Fix Geode LX timekeeping in the generic x86 build commit 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d upstream. In 2007, commit 07190a08eef36 ("Mark TSC on GeodeLX reliable") bypassed verification of the TSC on Geode LX. However, this code (now in the check_system_tsc_reliable() function in arch/x86/kernel/tsc.c) was only present if CONFIG_MGEODE_LX was set. OpenWRT has recently started building its generic Geode target for Geode GX, not LX, to include support for additional platforms. This broke the timekeeping on LX-based devices, because the TSC wasn't marked as reliable: https://dev.openwrt.org/ticket/20531 By adding a runtime check on is_geode_lx(), we can also include the fix if CONFIG_MGEODEGX1 or CONFIG_X86_GENERIC are set, thus fixing the problem. Signed-off-by: David Woodhouse Cc: Andres Salomon Cc: Linus Torvalds Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1442409003.131189.87.camel@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 27e3a14fc91..9714a7aa32f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -20,6 +20,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -806,15 +807,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; From fb7eff9cfdffc4cb2f3d75024bda6c2a56ab12e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20M=C3=BCller?= Date: Thu, 1 Oct 2015 13:43:42 +0200 Subject: [PATCH 07/55] Use WARN_ON_ONCE for missing X86_FEATURE_NRIPS commit d2922422c48df93f3edff7d872ee4f3191fefb08 upstream. The cpu feature flags are not ever going to change, so warning everytime can cause a lot of kernel log spam (in our case more than 10GB/hour). The warning seems to only occur when nested virtualization is enabled, so it's probably triggered by a KVM bug. This is a sensible and safe change anyway, and the KVM bug fix might not be suitable for stable releases anyway. Signed-off-by: Dirk Mueller Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 224d2ef754c..3deddd796f7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -496,7 +496,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } From 4f9d53595caf9c801c8b4389bc0c64e9c16488f0 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 1 Oct 2015 09:04:22 -0400 Subject: [PATCH 08/55] x86/mm: Set NX on gap between __ex_table and rodata commit ab76f7b4ab2397ffdd2f1eb07c55697d19991d10 upstream. Unused space between the end of __ex_table and the start of rodata can be left W+x in the kernel page tables. Extend the setting of the NX bit to cover this gap by starting from text_end rather than rodata_start. Before: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff81600000 6M ro PSE GLB x pmd 0xffffffff81600000-0xffffffff81754000 1360K ro GLB x pte 0xffffffff81754000-0xffffffff81800000 688K RW GLB x pte 0xffffffff81800000-0xffffffff81a00000 2M ro PSE GLB NX pmd 0xffffffff81a00000-0xffffffff81b3b000 1260K ro GLB NX pte 0xffffffff81b3b000-0xffffffff82000000 4884K RW GLB NX pte 0xffffffff82000000-0xffffffff82200000 2M RW PSE GLB NX pmd 0xffffffff82200000-0xffffffffa0000000 478M pmd After: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff81600000 6M ro PSE GLB x pmd 0xffffffff81600000-0xffffffff81754000 1360K ro GLB x pte 0xffffffff81754000-0xffffffff81800000 688K RW GLB NX pte 0xffffffff81800000-0xffffffff81a00000 2M ro PSE GLB NX pmd 0xffffffff81a00000-0xffffffff81b3b000 1260K ro GLB NX pte 0xffffffff81b3b000-0xffffffff82000000 4884K RW GLB NX pte 0xffffffff82000000-0xffffffff82200000 2M RW PSE GLB NX pmd 0xffffffff82200000-0xffffffffa0000000 478M pmd Signed-off-by: Stephen Smalley Acked-by: Kees Cook Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443704662-3138-1-git-send-email-sds@tycho.nsa.gov Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2db3f30bed7..b04e5026208 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1163,7 +1163,7 @@ void mark_rodata_ro(void) * has been zapped already via cleanup_highmem(). */ all_end = roundup((unsigned long)_brk_end, PMD_SIZE); - set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); rodata_test(); From 919845cb331e066a042a8060e84bb9dc6ba390bf Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 25 Sep 2015 11:59:52 +0200 Subject: [PATCH 09/55] x86/xen: Support kexec/kdump in HVM guests by doing a soft reset commit 0b34a166f291d255755be46e43ed5497cdd194f2 upstream. Currently there is a number of issues preventing PVHVM Xen guests from doing successful kexec/kdump: - Bound event channels. - Registered vcpu_info. - PIRQ/emuirq mappings. - shared_info frame after XENMAPSPACE_shared_info operation. - Active grant mappings. Basically, newly booted kernel stumbles upon already set up Xen interfaces and there is no way to reestablish them. In Xen-4.7 a new feature called 'soft reset' is coming. A guest performing kexec/kdump operation is supposed to call SCHEDOP_shutdown hypercall with SHUTDOWN_soft_reset reason before jumping to new kernel. Hypervisor (with some help from toolstack) will do full domain cleanup (but keeping its memory and vCPU contexts intact) returning the guest to the state it had when it was first booted and thus allowing it to start over. Doing SHUTDOWN_soft_reset on Xen hypervisors which don't support it is probably OK as by default all unknown shutdown reasons cause domain destroy with a message in toolstack log: 'Unknown shutdown reason code 5. Destroying domain.' which gives a clue to what the problem is and eliminates false expectations. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten.c | 23 +++++++++++++++++++++++ include/xen/interface/sched.h | 8 ++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 13d926282c8..511630db00a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,10 @@ #include #include +#ifdef CONFIG_KEXEC_CORE +#include +#endif + #include #include #include @@ -1744,6 +1748,21 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { .notifier_call = xen_hvm_cpu_notify, }; +#ifdef CONFIG_KEXEC_CORE +static void xen_hvm_shutdown(void) +{ + native_machine_shutdown(); + if (kexec_in_progress) + xen_reboot(SHUTDOWN_soft_reset); +} + +static void xen_hvm_crash_shutdown(struct pt_regs *regs) +{ + native_machine_crash_shutdown(regs); + xen_reboot(SHUTDOWN_soft_reset); +} +#endif + static void __init xen_hvm_guest_init(void) { init_hvm_pv_info(); @@ -1758,6 +1777,10 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); +#ifdef CONFIG_KEXEC_CORE + machine_ops.shutdown = xen_hvm_shutdown; + machine_ops.crash_shutdown = xen_hvm_crash_shutdown; +#endif } static bool __init xen_hvm_platform(void) diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h index 9ce083960a2..f18490985fc 100644 --- a/include/xen/interface/sched.h +++ b/include/xen/interface/sched.h @@ -107,5 +107,13 @@ struct sched_watchdog { #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ #define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ +/* + * Domain asked to perform 'soft reset' for it. The expected behavior is to + * reset internal Xen state for the domain returning it to the point where it + * was created but leaving the domain's memory contents and vCPU contexts + * intact. This will allow the domain to start over and set up all Xen specific + * interfaces again. + */ +#define SHUTDOWN_soft_reset 5 #endif /* __XEN_PUBLIC_SCHED_H__ */ From 69e686057711bf17907ba68e19ceffdfca80d788 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 6 Sep 2015 01:46:54 +0300 Subject: [PATCH 10/55] spi: Fix documentation of spi_alloc_master() commit a394d635193b641f2c86ead5ada5b115d57c51f8 upstream. Actually, spi_master_put() after spi_alloc_master() must _not_ be followed by kfree(). The memory is already freed with the call to spi_master_put() through spi_master_class, which registers a release function. Calling both spi_master_put() and kfree() results in often nasty (and delayed) crashes elsewhere in the kernel, often in the networking stack. This reverts commit eb4af0f5349235df2e4a5057a72fc8962d00308a. Link to patch and concerns: https://lkml.org/lkml/2012/9/3/269 or http://lkml.iu.edu/hypermail/linux/kernel/1209.0/00790.html Alexey Klimov: This revert becomes valid after 94c69f765f1b4a658d96905ec59928e3e3e07e6a when spi-imx.c has been fixed and there is no need to call kfree() so comment for spi_alloc_master() should be fixed. Signed-off-by: Guenter Roeck Signed-off-by: Alexey Klimov Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 32b7bb111eb..7c159634aaa 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1030,8 +1030,7 @@ static struct class spi_master_class = { * * The caller is responsible for assigning the bus number and initializing * the master's methods before calling spi_register_master(); and (after errors - * adding the device) calling spi_master_put() and kfree() to prevent a memory - * leak. + * adding the device) calling spi_master_put() to prevent a memory leak. */ struct spi_master *spi_alloc_master(struct device *dev, unsigned size) { From 69155df6a85cd1672c032220a8619a7f9e5f403b Mon Sep 17 00:00:00 2001 From: "Tan, Jui Nee" Date: Tue, 1 Sep 2015 10:22:51 +0800 Subject: [PATCH 11/55] spi: spi-pxa2xx: Check status register to determine if SSSR_TINT is disabled commit 02bc933ebb59208f42c2e6305b2c17fd306f695d upstream. On Intel Baytrail, there is case when interrupt handler get called, no SPI message is captured. The RX FIFO is indeed empty when RX timeout pending interrupt (SSSR_TINT) happens. Use the BIOS version where both HSUART and SPI are on the same IRQ. Both drivers are using IRQF_SHARED when calling the request_irq function. When running two separate and independent SPI and HSUART application that generate data traffic on both components, user will see messages like below on the console: pxa2xx-spi pxa2xx-spi.0: bad message state in interrupt handler This commit will fix this by first checking Receiver Time-out Interrupt, if it is disabled, ignore the request and return without servicing. Signed-off-by: Tan, Jui Nee Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index cc42ee5e19f..787cfbaa775 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -546,6 +546,10 @@ static irqreturn_t ssp_int(int irq, void *dev_id) if (!(sccr1_reg & SSCR1_TIE)) mask &= ~SSSR_TFS; + /* Ignore RX timeout interrupt if it is disabled */ + if (!(sccr1_reg & SSCR1_TINTE)) + mask &= ~SSSR_TINT; + if (!(status & mask)) return IRQ_NONE; From 9834213fa8586963f8f85fcf08df94efcb72435f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 1 Oct 2015 15:36:57 -0700 Subject: [PATCH 12/55] mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault commit 2f84a8990ebbe235c59716896e017c6b2ca1200f upstream. SunDong reported the following on https://bugzilla.kernel.org/show_bug.cgi?id=103841 I think I find a linux bug, I have the test cases is constructed. I can stable recurring problems in fedora22(4.0.4) kernel version, arch for x86_64. I construct transparent huge page, when the parent and child process with MAP_SHARE, MAP_PRIVATE way to access the same huge page area, it has the opportunity to lead to huge page copy on write failure, and then it will munmap the child corresponding mmap area, but then the child mmap area with VM_MAYSHARE attributes, child process munmap this area can trigger VM_BUG_ON in set_vma_resv_flags functions (vma - > vm_flags & VM_MAYSHARE). There were a number of problems with the report (e.g. it's hugetlbfs that triggers this, not transparent huge pages) but it was fundamentally correct in that a VM_BUG_ON in set_vma_resv_flags() can be triggered that looks like this vma ffff8804651fd0d0 start 00007fc474e00000 end 00007fc475e00000 next ffff8804651fd018 prev ffff8804651fd188 mm ffff88046b1b1800 prot 8000000000000027 anon_vma (null) vm_ops ffffffff8182a7a0 pgoff 0 file ffff88106bdb9800 private_data (null) flags: 0x84400fb(read|write|shared|mayread|maywrite|mayexec|mayshare|dontexpand|hugetlb) ------------ kernel BUG at mm/hugetlb.c:462! SMP Modules linked in: xt_pkttype xt_LOG xt_limit [..] CPU: 38 PID: 26839 Comm: map Not tainted 4.0.4-default #1 Hardware name: Dell Inc. PowerEdge R810/0TT6JF, BIOS 2.7.4 04/26/2012 set_vma_resv_flags+0x2d/0x30 The VM_BUG_ON is correct because private and shared mappings have different reservation accounting but the warning clearly shows that the VMA is shared. When a private COW fails to allocate a new page then only the process that created the VMA gets the page -- all the children unmap the page. If the children access that data in the future then they get killed. The problem is that the same file is mapped shared and private. During the COW, the allocation fails, the VMAs are traversed to unmap the other private pages but a shared VMA is found and the bug is triggered. This patch identifies such VMAs and skips them. Signed-off-by: Mel Gorman Reported-by: SunDong Reviewed-by: Michal Hocko Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: David Rientjes Reviewed-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d9bc87ca062..e9fd382bf25 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2572,6 +2572,14 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, if (iter_vma == vma) continue; + /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these From c47197f13497a9b17d9b2b69fe1185fcef044166 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 5 Oct 2015 16:55:09 +0200 Subject: [PATCH 13/55] ALSA: synth: Fix conflicting OSS device registration on AWE32 commit 225db5762dc1a35b26850477ffa06e5cd0097243 upstream. When OSS emulation is loaded on ISA SB AWE32 chip, we get now kernel warnings like: WARNING: CPU: 0 PID: 2791 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x51/0x80() sysfs: cannot create duplicate filename '/devices/isa/sbawe.0/sound/card0/seq-oss-0-0' It's because both emux synth and opl3 drivers try to register their OSS device object with the same static index number 0. This hasn't been a big problem until the recent rewrite of device management code (that exposes sysfs at the same time), but it's been an obvious bug. This patch works around it just by using a different index number of emux synth object. There can be a more elegant way to fix, but it's enough for now, as this code won't be touched so often, in anyway. Reported-and-tested-by: Michael Shell Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/synth/emux/emux_oss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/synth/emux/emux_oss.c b/sound/synth/emux/emux_oss.c index daf61abc367..646b66703bd 100644 --- a/sound/synth/emux/emux_oss.c +++ b/sound/synth/emux/emux_oss.c @@ -69,7 +69,8 @@ snd_emux_init_seq_oss(struct snd_emux *emu) struct snd_seq_oss_reg *arg; struct snd_seq_device *dev; - if (snd_seq_device_new(emu->card, 0, SNDRV_SEQ_DEV_ID_OSS, + /* using device#1 here for avoiding conflicts with OPL3 */ + if (snd_seq_device_new(emu->card, 1, SNDRV_SEQ_DEV_ID_OSS, sizeof(struct snd_seq_oss_reg), &dev) < 0) return; From 9f6425cad8634c76c9055a3f4e85733f8f234419 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 15 Sep 2015 20:51:31 +0200 Subject: [PATCH 14/55] ASoC: fix broken pxa SoC support commit 3c8f7710c1c44fb650bc29b6ef78ed8b60cfaa28 upstream. The previous fix of pxa library support, which was introduced to fix the library dependency, broke the previous SoC behavior, where a machine code binding pxa2xx-ac97 with a coded relied on : - sound/soc/pxa/pxa2xx-ac97.c - sound/soc/codecs/XXX.c For example, the mioa701_wm9713.c machine code is currently broken. The "select ARM" statement wrongly selects the soc/arm/pxa2xx-ac97 for compilation, as per an unfortunate fate SND_PXA2XX_AC97 is both declared in sound/arm/Kconfig and sound/soc/pxa/Kconfig. Fix this by ensuring that SND_PXA2XX_SOC correctly triggers the correct pxa2xx-ac97 compilation. Fixes: 846172dfe33c ("ASoC: fix SND_PXA2XX_LIB Kconfig warning") Signed-off-by: Robert Jarzmik Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/arm/Kconfig | 15 ++++++++------- sound/soc/pxa/Kconfig | 2 -- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sound/arm/Kconfig b/sound/arm/Kconfig index 885683a3b0b..e0406211716 100644 --- a/sound/arm/Kconfig +++ b/sound/arm/Kconfig @@ -9,6 +9,14 @@ menuconfig SND_ARM Drivers that are implemented on ASoC can be found in "ALSA for SoC audio support" section. +config SND_PXA2XX_LIB + tristate + select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 + select SND_DMAENGINE_PCM + +config SND_PXA2XX_LIB_AC97 + bool + if SND_ARM config SND_ARMAACI @@ -21,13 +29,6 @@ config SND_PXA2XX_PCM tristate select SND_PCM -config SND_PXA2XX_LIB - tristate - select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 - -config SND_PXA2XX_LIB_AC97 - bool - config SND_PXA2XX_AC97 tristate "AC97 driver for the Intel PXA2xx chip" depends on ARCH_PXA diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 4d2e46fae77..20a57c0060b 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -1,7 +1,6 @@ config SND_PXA2XX_SOC tristate "SoC Audio for the Intel PXA2xx chip" depends on ARCH_PXA - select SND_ARM select SND_PXA2XX_LIB help Say Y or M if you want to add support for codecs attached to @@ -24,7 +23,6 @@ config SND_PXA2XX_AC97 config SND_PXA2XX_SOC_AC97 tristate select AC97_BUS - select SND_ARM select SND_PXA2XX_LIB_AC97 select SND_SOC_AC97_BUS From b7ab3afe207c8de114c9f546f8f9906f4ea7ecc3 Mon Sep 17 00:00:00 2001 From: Yitian Bu Date: Fri, 2 Oct 2015 15:18:41 +0800 Subject: [PATCH 15/55] ASoC: dwc: correct irq clear method commit 4873867e5f2bd90faad861dd94865099fc3140f3 upstream. from Designware I2S datasheet, tx/rx XRUN irq is cleared by reading register TOR/ROR, rather than by writing into them. Signed-off-by: Yitian Bu Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/dwc/designware_i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c index 489a9abf112..6a530afbb7e 100644 --- a/sound/soc/dwc/designware_i2s.c +++ b/sound/soc/dwc/designware_i2s.c @@ -100,10 +100,10 @@ static inline void i2s_clear_irqs(struct dw_i2s_dev *dev, u32 stream) if (stream == SNDRV_PCM_STREAM_PLAYBACK) { for (i = 0; i < 4; i++) - i2s_write_reg(dev->i2s_base, TOR(i), 0); + i2s_read_reg(dev->i2s_base, TOR(i)); } else { for (i = 0; i < 4; i++) - i2s_write_reg(dev->i2s_base, ROR(i), 0); + i2s_read_reg(dev->i2s_base, ROR(i)); } } From 6ec7d68798b8103a120b5b650995d7766ee3e374 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 11 Sep 2015 21:44:17 -0400 Subject: [PATCH 16/55] btrfs: skip waiting on ordered range for special files commit a30e577c96f59b1e1678ea5462432b09bf7d5cbc upstream. In btrfs_evict_inode, we properly truncate the page cache for evicted inodes but then we call btrfs_wait_ordered_range for every inode as well. It's the right thing to do for regular files but results in incorrect behavior for device inodes for block devices. filemap_fdatawrite_range gets called with inode->i_mapping which gets resolved to the block device inode before getting passed to wbc_attach_fdatawrite_inode and ultimately to inode_to_bdi. What happens next depends on whether there's an open file handle associated with the inode. If there is, we write to the block device, which is unexpected behavior. If there isn't, we through normally and inode->i_data is used. We can also end up racing against open/close which can result in crashes when i_mapping points to a block device inode that has been closed. Since there can't be any page cache associated with special file inodes, it's safe to skip the btrfs_wait_ordered_range call entirely and avoid the problem. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100911 Tested-by: Christoph Biedl Signed-off-by: Jeff Mahoney Reviewed-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d20db643772..f22beda91ff 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4650,7 +4650,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, From ba45b48d9bd1b01229c4fa3d94ba2a89045f23bd Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 11 Aug 2015 13:05:10 +0100 Subject: [PATCH 17/55] staging: comedi: adl_pci7x3x: fix digital output on PCI-7230 commit ad83dbd974feb2e2a8cc071a1d28782bd4d2c70e upstream. The "adl_pci7x3x" driver replaced the "adl_pci7230" and "adl_pci7432" drivers in commits 8f567c373c4b ("staging: comedi: new adl_pci7x3x driver") and 657f77d173d3 ("staging: comedi: remove adl_pci7230 and adl_pci7432 drivers"). Although the new driver code agrees with the user manuals for the respective boards, digital outputs stopped working on the PCI-7230. This has 16 digital output channels and the previous adl_pci7230 driver shifted the 16 bit output state left by 16 bits before writing to the hardware register. The new adl_pci7x3x driver doesn't do that. Fix it in `adl_pci7x3x_do_insn_bits()` by checking for the special case of the subdevice having only 16 channels and duplicating the 16 bit output state into both halves of the 32-bit register. That should work both for what the board actually does and for what the user manual says it should do. Fixes: 8f567c373c4b ("staging: comedi: new adl_pci7x3x driver") Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/adl_pci7x3x.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/adl_pci7x3x.c b/drivers/staging/comedi/drivers/adl_pci7x3x.c index e3960745f50..49cb6920689 100644 --- a/drivers/staging/comedi/drivers/adl_pci7x3x.c +++ b/drivers/staging/comedi/drivers/adl_pci7x3x.c @@ -119,10 +119,21 @@ static int adl_pci7x3x_do_insn_bits(struct comedi_device *dev, unsigned int bits = data[1]; if (mask) { + unsigned int val; + s->state &= ~mask; s->state |= (bits & mask); - - outl(s->state, dev->iobase + reg); + val = s->state; + if (s->n_chan == 16) { + /* + * It seems the PCI-7230 needs the 16-bit DO state + * to be shifted left by 16 bits before being written + * to the 32-bit register. Set the value in both + * halves of the register to be sure. + */ + val |= val << 16; + } + outl(val, dev->iobase + reg); } /* From 249fbae374c90d6d29ed26f11bbacaf0bd0be827 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 12 Aug 2015 15:12:09 +0100 Subject: [PATCH 18/55] dm btree: add ref counting ops for the leaves of top level btrees commit b0dc3c8bc157c60b1d470163882be8c13e1950af upstream. When using nested btrees, the top leaves of the top levels contain block addresses for the root of the next tree down. If we shadow a shared leaf node the leaf values (sub tree roots) should be incremented accordingly. This is only an issue if there is metadata sharing in the top levels. Which only occurs if metadata snapshots are being used (as is possible with dm-thinp). And could result in a block from the thinp metadata snap being reused early, thus corrupting the thinp metadata snap. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- .../md/persistent-data/dm-btree-internal.h | 6 +++ drivers/md/persistent-data/dm-btree-remove.c | 12 ++---- drivers/md/persistent-data/dm-btree-spine.c | 37 +++++++++++++++++++ drivers/md/persistent-data/dm-btree.c | 7 +--- 4 files changed, 47 insertions(+), 15 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index bf2b80d5c47..8731b6ea026 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -138,4 +138,10 @@ int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; +/* + * Value type for upper levels of multi-level btrees. + */ +extern void init_le64_type(struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt); + #endif /* DM_BTREE_INTERNAL_H */ diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index a03178e91a7..7c0d75547cc 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -544,14 +544,6 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, return r; } -static struct dm_btree_value_type le64_type = { - .context = NULL, - .size = sizeof(__le64), - .inc = NULL, - .dec = NULL, - .equal = NULL -}; - int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, dm_block_t *new_root) { @@ -559,12 +551,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, int index = 0, r = 0; struct shadow_spine spine; struct btree_node *n; + struct dm_btree_value_type le64_vt; + init_le64_type(info->tm, &le64_vt); init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { r = remove_raw(&spine, info, (level == last_level ? - &info->value_type : &le64_type), + &info->value_type : &le64_vt), root, keys[level], (unsigned *)&index); if (r < 0) break; diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index 1b5e13ec7f9..0dee514ba4c 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -249,3 +249,40 @@ int shadow_root(struct shadow_spine *s) { return s->root; } + +static void le64_inc(void *context, const void *value_le) +{ + struct dm_transaction_manager *tm = context; + __le64 v_le; + + memcpy(&v_le, value_le, sizeof(v_le)); + dm_tm_inc(tm, le64_to_cpu(v_le)); +} + +static void le64_dec(void *context, const void *value_le) +{ + struct dm_transaction_manager *tm = context; + __le64 v_le; + + memcpy(&v_le, value_le, sizeof(v_le)); + dm_tm_dec(tm, le64_to_cpu(v_le)); +} + +static int le64_equal(void *context, const void *value1_le, const void *value2_le) +{ + __le64 v1_le, v2_le; + + memcpy(&v1_le, value1_le, sizeof(v1_le)); + memcpy(&v2_le, value2_le, sizeof(v2_le)); + return v1_le == v2_le; +} + +void init_le64_type(struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt) +{ + vt->context = tm; + vt->size = sizeof(__le64); + vt->inc = le64_inc; + vt->dec = le64_dec; + vt->equal = le64_equal; +} diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index e3ecb0b824b..79233b051da 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -651,12 +651,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, struct btree_node *n; struct dm_btree_value_type le64_type; - le64_type.context = NULL; - le64_type.size = sizeof(__le64); - le64_type.inc = NULL; - le64_type.dec = NULL; - le64_type.equal = NULL; - + init_le64_type(info->tm, &le64_type); init_shadow_spine(&spine, info); for (level = 0; level < (info->levels - 1); level++) { From 03fbf7001db5ecf209a9f36faa5a607e39cd54e9 Mon Sep 17 00:00:00 2001 From: "Liu.Zhao" Date: Mon, 24 Aug 2015 08:36:12 -0700 Subject: [PATCH 19/55] USB: option: add ZTE PIDs commit 19ab6bc5674a30fdb6a2436b068d19a3c17dc73e upstream. This is intended to add ZTE device PIDs on kernel. Signed-off-by: Liu.Zhao [johan: sort the new entries ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 096438e4fb0..c918075e5ea 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -276,6 +276,10 @@ static void option_instat_callback(struct urb *urb); #define ZTE_PRODUCT_MF622 0x0001 #define ZTE_PRODUCT_MF628 0x0015 #define ZTE_PRODUCT_MF626 0x0031 +#define ZTE_PRODUCT_ZM8620_X 0x0396 +#define ZTE_PRODUCT_ME3620_MBIM 0x0426 +#define ZTE_PRODUCT_ME3620_X 0x1432 +#define ZTE_PRODUCT_ME3620_L 0x1433 #define ZTE_PRODUCT_AC2726 0xfff1 #define ZTE_PRODUCT_CDMA_TECH 0xfffe #define ZTE_PRODUCT_AC8710T 0xffff @@ -549,6 +553,18 @@ static const struct option_blacklist_info zte_mc2716_z_blacklist = { .sendsetup = BIT(1) | BIT(2) | BIT(3), }; +static const struct option_blacklist_info zte_me3620_mbim_blacklist = { + .reserved = BIT(2) | BIT(3) | BIT(4), +}; + +static const struct option_blacklist_info zte_me3620_xl_blacklist = { + .reserved = BIT(3) | BIT(4) | BIT(5), +}; + +static const struct option_blacklist_info zte_zm8620_x_blacklist = { + .reserved = BIT(3) | BIT(4) | BIT(5), +}; + static const struct option_blacklist_info huawei_cdc12_blacklist = { .reserved = BIT(1) | BIT(2), }; @@ -1579,6 +1595,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&zte_ad3812_z_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2716, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_mc2716_z_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_L), + .driver_info = (kernel_ulong_t)&zte_me3620_xl_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_MBIM), + .driver_info = (kernel_ulong_t)&zte_me3620_mbim_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_X), + .driver_info = (kernel_ulong_t)&zte_me3620_xl_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ZM8620_X), + .driver_info = (kernel_ulong_t)&zte_zm8620_x_blacklist }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) }, From bad5bfcd07d6bcd73d8e4e141c9c1904b21d7053 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 2 Oct 2015 11:17:37 -0400 Subject: [PATCH 20/55] dm raid: fix round up of default region size commit 042745ee53a0a7c1f5aff191a4a24213c6dcfb52 upstream. Commit 3a0f9aaee028 ("dm raid: round region_size to power of two") intended to make sure that the default region size is a power of two. However, the logic in that commit is incorrect and sets the variable region_size to 0 or 1, depending on whether min_region_size is a power of two. Fix this logic, using roundup_pow_of_two(), so that region_size is properly rounded up to the next power of two. Signed-off-by: Mikulas Patocka Fixes: 3a0f9aaee028 ("dm raid: round region_size to power of two") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 84cddccc024..4805c15185c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -325,8 +325,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) */ if (min_region_size > (1 << 13)) { /* If not a power of 2, make it the next power of 2 */ - if (min_region_size & (min_region_size - 1)) - region_size = 1 << fls(region_size); + region_size = roundup_pow_of_two(min_region_size); DMINFO("Choosing default region size of %lu sectors", region_size); } else { From 6709d8bdf704a67f8930374fd001d79eb4f1f41b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 21 Jul 2015 21:37:31 -0700 Subject: [PATCH 21/55] netfilter: nf_conntrack: Support expectations in different zones commit 4b31814d20cbe5cd4ccf18089751e77a04afe4f2 upstream. When zones were originally introduced, the expectation functions were all extended to perform lookup using the zone. However, insertion was not modified to check the zone. This means that two expectations which are intended to apply for different connections that have the same tuple but exist in different zones cannot both be tracked. Fixes: 5d0aa2ccd4 (netfilter: nf_conntrack: add support for "conntrack zones") Signed-off-by: Joe Stringer Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_expect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c63b618cd61..95578da760d 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -202,7 +202,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } static inline int expect_matches(const struct nf_conntrack_expect *a, From 10ff4a0ca662575c9815b0bc6553d3f263efc24d Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 22 Sep 2015 09:29:38 -0500 Subject: [PATCH 22/55] disabling oplocks/leases via module parm enable_oplocks broken for SMB3 commit e0ddde9d44e37fbc21ce893553094ecf1a633ab5 upstream. leases (oplocks) were always requested for SMB2/SMB3 even when oplocks disabled in the cifs.ko module. Signed-off-by: Steve French Reviewed-by: Chandrika Srinivasan Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e12f258a5ff..66202da4c96 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -48,9 +48,13 @@ change_conf(struct TCP_Server_Info *server) break; default: server->echoes = true; - server->oplocks = true; + if (enable_oplocks) { + server->oplocks = true; + server->oplock_credits = 1; + } else + server->oplocks = false; + server->echo_credits = 1; - server->oplock_credits = 1; } server->credits -= server->echo_credits + server->oplock_credits; return 0; From 253db046eb2812dfe976203394e51f1a36fcbc67 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 23 Jun 2015 11:34:21 +0200 Subject: [PATCH 23/55] drm: Reject DRI1 hw lock ioctl functions for kms drivers commit da168d81b44898404d281d5dbe70154ab5f117c1 upstream. I've done some extensive history digging across libdrm, mesa and xf86-video-{intel,nouveau,ati}. The only potential user of this with kms drivers I could find was ttmtest, which once used drmGetLock still. But that mistake was quickly fixed up. Even the intel xvmc library (which otherwise was really good with using dri1 stuff in kms mode) managed to never take the hw lock for dri2 (and hence kms). Hence it should be save to unconditionally disallow this. Cc: Peter Antoine Reviewed-by: Peter Antoine Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_lock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index d752c96d609..bdceb60998d 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -58,6 +58,9 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) struct drm_master *master = file_priv->master; int ret = 0; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + ++file_priv->lock_count; if (lock->context == DRM_KERNEL_CONTEXT) { @@ -151,6 +154,9 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) struct drm_lock *lock = data; struct drm_master *master = file_priv->master; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", task_pid_nr(current), lock->context); From 0bccecfc32cb896d49ebd226b22cf6bfed45b35d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Sep 2015 11:41:42 -0700 Subject: [PATCH 24/55] USB: whiteheat: fix potential null-deref at probe commit cbb4be652d374f64661137756b8f357a1827d6a4 upstream. Fix potential null-pointer dereference at probe by making sure that the required endpoints are present. The whiteheat driver assumes there are at least five pairs of bulk endpoints, of which the final pair is used for the "command port". An attempt to bind to an interface with fewer bulk endpoints would currently lead to an oops. Fixes CVE-2015-5257. Reported-by: Moein Ghasemzadeh Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/whiteheat.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 5e3dd9f87ff..ae79c2245a7 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -81,6 +81,8 @@ static int whiteheat_firmware_download(struct usb_serial *serial, static int whiteheat_firmware_attach(struct usb_serial *serial); /* function prototypes for the Connect Tech WhiteHEAT serial converter */ +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id); static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_release(struct usb_serial *serial); static int whiteheat_port_probe(struct usb_serial_port *port); @@ -117,6 +119,7 @@ static struct usb_serial_driver whiteheat_device = { .description = "Connect Tech - WhiteHEAT", .id_table = id_table_std, .num_ports = 4, + .probe = whiteheat_probe, .attach = whiteheat_attach, .release = whiteheat_release, .port_probe = whiteheat_port_probe, @@ -218,6 +221,34 @@ static int whiteheat_firmware_attach(struct usb_serial *serial) /***************************************************************************** * Connect Tech's White Heat serial driver functions *****************************************************************************/ + +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id) +{ + struct usb_host_interface *iface_desc; + struct usb_endpoint_descriptor *endpoint; + size_t num_bulk_in = 0; + size_t num_bulk_out = 0; + size_t min_num_bulk; + unsigned int i; + + iface_desc = serial->interface->cur_altsetting; + + for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { + endpoint = &iface_desc->endpoint[i].desc; + if (usb_endpoint_is_bulk_in(endpoint)) + ++num_bulk_in; + if (usb_endpoint_is_bulk_out(endpoint)) + ++num_bulk_out; + } + + min_num_bulk = COMMAND_PORT + 1; + if (num_bulk_in < min_num_bulk || num_bulk_out < min_num_bulk) + return -ENODEV; + + return 0; +} + static int whiteheat_attach(struct usb_serial *serial) { struct usb_serial_port *command_port; From 5eaec968f3377c20e7da49f8e3aef1d874a0591b Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 21 Sep 2015 17:46:13 +0300 Subject: [PATCH 25/55] usb: xhci: Clear XHCI_STATE_DYING on start commit e5bfeab0ad515b4f6df39fe716603e9dc6d3dfd0 upstream. For whatever reason if XHCI died in the previous instant then it will never recover on the next xhci_start unless we clear the DYING flag. Signed-off-by: Roger Quadros Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1f901fc2559..1a22d186bef 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -139,7 +139,8 @@ static int xhci_start(struct xhci_hcd *xhci) "waited %u microseconds.\n", XHCI_MAX_HALT_USEC); if (!ret) - xhci->xhc_state &= ~XHCI_STATE_HALTED; + xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING); + return ret; } From 560db83c3e96376df8c6b8874c65f5d28031a493 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 21 Sep 2015 17:46:16 +0300 Subject: [PATCH 26/55] xhci: change xhci 1.0 only restrictions to support xhci 1.1 commit dca7794539eff04b786fb6907186989e5eaaa9c2 upstream. Some changes between xhci 0.96 and xhci 1.0 specifications forced us to check the hci version in code, some of these checks were implemented as hci_version == 1.0, which will not work with new xhci 1.1 controllers. xhci 1.1 behaves similar to xhci 1.0 in these cases, so change these checks to hci_version >= 1.0 Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 6 +++--- drivers/usb/host/xhci-ring.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 31bed5f7d0e..87e82e6b0c3 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1473,10 +1473,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, * use Event Data TRBs, and we don't chain in a link TRB on short * transfers, we're basically dividing by 1. * - * xHCI 1.0 specification indicates that the Average TRB Length should - * be set to 8 for control endpoints. + * xHCI 1.0 and 1.1 specification indicates that the Average TRB Length + * should be set to 8 for control endpoints. */ - if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version == 0x100) + if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) ep_ctx->tx_info |= cpu_to_le32(AVG_TRB_LENGTH_FOR_EP(8)); else ep_ctx->tx_info |= diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index fde0277adc2..fe223cfaab0 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3511,8 +3511,8 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (start_cycle == 0) field |= 0x1; - /* xHCI 1.0 6.4.1.2.1: Transfer Type field */ - if (xhci->hci_version == 0x100) { + /* xHCI 1.0/1.1 6.4.1.2.1: Transfer Type field */ + if (xhci->hci_version >= 0x100) { if (urb->transfer_buffer_length > 0) { if (setup->bRequestType & USB_DIR_IN) field |= TRB_TX_TYPE(TRB_DATA_IN); From 760f9dc129d41a76934d149ccc762590006a6f6a Mon Sep 17 00:00:00 2001 From: Reyad Attiyat Date: Thu, 6 Aug 2015 19:23:58 +0300 Subject: [PATCH 27/55] usb: xhci: Add support for URB_ZERO_PACKET to bulk/sg transfers commit 4758dcd19a7d9ba9610b38fecb93f65f56f86346 upstream. This commit checks for the URB_ZERO_PACKET flag and creates an extra zero-length td if the urb transfer length is a multiple of the endpoint's max packet length. Signed-off-by: Reyad Attiyat Signed-off-by: Mathias Nyman Cc: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 66 ++++++++++++++++++++++++++++-------- drivers/usb/host/xhci.c | 5 +++ 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index fe223cfaab0..4ba6974dd4b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3167,9 +3167,11 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct xhci_td *td; struct scatterlist *sg; int num_sgs; - int trb_buff_len, this_sg_len, running_total; + int trb_buff_len, this_sg_len, running_total, ret; unsigned int total_packet_count; + bool zero_length_needed; bool first_trb; + int last_trb_num; u64 addr; bool more_trbs_coming; @@ -3185,13 +3187,27 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, total_packet_count = DIV_ROUND_UP(urb->transfer_buffer_length, usb_endpoint_maxp(&urb->ep->desc)); - trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id], + ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, num_trbs, urb, 0, mem_flags); - if (trb_buff_len < 0) - return trb_buff_len; + if (ret < 0) + return ret; urb_priv = urb->hcpriv; + + /* Deal with URB_ZERO_PACKET - need one more td/trb */ + zero_length_needed = urb->transfer_flags & URB_ZERO_PACKET && + urb_priv->length == 2; + if (zero_length_needed) { + num_trbs++; + xhci_dbg(xhci, "Creating zero length td.\n"); + ret = prepare_transfer(xhci, xhci->devs[slot_id], + ep_index, urb->stream_id, + 1, urb, 1, mem_flags); + if (ret < 0) + return ret; + } + td = urb_priv->td[0]; /* @@ -3221,6 +3237,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length; first_trb = true; + last_trb_num = zero_length_needed ? 2 : 1; /* Queue the first TRB, even if it's zero-length */ do { u32 field = 0; @@ -3238,12 +3255,15 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Chain all the TRBs together; clear the chain bit in the last * TRB to indicate it's the last TRB in the chain. */ - if (num_trbs > 1) { + if (num_trbs > last_trb_num) { field |= TRB_CHAIN; - } else { - /* FIXME - add check for ZERO_PACKET flag before this */ + } else if (num_trbs == last_trb_num) { td->last_trb = ep_ring->enqueue; field |= TRB_IOC; + } else if (zero_length_needed && num_trbs == 1) { + trb_buff_len = 0; + urb_priv->td[1]->last_trb = ep_ring->enqueue; + field |= TRB_IOC; } /* Only set interrupt on short packet for IN endpoints */ @@ -3305,7 +3325,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (running_total + trb_buff_len > urb->transfer_buffer_length) trb_buff_len = urb->transfer_buffer_length - running_total; - } while (running_total < urb->transfer_buffer_length); + } while (num_trbs > 0); check_trb_math(urb, num_trbs, running_total); giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, @@ -3323,7 +3343,9 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, int num_trbs; struct xhci_generic_trb *start_trb; bool first_trb; + int last_trb_num; bool more_trbs_coming; + bool zero_length_needed; int start_cycle; u32 field, length_field; @@ -3354,7 +3376,6 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, num_trbs++; running_total += TRB_MAX_BUFF_SIZE; } - /* FIXME: this doesn't deal with URB_ZERO_PACKET - need one more */ ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, @@ -3363,6 +3384,20 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, return ret; urb_priv = urb->hcpriv; + + /* Deal with URB_ZERO_PACKET - need one more td/trb */ + zero_length_needed = urb->transfer_flags & URB_ZERO_PACKET && + urb_priv->length == 2; + if (zero_length_needed) { + num_trbs++; + xhci_dbg(xhci, "Creating zero length td.\n"); + ret = prepare_transfer(xhci, xhci->devs[slot_id], + ep_index, urb->stream_id, + 1, urb, 1, mem_flags); + if (ret < 0) + return ret; + } + td = urb_priv->td[0]; /* @@ -3384,7 +3419,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length; first_trb = true; - + last_trb_num = zero_length_needed ? 2 : 1; /* Queue the first TRB, even if it's zero-length */ do { u32 remainder = 0; @@ -3401,12 +3436,15 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Chain all the TRBs together; clear the chain bit in the last * TRB to indicate it's the last TRB in the chain. */ - if (num_trbs > 1) { + if (num_trbs > last_trb_num) { field |= TRB_CHAIN; - } else { - /* FIXME - add check for ZERO_PACKET flag before this */ + } else if (num_trbs == last_trb_num) { td->last_trb = ep_ring->enqueue; field |= TRB_IOC; + } else if (zero_length_needed && num_trbs == 1) { + trb_buff_len = 0; + urb_priv->td[1]->last_trb = ep_ring->enqueue; + field |= TRB_IOC; } /* Only set interrupt on short packet for IN endpoints */ @@ -3444,7 +3482,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length - running_total; if (trb_buff_len > TRB_MAX_BUFF_SIZE) trb_buff_len = TRB_MAX_BUFF_SIZE; - } while (running_total < urb->transfer_buffer_length); + } while (num_trbs > 0); check_trb_math(urb, num_trbs, running_total); giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1a22d186bef..a3431e90345 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1300,6 +1300,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) if (usb_endpoint_xfer_isoc(&urb->ep->desc)) size = urb->number_of_packets; + else if (usb_endpoint_is_bulk_out(&urb->ep->desc) && + urb->transfer_buffer_length > 0 && + urb->transfer_flags & URB_ZERO_PACKET && + !(urb->transfer_buffer_length % usb_endpoint_maxp(&urb->ep->desc))) + size = 2; else size = 1; From 162d3c2fd8b7ca5971fc1a366013463d7511afdd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Sep 2015 12:48:40 -0400 Subject: [PATCH 28/55] Initialize msg/shm IPC objects before doing ipc_addid() commit b9a532277938798b53178d5a66af6e2915cb27cf upstream. As reported by Dmitry Vyukov, we really shouldn't do ipc_addid() before having initialized the IPC object state. Yes, we initialize the IPC object in a locked state, but with all the lockless RCU lookup work, that IPC object lock no longer means that the state cannot be seen. We already did this for the IPC semaphore code (see commit e8577d1f0329: "ipc/sem.c: fully initialize sem_array before making it visible") but we clearly forgot about msg and shm. Reported-by: Dmitry Vyukov Cc: Manfred Spraul Cc: Davidlohr Bueso Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/msg.c | 14 +++++++------- ipc/shm.c | 12 ++++++------ ipc/util.c | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 52770bfde2a..32aaaab15c5 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -202,13 +202,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* ipc_addid() locks msq upon success. */ - id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); - if (id < 0) { - ipc_rcu_putref(msq, msg_rcu_free); - return id; - } - msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -218,6 +211,13 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); + /* ipc_addid() locks msq upon success. */ + id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); + if (id < 0) { + ipc_rcu_putref(msq, msg_rcu_free); + return id; + } + ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); diff --git a/ipc/shm.c b/ipc/shm.c index 6dc55af8a29..08b14f69d6c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -544,12 +544,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); - if (id < 0) { - error = id; - goto no_id; - } - shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; @@ -559,6 +553,12 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_file = file; shp->shm_creator = current; + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; + goto no_id; + } + /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. diff --git a/ipc/util.c b/ipc/util.c index 7684f41bce7..735342570a8 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -292,6 +292,10 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) rcu_read_lock(); spin_lock(&new->lock); + current_euid_egid(&euid, &egid); + new->cuid = new->uid = euid; + new->gid = new->cgid = egid; + id = idr_alloc(&ids->ipcs_idr, new, (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); @@ -304,10 +308,6 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) ids->in_use++; - current_euid_egid(&euid, &egid); - new->cuid = new->uid = euid; - new->gid = new->cgid = egid; - if (next_id < 0) { new->seq = ids->seq++; if (ids->seq > ids->seq_max) From 212c45ac20229c1752dd56fa38e9a8d57127974b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 27 Jun 2015 14:39:30 +0300 Subject: [PATCH 29/55] ipvs: do not use random local source address for tunnels commit 4754957f04f5f368792a0eb7dab0ae89fb93dcfd upstream. Michael Vallaly reports about wrong source address used in rare cases for tunneled traffic. Looks like __ip_vs_get_out_rt in 3.10+ is providing uninitialized dest_dst->dst_saddr.ip because ip_vs_dest_dst_alloc uses kmalloc. While we retry after seeing EINVAL from routing for data that does not look like valid local address, it still succeeded when this memory was previously used from other dests and with different local addresses. As result, we can use valid local address that is not suitable for our real server. Fix it by providing 0.0.0.0 every time our cache is refreshed. By this way we will get preferred source address from routing. Reported-by: Michael Vallaly Fixes: 026ace060dfe ("ipvs: optimize dst usage for real server") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_xmit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 1692e753475..c3d204973db 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -129,7 +129,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; From 2fc9fc91a54a04877e441e261efcfe38aea882bf Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 8 Jul 2015 08:31:33 +0300 Subject: [PATCH 30/55] ipvs: fix crash with sync protocol v0 and FTP commit 56184858d1fc95c46723436b455cb7261cd8be6f upstream. Fix crash in 3.5+ if FTP is used after switching sync_version to 0. Fixes: 749c42b620a9 ("ipvs: reduce sync rate with time thresholds") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e476cc7dc80..19f9aa4e698 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -599,7 +599,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp->control, pkts); + ip_vs_sync_conn(net, cp, pkts); } } From 4d2c033d846d0cf835ecb2ab6f3e3d0e002b915d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 Jan 2015 13:49:08 +0100 Subject: [PATCH 31/55] udf: Check length of extended attributes and allocation descriptors commit 23b133bdc452aa441fcb9b82cbf6dd05cfd342d0 upstream. Check length of extended attributes and allocation descriptors when loading inodes from disk. Otherwise corrupted filesystems could confuse the code and make the kernel oops. Reported-by: Carl Henrik Lunde Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Jiri Slaby [Jan and Jiri fixed it in 3.12 stable, i ported it to 3.10 stable, replaced bs by inode->i_sb->s_blocksize] Signed-off-by: Zhang Zhen Signed-off-by: Greg Kroah-Hartman --- fs/udf/inode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index aa023283cc8..789814f2743 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1495,6 +1495,16 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); } + /* + * Sanity check length of allocation descriptors and extended attrs to + * avoid integer overflows + */ + if (iinfo->i_lenEAttr > inode->i_sb->s_blocksize || iinfo->i_lenAlloc > inode->i_sb->s_blocksize) + return; + /* Now do exact checks */ + if (udf_file_entry_alloc_offset(inode) + iinfo->i_lenAlloc > inode->i_sb->s_blocksize) + return; + switch (fe->icbTag.fileType) { case ICBTAG_FILE_TYPE_DIRECTORY: inode->i_op = &udf_dir_inode_operations; From 789ef3db34ad5bbb3660dd7cf16810974021f9f1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 19 Sep 2015 07:00:18 -0700 Subject: [PATCH 32/55] regmap: debugfs: Ensure we don't underflow when printing access masks commit b763ec17ac762470eec5be8ebcc43e4f8b2c2b82 upstream. If a read is attempted which is smaller than the line length then we may underflow the subtraction we're doing with the unsigned size_t type so move some of the calculation to be additions on the right hand side instead in order to avoid this. Reported-by: Rasmus Villemoes Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index b41994fd846..d576dc90aa2 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -419,7 +419,7 @@ static ssize_t regmap_access_read_file(struct file *file, /* If we're in the region the user is trying to read */ if (p >= *ppos) { /* ...but not beyond it */ - if (buf_pos >= count - 1 - tot_len) + if (buf_pos + tot_len + 1 >= count) break; /* Format the register */ From 0bdf7953e7c97de5100d5f4ecad7a5d0c760503c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 19 Sep 2015 07:12:34 -0700 Subject: [PATCH 33/55] regmap: debugfs: Don't bother actually printing when calculating max length commit 176fc2d5770a0990eebff903ba680d2edd32e718 upstream. The in kernel snprintf() will conveniently return the actual length of the printed string even if not given an output beffer at all so just do that rather than relying on the user to pass in a suitable buffer, ensuring that we don't need to worry if the buffer was truncated due to the size of the buffer passed in. Reported-by: Rasmus Villemoes Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-debugfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index d576dc90aa2..2670bebb058 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -23,8 +23,7 @@ static struct dentry *regmap_debugfs_root; /* Calculate the length of a fixed format */ static size_t regmap_calc_reg_len(int max_val, char *buf, size_t buf_size) { - snprintf(buf, buf_size, "%x", max_val); - return strlen(buf); + return snprintf(NULL, 0, "%x", max_val); } static ssize_t regmap_name_read_file(struct file *file, From db3611bc16541c7684220ffe43ded0093e7f0567 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 18 Sep 2015 23:41:23 +0200 Subject: [PATCH 34/55] security: fix typo in security_task_prctl commit b7f76ea2ef6739ee484a165ffbac98deb855d3d3 upstream. Signed-off-by: Jann Horn Reviewed-by: Andy Lutomirski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/security.h b/include/linux/security.h index 4686491852a..4e50307c4c6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2394,7 +2394,7 @@ static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg4, unsigned long arg5) { - return cap_task_prctl(option, arg2, arg3, arg3, arg5); + return cap_task_prctl(option, arg2, arg3, arg4, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) From 560fac952b6eefd68535c09d971f2d4b9a4e28b6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 21 Sep 2015 17:46:09 +0300 Subject: [PATCH 35/55] usb: Use the USB_SS_MULT() macro to get the burst multiplier. commit ff30cbc8da425754e8ab96904db1d295bd034f27 upstream. Bits 1:0 of the bmAttributes are used for the burst multiplier. The rest of the bits used to be reserved (zero), but USB3.1 takes bit 7 into use. Use the existing USB_SS_MULT() macro instead to make sure the mult value and hence max packet calculations are correct for USB3.1 devices. Note that burst multiplier in bmAttributes is zero based and that the USB_SS_MULT() macro adds one. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 65243832519..85756bd3674 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -114,7 +114,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 16; } else if (usb_endpoint_xfer_isoc(&ep->desc) && - desc->bmAttributes > 2) { + USB_SS_MULT(desc->bmAttributes) > 3) { dev_warn(ddev, "Isoc endpoint has Mult of %d in " "config %d interface %d altsetting %d ep %d: " "setting to 3\n", desc->bmAttributes + 1, @@ -123,7 +123,8 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, } if (usb_endpoint_xfer_isoc(&ep->desc)) - max_tx = (desc->bMaxBurst + 1) * (desc->bmAttributes + 1) * + max_tx = (desc->bMaxBurst + 1) * + (USB_SS_MULT(desc->bmAttributes)) * usb_endpoint_maxp(&ep->desc); else if (usb_endpoint_xfer_int(&ep->desc)) max_tx = usb_endpoint_maxp(&ep->desc) * From fcb7ae4e6596f78e80ca3947f3290a062206e8fd Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Thu, 1 Oct 2015 14:10:22 -0700 Subject: [PATCH 36/55] usb: Add device quirk for Logitech PTZ cameras commit 72194739f54607bbf8cfded159627a2015381557 upstream. Add a device quirk for the Logitech PTZ Pro Camera and its sibling the ConferenceCam CC3000e Camera. This fixes the failed camera enumeration on some boot, particularly on machines with fast CPU. Tested by connecting a Logitech PTZ Pro Camera to a machine with a Haswell Core i7-4600U CPU @ 2.10GHz, and doing thousands of reboot cycles while recording the kernel logs and taking camera picture after each boot. Before the patch, more than 7% of the boots show some enumeration transfer failures and in a few of them, the kernel is giving up before actually enumerating the webcam. After the patch, the enumeration has been correct on every reboot. Signed-off-by: Vincent Palatin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index b73f3031a66..ff987e8d793 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -53,6 +53,13 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech ConferenceCam CC3000e */ + { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0848), .driver_info = USB_QUIRK_DELAY_INIT }, + + /* Logitech PTZ Pro Camera */ + { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, From b196b1125c7d54fc5df7e0a2a2525540cf66857d Mon Sep 17 00:00:00 2001 From: Yao-Wen Mao Date: Mon, 31 Aug 2015 14:24:09 +0800 Subject: [PATCH 37/55] USB: Add reset-resume quirk for two Plantronics usb headphones. commit 8484bf2981b3d006426ac052a3642c9ce1d8d980 upstream. These two headphones need a reset-resume quirk to properly resume to original volume level. Signed-off-by: Yao-Wen Mao Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index ff987e8d793..d4db4ea4a92 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -84,6 +84,12 @@ static const struct usb_device_id usb_quirk_list[] = { /* Philips PSC805 audio device */ { USB_DEVICE(0x0471, 0x0155), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Plantronic Audio 655 DSP */ + { USB_DEVICE(0x047f, 0xc008), .driver_info = USB_QUIRK_RESET_RESUME }, + + /* Plantronic Audio 648 USB */ + { USB_DEVICE(0x047f, 0xc013), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Artisman Watchdog Dongle */ { USB_DEVICE(0x04b4, 0x0526), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, From 1335a48ac221194597294b5012aaca14484da0ad Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 27 Mar 2015 08:33:43 +0000 Subject: [PATCH 38/55] MIPS: dma-default: Fix 32-bit fall back to GFP_DMA commit 53960059d56ecef67d4ddd546731623641a3d2d1 upstream. If there is a DMA zone (usually 24bit = 16MB I believe), but no DMA32 zone, as is the case for some 32-bit kernels, then massage_gfp_flags() will cause DMA memory allocated for devices with a 32..63-bit coherent_dma_mask to fall back to using __GFP_DMA, even though there may only be 32-bits of physical address available anyway. Correct that case to compare against a mask the size of phys_addr_t instead of always using a 64-bit mask. Signed-off-by: James Hogan Fixes: a2e715a86c6d ("MIPS: DMA: Fix computation of DMA flags from device's coherent_dma_mask.") Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9610/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/dma-default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 23129d1005d..5fa55b80b7b 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -91,7 +91,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) else #endif #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8)) dma_flag = __GFP_DMA; else #endif From d7b00aadea64b68ef9025c6622a78a7b703b47e1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Jul 2015 10:20:07 +1000 Subject: [PATCH 39/55] md: flush ->event_work before stopping array. commit ee5d004fd0591536a061451eba2b187092e9127c upstream. The 'event_work' worker used by dm-raid may still be running when the array is stopped. This can result in an oops. So flush the workqueue on which it is run after detaching and before destroying the device. Reported-by: Heinz Mauelshagen Signed-off-by: NeilBrown Fixes: 9d09e663d550 ("dm: raid456 basic support") Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 37ff00d014b..7c45286e266 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5306,6 +5306,8 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void __md_stop(struct mddev *mddev) { mddev->ready = 0; + /* Ensure ->event_work is done */ + flush_workqueue(md_misc_wq); mddev->pers->stop(mddev); if (mddev->pers->sync_request && mddev->to_remove == NULL) mddev->to_remove = &md_redundancy_group; From 892e053cd7afcc54b2958130cab94ee2123be2d7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 10 Sep 2015 14:36:21 +1000 Subject: [PATCH 40/55] powerpc/MSI: Fix race condition in tearing down MSI interrupts commit e297c939b745e420ef0b9dc989cb87bda617b399 upstream. This fixes a race which can result in the same virtual IRQ number being assigned to two different MSI interrupts. The most visible consequence of that is usually a warning and stack trace from the sysfs code about an attempt to create a duplicate entry in sysfs. The race happens when one CPU (say CPU 0) is disposing of an MSI while another CPU (say CPU 1) is setting up an MSI. CPU 0 calls (for example) pnv_teardown_msi_irqs(), which calls msi_bitmap_free_hwirqs() to indicate that the MSI (i.e. its hardware IRQ number) is no longer in use. Then, before CPU 0 gets to calling irq_dispose_mapping() to free up the virtal IRQ number, CPU 1 comes in and calls msi_bitmap_alloc_hwirqs() to allocate an MSI, and gets the same hardware IRQ number that CPU 0 just freed. CPU 1 then calls irq_create_mapping() to get a virtual IRQ number, which sees that there is currently a mapping for that hardware IRQ number and returns the corresponding virtual IRQ number (which is the same virtual IRQ number that CPU 0 was using). CPU 0 then calls irq_dispose_mapping() and frees that virtual IRQ number. Now, if another CPU comes along and calls irq_create_mapping(), it is likely to get the virtual IRQ number that was just freed, resulting in the same virtual IRQ number apparently being used for two different hardware interrupts. To fix this race, we just move the call to msi_bitmap_free_hwirqs() to after the call to irq_dispose_mapping(). Since virq_to_hw() doesn't work for the virtual IRQ number after irq_dispose_mapping() has been called, we need to call it before irq_dispose_mapping() and remember the result for the msi_bitmap_free_hwirqs() call. The pattern of calling msi_bitmap_free_hwirqs() before irq_dispose_mapping() appears in 5 places under arch/powerpc, and appears to have originated in commit 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") from 2007. Fixes: 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") Reported-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci.c | 5 +++-- arch/powerpc/sysdev/fsl_msi.c | 5 +++-- arch/powerpc/sysdev/mpic_pasemi_msi.c | 6 ++++-- arch/powerpc/sysdev/mpic_u3msi.c | 5 +++-- arch/powerpc/sysdev/ppc4xx_msi.c | 5 +++-- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 8ee842ce3ab..0473d31b3a4 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -106,6 +106,7 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -113,10 +114,10 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index ab02db3d02d..6616fa61994 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -108,15 +108,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 38e62382070..9e14d82287a 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -74,6 +74,7 @@ static int pasemi_msi_check_device(struct pci_dev *pdev, int nvec, int type) static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -81,10 +82,11 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, + hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 9a7aa0ed9c1..dfc3486bf80 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -124,15 +124,16 @@ static int u3msi_msi_check_device(struct pci_dev *pdev, int nvec, int type) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 43948da837a..c3e65129940 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -121,16 +121,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } From a5bae33a3530cdfa59e86899e06e4213372b2bce Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 22 Sep 2015 23:58:07 +0200 Subject: [PATCH 41/55] UBI: Validate data_size commit 281fda27673f833a01d516658a64d22a32c8e072 upstream. Make sure that data_size is less than LEB size. Otherwise a handcrafted UBI image is able to trigger an out of bounds memory access in ubi_compare_lebs(). Signed-off-by: Richard Weinberger Reviewed-by: David Gstir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/io.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index bf79def4012..8822e880833 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -931,6 +931,11 @@ static int validate_vid_hdr(const struct ubi_device *ubi, goto bad; } + if (data_size > ubi->leb_size) { + ubi_err("bad data_size"); + goto bad; + } + if (vol_type == UBI_VID_STATIC) { /* * Although from high-level point of view static volumes may From 480ed182e39ca90b19282dbc3e3c77a3517c41fc Mon Sep 17 00:00:00 2001 From: shengyong Date: Mon, 28 Sep 2015 17:57:19 +0000 Subject: [PATCH 42/55] UBI: return ENOSPC if no enough space available commit 7c7feb2ebfc9c0552c51f0c050db1d1a004faac5 upstream. UBI: attaching mtd1 to ubi0 UBI: scanning is finished UBI error: init_volumes: not enough PEBs, required 706, available 686 UBI error: ubi_wl_init: no enough physical eraseblocks (-20, need 1) UBI error: ubi_attach_mtd_dev: failed to attach mtd1, error -12 <= NOT ENOMEM UBI error: ubi_init: cannot attach mtd1 If available PEBs are not enough when initializing volumes, return -ENOSPC directly. If available PEBs are not enough when initializing WL, return -ENOSPC instead of -ENOMEM. Signed-off-by: Sheng Yong Signed-off-by: Richard Weinberger Reviewed-by: David Gstir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/vtbl.c | 1 + drivers/mtd/ubi/wl.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index d77b1c1d7c7..bebf49e0dbe 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -651,6 +651,7 @@ static int init_volumes(struct ubi_device *ubi, if (ubi->corr_peb_count) ubi_err("%d PEBs are corrupted and not used", ubi->corr_peb_count); + return -ENOSPC; } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index c08254016fe..3375bfb1b24 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1978,6 +1978,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->corr_peb_count) ubi_err("%d PEBs are corrupted and not used", ubi->corr_peb_count); + err = -ENOSPC; goto out_free; } ubi->avail_pebs -= reserved_pebs; From ef5844a015293453e4031ad336e459729cfe3cd9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 21 Jul 2015 08:36:07 -0400 Subject: [PATCH 43/55] IB/qib: Change lkey table allocation to support more MRs commit d6f1c17e162b2a11e708f28fa93f2f79c164b442 upstream. The lkey table is allocated with with a get_user_pages() with an order based on a number of index bits from a module parameter. The underlying kernel code cannot allocate that many contiguous pages. There is no reason the underlying memory needs to be physically contiguous. This patch: - switches the allocation/deallocation to vmalloc/vfree - caps the number of bits to 23 to insure at least 1 generation bit o this matches the module parameter description Reviewed-by: Vinit Agnihotri Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib.h | 27 +++++++++++---------------- drivers/infiniband/hw/qib/qib_keys.c | 4 ++++ drivers/infiniband/hw/qib/qib_verbs.c | 14 ++++++++++---- drivers/infiniband/hw/qib/qib_verbs.h | 2 ++ 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index d1b30c66d60..727a9dc44b9 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1467,27 +1467,22 @@ extern struct mutex qib_mutex; * first to avoid possible serial port delays from printk. */ #define qib_early_err(dev, fmt, ...) \ - do { \ - dev_err(dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_err(dev, fmt, ##__VA_ARGS__) #define qib_dev_err(dd, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ - qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_warn(dd, fmt, ...) \ + dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) #define qib_dev_porterr(dd, port, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__); \ - } while (0) - + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__) #define qib_devinfo(pcidev, fmt, ...) \ - do { \ - dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 3b9afccaaad..eabe54738be 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -86,6 +86,10 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) * unrestricted LKEY. */ rkt->gen++; + /* + * bits are capped in qib_verbs.c to insure enough bits + * for generation number + */ mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) << 8); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 904c384aa36..6c809bf5012 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "qib.h" #include "qib_common.h" @@ -2084,10 +2085,16 @@ int qib_register_ib_device(struct qib_devdata *dd) * the LKEY). The remaining bits act as a generation number or tag. */ spin_lock_init(&dev->lk_table.lock); + /* insure generation is at least 4 bits see keys.c */ + if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) { + qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", + ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS); + ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS; + } dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); dev->lk_table.table = (struct qib_mregion __rcu **) - __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); + vmalloc(lk_tab_size); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; @@ -2260,7 +2267,7 @@ err_tx: sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: - free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size)); + vfree(dev->lk_table.table); err_lk: kfree(dev->qp_table); err_qpt: @@ -2314,8 +2321,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - free_pages((unsigned long) dev->lk_table.table, - get_order(lk_tab_size)); + vfree(dev->lk_table.table); kfree(dev->qp_table); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index aff8b2c1788..e4f9fff5189 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -645,6 +645,8 @@ struct qib_qpn_table { struct qpn_map map[QPNMAP_ENTRIES]; }; +#define MAX_LKEY_TABLE_BITS 23 + struct qib_lkey_table { spinlock_t lock; /* protect changes in this struct */ u32 next; /* next unused index (speeds search) */ From 91397d567646df4ede8619beb678252e82fafb2a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 15 Aug 2015 13:36:12 -0500 Subject: [PATCH 44/55] dcache: Handle escaped paths in prepend_path commit cde93be45a8a90d8c264c776fab63487b5038a65 upstream. A rename can result in a dentry that by walking up d_parent will never reach it's mnt_root. For lack of a better term I call this an escaped path. prepend_path is called by four different functions __d_path, d_absolute_path, d_path, and getcwd. __d_path only wants to see paths are connected to the root it passes in. So __d_path needs prepend_path to return an error. d_absolute_path similarly wants to see paths that are connected to some root. Escaped paths are not connected to any mnt_root so d_absolute_path needs prepend_path to return an error greater than 1. So escaped paths will be treated like paths on lazily unmounted mounts. getcwd needs to prepend "(unreachable)" so getcwd also needs prepend_path to return an error. d_path is the interesting hold out. d_path just wants to print something, and does not care about the weird cases. Which raises the question what should be printed? Given that / should result in -ENOENT I believe it is desirable for escaped paths to be printed as empty paths. As there are not really any meaninful path components when considered from the perspective of a mount tree. So tweak prepend_path to return an empty path with an new error code of 3 when it encounters an escaped path. Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index f1e80178597..17222fa5bdc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2534,6 +2534,8 @@ static int prepend_path(const struct path *path, struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; struct mount *mnt = real_mount(vfsmnt); + char *orig_buffer = *buffer; + int orig_len = *buflen; bool slash = false; int error = 0; @@ -2541,6 +2543,14 @@ static int prepend_path(const struct path *path, struct dentry * parent; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + /* Escaped? */ + if (dentry != vfsmnt->mnt_root) { + *buffer = orig_buffer; + *buflen = orig_len; + slash = false; + error = 3; + goto global_root; + } /* Global root? */ if (!mnt_has_parent(mnt)) goto global_root; From fa6ef735862a25daff2a3ee3aba1e32f278f21c5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 15 Aug 2015 20:27:13 -0500 Subject: [PATCH 45/55] vfs: Test for and handle paths that are unreachable from their mnt_root commit 397d425dc26da728396e66d392d5dcb8dac30c37 upstream. In rare cases a directory can be renamed out from under a bind mount. In those cases without special handling it becomes possible to walk up the directory tree to the root dentry of the filesystem and down from the root dentry to every other file or directory on the filesystem. Like division by zero .. from an unconnected path can not be given a useful semantic as there is no predicting at which path component the code will realize it is unconnected. We certainly can not match the current behavior as the current behavior is a security hole. Therefore when encounting .. when following an unconnected path return -ENOENT. - Add a function path_connected to verify path->dentry is reachable from path->mnt.mnt_root. AKA to validate that rename did not do something nasty to the bind mount. To avoid races path_connected must be called after following a path component to it's next path component. Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 036c21246d6..157c3dbacf6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -473,6 +473,24 @@ void path_put(const struct path *path) } EXPORT_SYMBOL(path_put); +/** + * path_connected - Verify that a path->dentry is below path->mnt.mnt_root + * @path: nameidate to verify + * + * Rename can sometimes move a file or directory outside of a bind + * mount, path_connected allows those cases to be detected. + */ +static bool path_connected(const struct path *path) +{ + struct vfsmount *mnt = path->mnt; + + /* Only bind mounts can have disconnected paths */ + if (mnt->mnt_root == mnt->mnt_sb->s_root) + return true; + + return is_subdir(path->dentry, mnt->mnt_root); +} + /* * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't @@ -1148,6 +1166,8 @@ static int follow_dotdot_rcu(struct nameidata *nd) goto failed; nd->path.dentry = parent; nd->seq = seq; + if (unlikely(!path_connected(&nd->path))) + goto failed; break; } if (!follow_up_rcu(&nd->path)) @@ -1231,7 +1251,7 @@ static void follow_mount(struct path *path) } } -static void follow_dotdot(struct nameidata *nd) +static int follow_dotdot(struct nameidata *nd) { set_root(nd); @@ -1246,6 +1266,10 @@ static void follow_dotdot(struct nameidata *nd) /* rare case of legitimate dget_parent()... */ nd->path.dentry = dget_parent(nd->path.dentry); dput(old); + if (unlikely(!path_connected(&nd->path))) { + path_put(&nd->path); + return -ENOENT; + } break; } if (!follow_up(&nd->path)) @@ -1253,6 +1277,7 @@ static void follow_dotdot(struct nameidata *nd) } follow_mount(&nd->path); nd->inode = nd->path.dentry->d_inode; + return 0; } /* @@ -1476,7 +1501,7 @@ static inline int handle_dots(struct nameidata *nd, int type) if (follow_dotdot_rcu(nd)) return -ECHILD; } else - follow_dotdot(nd); + return follow_dotdot(nd); } return 0; } From 339cb27fc56dd226e6ec59b15010527b12b0411a Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Mon, 21 Sep 2015 21:39:50 +0100 Subject: [PATCH 46/55] arm64: readahead: fault retry breaks mmap file read random detection commit 569ba74a7ba69f46ce2950bf085b37fea2408385 upstream. This is the arm64 portion of commit 45cac65b0fcd ("readahead: fault retry breaks mmap file read random detection"), which was absent from the initial port and has since gone unnoticed. The original commit says: > .fault now can retry. The retry can break state machine of .fault. In > filemap_fault, if page is miss, ra->mmap_miss is increased. In the second > try, since the page is in page cache now, ra->mmap_miss is decreased. And > these are done in one fault, so we can't detect random mmap file access. > > Add a new flag to indicate .fault is tried once. In the second try, skip > ra->mmap_miss decreasing. The filemap_fault state machine is ok with it. With this change, Mark reports that: > Random read improves by 250%, sequential read improves by 40%, and > random write by 400% to an eMMC device with dm crypto wrapped around it. Cc: Shaohua Li Cc: Rik van Riel Cc: Wu Fengguang Signed-off-by: Mark Salyzyn Signed-off-by: Riley Andrews Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b5d458769b6..250319040de 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -278,6 +278,7 @@ retry: * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; goto retry; } } From 46409b7e4b56dd58779fb148d870353588a925ae Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 23 Sep 2015 23:12:09 +0200 Subject: [PATCH 47/55] m68k: Define asmlinkage_protect commit 8474ba74193d302e8340dddd1e16c85cc4b98caf upstream. Make sure the compiler does not modify arguments of syscall functions. This can happen if the compiler generates a tailcall to another function. For example, without asmlinkage_protect sys_openat is compiled into this function: sys_openat: clr.l %d0 move.w 18(%sp),%d0 move.l %d0,16(%sp) jbra do_sys_open Note how the fourth argument is modified in place, modifying the register %d4 that gets restored from this stack slot when the function returns to user-space. The caller may expect the register to be unmodified across system calls. Signed-off-by: Andreas Schwab Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/linkage.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h index 5a822bb790f..066e74f666a 100644 --- a/arch/m68k/include/asm/linkage.h +++ b/arch/m68k/include/asm/linkage.h @@ -4,4 +4,34 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "m" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) + #endif From 495d9de1ad472cd215ac72bcfa8ac6252af02e16 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Thu, 16 Jul 2015 16:30:02 +0800 Subject: [PATCH 48/55] bonding: correct the MAC address for "follow" fail_over_mac policy [ Upstream commit a951bc1e6ba58f11df5ed5ddc41311e10f5fd20b ] The "follow" fail_over_mac policy is useful for multiport devices that either become confused or incur a performance penalty when multiple ports are programmed with the same MAC address, but the same MAC address still may happened by this steps for this policy: 1) echo +eth0 > /sys/class/net/bond0/bonding/slaves bond0 has the same mac address with eth0, it is MAC1. 2) echo +eth1 > /sys/class/net/bond0/bonding/slaves eth1 is backup, eth1 has MAC2. 3) ifconfig eth0 down eth1 became active slave, bond will swap MAC for eth0 and eth1, so eth1 has MAC1, and eth0 has MAC2. 4) ifconfig eth1 down there is no active slave, and eth1 still has MAC1, eth2 has MAC2. 5) ifconfig eth0 up the eth0 became active slave again, the bond set eth0 to MAC1. Something wrong here, then if you set eth1 up, the eth0 and eth1 will have the same MAC address, it will break this policy for ACTIVE_BACKUP mode. This patch will fix this problem by finding the old active slave and swap them MAC address before change active slave. Signed-off-by: Ding Tianhong Tested-by: Nikolay Aleksandrov Signed-off-by: David S. Miller [bwh: Backported to 3.10: bond_for_each_slave() takes an extra int paramter] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6b5baf01512..c0ed7c80281 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -876,6 +876,23 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, } } +static struct slave *bond_get_old_active(struct bonding *bond, + struct slave *new_active) +{ + struct slave *slave; + int i; + + bond_for_each_slave(bond, slave, i) { + if (slave == new_active) + continue; + + if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) + return slave; + } + + return NULL; +} + /* * bond_do_fail_over_mac * @@ -919,6 +936,9 @@ static void bond_do_fail_over_mac(struct bonding *bond, write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); + if (!old_active) + old_active = bond_get_old_active(bond, new_active); + if (old_active) { memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); memcpy(saddr.sa_data, old_active->dev->dev_addr, From e9346b9a2f5cd4ff470ee58d1bf271d0a2ca27a0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 5 Oct 2015 10:29:28 -0700 Subject: [PATCH 49/55] fib_rules: Fix dump_rules() not to exit early Backports of 41fc014332d9 ("fib_rules: fix fib rule dumps across multiple skbs") introduced a regression in "ip rule show" - it ends up dumping the first rule over and over and never exiting, because 3.19 and earlier are missing commit 053c095a82cf ("netlink: make nlmsg_end() and genlmsg_end() void"), so fib_nl_fill_rule() ends up returning skb->len (i.e. > 0) in the success case. Fix this by checking the return code for < 0 instead of != 0. Signed-off-by: Roland Dreier Signed-off-by: Greg Kroah-Hartman --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 627e517077e..84340a2605e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -606,7 +606,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI, ops); - if (err) + if (err < 0) break; skip: idx++; From 8938c10543801cbb9d85efd3f317184e405b45bc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 26 Sep 2015 12:23:56 +0100 Subject: [PATCH 50/55] genirq: Fix race in register_irq_proc() commit 95c2b17534654829db428f11bcf4297c059a2a7e upstream. Per-IRQ directories in procfs are created only when a handler is first added to the irqdesc, not when the irqdesc is created. In the case of a shared IRQ, multiple tasks can race to create a directory. This race condition seems to have been present forever, but is easier to hit with async probing. Signed-off-by: Ben Hutchings Link: http://lkml.kernel.org/r/1443266636.2004.2.camel@decadent.org.uk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/proc.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 19ed5c425c3..349e5bbdb31 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internals.h" @@ -309,18 +310,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_MUTEX(register_lock); char name [MAX_NAMELEN]; - if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) return; + /* + * irq directories are registered only when a handler is + * added, not when the descriptor is created, so multiple + * tasks might try to register at the same time. + */ + mutex_lock(®ister_lock); + + if (desc->dir) + goto out_unlock; + memset(name, 0, MAX_NAMELEN); sprintf(name, "%d", irq); /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - return; + goto out_unlock; #ifdef CONFIG_SMP /* create /proc/irq//smp_affinity */ @@ -341,6 +353,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("spurious", 0444, desc->dir, &irq_spurious_proc_fops, (void *)(long)irq); + +out_unlock: + mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) From f7c7bb9f06eb2af9c03b29e019d45ad4e943f900 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 16 Aug 2013 14:17:19 -0700 Subject: [PATCH 51/55] x86: Add 1/2/4/8 byte optimization to 64bit __copy_{from,to}_user_inatomic commit ff47ab4ff3cddfa7bc1b25b990e24abe2ae474ff upstream. The 64bit __copy_{from,to}_user_inatomic always called copy_from_user_generic, but skipped the special optimizations for 1/2/4/8 byte accesses. This especially hurts the futex call, which accesses the 4 byte futex user value with a complicated fast string operation in a function call, instead of a single movl. Use __copy_{from,to}_user for _inatomic instead to get the same optimizations. The only problem was the might_fault() in those functions. So move that to new wrapper and call __copy_{f,t}_user_nocheck() from *_inatomic directly. 32bit already did this correctly by duplicating the code. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1376687844-19857-2-git-send-email-andi@firstfloor.org Signed-off-by: H. Peter Anvin Cc: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess_64.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 142810c457d..34df5c22df9 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -77,11 +77,10 @@ int copy_to_user(void __user *dst, const void *src, unsigned size) } static __always_inline __must_check -int __copy_from_user(void *dst, const void __user *src, unsigned size) +int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) { int ret = 0; - might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -121,11 +120,17 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size) } static __always_inline __must_check -int __copy_to_user(void __user *dst, const void *src, unsigned size) +int __copy_from_user(void *dst, const void __user *src, unsigned size) +{ + might_fault(); + return __copy_from_user_nocheck(dst, src, size); +} + +static __always_inline __must_check +int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size) { int ret = 0; - might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -164,6 +169,13 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size) } } +static __always_inline __must_check +int __copy_to_user(void __user *dst, const void *src, unsigned size) +{ + might_fault(); + return __copy_to_user_nocheck(dst, src, size); +} + static __always_inline __must_check int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { @@ -220,13 +232,13 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) static __must_check __always_inline int __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) { - return copy_user_generic(dst, (__force const void *)src, size); + return __copy_from_user_nocheck(dst, (__force const void *)src, size); } static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) { - return copy_user_generic((__force void *)dst, src, size); + return __copy_to_user_nocheck((__force void *)dst, src, size); } extern long __copy_user_nocache(void *dst, const void __user *src, From d60cb61235d4df7eef0bb048664f0bbcc615dc71 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 9 Oct 2015 14:03:38 +0100 Subject: [PATCH 52/55] dm cache: fix NULL pointer when switching from cleaner policy commit 2bffa1503c5c06192eb1459180fac4416575a966 upstream. The cleaner policy doesn't make use of the per cache block hint space in the metadata (unlike the other policies). When switching from the cleaner policy to mq or smq a NULL pointer crash (in dm_tm_new_block) was observed. The crash was caused by bugs in dm-cache-metadata.c when trying to skip creation of the hint btree. The minimal fix is to change hint size for the cleaner policy to 4 bytes (only hint size supported). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-policy-cleaner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index b04d1f904d0..2eca9084def 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -434,7 +434,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, static struct dm_cache_policy_type wb_policy_type = { .name = "cleaner", .version = {1, 0, 0}, - .hint_size = 0, + .hint_size = 4, .owner = THIS_MODULE, .create = wb_create }; From 613df159a3db2421407d69ba4473bf4830060347 Mon Sep 17 00:00:00 2001 From: "covici@ccs.covici.com" Date: Wed, 20 May 2015 05:44:11 -0400 Subject: [PATCH 53/55] staging: speakup: fix speakup-r regression commit b1d562acc78f0af46de0dfe447410bc40bdb7ece upstream. Here is a patch to make speakup-r work again. It broke in 3.6 due to commit 4369c64c79a22b98d3b7eff9d089196cd878a10a "Input: Send events one packet at a time) The problem was that the fakekey.c routine to fake a down arrow no longer functioned properly and putting the input_sync fixed it. Fixes: 4369c64c79a22b98d3b7eff9d089196cd878a10a Acked-by: Samuel Thibault Signed-off-by: John Covici Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/fakekey.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/speakup/fakekey.c b/drivers/staging/speakup/fakekey.c index 4299cf45f94..5e1f16c36b4 100644 --- a/drivers/staging/speakup/fakekey.c +++ b/drivers/staging/speakup/fakekey.c @@ -81,6 +81,7 @@ void speakup_fake_down_arrow(void) __this_cpu_write(reporting_keystroke, true); input_report_key(virt_keyboard, KEY_DOWN, PRESSED); input_report_key(virt_keyboard, KEY_DOWN, RELEASED); + input_sync(virt_keyboard); __this_cpu_write(reporting_keystroke, false); /* reenable preemption */ From c3dadb26a96bd1895a6ab69e0042a52014b7986a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 19:16:07 +0200 Subject: [PATCH 54/55] 3w-9xxx: don't unmap bounce buffered commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 15e3d5a285ab9283136dba34bbf72886d9146706 upstream. 3w controller don't dma map small single SGL entry commands but instead bounce buffer them. Add a helper to identify these commands and don't call scsi_dma_unmap for them. Based on an earlier patch from James Bottomley. Fixes: 118c85 ("3w-9xxx: fix command completion race") Reported-by: Tóth Attila Tested-by: Tóth Attila Signed-off-by: Christoph Hellwig Acked-by: Adam Radford Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-9xxx.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 5f57e3d35e2..6adf9abdf95 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -225,6 +225,17 @@ static const struct file_operations twa_fops = { .llseek = noop_llseek, }; +/* + * The controllers use an inline buffer instead of a mapped SGL for small, + * single entry buffers. Note that we treat a zero-length transfer like + * a mapped SGL. + */ +static bool twa_command_mapped(struct scsi_cmnd *cmd) +{ + return scsi_sg_count(cmd) != 1 || + scsi_bufflen(cmd) >= TW_MIN_SGL_LENGTH; +} + /* This function will complete an aen request from the isr */ static int twa_aen_complete(TW_Device_Extension *tw_dev, int request_id) { @@ -1351,7 +1362,8 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance) } /* Now complete the io */ - scsi_dma_unmap(cmd); + if (twa_command_mapped(cmd)) + scsi_dma_unmap(cmd); cmd->scsi_done(cmd); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); @@ -1594,7 +1606,8 @@ static int twa_reset_device_extension(TW_Device_Extension *tw_dev) struct scsi_cmnd *cmd = tw_dev->srb[i]; cmd->result = (DID_RESET << 16); - scsi_dma_unmap(cmd); + if (twa_command_mapped(cmd)) + scsi_dma_unmap(cmd); cmd->scsi_done(cmd); } } @@ -1777,12 +1790,14 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_ retval = twa_scsiop_execute_scsi(tw_dev, request_id, NULL, 0, NULL); switch (retval) { case SCSI_MLQUEUE_HOST_BUSY: - scsi_dma_unmap(SCpnt); + if (twa_command_mapped(SCpnt)) + scsi_dma_unmap(SCpnt); twa_free_request_id(tw_dev, request_id); break; case 1: SCpnt->result = (DID_ERROR << 16); - scsi_dma_unmap(SCpnt); + if (twa_command_mapped(SCpnt)) + scsi_dma_unmap(SCpnt); done(SCpnt); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); @@ -1843,8 +1858,7 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, /* Map sglist from scsi layer to cmd packet */ if (scsi_sg_count(srb)) { - if ((scsi_sg_count(srb) == 1) && - (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) { + if (!twa_command_mapped(srb)) { if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) scsi_sg_copy_to_buffer(srb, @@ -1917,7 +1931,7 @@ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int re { struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - if (scsi_bufflen(cmd) < TW_MIN_SGL_LENGTH && + if (!twa_command_mapped(cmd) && (cmd->sc_data_direction == DMA_FROM_DEVICE || cmd->sc_data_direction == DMA_BIDIRECTIONAL)) { if (scsi_sg_count(cmd) == 1) { From 61ce1520149bb1cfdc9a2946a1b6a33119742881 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 Oct 2015 14:38:04 -0700 Subject: [PATCH 55/55] Linux 3.10.91 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ce741a9f5b1..ba6a94cf354 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 90 +SUBLEVEL = 91 EXTRAVERSION = NAME = TOSSUG Baby Fish