import PULS_20160108

2018-03-13 20:29:02 +01:00
parent a8d97b1bd0
commit 6fa3eb70c0
6257 changed files with 2910927 additions and 5489 deletions
@@ -1,93 +0,0 @@
-#
-# NOTE! Don't add files that are generated in specific
-# subdirectories here. Add them in the ".gitignore" file
-# in that subdirectory instead.
-#
-# NOTE! Please use 'git ls-files -i --exclude-standard'
-# command after changing this file, to see if there are
-# any tracked files which get ignored after the change.
-#
-# Normal rules
-#
-.*
-*.o
-*.o.*
-*.a
-*.s
-*.ko
-*.so
-*.so.dbg
-*.mod.c
-*.i
-*.lst
-*.symtypes
-*.order
-modules.builtin
-*.elf
-*.bin
-*.gz
-*.bz2
-*.lzma
-*.xz
-*.lzo
-*.patch
-*.gcno
-
-#
-# Top-level generic files
-#
-/tags
-/TAGS
-/linux
-/vmlinux
-/vmlinuz
-/System.map
-/Module.markers
-/Module.symvers
-
-#
-# Debian directory (make deb-pkg)
-#
-/debian/
-
-#
-# git files that we don't want to ignore even it they are dot-files
-#
-!.gitignore
-!.mailmap
-
-#
-# Generated include files
-#
-include/config
-include/generated
-arch/*/include/generated
-
-# stgit generated dirs
-patches-*
-
-# quilt's files
-patches
-series
-
-# cscope files
-cscope.*
-ncscope.*
-
-# gnu global files
-GPATH
-GRTAGS
-GSYMS
-GTAGS
-
-*.orig
-*~
-\#*#
-
-#
-# Leavings from module signing
-#
-extra_certificates
-signing_key.priv
-signing_key.x509
-x509.genkey
@@ -0,0 +1,100 @@
+#
+# Copyright (C) 2009-2011 The Android-x86 Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+ifeq ($(LINUX_KERNEL_VERSION),kernel-3.10)
+ifneq ($(strip $(MTK_EMULATOR_SUPPORT)),yes)
+ifneq ($(strip $(MTK_PROJECT_NAME)),)
+
+ifneq ($(wildcard $(call my-dir)/arch/$(TARGET_ARCH)/configs/$(KERNEL_DEFCONFIG)),)
+
+KERNEL_DIR := $(call my-dir)
+ROOTDIR := $(abspath $(TOP))
+
+ifneq ($(filter /% ~%,$(OUT_DIR)),)
+KERNEL_OUT := $(TARGET_OUT_INTERMEDIATES)/KERNEL_OBJ
+else
+KERNEL_OUT := $(ROOTDIR)/$(TARGET_OUT_INTERMEDIATES)/KERNEL_OBJ
+endif
+
+ifeq ($(TARGET_ARCH), arm64)
+  ifeq ($(MTK_APPENDED_DTB_SUPPORT), yes)
+    TARGET_PREBUILT_KERNEL := $(KERNEL_OUT)/arch/$(TARGET_ARCH)/boot/Image.gz-dtb
+  else
+    TARGET_PREBUILT_KERNEL := $(KERNEL_OUT)/arch/$(TARGET_ARCH)/boot/Image.gz
+  endif
+else
+  ifeq ($(MTK_APPENDED_DTB_SUPPORT), yes)
+    TARGET_PREBUILT_KERNEL := $(KERNEL_OUT)/arch/$(TARGET_ARCH)/boot/zImage-dtb
+  else
+    TARGET_PREBUILT_KERNEL := $(KERNEL_OUT)/arch/$(TARGET_ARCH)/boot/zImage
+  endif
+endif
+TARGET_PREBUILT_KERNEL_BIN := $(KERNEL_OUT)/arch/$(TARGET_ARCH)/boot/zImage.bin
+
+TARGET_KERNEL_CONFIG := $(KERNEL_OUT)/.config
+KERNEL_HEADERS_INSTALL := $(KERNEL_OUT)/usr
+
+ifneq ($(strip $(TARGET_NO_KERNEL)),true)
+  INSTALLED_KERNEL_TARGET := $(PRODUCT_OUT)/kernel
+else
+  INSTALLED_KERNEL_TARGET :=
+endif
+
+ifeq ($(KERNEL_CROSS_COMPILE),)
+ifeq ($(TARGET_ARCH), arm64)
+KERNEL_CROSS_COMPILE := aarch64-linux-android-
+else
+KERNEL_CROSS_COMPILE := arm-eabi-
+endif
+endif
+
+$(KERNEL_OUT):
+	mkdir -p $@
+
+.PHONY: kernel kernel-defconfig kernel-menuconfig clean-kernel
+kernel-menuconfig: | $(KERNEL_OUT)
+	$(MAKE) -C $(KERNEL_DIR) O=$(KERNEL_OUT) ARCH=$(TARGET_ARCH) MTK_TARGET_PROJECT=${MTK_TARGET_PROJECT} CROSS_COMPILE=$(KERNEL_CROSS_COMPILE) ROOTDIR=$(ROOTDIR) menuconfig
+
+kernel-savedefconfig: | $(KERNEL_OUT)
+	cp $(TARGET_KERNEL_CONFIG) $(KERNEL_DIR)/arch/$(TARGET_ARCH)/configs/$(KERNEL_DEFCONFIG)
+
+$(TARGET_PREBUILT_KERNEL): kernel
+	@echo Done kernel
+
+$(TARGET_KERNEL_CONFIG) kernel-defconfig: $(KERNEL_DIR)/arch/$(TARGET_ARCH)/configs/$(KERNEL_DEFCONFIG) | $(KERNEL_OUT)
+	$(MAKE) -C $(KERNEL_DIR) O=$(KERNEL_OUT) ARCH=$(TARGET_ARCH) MTK_TARGET_PROJECT=${MTK_TARGET_PROJECT} CROSS_COMPILE=$(KERNEL_CROSS_COMPILE) ROOTDIR=$(ROOTDIR) $(KERNEL_DEFCONFIG)
+	$(MAKE) -C $(KERNEL_DIR) O=$(KERNEL_OUT) ARCH=$(TARGET_ARCH) MTK_TARGET_PROJECT=${MTK_TARGET_PROJECT} CROSS_COMPILE=$(KERNEL_CROSS_COMPILE) ROOTDIR=$(ROOTDIR) oldconfig
+
+$(KERNEL_HEADERS_INSTALL): $(TARGET_KERNEL_CONFIG) | $(KERNEL_OUT)
+	$(MAKE) -C $(KERNEL_DIR) O=$(KERNEL_OUT) ARCH=$(TARGET_ARCH) MTK_TARGET_PROJECT=${MTK_TARGET_PROJECT} CROSS_COMPILE=$(KERNEL_CROSS_COMPILE) ROOTDIR=$(ROOTDIR) headers_install
+
+kernel: $(TARGET_KERNEL_CONFIG) $(KERNEL_HEADERS_INSTALL) | $(KERNEL_OUT)
+	$(MAKE) -C $(KERNEL_DIR) O=$(KERNEL_OUT) ARCH=$(TARGET_ARCH) MTK_TARGET_PROJECT=${MTK_TARGET_PROJECT} CROSS_COMPILE=$(KERNEL_CROSS_COMPILE) ROOTDIR=$(ROOTDIR) 
+
+$(INSTALLED_KERNEL_TARGET): kernel
+
+ifeq ($(strip $(MTK_HEADER_SUPPORT)),yes)
+$(TARGET_PREBUILT_KERNEL_BIN): $(TARGET_PREBUILT_KERNEL) | $(HOST_OUT_EXECUTABLES)/mkimage
+	$(HOST_OUT_EXECUTABLES)/mkimage $< KERNEL 0xffffffff > $@ 	
+
+$(INSTALLED_KERNEL_TARGET): $(TARGET_PREBUILT_KERNEL_BIN) | $(ACP)
+	$(copy-file-to-target)
+else
+$(INSTALLED_KERNEL_TARGET): $(TARGET_PREBUILT_KERNEL) | $(ACP)
+	$(copy-file-to-target)
+endif
+
+clean-kernel:
+	@rm -rf $(KERNEL_OUT)
+
+endif
+endif
+endif
+endif # Ifeq ($(LINUX_KERNEL_VERSION),kernel-3.10)
+
@@ -0,0 +1,2 @@
+# dummy file
+# To avoid scanning kernel when using findleaves.py to search CleanSpec.mk
@@ -1,6 +1,5 @@
 *.xml
 *.ps
-*.pdf
 *.html
 *.9.gz
 *.9
@@ -0,0 +1,121 @@
+				=============
+				A N D R O I D
+				=============
+
+Copyright (C) 2009 Google, Inc.
+Written by Mike Chan <mike@android.com>
+
+CONTENTS:
+---------
+
+1. Android
+  1.1 Required enabled config options
+  1.2 Required disabled config options
+  1.3 Recommended enabled config options
+2. Contact
+
+
+1. Android
+==========
+
+Android (www.android.com) is an open source operating system for mobile devices.
+This document describes configurations needed to run the Android framework on
+top of the Linux kernel.
+
+To see a working defconfig look at msm_defconfig or goldfish_defconfig
+which can be found at http://android.git.kernel.org in kernel/common.git
+and kernel/msm.git
+
+
+1.1 Required enabled config options
+-----------------------------------
+After building a standard defconfig, ensure that these options are enabled in
+your .config or defconfig if they are not already. Based off the msm_defconfig.
+You should keep the rest of the default options enabled in the defconfig
+unless you know what you are doing.
+
+ANDROID_PARANOID_NETWORK
+ASHMEM
+CONFIG_FB_MODE_HELPERS
+CONFIG_FONT_8x16
+CONFIG_FONT_8x8
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM
+DAB
+EARLYSUSPEND
+FB
+FB_CFB_COPYAREA
+FB_CFB_FILLRECT
+FB_CFB_IMAGEBLIT
+FB_DEFERRED_IO
+FB_TILEBLITTING
+HIGH_RES_TIMERS
+INOTIFY
+INOTIFY_USER
+INPUT_EVDEV
+INPUT_GPIO
+INPUT_MISC
+LEDS_CLASS
+LEDS_GPIO
+LOCK_KERNEL
+LkOGGER
+LOW_MEMORY_KILLER
+MISC_DEVICES
+NEW_LEDS
+NO_HZ
+POWER_SUPPLY
+PREEMPT
+RAMFS
+RTC_CLASS
+RTC_LIB
+SWITCH
+SWITCH_GPIO
+TMPFS
+UID_STAT
+UID16
+USB_FUNCTION
+USB_FUNCTION_ADB
+USER_WAKELOCK
+VIDEO_OUTPUT_CONTROL
+WAKELOCK
+YAFFS_AUTO_YAFFS2
+YAFFS_FS
+YAFFS_YAFFS1
+YAFFS_YAFFS2
+
+
+1.2 Required disabled config options
+------------------------------------
+CONFIG_YAFFS_DISABLE_LAZY_LOAD
+DNOTIFY
+
+
+1.3 Recommended enabled config options
+------------------------------
+ANDROID_PMEM
+PSTORE_CONSOLE
+PSTORE_RAM
+SCHEDSTATS
+DEBUG_PREEMPT
+DEBUG_MUTEXES
+DEBUG_SPINLOCK_SLEEP
+DEBUG_INFO
+FRAME_POINTER
+CPU_FREQ
+CPU_FREQ_TABLE
+CPU_FREQ_DEFAULT_GOV_ONDEMAND
+CPU_FREQ_GOV_ONDEMAND
+CRC_CCITT
+EMBEDDED
+INPUT_TOUCHSCREEN
+I2C
+I2C_BOARDINFO
+LOG_BUF_SHIFT=17
+SERIAL_CORE
+SERIAL_CORE_CONSOLE
+
+
+2. Contact
+==========
+website: http://android.git.kernel.org
+
+mailing-lists: android-kernel@googlegroups.com
@@ -0,0 +1,34 @@
+		Tagged virtual addresses in AArch64 Linux
+		=========================================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date  : 12 June 2013
+
+This document briefly describes the provision of tagged virtual
+addresses in the AArch64 translation system and their potential uses
+in AArch64 Linux.
+
+The kernel configures the translation tables so that translations made
+via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
+the virtual address ignored by the translation hardware. This frees up
+this byte for application use, with the following caveats:
+
+	(1) The kernel requires that all user addresses passed to EL1
+	    are tagged with tag 0x00. This means that any syscall
+	    parameters containing user virtual addresses *must* have
+	    their top byte cleared before trapping to the kernel.
+
+	(2) Non-zero tags are not preserved when delivering signals.
+	    This means that signal handlers in applications making use
+	    of tags cannot rely on the tag information for user virtual
+	    addresses being maintained for fields inside siginfo_t.
+	    One exception to this rule is for signals raised in response
+	    to watchpoint debug exceptions, where the tag information
+	    will be preserved.
+
+	(3) Special care should be taken when using tagged pointers,
+	    since it is likely that C compilers will not hazard two
+	    virtual addresses differing only in the upper byte.
+
+The architecture prevents the use of a tagged PC, so the upper byte will
+be set to a sign-extension of bit 55 on exception return.
@@ -598,6 +598,15 @@ is completely unused; @cgrp->parent is still valid. (Note - can also
 be called for a newly-created cgroup if an error occurs after this
 subsystem's create() method has been called for the new cgroup).

+int allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+(cgroup_mutex held by caller)
+
+Called prior to moving a task into a cgroup; if the subsystem
+returns an error, this will abort the attach operation.  Used
+to extend the permission checks - if all subsystems in a cgroup
+return 0, the attach will be allowed to proceed, even if the
+default permission check (root or same user) fails.
+
 int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)

@@ -28,6 +28,7 @@ Contents:
 2.3  Userspace
 2.4  Ondemand
 2.5  Conservative
+2.6  Interactive

 3.   The Governor Interface in the CPUfreq Core

@@ -218,6 +219,90 @@ a decision on when to decrease the frequency while running in any
 speed. Load for frequency increase is still evaluated every
 sampling rate.

+2.6 Interactive
+---------------
+
+The CPUfreq governor "interactive" is designed for latency-sensitive,
+interactive workloads. This governor sets the CPU speed depending on
+usage, similar to "ondemand" and "conservative" governors, but with a
+different set of configurable behaviors.
+
+The tuneable values for this governor are:
+
+target_loads: CPU load values used to adjust speed to influence the
+current CPU load toward that value.  In general, the lower the target
+load, the more often the governor will raise CPU speeds to bring load
+below the target.  The format is a single target load, optionally
+followed by pairs of CPU speeds and CPU loads to target at or above
+those speeds.  Colons can be used between the speeds and associated
+target loads for readability.  For example:
+
+   85 1000000:90 1700000:99
+
+targets CPU load 85% below speed 1GHz, 90% at or above 1GHz, until
+1.7GHz and above, at which load 99% is targeted.  If speeds are
+specified these must appear in ascending order.  Higher target load
+values are typically specified for higher speeds, that is, target load
+values also usually appear in an ascending order. The default is
+target load 90% for all speeds.
+
+min_sample_time: The minimum amount of time to spend at the current
+frequency before ramping down. Default is 80000 uS.
+
+hispeed_freq: An intermediate "hi speed" at which to initially ramp
+when CPU load hits the value specified in go_hispeed_load.  If load
+stays high for the amount of time specified in above_hispeed_delay,
+then speed may be bumped higher.  Default is the maximum speed
+allowed by the policy at governor initialization time.
+
+go_hispeed_load: The CPU load at which to ramp to hispeed_freq.
+Default is 99%.
+
+above_hispeed_delay: When speed is at or above hispeed_freq, wait for
+this long before raising speed in response to continued high load.
+The format is a single delay value, optionally followed by pairs of
+CPU speeds and the delay to use at or above those speeds.  Colons can
+be used between the speeds and associated delays for readability.  For
+example:
+
+   80000 1300000:200000 1500000:40000
+
+uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay
+200000 uS is used until speed 1.5 GHz, at which speed (and above)
+delay 40000 uS is used.  If speeds are specified these must appear in
+ascending order.  Default is 20000 uS.
+
+timer_rate: Sample rate for reevaluating CPU load when the CPU is not
+idle.  A deferrable timer is used, such that the CPU will not be woken
+from idle to service this timer until something else needs to run.
+(The maximum time to allow deferring this timer when not running at
+minimum speed is configurable via timer_slack.)  Default is 20000 uS.
+
+timer_slack: Maximum additional time to defer handling the governor
+sampling timer beyond timer_rate when running at speeds above the
+minimum.  For platforms that consume additional power at idle when
+CPUs are running at speeds greater than minimum, this places an upper
+bound on how long the timer will be deferred prior to re-evaluating
+load and dropping speed.  For example, if timer_rate is 20000uS and
+timer_slack is 10000uS then timers will be deferred for up to 30msec
+when not at lowest speed.  A value of -1 means defer timers
+indefinitely at all speeds.  Default is 80000 uS.
+
+boost: If non-zero, immediately boost speed of all CPUs to at least
+hispeed_freq until zero is written to this attribute.  If zero, allow
+CPU speeds to drop below hispeed_freq according to load as usual.
+Default is zero.
+
+boostpulse: On each write, immediately boost speed of all CPUs to
+hispeed_freq for at least the period of time specified by
+boostpulse_duration, after which speeds are allowed to drop below
+hispeed_freq according to load as usual.
+
+boostpulse_duration: Length of time to hold CPU speed at hispeed_freq
+on a write to boostpulse, before allowing speed to drop according to
+load as usual.  Default is 80000 uS.
+
+
 3. The Governor Interface in the CPUfreq Core
 =============================================

@@ -0,0 +1,172 @@
+=======================================================
+ARM CCI cache coherent interconnect binding description
+=======================================================
+
+ARM multi-cluster systems maintain intra-cluster coherency through a
+cache coherent interconnect (CCI) that is capable of monitoring bus
+transactions and manage coherency, TLB invalidations and memory barriers.
+
+It allows snooping and distributed virtual memory message broadcast across
+clusters, through memory mapped interface, with a global control register
+space and multiple sets of interface control registers, one per slave
+interface.
+
+Bindings for the CCI node follow the ePAPR standard, available from:
+
+www.power.org/documentation/epapr-version-1-1/
+
+with the addition of the bindings described in this document which are
+specific to ARM.
+
+* CCI interconnect node
+
+	Description: Describes a CCI cache coherent Interconnect component
+
+	Node name must be "cci".
+	Node's parent must be the root node /, and the address space visible
+	through the CCI interconnect is the same as the one seen from the
+	root node (ie from CPUs perspective as per DT standard).
+	Every CCI node has to define the following properties:
+
+	- compatible
+		Usage: required
+		Value type: <string>
+		Definition: must be set to
+			    "arm,cci-400"
+
+	- reg
+		Usage: required
+		Value type: <prop-encoded-array>
+		Definition: A standard property. Specifies base physical
+			    address of CCI control registers common to all
+			    interfaces.
+
+	- ranges:
+		Usage: required
+		Value type: <prop-encoded-array>
+		Definition: A standard property. Follow rules in the ePAPR for
+			    hierarchical bus addressing. CCI interfaces
+			    addresses refer to the parent node addressing
+			    scheme to declare their register bases.
+
+	CCI interconnect node can define the following child nodes:
+
+	- CCI control interface nodes
+
+		Node name must be "slave-if".
+		Parent node must be CCI interconnect node.
+
+		A CCI control interface node must contain the following
+		properties:
+
+		- compatible
+			Usage: required
+			Value type: <string>
+			Definition: must be set to
+				    "arm,cci-400-ctrl-if"
+
+		- interface-type:
+			Usage: required
+			Value type: <string>
+			Definition: must be set to one of {"ace", "ace-lite"}
+				    depending on the interface type the node
+				    represents.
+
+		- reg:
+			Usage: required
+			Value type: <prop-encoded-array>
+			Definition: the base address and size of the
+				    corresponding interface programming
+				    registers.
+
+* CCI interconnect bus masters
+
+	Description: masters in the device tree connected to a CCI port
+		     (inclusive of CPUs and their cpu nodes).
+
+	A CCI interconnect bus master node must contain the following
+	properties:
+
+	- cci-control-port:
+		Usage: required
+		Value type: <phandle>
+		Definition: a phandle containing the CCI control interface node
+			    the master is connected to.
+
+Example:
+
+	cpus {
+		#size-cells = <0>;
+		#address-cells = <1>;
+
+		CPU0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			cci-control-port = <&cci_control1>;
+			reg = <0x0>;
+		};
+
+		CPU1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			cci-control-port = <&cci_control1>;
+			reg = <0x1>;
+		};
+
+		CPU2: cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			cci-control-port = <&cci_control2>;
+			reg = <0x100>;
+		};
+
+		CPU3: cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			cci-control-port = <&cci_control2>;
+			reg = <0x101>;
+		};
+
+	};
+
+	dma0: dma@3000000 {
+		compatible = "arm,pl330", "arm,primecell";
+		cci-control-port = <&cci_control0>;
+		reg = <0x0 0x3000000 0x0 0x1000>;
+		interrupts = <10>;
+		#dma-cells = <1>;
+		#dma-channels = <8>;
+		#dma-requests = <32>;
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x6000>;
+
+		cci_control0: slave-if@1000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace-lite";
+			reg = <0x1000 0x1000>;
+		};
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+This CCI node corresponds to a CCI component whose control registers sits
+at address 0x000000002c090000.
+CCI slave interface @0x000000002c091000 is connected to dma controller dma0.
+CCI slave interface @0x000000002c094000 is connected to CPUs {CPU0, CPU1};
+CCI slave interface @0x000000002c095000 is connected to CPUs {CPU2, CPU3};
@@ -16,6 +16,9 @@ Required properties:
 	"arm,arm1176-pmu"
 	"arm,arm1136-pmu"
 - interrupts : 1 combined interrupt or 1 per core.
+- cluster : a phandle to the cluster to which it belongs
+	If there are more than one cluster with same CPU type
+	then there should be separate PMU nodes per cluster.

 Example:

@@ -369,6 +369,8 @@ is not associated with a file:
 [stack:1001]             = the stack of the thread with tid 1001
 [vdso]                   = the "virtual dynamic shared object",
                            the kernel system call handler
+ [anon:<name>]            = an anonymous mapping that has been
+                            named by userspace

 or if empty, the mapping is anonymous.

@@ -419,6 +421,7 @@ KernelPageSize:        4 kB
 MMUPageSize:           4 kB
 Locked:              374 kB
 VmFlags: rd ex mr mw me de
+Name:           name from userspace

 the first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
@@ -469,6 +472,9 @@ Note that there is no guarantee that every flag and associated mnemonic will
 be present in all further kernel releases. Things get changed, the flags may
 be vanished or the reverse -- new added.

+The "Name" field will only be present on a mapping that has been named by
+userspace, and will show the name passed in by userspace.
+
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.

@@ -3217,6 +3217,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					HIGHMEM regardless of setting
 					of CONFIG_HIGHPTE.

+	uuid_debug=	(Boolean) whether to enable debugging of TuxOnIce's
+			uuid support.
+
 	vdso=		[X86,SH]
 			vdso=2: enable compat VDSO (default with COMPAT_VDSO)
 			vdso=1: enable VDSO (default)
@@ -22,6 +22,15 @@ ip_no_pmtu_disc - BOOLEAN
 min_pmtu - INTEGER
 	default 552 - minimum discovered Path MTU

+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv4 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMP echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to. Similarly affects the fwmark
+	used by internal routing lookups triggered by incoming packets, such as
+	the ones used for Path MTU Discovery.
+	Default: 0
+
 route/max_size - INTEGER
 	Maximum number of routes allowed in the kernel.  Increase
 	this when using large numbers of interfaces and/or routes.
@@ -468,6 +477,16 @@ tcp_fastopen - INTEGER

 	See include/net/tcp.h and the code for more details.

+tcp_fwmark_accept - BOOLEAN
+	If set, incoming connections to listening sockets that do not have a
+	socket mark will set the mark of the accepting socket to the fwmark of
+	the incoming SYN packet. This will cause all packets on that connection
+	(starting from the first SYNACK) to be sent with that fwmark. The
+	listening socket's mark is unchanged. Listening sockets that already
+	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+	unaffected.
+	Default: 0
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 255. Default value
@@ -1093,6 +1112,15 @@ conf/all/forwarding - BOOLEAN
 proxy_ndp - BOOLEAN
 	Do proxy ndp.

+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv6 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to. Similarly affects the fwmark
+	used by internal routing lookups triggered by incoming packets, such as
+	the ones used for Path MTU Discovery.
+	Default: 0
+
 conf/interface/*:
 	Change special settings per interface.

@@ -0,0 +1,477 @@
+		   TuxOnIce 3.0 Internal Documentation.
+			Updated to 26 March 2009
+
+1.  Introduction.
+
+    TuxOnIce 3.0 is an addition to the Linux Kernel, designed to
+    allow the user to quickly shutdown and quickly boot a computer, without
+    needing to close documents or programs. It is equivalent to the
+    hibernate facility in some laptops. This implementation, however,
+    requires no special BIOS or hardware support.
+
+    The code in these files is based upon the original implementation
+    prepared by Gabor Kuti and additional work by Pavel Machek and a
+    host of others. This code has been substantially reworked by Nigel
+    Cunningham, again with the help and testing of many others, not the
+    least of whom is Michael Frank. At its heart, however, the operation is
+    essentially the same as Gabor's version.
+
+2.  Overview of operation.
+
+    The basic sequence of operations is as follows:
+
+	a. Quiesce all other activity.
+	b. Ensure enough memory and storage space are available, and attempt
+	   to free memory/storage if necessary.
+	c. Allocate the required memory and storage space.
+	d. Write the image.
+	e. Power down.
+
+    There are a number of complicating factors which mean that things are
+    not as simple as the above would imply, however...
+
+    o The activity of each process must be stopped at a point where it will
+    not be holding locks necessary for saving the image, or unexpectedly
+    restart operations due to something like a timeout and thereby make
+    our image inconsistent.
+
+    o It is desirous that we sync outstanding I/O to disk before calculating
+    image statistics. This reduces corruption if one should suspend but
+    then not resume, and also makes later parts of the operation safer (see
+    below).
+
+    o We need to get as close as we can to an atomic copy of the data.
+    Inconsistencies in the image will result in inconsistent memory contents at
+    resume time, and thus in instability of the system and/or file system
+    corruption. This would appear to imply a maximum image size of one half of
+    the amount of RAM, but we have a solution... (again, below).
+
+    o In 2.6, we choose to play nicely with the other suspend-to-disk
+    implementations.
+
+3.  Detailed description of internals.
+
+    a. Quiescing activity.
+
+    Safely quiescing the system is achieved using three separate but related
+    aspects.
+
+    First, we note that the vast majority of processes don't need to run during
+    suspend. They can be 'frozen'. We therefore implement a refrigerator
+    routine, which processes enter and in which they remain until the cycle is
+    complete. Processes enter the refrigerator via try_to_freeze() invocations
+    at appropriate places.  A process cannot be frozen in any old place. It
+    must not be holding locks that will be needed for writing the image or
+    freezing other processes. For this reason, userspace processes generally
+    enter the refrigerator via the signal handling code, and kernel threads at
+    the place in their event loops where they drop locks and yield to other
+    processes or sleep.
+
+    The task of freezing processes is complicated by the fact that there can be
+    interdependencies between processes. Freezing process A before process B may
+    mean that process B cannot be frozen, because it stops at waiting for
+    process A rather than in the refrigerator. This issue is seen where
+    userspace waits on freezeable kernel threads or fuse filesystem threads. To
+    address this issue, we implement the following algorithm for quiescing
+    activity:
+
+	- Freeze filesystems (including fuse - userspace programs starting
+		new requests are immediately frozen; programs already running
+		requests complete their work before being frozen in the next
+		step)
+	- Freeze userspace
+	- Thaw filesystems (this is safe now that userspace is frozen and no
+		fuse requests are outstanding).
+	- Invoke sys_sync (noop on fuse).
+	- Freeze filesystems
+	- Freeze kernel threads
+
+    If we need to free memory, we thaw kernel threads and filesystems, but not
+    userspace. We can then free caches without worrying about deadlocks due to
+    swap files being on frozen filesystems or such like.
+
+    b. Ensure enough memory & storage are available.
+
+    We have a number of constraints to meet in order to be able to successfully
+    suspend and resume.
+
+    First, the image will be written in two parts, described below. One of these
+    parts needs to have an atomic copy made, which of course implies a maximum
+    size of one half of the amount of system memory. The other part ('pageset')
+    is not atomically copied, and can therefore be as large or small as desired.
+
+    Second, we have constraints on the amount of storage available. In these
+    calculations, we may also consider any compression that will be done. The
+    cryptoapi module allows the user to configure an expected compression ratio.
+
+    Third, the user can specify an arbitrary limit on the image size, in
+    megabytes. This limit is treated as a soft limit, so that we don't fail the
+    attempt to suspend if we cannot meet this constraint.
+
+    c. Allocate the required memory and storage space.
+
+    Having done the initial freeze, we determine whether the above constraints
+    are met, and seek to allocate the metadata for the image. If the constraints
+    are not met, or we fail to allocate the required space for the metadata, we
+    seek to free the amount of memory that we calculate is needed and try again.
+    We allow up to four iterations of this loop before aborting the cycle. If we
+    do fail, it should only be because of a bug in TuxOnIce's calculations.
+
+    These steps are merged together in the prepare_image function, found in
+    prepare_image.c. The functions are merged because of the cyclical nature
+    of the problem of calculating how much memory and storage is needed. Since
+    the data structures containing the information about the image must
+    themselves take memory and use storage, the amount of memory and storage
+    required changes as we prepare the image. Since the changes are not large,
+    only one or two iterations will be required to achieve a solution.
+
+    The recursive nature of the algorithm is miminised by keeping user space
+    frozen while preparing the image, and by the fact that our records of which
+    pages are to be saved and which pageset they are saved in use bitmaps (so
+    that changes in number or fragmentation of the pages to be saved don't
+    feedback via changes in the amount of memory needed for metadata). The
+    recursiveness is thus limited to any extra slab pages allocated to store the
+    extents that record storage used, and the effects of seeking to free memory.
+
+    d. Write the image.
+
+    We previously mentioned the need to create an atomic copy of the data, and
+    the half-of-memory limitation that is implied in this. This limitation is
+    circumvented by dividing the memory to be saved into two parts, called
+    pagesets.
+
+    Pageset2 contains most of the page cache - the pages on the active and
+    inactive LRU lists that aren't needed or modified while TuxOnIce is
+    running, so they can be safely written without an atomic copy. They are
+    therefore saved first and reloaded last. While saving these pages,
+    TuxOnIce carefully ensures that the work of writing the pages doesn't make
+    the image inconsistent. With the support for Kernel (Video) Mode Setting
+    going into the kernel at the time of writing, we need to check for pages
+    on the LRU that are used by KMS, and exclude them from pageset2. They are
+    atomically copied as part of pageset 1.
+
+    Once pageset2 has been saved, we prepare to do the atomic copy of remaining
+    memory. As part of the preparation, we power down drivers, thereby providing
+    them with the opportunity to have their state recorded in the image. The
+    amount of memory allocated by drivers for this is usually negligible, but if
+    DRI is in use, video drivers may require significants amounts. Ideally we
+    would be able to query drivers while preparing the image as to the amount of
+    memory they will need. Unfortunately no such mechanism exists at the time of
+    writing. For this reason, TuxOnIce allows the user to set an
+    'extra_pages_allowance', which is used to seek to ensure sufficient memory
+    is available for drivers at this point. TuxOnIce also lets the user set this
+    value to 0. In this case, a test driver suspend is done while preparing the
+    image, and the difference (plus a margin) used instead. TuxOnIce will also
+    automatically restart the hibernation process (twice at most) if it finds
+    that the extra pages allowance is not sufficient. It will then use what was
+    actually needed (plus a margin, again). Failure to hibernate should thus
+    be an extremely rare occurence.
+
+    Having suspended the drivers, we save the CPU context before making an
+    atomic copy of pageset1, resuming the drivers and saving the atomic copy.
+    After saving the two pagesets, we just need to save our metadata before
+    powering down.
+
+    As we mentioned earlier, the contents of pageset2 pages aren't needed once
+    they've been saved. We therefore use them as the destination of our atomic
+    copy. In the unlikely event that pageset1 is larger, extra pages are
+    allocated while the image is being prepared. This is normally only a real
+    possibility when the system has just been booted and the page cache is
+    small.
+
+    This is where we need to be careful about syncing, however. Pageset2 will
+    probably contain filesystem meta data. If this is overwritten with pageset1
+    and then a sync occurs, the filesystem will be corrupted - at least until
+    resume time and another sync of the restored data. Since there is a
+    possibility that the user might not resume or (may it never be!) that
+    TuxOnIce might oops, we do our utmost to avoid syncing filesystems after
+    copying pageset1.
+
+    e. Power down.
+
+    Powering down uses standard kernel routines. TuxOnIce supports powering down
+    using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off.
+    Supporting suspend to ram (S3) as a power off option might sound strange,
+    but it allows the user to quickly get their system up and running again if
+    the battery doesn't run out (we just need to re-read the overwritten pages)
+    and if the battery does run out (or the user removes power), they can still
+    resume.
+
+4.  Data Structures.
+
+    TuxOnIce uses three main structures to store its metadata and configuration
+    information:
+
+    a) Pageflags bitmaps.
+
+    TuxOnIce records which pages will be in pageset1, pageset2, the destination
+    of the atomic copy and the source of the atomically restored image using
+    bitmaps. The code used is that written for swsusp, with small improvements
+    to match TuxOnIce's requirements.
+
+    The pageset1 bitmap is thus easily stored in the image header for use at
+    resume time.
+
+    As mentioned above, using bitmaps also means that the amount of memory and
+    storage required for recording the above information is constant. This
+    greatly simplifies the work of preparing the image. In earlier versions of
+    TuxOnIce, extents were used to record which pages would be stored. In that
+    case, however, eating memory could result in greater fragmentation of the
+    lists of pages, which in turn required more memory to store the extents and
+    more storage in the image header. These could in turn require further
+    freeing of memory, and another iteration. All of this complexity is removed
+    by having bitmaps.
+
+    Bitmaps also make a lot of sense because TuxOnIce only ever iterates
+    through the lists. There is therefore no cost to not being able to find the
+    nth page in order 0 time. We only need to worry about the cost of finding
+    the n+1th page, given the location of the nth page. Bitwise optimisations
+    help here.
+
+    b) Extents for block data.
+
+    TuxOnIce supports writing the image to multiple block devices. In the case
+    of swap, multiple partitions and/or files may be in use, and we happily use
+    them all (with the exception of compcache pages, which we allocate but do
+    not use). This use of multiple block devices is accomplished as follows:
+
+    Whatever the actual source of the allocated storage, the destination of the
+    image can be viewed in terms of one or more block devices, and on each
+    device, a list of sectors. To simplify matters, we only use contiguous,
+    PAGE_SIZE aligned sectors, like the swap code does.
+
+    Since sector numbers on each bdev may well not start at 0, it makes much
+    more sense to use extents here. Contiguous ranges of pages can thus be
+    represented in the extents by contiguous values.
+
+    Variations in block size are taken account of in transforming this data
+    into the parameters for bio submission.
+
+    We can thus implement a layer of abstraction wherein the core of TuxOnIce
+    doesn't have to worry about which device we're currently writing to or
+    where in the device we are. It simply requests that the next page in the
+    pageset or header be written, leaving the details to this lower layer.
+    The lower layer remembers where in the sequence of devices and blocks each
+    pageset starts. The header always starts at the beginning of the allocated
+    storage.
+
+    So extents are:
+
+    struct extent {
+      unsigned long minimum, maximum;
+      struct extent *next;
+    }
+
+    These are combined into chains of extents for a device:
+
+    struct extent_chain {
+      int size; /* size of the extent ie sum (max-min+1) */
+      int allocs, frees;
+      char *name;
+      struct extent *first, *last_touched;
+    };
+
+    For each bdev, we need to store a little more info:
+
+    struct suspend_bdev_info {
+       struct block_device *bdev;
+       dev_t dev_t;
+       int bmap_shift;
+       int blocks_per_page;
+    };
+
+    The dev_t is used to identify the device in the stored image. As a result,
+    we expect devices at resume time to have the same major and minor numbers
+    as they had while suspending.  This is primarily a concern where the user
+    utilises LVM for storage, as they will need to dmsetup their partitions in
+    such a way as to maintain this consistency at resume time.
+
+    bmap_shift and blocks_per_page apply the effects of variations in blocks
+    per page settings for the filesystem and underlying bdev. For most
+    filesystems, these are the same, but for xfs, they can have independant
+    values.
+
+    Combining these two structures together, we have everything we need to
+    record what devices and what blocks on each device are being used to
+    store the image, and to submit i/o using bio_submit.
+
+    The last elements in the picture are a means of recording how the storage
+    is being used.
+
+    We do this first and foremost by implementing a layer of abstraction on
+    top of the devices and extent chains which allows us to view however many
+    devices there might be as one long storage tape, with a single 'head' that
+    tracks a 'current position' on the tape:
+
+    struct extent_iterate_state {
+      struct extent_chain *chains;
+      int num_chains;
+      int current_chain;
+      struct extent *current_extent;
+      unsigned long current_offset;
+    };
+
+    That is, *chains points to an array of size num_chains of extent chains.
+    For the filewriter, this is always a single chain. For the swapwriter, the
+    array is of size MAX_SWAPFILES.
+
+    current_chain, current_extent and current_offset thus point to the current
+    index in the chains array (and into a matching array of struct
+    suspend_bdev_info), the current extent in that chain (to optimise access),
+    and the current value in the offset.
+
+    The image is divided into three parts:
+    - The header
+    - Pageset 1
+    - Pageset 2
+
+    The header always starts at the first device and first block. We know its
+    size before we begin to save the image because we carefully account for
+    everything that will be stored in it.
+
+    The second pageset (LRU) is stored first. It begins on the next page after
+    the end of the header.
+
+    The first pageset is stored second. It's start location is only known once
+    pageset2 has been saved, since pageset2 may be compressed as it is written.
+    This location is thus recorded at the end of saving pageset2. It is page
+    aligned also.
+
+    Since this information is needed at resume time, and the location of extents
+    in memory will differ at resume time, this needs to be stored in a portable
+    way:
+
+    struct extent_iterate_saved_state {
+        int chain_num;
+        int extent_num;
+        unsigned long offset;
+    };
+
+    We can thus implement a layer of abstraction wherein the core of TuxOnIce
+    doesn't have to worry about which device we're currently writing to or
+    where in the device we are. It simply requests that the next page in the
+    pageset or header be written, leaving the details to this layer, and
+    invokes the routines to remember and restore the position, without having
+    to worry about the details of how the data is arranged on disk or such like.
+
+    c) Modules
+
+    One aim in designing TuxOnIce was to make it flexible. We wanted to allow
+    for the implementation of different methods of transforming a page to be
+    written to disk and different methods of getting the pages stored.
+
+    In early versions (the betas and perhaps Suspend1), compression support was
+    inlined in the image writing code, and the data structures and code for
+    managing swap were intertwined with the rest of the code. A number of people
+    had expressed interest in implementing image encryption, and alternative
+    methods of storing the image.
+
+    In order to achieve this, TuxOnIce was given a modular design.
+
+    A module is a single file which encapsulates the functionality needed
+    to transform a pageset of data (encryption or compression, for example),
+    or to write the pageset to a device. The former type of module is called
+    a 'page-transformer', the later a 'writer'.
+
+    Modules are linked together in pipeline fashion. There may be zero or more
+    page transformers in a pipeline, and there is always exactly one writer.
+    The pipeline follows this pattern:
+
+		---------------------------------
+		|          TuxOnIce Core        |
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|	Page transformer 1	|
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|	Page transformer 2	|
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|            Writer		|
+		---------------------------------
+
+    During the writing of an image, the core code feeds pages one at a time
+    to the first module. This module performs whatever transformations it
+    implements on the incoming data, completely consuming the incoming data and
+    feeding output in a similar manner to the next module.
+
+    All routines are SMP safe, and the final result of the transformations is
+    written with an index (provided by the core) and size of the output by the
+    writer. As a result, we can have multithreaded I/O without needing to
+    worry about the sequence in which pages are written (or read).
+
+    During reading, the pipeline works in the reverse direction. The core code
+    calls the first module with the address of a buffer which should be filled.
+    (Note that the buffer size is always PAGE_SIZE at this time). This module
+    will in turn request data from the next module and so on down until the
+    writer is made to read from the stored image.
+
+    Part of definition of the structure of a module thus looks like this:
+
+        int (*rw_init) (int rw, int stream_number);
+        int (*rw_cleanup) (int rw);
+        int (*write_chunk) (struct page *buffer_page);
+        int (*read_chunk) (struct page *buffer_page, int sync);
+
+    It should be noted that the _cleanup routine may be called before the
+    full stream of data has been read or written. While writing the image,
+    the user may (depending upon settings) choose to abort suspending, and
+    if we are in the midst of writing the last portion of the image, a portion
+    of the second pageset may be reread. This may also happen if an error
+    occurs and we seek to abort the process of writing the image.
+
+    The modular design is also useful in a number of other ways. It provides
+    a means where by we can add support for:
+
+    - providing overall initialisation and cleanup routines;
+    - serialising configuration information in the image header;
+    - providing debugging information to the user;
+    - determining memory and image storage requirements;
+    - dis/enabling components at run-time;
+    - configuring the module (see below);
+
+    ...and routines for writers specific to their work:
+    - Parsing a resume= location;
+    - Determining whether an image exists;
+    - Marking a resume as having been attempted;
+    - Invalidating an image;
+
+    Since some parts of the core - the user interface and storage manager
+    support - have use for some of these functions, they are registered as
+    'miscellaneous' modules as well.
+
+    d) Sysfs data structures.
+
+    This brings us naturally to support for configuring TuxOnIce. We desired to
+    provide a way to make TuxOnIce as flexible and configurable as possible.
+    The user shouldn't have to reboot just because they want to now hibernate to
+    a file instead of a partition, for example.
+
+    To accomplish this, TuxOnIce implements a very generic means whereby the
+    core and modules can register new sysfs entries. All TuxOnIce entries use
+    a single _store and _show routine, both of which are found in
+    tuxonice_sysfs.c in the kernel/power directory. These routines handle the
+    most common operations - getting and setting the values of bits, integers,
+    longs, unsigned longs and strings in one place, and allow overrides for
+    customised get and set options as well as side-effect routines for all
+    reads and writes.
+
+    When combined with some simple macros, a new sysfs entry can then be defined
+    in just a couple of lines:
+
+        SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
+                        2048, 0, NULL),
+
+    This defines a sysfs entry named "progress_granularity" which is rw and
+    allows the user to access an integer stored at &progress_granularity, giving
+    it a value between 1 and 2048 inclusive.
+
+    Sysfs entries are registered under /sys/power/tuxonice, and entries for
+    modules are located in a subdirectory named after the module.
+
@@ -0,0 +1,948 @@
+	--- TuxOnIce, version 3.0 ---
+
+1.  What is it?
+2.  Why would you want it?
+3.  What do you need to use it?
+4.  Why not just use the version already in the kernel?
+5.  How do you use it?
+6.  What do all those entries in /sys/power/tuxonice do?
+7.  How do you get support?
+8.  I think I've found a bug. What should I do?
+9.  When will XXX be supported?
+10  How does it work?
+11. Who wrote TuxOnIce?
+
+1. What is it?
+
+   Imagine you're sitting at your computer, working away. For some reason, you
+   need to turn off your computer for a while - perhaps it's time to go home
+   for the day. When you come back to your computer next, you're going to want
+   to carry on where you left off. Now imagine that you could push a button and
+   have your computer store the contents of its memory to disk and power down.
+   Then, when you next start up your computer, it loads that image back into
+   memory and you can carry on from where you were, just as if you'd never
+   turned the computer off. You have far less time to start up, no reopening of
+   applications or finding what directory you put that file in yesterday.
+   That's what TuxOnIce does.
+
+   TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who,
+   with some help from Pavel Machek, got an early version going in 1999. The
+   project was then taken over by Florent Chabaud while still in alpha version
+   numbers. Nigel Cunningham came on the scene when Florent was unable to
+   continue, moving the project into betas, then 1.0, 2.0 and so on up to
+   the present series. During the 2.0 series, the name was contracted to
+   Suspend2 and the website suspend2.net created. Beginning around July 2007,
+   a transition to calling the software TuxOnIce was made, to seek to help
+   make it clear that TuxOnIce is more concerned with hibernation than suspend
+   to ram.
+
+   Pavel Machek's swsusp code, which was merged around 2.5.17 retains the
+   original name, and was essentially a fork of the beta code until Rafael
+   Wysocki came on the scene in 2005 and began to improve it further.
+
+2. Why would you want it?
+
+   Why wouldn't you want it?
+
+   Being able to save the state of your system and quickly restore it improves
+   your productivity - you get a useful system in far less time than through
+   the normal boot process. You also get to be completely 'green', using zero
+   power, or as close to that as possible (the computer may still provide
+   minimal power to some devices, so they can initiate a power on, but that
+   will be the same amount of power as would be used if you told the computer
+   to shutdown.
+
+3. What do you need to use it?
+
+   a. Kernel Support.
+
+   i) The TuxOnIce patch.
+
+   TuxOnIce is part of the Linux Kernel. This version is not part of Linus's
+   2.6 tree at the moment, so you will need to download the kernel source and
+   apply the latest patch. Having done that, enable the appropriate options in
+   make [menu|x]config (under Power Management Options - look for "Enhanced
+   Hibernation"), compile and install your kernel. TuxOnIce works with SMP,
+   Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64.
+
+   TuxOnIce patches are available from http://tuxonice.net.
+
+   ii) Compression support.
+
+   Compression support is implemented via the cryptoapi. You will therefore want
+   to select any Cryptoapi transforms that you want to use on your image from
+   the Cryptoapi menu while configuring your kernel. We recommend the use of the
+   LZO compression method - it is very fast and still achieves good compression.
+
+   You can also tell TuxOnIce to write its image to an encrypted and/or
+   compressed filesystem/swap partition. In that case, you don't need to do
+   anything special for TuxOnIce when it comes to kernel configuration.
+
+   iii) Configuring other options.
+
+   While you're configuring your kernel, try to configure as much as possible
+   to build as modules. We recommend this because there are a number of drivers
+   that are still in the process of implementing proper power management
+   support. In those cases, the best way to work around their current lack is
+   to build them as modules and remove the modules while hibernating. You might
+   also bug the driver authors to get their support up to speed, or even help!
+
+   b. Storage.
+
+   i) Swap.
+
+   TuxOnIce can store the hibernation image in your swap partition, a swap file or
+   a combination thereof. Whichever combination you choose, you will probably
+   want to create enough swap space to store the largest image you could have,
+   plus the space you'd normally use for swap. A good rule of thumb would be
+   to calculate the amount of swap you'd want without using TuxOnIce, and then
+   add the amount of memory you have. This swapspace can be arranged in any way
+   you'd like. It can be in one partition or file, or spread over a number. The
+   only requirement is that they be active when you start a hibernation cycle.
+
+   There is one exception to this requirement. TuxOnIce has the ability to turn
+   on one swap file or partition at the start of hibernating and turn it back off
+   at the end. If you want to ensure you have enough memory to store a image
+   when your memory is fully used, you might want to make one swap partition or
+   file for 'normal' use, and another for TuxOnIce to activate & deactivate
+   automatically. (Further details below).
+
+   ii) Normal files.
+
+   TuxOnIce includes a 'file allocator'. The file allocator can store your
+   image in a simple file. Since Linux has the concept of everything being a
+   file, this is more powerful than it initially sounds. If, for example, you
+   were to set up a network block device file, you could hibernate to a network
+   server. This has been tested and works to a point, but nbd itself isn't
+   stateless enough for our purposes.
+
+   Take extra care when setting up the file allocator. If you just type
+   commands without thinking and then try to hibernate, you could cause
+   irreversible corruption on your filesystems! Make sure you have backups.
+
+   Most people will only want to hibernate to a local file. To achieve that, do
+   something along the lines of:
+
+   echo "TuxOnIce" > /hibernation-file
+   dd if=/dev/zero bs=1M count=512 >> /hibernation-file
+
+   This will create a 512MB file called /hibernation-file. To get TuxOnIce to use
+   it:
+
+   echo /hibernation-file > /sys/power/tuxonice/file/target
+
+   Then
+
+   cat /sys/power/tuxonice/resume
+
+   Put the results of this into your bootloader's configuration (see also step
+   C, below):
+
+   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+   # cat /sys/power/tuxonice/resume
+   file:/dev/hda2:0x1e001
+
+   In this example, we would edit the append= line of our lilo.conf|menu.lst
+   so that it included:
+
+   resume=file:/dev/hda2:0x1e001
+   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+
+   For those who are thinking 'Could I make the file sparse?', the answer is
+   'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in
+   a sparse file while hibernating. In the longer term (post merge!), I'd like
+   to change things so that the file could be dynamically resized and have
+   holes filled as needed. Right now, however, that's not possible and not a
+   priority.
+
+   c. Bootloader configuration.
+
+   Using TuxOnIce also requires that you add an extra parameter to
+   your lilo.conf or equivalent. Here's an example for a swap partition:
+
+   append="resume=swap:/dev/hda1"
+
+   This would tell TuxOnIce that /dev/hda1 is a swap partition you
+   have. TuxOnIce will use the swap signature of this partition as a
+   pointer to your data when you hibernate. This means that (in this example)
+   /dev/hda1 doesn't need to be _the_ swap partition where all of your data
+   is actually stored. It just needs to be a swap partition that has a
+   valid signature.
+
+   You don't need to have a swap partition for this purpose. TuxOnIce
+   can also use a swap file, but usage is a little more complex. Having made
+   your swap file, turn it on and do
+
+   cat /sys/power/tuxonice/swap/headerlocations
+
+   (this assumes you've already compiled your kernel with TuxOnIce
+   support and booted it). The results of the cat command will tell you
+   what you need to put in lilo.conf:
+
+   For swap partitions like /dev/hda1, simply use resume=/dev/hda1.
+   For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d.
+
+   If the swapfile changes for any reason (it is moved to a different
+   location, it is deleted and recreated, or the filesystem is
+   defragmented) then you will have to check
+   /sys/power/tuxonice/swap/headerlocations for a new resume_block value.
+
+   Once you've compiled and installed the kernel and adjusted your bootloader
+   configuration, you should only need to reboot for the most basic part
+   of TuxOnIce to be ready.
+
+   If you only compile in the swap allocator, or only compile in the file
+   allocator, you don't need to add the "swap:" part of the resume=
+   parameters above. resume=/dev/hda2:0x242d will work just as well. If you
+   have compiled both and your storage is on swap, you can also use this
+   format (the swap allocator is the default allocator).
+
+   When compiling your kernel, one of the options in the 'Power Management
+   Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is
+   called 'Default resume partition'. This can be used to set a default value
+   for the resume= parameter.
+
+   d. The hibernate script.
+
+   Since the driver model in 2.6 kernels is still being developed, you may need
+   to do more than just configure TuxOnIce. Users of TuxOnIce usually start the
+   process via a script which prepares for the hibernation cycle, tells the
+   kernel to do its stuff and then restore things afterwards. This script might
+   involve:
+
+   - Switching to a text console and back if X doesn't like the video card
+     status on resume.
+   - Un/reloading drivers that don't play well with hibernation.
+
+   Note that you might not be able to unload some drivers if there are
+   processes using them. You might have to kill off processes that hold
+   devices open. Hint: if your X server accesses an USB mouse, doing a
+   'chvt' to a text console releases the device and you can unload the
+   module.
+
+   Check out the latest script (available on tuxonice.net).
+
+   e. The userspace user interface.
+
+   TuxOnIce has very limited support for displaying status if you only apply
+   the kernel patch - it can printk messages, but that is all. In addition,
+   some of the functions mentioned in this document (such as cancelling a cycle
+   or performing interactive debugging) are unavailable. To utilise these
+   functions, or simply get a nice display, you need the 'userui' component.
+   Userui comes in three flavours, usplash, fbsplash and text. Text should
+   work on any console. Usplash and fbsplash require the appropriate
+   (distro specific?) support.
+
+   To utilise a userui, TuxOnIce just needs to be told where to find the
+   userspace binary:
+
+   echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program
+
+   The hibernate script can do this for you, and a default value for this
+   setting can be configured when compiling the kernel. This path is also
+   stored in the image header, so if you have an initrd or initramfs, you can
+   use the userui during the first part of resuming (prior to the atomic
+   restore) by putting the binary in the same path in your initrd/ramfs.
+   Alternatively, you can put it in a different location and do an echo
+   similar to the above prior to the echo > do_resume. The value saved in the
+   image header will then be ignored.
+
+4. Why not just use the version already in the kernel?
+
+   The version in the vanilla kernel has a number of drawbacks. The most
+   serious of these are:
+	- it has a maximum image size of 1/2 total memory;
+	- it doesn't allocate storage until after it has snapshotted memory.
+	  This means that you can't be sure hibernating will work until you
+	  see it start to write the image;
+	- it does not allow you to press escape to cancel a cycle;
+	- it does not allow you to press escape to cancel resuming;
+	- it does not allow you to automatically swapon a file when
+	  starting a cycle;
+	- it does not allow you to use multiple swap partitions or files;
+	- it does not allow you to use ordinary files;
+	- it just invalidates an image and continues to boot if you
+	  accidentally boot the wrong kernel after hibernating;
+	- it doesn't support any sort of nice display while hibernating;
+	- it is moving toward requiring that you have an initrd/initramfs
+	  to ever have a hope of resuming (uswsusp). While uswsusp will
+	  address some of the concerns above, it won't address all of them,
+          and will be more complicated to get set up;
+        - it doesn't have support for suspend-to-both (write a hibernation
+	  image, then suspend to ram; I think this is known as ReadySafe
+	  under M$).
+
+5. How do you use it?
+
+   A hibernation cycle can be started directly by doing:
+
+	echo > /sys/power/tuxonice/do_hibernate
+
+   In practice, though, you'll probably want to use the hibernate script
+   to unload modules, configure the kernel the way you like it and so on.
+   In that case, you'd do (as root):
+
+	hibernate
+
+   See the hibernate script's man page for more details on the options it
+   takes.
+
+   If you're using the text or splash user interface modules, one feature of
+   TuxOnIce that you might find useful is that you can press Escape at any time
+   during hibernating, and the process will be aborted.
+
+   Due to the way hibernation works, this means you'll have your system back and
+   perfectly usable almost instantly. The only exception is when it's at the
+   very end of writing the image. Then it will need to reload a small (usually
+   4-50MBs, depending upon the image characteristics) portion first.
+
+   Likewise, when resuming, you can press escape and resuming will be aborted.
+   The computer will then powerdown again according to settings at that time for
+   the powerdown method or rebooting.
+
+   You can change the settings for powering down while the image is being
+   written by pressing 'R' to toggle rebooting and 'O' to toggle between
+   suspending to ram and powering down completely).
+
+   If you run into problems with resuming, adding the "noresume" option to
+   the kernel command line will let you skip the resume step and recover your
+   system. This option shouldn't normally be needed, because TuxOnIce modifies
+   the image header prior to the atomic restore, and will thus prompt you
+   if it detects that you've tried to resume an image before (this flag is
+   removed if you press Escape to cancel a resume, so you won't be prompted
+   then).
+
+   Recent kernels (2.6.24 onwards) add support for resuming from a different
+   kernel to the one that was hibernated (thanks to Rafael for his work on
+   this - I've just embraced and enhanced the support for TuxOnIce). This
+   should further reduce the need for you to use the noresume option.
+
+6. What do all those entries in /sys/power/tuxonice do?
+
+   /sys/power/tuxonice is the directory which contains files you can use to
+   tune and configure TuxOnIce to your liking. The exact contents of
+   the directory will depend upon the version of TuxOnIce you're
+   running and the options you selected at compile time. In the following
+   descriptions, names in brackets refer to compile time options.
+   (Note that they're all dependant upon you having selected CONFIG_TUXONICE
+   in the first place!).
+
+   Since the values of these settings can open potential security risks, the
+   writeable ones are accessible only to the root user. You may want to
+   configure sudo to allow you to invoke your hibernate script as an ordinary
+   user.
+
+   - alloc/failure_test
+
+   This debugging option provides a way of testing TuxOnIce's handling of
+   memory allocation failures. Each allocation type that TuxOnIce makes has
+   been given a unique number (see the source code). Echo the appropriate
+   number into this entry, and when TuxOnIce attempts to do that allocation,
+   it will pretend there was a failure and act accordingly.
+
+   - alloc/find_max_mem_allocated
+
+   This debugging option will cause TuxOnIce to find the maximum amount of
+   memory it used during a cycle, and report that information in debugging
+   information at the end of the cycle.
+
+   - alt_resume_param
+
+   Instead of powering down after writing a hibernation image, TuxOnIce
+   supports resuming from a different image. This entry lets you set the
+   location of the signature for that image (the resume= value you'd use
+   for it). Using an alternate image and keep_image mode, you can do things
+   like using an alternate image to power down an uninterruptible power
+   supply.
+
+   - block_io/target_outstanding_io
+
+   This value controls the amount of memory that the block I/O code says it
+   needs when the core code is calculating how much memory is needed for
+   hibernating and for resuming. It doesn't directly control the amount of
+   I/O that is submitted at any one time - that depends on the amount of
+   available memory (we may have more available than we asked for), the
+   throughput that is being achieved and the ability of the CPU to keep up
+   with disk throughput (particularly where we're compressing pages).
+
+   - checksum/enabled
+
+   Use cryptoapi hashing routines to verify that Pageset2 pages don't change
+   while we're saving the first part of the image, and to get any pages that
+   do change resaved in the atomic copy. This should normally not be needed,
+   but if you're seeing issues, please enable this. If your issues stop you
+   being able to resume, enable this option, hibernate and cancel the cycle
+   after the atomic copy is done. If the debugging info shows a non-zero
+   number of pages resaved, please report this to Nigel.
+
+   - compression/algorithm
+
+   Set the cryptoapi algorithm used for compressing the image.
+
+   - compression/expected_compression
+
+   These values allow you to set an expected compression ratio, which TuxOnice
+   will use in calculating whether it meets constraints on the image size. If
+   this expected compression ratio is not attained, the hibernation cycle will
+   abort, so it is wise to allow some spare. You can see what compression
+   ratio is achieved in the logs after hibernating.
+
+   - debug_info:
+
+   This file returns information about your configuration that may be helpful
+   in diagnosing problems with hibernating.
+
+   - did_suspend_to_both:
+
+   This file can be used when you hibernate with powerdown method 3 (ie suspend
+   to ram after writing the image). There can be two outcomes in this case. We
+   can resume from the suspend-to-ram before the battery runs out, or we can run
+   out of juice and and up resuming like normal. This entry lets you find out,
+   post resume, which way we went. If the value is 1, we resumed from suspend
+   to ram. This can be useful when actions need to be run post suspend-to-ram
+   that don't need to be run if we did the normal resume from power off.
+
+   - do_hibernate:
+
+   When anything is written to this file, the kernel side of TuxOnIce will
+   begin to attempt to write an image to disk and power down. You'll normally
+   want to run the hibernate script instead, to get modules unloaded first.
+
+   - do_resume:
+
+   When anything is written to this file TuxOnIce will attempt to read and
+   restore an image. If there is no image, it will return almost immediately.
+   If an image exists, the echo > will never return. Instead, the original
+   kernel context will be restored and the original echo > do_hibernate will
+   return.
+
+   - */enabled
+
+   These option can be used to temporarily disable various parts of TuxOnIce.
+
+   - extra_pages_allowance
+
+   When TuxOnIce does its atomic copy, it calls the driver model suspend
+   and resume methods. If you have DRI enabled with a driver such as fglrx,
+   this can result in the driver allocating a substantial amount of memory
+   for storing its state. Extra_pages_allowance tells TuxOnIce how much
+   extra memory it should ensure is available for those allocations. If
+   your attempts at hibernating end with a message in dmesg indicating that
+   insufficient extra pages were allowed, you need to increase this value.
+
+   - file/target:
+
+   Read this value to get the current setting. Write to it to point TuxOnice
+   at a new storage location for the file allocator. See section 3.b.ii above
+   for details of how to set up the file allocator.
+
+   - freezer_test
+
+   This entry can be used to get TuxOnIce to just test the freezer and prepare
+   an image without actually doing a hibernation cycle. It is useful for
+   diagnosing freezing and image preparation issues.
+
+   - full_pageset2
+
+   TuxOnIce divides the pages that are stored in an image into two sets. The
+   difference between the two sets is that pages in pageset 1 are atomically
+   copied, and pages in pageset 2 are written to disk without being copied
+   first. A page CAN be written to disk without being copied first if and only
+   if its contents will not be modified or used at any time after userspace
+   processes are frozen. A page MUST be in pageset 1 if its contents are
+   modified or used at any time after userspace processes have been frozen.
+
+   Normally (ie if this option is enabled), TuxOnIce will put all pages on the
+   per-zone LRUs in pageset2, then remove those pages used by any userspace
+   user interface helper and TuxOnIce storage manager that are running,
+   together with pages used by the GEM memory manager introduced around 2.6.28
+   kernels.
+
+   If this option is disabled, a much more conservative approach will be taken.
+   The only pages in pageset2 will be those belonging to userspace processes,
+   with the exclusion of those belonging to the TuxOnIce userspace helpers
+   mentioned above. This will result in a much smaller pageset2, and will
+   therefore result in smaller images than are possible with this option
+   enabled.
+
+   - ignore_rootfs
+
+   TuxOnIce records which device is mounted as the root filesystem when
+   writing the hibernation image. It will normally check at resume time that
+   this device isn't already mounted - that would be a cause of filesystem
+   corruption. In some particular cases (RAM based root filesystems), you
+   might want to disable this check. This option allows you to do that.
+
+   - image_exists:
+
+   Can be used in a script to determine whether a valid image exists at the
+   location currently pointed to by resume=. Returns up to three lines.
+   The first is whether an image exists (-1 for unsure, otherwise 0 or 1).
+   If an image eixsts, additional lines will return the machine and version.
+   Echoing anything to this entry removes any current image.
+
+   - image_size_limit:
+
+   The maximum size of hibernation image written to disk, measured in megabytes
+   (1024*1024).
+
+   - last_result:
+
+   The result of the last hibernation cycle, as defined in
+   include/linux/suspend-debug.h with the values SUSPEND_ABORTED to
+   SUSPEND_KEPT_IMAGE. This is a bitmask.
+
+   - late_cpu_hotplug:
+
+   This sysfs entry controls whether cpu hotplugging is done - as normal - just
+   before (unplug) and after (replug) the atomic copy/restore (so that all
+   CPUs/cores are available for multithreaded I/O). The alternative is to
+   unplug all secondary CPUs/cores at the start of hibernating/resuming, and
+   replug them at the end of resuming. No multithreaded I/O will be possible in
+   this configuration, but the odd machine has been reported to require it.
+
+   - lid_file:
+
+   This determines which ACPI button file we look in to determine whether the
+   lid is open or closed after resuming from suspend to disk or power off.
+   If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state
+   and check its contents at the appropriate moment. See post_wake_state below
+   for more details on how this entry is used.
+
+   - log_everything (CONFIG_PM_DEBUG):
+
+   Setting this option results in all messages printed being logged. Normally,
+   only a subset are logged, so as to not slow the process and not clutter the
+   logs. Useful for debugging. It can be toggled during a cycle by pressing
+   'L'.
+
+   - no_load_direct:
+
+   This is a debugging option. If, when loading the atomically copied pages of
+   an image, TuxOnIce finds that the destination address for a page is free,
+   it will normally allocate the image, load the data directly into that
+   address and skip it in the atomic restore. If this option is disabled, the
+   page will be loaded somewhere else and atomically restored like other pages.
+
+   - no_flusher_thread:
+
+   When doing multithreaded I/O (see below), the first online CPU can be used
+   to _just_ submit compressed pages when writing the image, rather than
+   compressing and submitting data. This option is normally disabled, but has
+   been included because Nigel would like to see whether it will be more useful
+   as the number of cores/cpus in computers increases.
+
+   - no_multithreaded_io:
+
+   TuxOnIce will normally create one thread per cpu/core on your computer,
+   each of which will then perform I/O. This will generally result in
+   throughput that's the maximum the storage medium can handle. There
+   shouldn't be any reason to disable multithreaded I/O now, but this option
+   has been retained for debugging purposes.
+
+   - no_pageset2
+
+   See the entry for full_pageset2 above for an explanation of pagesets.
+   Enabling this option causes TuxOnIce to do an atomic copy of all pages,
+   thereby limiting the maximum image size to 1/2 of memory, as swsusp does.
+
+   - no_pageset2_if_unneeded
+
+   See the entry for full_pageset2 above for an explanation of pagesets.
+   Enabling this option causes TuxOnIce to act like no_pageset2 was enabled
+   if and only it isn't needed anyway. This option may still make TuxOnIce
+   less reliable because pageset2 pages are normally used to store the
+   atomic copy - drivers that want to do allocations of larger amounts of
+   memory in one shot will be more likely to find that those amounts aren't
+   available if this option is enabled.
+
+   - pause_between_steps (CONFIG_PM_DEBUG):
+
+   This option is used during debugging, to make TuxOnIce pause between
+   each step of the process. It is ignored when the nice display is on.
+
+   - post_wake_state:
+
+   TuxOnIce provides support for automatically waking after a user-selected
+   delay, and using a different powerdown method if the lid is still closed.
+   (Yes, we're assuming a laptop).  This entry lets you choose what state
+   should be entered next. The values are those described under
+   powerdown_method, below. It can be used to suspend to RAM after hibernating,
+   then powerdown properly (say) 20 minutes. It can also be used to power down
+   properly, then wake at (say) 6.30am and suspend to RAM until you're ready
+   to use the machine.
+
+   - powerdown_method:
+
+   Used to select a method by which TuxOnIce should powerdown after writing the
+   image. Currently:
+
+   0: Don't use ACPI to power off.
+   3: Attempt to enter Suspend-to-ram.
+   4: Attempt to enter ACPI S4 mode.
+   5: Attempt to power down via ACPI S5 mode.
+
+   Note that these options are highly dependant upon your hardware & software:
+
+   3: When succesful, your machine suspends to ram instead of powering off.
+      The advantage of using this mode is that it doesn't matter whether your
+      battery has enough charge to make it through to your next resume. If it
+      lasts, you will simply resume from suspend to ram (and the image on disk
+      will be discarded). If the battery runs out, you will resume from disk
+      instead. The disadvantage is that it takes longer than a normal
+      suspend-to-ram to enter the state, since the suspend-to-disk image needs
+      to be written first.
+   4/5: When successful, your machine will be off and comsume (almost) no power.
+      But it might still react to some external events like opening the lid or
+      trafic on  a network or usb device. For the bios, resume is then the same
+      as warm boot, similar to a situation where you used the command `reboot'
+      to reboot your machine. If your machine has problems on warm boot or if
+      you want to protect your machine with the bios password, this is probably
+      not the right choice. Mode 4 may be necessary on some machines where ACPI
+      wake up methods need to be run to properly reinitialise hardware after a
+      hibernation cycle.
+   0: Switch the machine completely off. The only possible wakeup is the power
+      button. For the bios, resume is then the same as a cold boot, in
+      particular you would  have to provide your bios boot password if your
+      machine uses that feature for booting.
+
+   - progressbar_granularity_limit:
+
+   This option can be used to limit the granularity of the progress bar
+   displayed with a bootsplash screen. The value is the maximum number of
+   steps. That is, 10 will make the progress bar jump in 10% increments.
+
+   - reboot:
+
+   This option causes TuxOnIce to reboot rather than powering down
+   at the end of saving an image. It can be toggled during a cycle by pressing
+   'R'.
+
+   - resume:
+
+   This sysfs entry can be used to read and set the location in which TuxOnIce
+   will look for the signature of an image - the value set using resume= at
+   boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By
+   writing to this file as well as modifying your bootloader's configuration
+   file (eg menu.lst), you can set or reset the location of your image or the
+   method of storing the image without rebooting.
+
+   - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP):
+
+   This option makes
+
+     echo disk > /sys/power/state
+
+   activate TuxOnIce instead of swsusp. Regardless of whether this option is
+   enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce
+   to check for an image too. This is due to the fact that at resume time, we
+   can't know whether this option was enabled until we see if an image is there
+   for us to resume from. (And when an image exists, we don't care whether we
+   did replace swsusp anyway - we just want to resume).
+
+   - resume_commandline:
+
+   This entry can be read after resuming to see the commandline that was used
+   when resuming began. You might use this to set up two bootloader entries
+   that are the same apart from the fact that one includes a extra append=
+   argument "at_work=1". You could then grep resume_commandline in your
+   post-resume scripts and configure networking (for example) differently
+   depending upon whether you're at home or work. resume_commandline can be
+   set to arbitrary text if you wish to remove sensitive contents.
+
+   - swap/swapfilename:
+
+   This entry is used to specify the swapfile or partition that
+   TuxOnIce will attempt to swapon/swapoff automatically. Thus, if
+   I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically
+   for my hibernation image, I would
+
+   echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile
+
+   /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the
+   swapon and swapoff occur while other processes are frozen (including kswapd)
+   so this swap file will not be used up when attempting to free memory. The
+   parition/file is also given the highest priority, so other swapfiles/partitions
+   will only be used to save the image when this one is filled.
+
+   The value of this file is used by headerlocations along with any currently
+   activated swapfiles/partitions.
+
+   - swap/headerlocations:
+
+   This option tells you the resume= options to use for swap devices you
+   currently have activated. It is particularly useful when you only want to
+   use a swap file to store your image. See above for further details.
+
+   - test_bio
+
+   This is a debugging option. When enabled, TuxOnIce will not hibernate.
+   Instead, when asked to write an image, it will skip the atomic copy,
+   just doing the writing of the image and then returning control to the
+   user at the point where it would have powered off. This is useful for
+   testing throughput in different configurations.
+
+   - test_filter_speed
+
+   This is a debugging option. When enabled, TuxOnIce will not hibernate.
+   Instead, when asked to write an image, it will not write anything or do
+   an atomic copy, but will only run any enabled compression algorithm on the
+   data that would have been written (the source pages of the atomic copy in
+   the case of pageset 1). This is useful for comparing the performance of
+   compression algorithms and for determining the extent to which an upgrade
+   to your storage method would improve hibernation speed.
+
+   - user_interface/debug_sections (CONFIG_PM_DEBUG):
+
+   This value, together with the console log level, controls what debugging
+   information is displayed. The console log level determines the level of
+   detail, and this value determines what detail is displayed. This value is
+   a bit vector, and the meaning of the bits can be found in the kernel tree
+   in include/linux/tuxonice.h. It can be overridden using the kernel's
+   command line option suspend_dbg.
+
+   - user_interface/default_console_level (CONFIG_PM_DEBUG):
+
+   This determines the value of the console log level at the start of a
+   hibernation cycle. If debugging is compiled in, the console log level can be
+   changed during a cycle by pressing the digit keys. Meanings are:
+
+   0: Nice display.
+   1: Nice display plus numerical progress.
+   2: Errors only.
+   3: Low level debugging info.
+   4: Medium level debugging info.
+   5: High level debugging info.
+   6: Verbose debugging info.
+
+   - user_interface/enable_escape:
+
+   Setting this to "1" will enable you abort a hibernation cycle or resuming by
+   pressing escape, "0" (default) disables this feature. Note that enabling
+   this option means that you cannot initiate a hibernation cycle and then walk
+   away from your computer, expecting it to be secure. With feature disabled,
+   you can validly have this expectation once TuxOnice begins to write the
+   image to disk. (Prior to this point, it is possible that TuxOnice might
+   about because of failure to freeze all processes or because constraints
+   on its ability to save the image are not met).
+
+   - user_interface/program
+
+   This entry is used to tell TuxOnice what userspace program to use for
+   providing a user interface while hibernating. The program uses a netlink
+   socket to pass messages back and forward to the kernel, allowing all of the
+   functions formerly implemented in the kernel user interface components.
+
+   - version:
+
+   The version of TuxOnIce you have compiled into the currently running kernel.
+
+   - wake_alarm_dir:
+
+   As mentioned above (post_wake_state), TuxOnIce supports automatically waking
+   after some delay. This entry allows you to select which wake alarm to use.
+   It should contain the value "rtc0" if you're wanting to use
+   /sys/class/rtc/rtc0.
+
+   - wake_delay:
+
+   This value determines the delay from the end of writing the image until the
+   wake alarm is triggered. You can set an absolute time by writing the desired
+   time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values
+   empty.
+
+   Note that for the wakeup to actually occur, you may need to modify entries
+   in /proc/acpi/wakeup. This is done by echoing the name of the button in the
+   first column (eg PBTN) into the file.
+
+7. How do you get support?
+
+   Glad you asked. TuxOnIce is being actively maintained and supported
+   by Nigel (the guy doing most of the kernel coding at the moment), Bernard
+   (who maintains the hibernate script and userspace user interface components)
+   and its users.
+
+   Resources availble include HowTos, FAQs and a Wiki, all available via
+   tuxonice.net.  You can find the mailing lists there.
+
+8. I think I've found a bug. What should I do?
+
+   By far and a way, the most common problems people have with TuxOnIce
+   related to drivers not having adequate power management support. In this
+   case, it is not a bug with TuxOnIce, but we can still help you. As we
+   mentioned above, such issues can usually be worked around by building the
+   functionality as modules and unloading them while hibernating. Please visit
+   the Wiki for up-to-date lists of known issues and work arounds.
+
+   If this information doesn't help, try running:
+
+   hibernate --bug-report
+
+   ..and sending the output to the users mailing list.
+
+   Good information on how to provide us with useful information from an
+   oops is found in the file REPORTING-BUGS, in the top level directory
+   of the kernel tree. If you get an oops, please especially note the
+   information about running what is printed on the screen through ksymoops.
+   The raw information is useless.
+
+9. When will XXX be supported?
+
+   If there's a feature missing from TuxOnIce that you'd like, feel free to
+   ask. We try to be obliging, within reason.
+
+   Patches are welcome. Please send to the list.
+
+10. How does it work?
+
+   TuxOnIce does its work in a number of steps.
+
+   a. Freezing system activity.
+
+   The first main stage in hibernating is to stop all other activity. This is
+   achieved in stages. Processes are considered in fours groups, which we will
+   describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE
+   flag, kernel threads without this flag, userspace processes with the
+   PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are
+   untouched by the refrigerator code. They are allowed to run during hibernating
+   and resuming, and are used to support user interaction, storage access or the
+   like. Other kernel threads (those unneeded while hibernating) are frozen last.
+   This leaves us with userspace processes that need to be frozen. When a
+   process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on
+   that process for the duration of that call. Processes that have this flag are
+   frozen after processes without it, so that we can seek to ensure that dirty
+   data is synced to disk as quickly as possible in a situation where other
+   processes may be submitting writes at the same time. Freezing the processes
+   that are submitting data stops new I/O from being submitted. Syncthreads can
+   then cleanly finish their work. So the order is:
+
+   - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE;
+   - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE);
+   - Kernel processes without PF_NOFREEZE.
+
+   b. Eating memory.
+
+   For a successful hibernation cycle, you need to have enough disk space to store the
+   image and enough memory for the various limitations of TuxOnIce's
+   algorithm. You can also specify a maximum image size. In order to attain
+   to those constraints, TuxOnIce may 'eat' memory. If, after freezing
+   processes, the constraints aren't met, TuxOnIce will thaw all the
+   other processes and begin to eat memory until its calculations indicate
+   the constraints are met. It will then freeze processes again and recheck
+   its calculations.
+
+   c. Allocation of storage.
+
+   Next, TuxOnIce allocates the storage that will be used to save
+   the image.
+
+   The core of TuxOnIce knows nothing about how or where pages are stored. We
+   therefore request the active allocator (remember you might have compiled in
+   more than one!) to allocate enough storage for our expect image size. If
+   this request cannot be fulfilled, we eat more memory and try again. If it
+   is fulfiled, we seek to allocate additional storage, just in case our
+   expected compression ratio (if any) isn't achieved. This time, however, we
+   just continue if we can't allocate enough storage.
+
+   If these calls to our allocator change the characteristics of the image
+   such that we haven't allocated enough memory, we also loop. (The allocator
+   may well need to allocate space for its storage information).
+
+   d. Write the first part of the image.
+
+   TuxOnIce stores the image in two sets of pages called 'pagesets'.
+   Pageset 2 contains pages on the active and inactive lists; essentially
+   the page cache. Pageset 1 contains all other pages, including the kernel.
+   We use two pagesets for one important reason: We need to make an atomic copy
+   of the kernel to ensure consistency of the image. Without a second pageset,
+   that would limit us to an image that was at most half the amount of memory
+   available. Using two pagesets allows us to store a full image. Since pageset
+   2 pages won't be needed in saving pageset 1, we first save pageset 2 pages.
+   We can then make our atomic copy of the remaining pages using both pageset 2
+   pages and any other pages that are free. While saving both pagesets, we are
+   careful not to corrupt the image. Among other things, we use lowlevel block
+   I/O routines that don't change the pagecache contents.
+
+   The next step, then, is writing pageset 2.
+
+   e. Suspending drivers and storing processor context.
+
+   Having written pageset2, TuxOnIce calls the power management functions to
+   notify drivers of the hibernation, and saves the processor state in preparation
+   for the atomic copy of memory we are about to make.
+
+   f. Atomic copy.
+
+   At this stage, everything else but the TuxOnIce code is halted. Processes
+   are frozen or idling, drivers are quiesced and have stored (ideally and where
+   necessary) their configuration in memory we are about to atomically copy.
+   In our lowlevel architecture specific code, we have saved the CPU state.
+   We can therefore now do our atomic copy before resuming drivers etc.
+
+   g. Save the atomic copy (pageset 1).
+
+   TuxOnice can then write the atomic copy of the remaining pages. Since we
+   have copied the pages into other locations, we can continue to use the
+   normal block I/O routines without fear of corruption our image.
+
+   f. Save the image header.
+
+   Nearly there! We save our settings and other parameters needed for
+   reloading pageset 1 in an 'image header'. We also tell our allocator to
+   serialise its data at this stage, so that it can reread the image at resume
+   time.
+
+   g. Set the image header.
+
+   Finally, we edit the header at our resume= location. The signature is
+   changed by the allocator to reflect the fact that an image exists, and to
+   point to the start of that data if necessary (swap allocator).
+
+   h. Power down.
+
+   Or reboot if we're debugging and the appropriate option is selected.
+
+   Whew!
+
+   Reloading the image.
+   --------------------
+
+   Reloading the image is essentially the reverse of all the above. We load
+   our copy of pageset 1, being careful to choose locations that aren't going
+   to be overwritten as we copy it back (We start very early in the boot
+   process, so there are no other processes to quiesce here). We then copy
+   pageset 1 back to its original location in memory and restore the process
+   context. We are now running with the original kernel. Next, we reload the
+   pageset 2 pages, free the memory and swap used by TuxOnIce, restore
+   the pageset header and restart processes. Sounds easy in comparison to
+   hibernating, doesn't it!
+
+   There is of course more to TuxOnIce than this, but this explanation
+   should be a good start. If there's interest, I'll write further
+   documentation on range pages and the low level I/O.
+
+11. Who wrote TuxOnIce?
+
+   (Answer based on the writings of Florent Chabaud, credits in files and
+   Nigel's limited knowledge; apologies to anyone missed out!)
+
+   The main developers of TuxOnIce have been...
+
+   Gabor Kuti
+   Pavel Machek
+   Florent Chabaud
+   Bernard Blackham
+   Nigel Cunningham
+
+   Significant portions of swsusp, the code in the vanilla kernel which
+   TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should
+   also be expressed to him.
+
+   The above mentioned developers have been aided in their efforts by a host
+   of hundreds, if not thousands of testers and people who have submitted bug
+   fixes & suggestions. Of special note are the efforts of Michael Frank, who
+   had his computers repetitively hibernate and resume for literally tens of
+   thousands of cycles and developed scripts to stress the system and test
+   TuxOnIce far beyond the point most of us (Nigel included!) would consider
+   testing. His efforts have contributed as much to TuxOnIce as any of the
+   names above.
@@ -0,0 +1,75 @@
+Motivation:
+
+In complicated DMA pipelines such as graphics (multimedia, camera, gpu, display)
+a consumer of a buffer needs to know when the producer has finished producing
+it.  Likewise the producer needs to know when the consumer is finished with the
+buffer so it can reuse it.  A particular buffer may be consumed by multiple
+consumers which will retain the buffer for different amounts of time.  In
+addition, a consumer may consume multiple buffers atomically.
+The sync framework adds an API which allows synchronization between the
+producers and consumers in a generic way while also allowing platforms which
+have shared hardware synchronization primitives to exploit them.
+
+Goals:
+	* provide a generic API for expressing synchronization dependencies
+	* allow drivers to exploit hardware synchronization between hardware
+	  blocks
+	* provide a userspace API that allows a compositor to manage
+	  dependencies.
+	* provide rich telemetry data to allow debugging slowdowns and stalls of
+	   the graphics pipeline.
+
+Objects:
+	* sync_timeline
+	* sync_pt
+	* sync_fence
+
+sync_timeline:
+
+A sync_timeline is an abstract monotonically increasing counter. In general,
+each driver/hardware block context will have one of these.  They can be backed
+by the appropriate hardware or rely on the generic sw_sync implementation.
+Timelines are only ever created through their specific implementations
+(i.e. sw_sync.)
+
+sync_pt:
+
+A sync_pt is an abstract value which marks a point on a sync_timeline. Sync_pts
+have a single timeline parent.  They have 3 states: active, signaled, and error.
+They start in active state and transition, once, to either signaled (when the
+timeline counter advances beyond the sync_pt’s value) or error state.
+
+sync_fence:
+
+Sync_fences are the primary primitives used by drivers to coordinate
+synchronization of their buffers.  They are a collection of sync_pts which may
+or may not have the same timeline parent.  A sync_pt can only exist in one fence
+and the fence's list of sync_pts is immutable once created.  Fences can be
+waited on synchronously or asynchronously.  Two fences can also be merged to
+create a third fence containing a copy of the two fences’ sync_pts.  Fences are
+backed by file descriptors to allow userspace to coordinate the display pipeline
+dependencies.
+
+Use:
+
+A driver implementing sync support should have a work submission function which:
+     * takes a fence argument specifying when to begin work
+     * asynchronously queues that work to kick off when the fence is signaled
+     * returns a fence to indicate when its work will be done.
+     * signals the returned fence once the work is completed.
+
+Consider an imaginary display driver that has the following API:
+/*
+ * assumes buf is ready to be displayed.
+ * blocks until the buffer is on screen.
+ */
+    void display_buffer(struct dma_buf *buf);
+
+The new API will become:
+/*
+ * will display buf when fence is signaled.
+ * returns immediately with a fence that will signal when buf
+ * is no longer displayed.
+ */
+struct sync_fence* display_buffer(struct dma_buf *buf,
+                                 struct sync_fence *fence);
@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm:
 - dirty_writeback_centisecs
 - drop_caches
 - extfrag_threshold
+- extra_free_kbytes
 - hugepages_treat_as_movable
 - hugetlb_shm_group
 - laptop_mode
@@ -198,6 +199,21 @@ fragmentation index is <= extfrag_threshold. The default value is 500.

 ==============================================================

+extra_free_kbytes
+
+This parameter tells the VM to keep extra free memory between the threshold
+where background reclaim (kswapd) kicks in, and the threshold where direct
+reclaim (by allocating processes) kicks in.
+
+This is useful for workloads that require low latency memory allocations
+and have a bounded burstiness in memory allocations, for example a
+realtime application that receives and transmits network traffic
+(causing in-kernel memory allocations) with a maximum total message burst
+size of 200MB may need 200MB of extra free memory to avoid direct reclaim
+related latencies.
+
+==============================================================
+
 hugepages_treat_as_movable

 This parameter is only useful when kernelcore= is specified at boot time to
@@ -358,11 +358,8 @@ Every arch has an init callback function.  If you need to do something early on
 to initialize some state, this is the time to do that.  Otherwise, this simple
 function below should be sufficient for most people:

-int __init ftrace_dyn_arch_init(void *data)
+int __init ftrace_dyn_arch_init(void)
 {
-	/* return value is done indirectly via data */
-	*(unsigned long *)data = 0;
-
 	return 0;
 }

@@ -2013,6 +2013,35 @@ will produce:
 1)   1.449 us    |             }


+You can disable the hierarchical function call formatting and instead print a
+flat list of function entry and return events.  This uses the format described
+in the Output Formatting section and respects all the trace options that
+control that formatting.  Hierarchical formatting is the default.
+
+	hierachical: echo nofuncgraph-flat > trace_options
+	flat: echo funcgraph-flat > trace_options
+
+  ie:
+
+  # tracer: function_graph
+  #
+  # entries-in-buffer/entries-written: 68355/68355   #P:2
+  #
+  #                              _-----=> irqs-off
+  #                             / _----=> need-resched
+  #                            | / _---=> hardirq/softirq
+  #                            || / _--=> preempt-depth
+  #                            ||| /     delay
+  #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+  #              | |       |   ||||       |         |
+                sh-1806  [001] d...   198.843443: graph_ent: func=_raw_spin_lock
+                sh-1806  [001] d...   198.843445: graph_ent: func=__raw_spin_lock
+                sh-1806  [001] d..1   198.843447: graph_ret: func=__raw_spin_lock
+                sh-1806  [001] d..1   198.843449: graph_ret: func=_raw_spin_lock
+                sh-1806  [001] d..1   198.843451: graph_ent: func=_raw_spin_unlock_irqrestore
+                sh-1806  [001] d...   198.843453: graph_ret: func=_raw_spin_unlock_irqrestore
+
+
 You might find other useful features for this tracer in the
 following "dynamic ftrace" section such as tracing only specific
 functions or tasks.
@@ -8314,6 +8314,13 @@ S:	Maintained
 F:	drivers/tc/
 F:	include/linux/tc.h

+TUXONICE (ENHANCED HIBERNATION)
+P:	Nigel Cunningham
+M:	nigel@tuxonice.net
+L:	tuxonice-devel@tuxonice.net
+W:	http://tuxonice.net
+S:	Maintained
+
 U14-34F SCSI DRIVER
 M:	Dario Ballabio <ballabio_dario@emc.com>
 L:	linux-scsi@vger.kernel.org
@@ -192,8 +192,15 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
 # "make" in the configured kernel build directory always uses that.
 # Default value for CROSS_COMPILE is not to prefix executables
 # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
-ARCH		?= $(SUBARCH)
-CROSS_COMPILE	?= $(CONFIG_CROSS_COMPILE:"%"=%)
+#ARCH		?= $(SUBARCH)
+#CROSS_COMPILE	?= $(CONFIG_CROSS_COMPILE:"%"=%)
+ifeq ($(TARGET_ARCH), arm)
+ARCH		?= arm
+CROSS_COMPILE	?= arm-eabi-
+else
+ARCH		?= arm64
+CROSS_COMPILE	?= aarch64-linux-android-
+endif

 # Architecture as present in compile.h
 UTS_MACHINE 	:= $(ARCH)
@@ -373,7 +380,9 @@ KBUILD_CFLAGS   := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		   -fno-strict-aliasing -fno-common \
 		   -Werror-implicit-function-declaration \
 		   -Wno-format-security \
-		   -fno-delete-null-pointer-checks
+		   -fno-delete-null-pointer-checks \
+		   -Werror=format -Werror=int-to-pointer-cast -Werror=pointer-to-int-cast
+
 KBUILD_AFLAGS_KERNEL :=
 KBUILD_CFLAGS_KERNEL :=
 KBUILD_AFLAGS   := -D__ASSEMBLY__
@@ -381,6 +390,13 @@ KBUILD_AFLAGS_MODULE  := -DMODULE
 KBUILD_CFLAGS_MODULE  := -DMODULE
 KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds

+-include $(srctree)/$(MTK_PROJECT)_mtk_cust.mak
+#MTK_INC += -I$(MTK_ROOT_CUSTOM)/$(MTK_PROJECT)/common
+LINUXINCLUDE    += $(MTK_INC)
+KBUILD_CFLAGS   += $(MTK_CFLAGS) $(MTK_CDEFS) -fno-pic
+KBUILD_CPPFLAGS += $(MTK_CPPFLAGS) $(MTK_CPPDEFS)
+KBUILD_AFLAGS   += $(MTK_AFLAGS) $(MTK_ADEFS)
+
 # Read KERNELRELEASE from include/config/kernel.release (if it exists)
 KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
 KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION)
@@ -589,6 +605,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-reorder-blocks,) \
 endif

 ifneq ($(CONFIG_FRAME_WARN),0)
+KBUILD_CFLAGS += $(call cc-option,-Werror=frame-larger-than=1)
 KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif

@@ -851,7 +868,9 @@ define filechk_utsrelease.h
 	  echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2;    \
 	  exit 1;                                                         \
 	fi;                                                               \
-	(echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";)
+	(echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";                  \
+	echo \#define BUILD_INFO \"$(MTK_BUILD_VERNO)\";                  \
+	echo \#define BUILD_FINGERPRINT \"$(TARGET_PRODUCT)\";)
 endef

 define filechk_version.h
@@ -987,6 +1006,29 @@ _modinst_post: _modinst_
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_modinst
 	$(call cmd,depmod)

+# MTK {
+# Target to install android modules
+
+AMODLIB = $(INSTALL_MOD_PATH)/lib/modules
+export AMODLIB
+AMODSYMLIB = $(INSTALL_MOD_PATH)/../symbols/system/lib/modules
+export AMODSYMLIB
+
+PHONY += android_modules_install
+android_modules_install: _android_modinst_
+
+PHONY += _android_modinst_
+_android_modinst_:
+	@if [ ! -d $(AMODLIB) ]; then \
+		mkdir -p $(AMODLIB); \
+	fi
+	@if [ ! -d $(AMODSYMLIB) ]; then \
+		mkdir -p $(AMODSYMLIB); \
+	fi
+	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.android.modinst
+# MTK }
+
+
 ifeq ($(CONFIG_MODULE_SIG), y)
 PHONY += modules_sign
 modules_sign:
@@ -0,0 +1,15 @@
+The files in this directory are meant to be used as a base for an Android
+kernel config. All devices should have the options in android-base.cfg enabled.
+While not mandatory, the options in android-recommended.cfg enable advanced
+Android features.
+
+Assuming you already have a minimalist defconfig for your device, a possible
+way to enable these options would be:
+
+     ARCH=<arch> scripts/kconfig/merge_config.sh <path_to>/<device>_defconfig android/configs/android-base.cfg android/configs/android-recommended.cfg
+
+This will generate a .config that can then be used to save a new defconfig or
+compile a new kernel with Android features enabled.
+
+Because there is no tool to consistently generate these config fragments,
+lets keep them alphabetically sorted instead of random.
@@ -0,0 +1,141 @@
+#  KEEP ALPHABETICALLY SORTED
+# CONFIG_INET_LRO is not set
+# CONFIG_MODULES is not set
+# CONFIG_OABI_COMPAT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ANDROID_INTF_ALARM_DEV=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_ARMV7_COMPAT=y
+CONFIG_ASHMEM=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_VERITY=y
+CONFIG_EMBEDDED=y
+CONFIG_FB=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_INET=y
+CONFIG_INET_ESP=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_TARGET_REJECT_SKERR=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_TARGET_REJECT_SKERR=y
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_TPROXY=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_KEY=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_NAT=y
+CONFIG_NO_HZ=y
+CONFIG_PACKET=y
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PPP=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PREEMPT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_RTC_CLASS=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_STAGING=y
+CONFIG_SWITCH=y
+CONFIG_SYNC=y
+CONFIG_SYSVIPC=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_G_ANDROID=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_XFRM_USER=y
@@ -0,0 +1,121 @@
+#  KEEP ALPHABETICALLY SORTED
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_NF_CONNTRACK_SIP is not set
+# CONFIG_PM_WAKELOCKS_GC is not set
+# CONFIG_VT is not set
+CONFIG_ANDROID_TIMED_GPIO=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_COMPACTION=y
+CONFIG_DM_UEVENT=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FUSE_FS=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HIDRAW=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_GPIO=y
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_KEYRESET=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_TABLET=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_ION=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KSM=y
+CONFIG_LOGIG940_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGITECH_FF=y
+CONFIG_MD=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_MSDOS_FS=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_PANTHERLORD_FF=y
+CONFIG_PERF_EVENTS=y
+CONFIG_PM_DEBUG=y
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+CONFIG_POWER_SUPPLY=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_SND=y
+CONFIG_SOUND=y
+CONFIG_SUSPEND_TIME=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_TABLET_USB_WACOM=y
+CONFIG_TIMER_STATS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_UHID=y
+CONFIG_UID_STAT=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_USBNET=y
+CONFIG_VFAT_FS=y
@@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up

 config SECCOMP_FILTER
 	def_bool y
@@ -18,12 +18,6 @@
 #include <asm/clk.h>
 #include <asm/mach_desc.h>

-/* called from unflatten_device_tree() to bootstrap devicetree itself */
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
-	return __va(memblock_alloc(size, align));
-}
-
 /**
 * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
 * @dt:		virtual address pointer to dt blob
@@ -7,6 +7,7 @@ config ARM
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARM_HAS_SG_CHAIN if (ARCH_MT6589 || ARCH_MT6582 || ARCH_MT6572 || ARCH_MT8135 || ARCH_MT6595 || ARCH_MT6795 || ARCH_MT6752 || ARCH_MT8127)
 	select BUILDTIME_EXTABLE_SORT if MMU
 	select CPU_PM if (SUSPEND || CPU_IDLE)
 	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN && MMU
@@ -15,6 +16,7 @@ config ARM
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PCI_IOMAP
+	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_STRNCPY_FROM_USER
@@ -61,6 +63,7 @@ config ARM
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 	select HAVE_CONTEXT_TRACKING
+	select CRYPTO_AES_ARM if (ARCH_MT6752)
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -633,6 +636,138 @@ config ARCH_MSM
 	  stack and controls some vital subsystems
 	  (clock and power control, etc).

+config ARCH_MT6572
+	bool "MediaTek MT6572"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_CPUFREQ
+	select ARM_AMBA
+	select CPU_V7
+	select HAVE_SMP
+	select NEED_MACH_MEMORY_H
+	select VFP_OPT
+	help
+	  This enable support for MediaTek MT6572
+
+config ARCH_MT6595
+	bool "MediaTek MT6595"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_CPUFREQ
+	select ARM_AMBA
+	select CPU_V7
+	select HAVE_SMP
+	select NEED_MACH_MEMORY_H
+	select FIQ_GLUE
+	select IRQ_DOMAIN
+	select IRQ_DOMAIN_DEBUG
+	select ARCH_REQUIRE_GPIOLIB
+	select ARM_ERRATA_828419
+	select ARM_ERRATA_828420
+	select ARM_ERRATA_831171
+	select VFP_OPT
+	select MTK_CPU_STRESS
+	select MTK_LASTPC
+	select MTK_SYSTRACKER
+	select ZONE_DMA if ARM_LPAE
+	help
+	  This enable support for MediaTek MT6595
+
+config ARCH_MT6582
+	bool "MediaTek MT6582"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_CPUFREQ
+	select ARM_AMBA
+	select CPU_V7
+	select HAVE_SMP
+	select NEED_MACH_MEMORY_H
+	select L1C_OPT
+	select VFP_OPT
+	select HAVE_TRUSTONIC_TEE_SUPPORT
+	select MTK_CPU_STRESS
+	select MTK_DBG_DUMP
+	select MTK_KERNEL_IN_SECURE_MODE if ((!TRUSTONIC_TEE_SUPPORT) && (!ARM_PSCI))
+	select FIQ_GLUE if TRUSTONIC_TEE_SUPPORT
+	help
+	  This enable support for MediaTek MT6582.
+
+config ARCH_MT6592
+	bool "MediaTek MT6592"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_CPUFREQ
+	select ARM_AMBA
+	select CPU_V7
+	select HAVE_SMP
+	select NEED_MACH_MEMORY_H
+	select ARM_HAS_SG_CHAIN
+	select VFP_OPT
+	select HAVE_TRUSTONIC_TEE_SUPPORT
+	select L1C_OPT
+	help
+	  This enable support for MediaTek MT6592.
+
+config ARCH_MT6752
+	bool "MediaTek MT6752"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_CPUFREQ
+	select ARM_AMBA
+	select CPU_V7
+	select HAVE_SMP
+	select NEED_MACH_MEMORY_H
+	select IRQ_DOMAIN
+	select IRQ_DOMAIN_DEBUG
+	select GENERIC_SCHED_CLOCK
+	select VFP_OPT
+	select MTK_SYSTRACKER
+	select MTK_L2C_SHARE
+	select ARM_ERRATA_824069
+	select ARM_ERRATA_826319
+	select ARCH_REQUIRE_GPIOLIB
+	select MTK_EIC
+	select MTK_ETM
+	help
+	  This enable support for MediaTek MT6752
+
+config ARCH_MT6795
+	bool "MediaTek MT6795"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_CPUFREQ
+	select ARM_AMBA
+	select CPU_V7
+	select HAVE_SMP
+	select NEED_MACH_MEMORY_H
+	select FIQ_GLUE
+	select IRQ_DOMAIN
+	select IRQ_DOMAIN_DEBUG
+	select GENERIC_SCHED_CLOCK
+	select ARM_ERRATA_828419
+	select ARM_ERRATA_828420
+	select VFP_OPT
+	help
+	  This enable support for MediaTek MT6795
+
+config ARCH_MT8127
+	bool "MediaTek MT8127"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_CPUFREQ
+	select ARM_AMBA
+	select CPU_V7
+	select HAVE_SMP
+	select NEED_MACH_MEMORY_H
+	select FIQ_GLUE
+	select IRQ_DOMAIN
+	select IRQ_DOMAIN_DEBUG
+	select ARCH_REQUIRE_GPIOLIB
+	select VFP_OPT
+	select HAVE_MTK_IN_HOUSE_TEE_SUPPORT
+	help
+	  This enable support for MediaTek MT8127
+
 config ARCH_SHMOBILE
 	bool "Renesas SH-Mobile / R-Mobile"
 	select CLKDEV_LOOKUP
@@ -955,6 +1090,24 @@ source "arch/arm/mach-ks8695/Kconfig"

 source "arch/arm/mach-msm/Kconfig"

+if ARCH_MT6572
+#source "arch/arm/mach-mt6572/Kconfig"
+endif
+
+if ARCH_MT6582
+#source "arch/arm/mach-mt6582/Kconfig"
+endif
+
+if ARCH_MT6592
+#source "arch/arm/mach-mt6592/Kconfig"
+endif
+
+if ARCH_MT8127
+source "arch/arm/mach-mt8127/Kconfig"
+endif
+
+source "drivers/misc/mediatek/mach/Kconfig"
+
 source "arch/arm/mach-mv78xx0/Kconfig"

 source "arch/arm/mach-imx/Kconfig"
@@ -1088,6 +1241,17 @@ if !MMU
 source "arch/arm/Kconfig-nommu"
 endif

+config MTK_KERNEL_IN_SECURE_MODE
+	bool "MTK's kernel runs in secure mode"
+	depends on ((!TRUSTONIC_TEE_SUPPORT) && (!ARM_PSCI))
+	help
+	  Indication to kernel's mode. (secure or non-secure)
+
+config L1C_OPT
+	bool "MTK's cache operation fixup"
+	help
+	  Use two stages of cache operations to do flush
+
 config PJ4B_ERRATA_4742
 	bool "PJ4B Errata 4742: IDLE Wake Up Commands can Cause the CPU Core to Cease Operation"
 	depends on CPU_PJ4B && MACH_ARMADA_370
@@ -1357,6 +1521,52 @@ config ARM_ERRATA_798181
 	  which sends an IPI to the CPUs that are running the same ASID
 	  as the one being invalidated.

+config ARM_ERRATA_828419
+	def_bool n
+	depends on CPU_V7 && SMP
+	help
+	  ARM errata: A Instruction Cache Maintenance Operation by MVA with an incorrect NS descriptor
+	  This workaround is to replace ICIMVAU operations by ICIALLUIS operations
+
+config ARM_ERRATA_828420
+	def_bool n
+	depends on CPU_V7 && SMP
+	help
+	  ARM errata: Cache Clean by MVA to PoC might not be correctly executed
+	  The software workaround is to treat every Data Clean by MVA to PoC
+	  as Data Clean Invalidate by MVA to PoC.
+
+config ARM_ERRATA_831171
+	def_bool n
+	depends on CPU_V7 && SMP
+	help
+	  ARM errata: Within rare timing constraints, a DSB following a TLB or ICache invalidation might complete 
+	  before the TLB or ICache invalidation has effectively completed on other CPUs within the cluster
+	  The software workaround is to issue the TLB or ICache maintenance invalidation twice before the DSB
+
+config ARM_ERRATA_824069
+	def_bool n
+	depends on SMP
+	help
+	  This option enables the workaround for erratum 824069 
+	  affecting Cortex-A53 MPCore with two or more processors (r0p0..r0p2). 
+	  If a Cortex-A53 processor is executing a store or PLDW instruction at the same time
+	  as a processor in another cluster is executing a cache maintenance operation
+	  to the same address, then this erratum might cause a clean cache line to be
+	  incorrectly marked as dirty. This workaround replaces all cache clean opeartion
+	  to clean & invalidate.
+							        
+config ARM_ERRATA_826319
+	def_bool n
+	depends on SMP
+	help
+	  This option enables the workaround for erratum 826319
+	  affecting Cortex-A53 MPCore with two or more processors (r0p0..r0p2).
+	  This erratum only affects configurations of the Cortex-A53 processor with an ABMA 4 ACE or AXI master interface and an L2 cache.
+	  To be affected by this erratum, the system that Cortex-A53 is connected to must also contain a peripheral or
+	  other component that contains a dependency between the read and write channels. The dependency must
+	  prevent the peripheral from accepting or responding to a write until it finishes processing a read.
+
 endmenu

 source "arch/arm/common/Kconfig"
@@ -1495,6 +1705,178 @@ config SCHED_SMT
 	  MultiThreading at a cost of slightly increased overhead in some
 	  places. If unsure say N here.

+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+	bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+	help
+	  Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+	bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+	depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+	help
+	  Experimental scheduler optimizations for heterogeneous platforms.
+	  Attempts to introspectively select task affinity to optimize power
+	  and performance. Basic support for multiple (>2) cpu types is in place,
+	  but it has only been tested with two types of cpus.
+	  There is currently no support for migration of task groups, hence
+	  !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+	  between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+
+config SCHED_HMP_PRIO_FILTER
+	bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+	depends on SCHED_HMP
+	help
+	  Enables task priority based HMP migration filter. Any task with
+	  a NICE value above the threshold will always be on low-power cpus
+	  with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+	int "NICE priority threshold"
+	default 5
+	depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+	string "HMP scheduler fast CPU mask"
+	depends on SCHED_HMP
+	help
+          Leave empty to use device tree information.
+	  Specify the cpuids of the fast CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+	string "HMP scheduler slow CPU mask"
+	depends on SCHED_HMP
+	help
+	  Leave empty to use device tree information.
+	  Specify the cpuids of the slow CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+	bool "Allows changing the load tracking scale through sysfs"
+	depends on SCHED_HMP
+	help
+	  When turned on, this option exports the thresholds and load average
+	  period value for the load tracking patches through sysfs.
+	  The values can be modified to change the rate of load accumulation
+	  and the thresholds used for HMP migration.
+	  The load_avg_period_ms is the time in ms to reach a load average of
+	  0.5 for an idle task of 0 load average ratio that start a busy loop.
+	  The up_threshold and down_threshold is the value to go to a faster
+	  CPU or to go back to a slower cpu.
+	  The {up,down}_threshold are devided by 1024 before being compared
+	  to the load average.
+	  For examples, with load_avg_period_ms = 128 and up_threshold = 512,
+	  a running task with a load of 0 will be migrated to a bigger CPU after
+	  128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+	  up_threshold is 0.5.
+	  This patch has the same behavior as changing the Y of the load
+	  average computation to
+	        (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+	  but it remove intermadiate overflows in computation.
+
+config MET_SCHED_HMP
+	bool "(EXPERIMENTAL) MET SCHED HMP Info"
+	depends on SCHED_HMP_ENHANCEMENT
+	depends on HMP_TRACER
+	help
+	  MET SCHED HMP Info
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+	bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+	depends on HMP_VARIABLE_SCALE && CPU_FREQ
+	depends on !ARCH_SCALE_INVARIANT_CPU_CAPACITY
+	help
+	  Scales the current load contribution in line with the frequency
+	  of the CPU that the task was executed on.
+	  In this version, we use a simple linear scale derived from the
+	  maximum frequency reported by CPUFreq.
+	  Restricting tracked load to be scaled by the CPU's frequency
+	  represents the consumption of possible compute capacity
+	  (rather than consumption of actual instantaneous capacity as
+	  normal) and allows the HMP migration's simple threshold
+	  migration strategy to interact more predictably with CPUFreq's
+	  asynchronous compute capacity changes.
+
+config SCHED_HMP_ENHANCEMENT
+	bool "(EXPERIMENTAL) HMP Ennhancement"
+	depends on SCHED_HMP
+	help
+	  HMP Ennhancement
+
+config HMP_TRACER
+	bool "(EXPERIMENTAL) Profile HMP scheduler"
+	depends on SCHED_HMP_ENHANCEMENT
+	help
+	  Profile HMP scheduler
+
+config HMP_DYNAMIC_THRESHOLD
+	bool "(EXPERIMENTAL) Dynamically adjust task migration threshold"
+	depends on SCHED_HMP_ENHANCEMENT
+	help
+	  Dynamically adjust task migration threshold according to current system load
+
+config HMP_GLOBAL_BALANCE
+	bool "(EXPERIMENTAL) Enhance HMP global load balance"
+	depends on SCHED_HMP_ENHANCEMENT
+	help
+	  Enhance HMP global load balance
+
+config HMP_TASK_ASSIGNMENT
+	bool "(EXPERIMENTAL) Enhance HMP task assignment"
+	depends on SCHED_HMP_ENHANCEMENT
+	help
+	  Enhance HMP task assignment
+
+config HMP_DISCARD_CFS_SELECTION_RESULT
+	bool "(EXPERIMENTAL) Discard CFS runqueue selection result"
+	depends on SCHED_HMP_ENHANCEMENT && HMP_TASK_ASSIGNMENT
+	help
+	  Discard CFS runqueue selection result even if only one cluster exists
+
+config HMP_PACK_SMALL_TASK
+	bool "(EXPERIMENTAL) Packing Small Tasks"
+	depends on SCHED_HMP_ENHANCEMENT
+	help
+		This option enables Packing Small Tasks
+
+config HMP_PACK_BUDDY_INFO
+	bool "(EXPERIMENTAL) Packing Small Tasks Buddy Information Log"
+	depends on SCHED_HMP_ENHANCEMENT && HMP_PACK_SMALL_TASK
+	help
+		This option enables Packing Small Tasks Buddy Information Log
+    
+config HMP_LAZY_BALANCE
+	bool "(EXPERIMENTAL) Lazy Balance"
+	depends on SCHED_HMP_ENHANCEMENT && HMP_PACK_SMALL_TASK
+	help
+		This option enables Lazy Balance
+    
+config HMP_POWER_AWARE_CONTROLLER
+	bool "(EXPERIMENTAL) Power-aware Scheduler for b.L MP Controller"
+	depends on SCHED_HMP_ENHANCEMENT && HMP_PACK_SMALL_TASK && HMP_LAZY_BALANCE
+	help
+		Power-aware scheduler for b.L MP controller and status interface
+
+config HEVTASK_INTERFACE
+	bool "task status interface"
+	help
+	  The option provide an interface to show task status
+	  
+config ARCH_SCALE_INVARIANT_CPU_CAPACITY
+       bool "(EXPERIMENTAL) Scale-Invariant CPU Compute Capacity Recording"
+       depends on CPU_FREQ
+       help
+         Provides a new measure of maximum and instantaneous CPU compute
+         capacity, derived from a table of relative compute performance
+         for each core type present in the system. The table is an
+         estimate and specific core performance may be different for
+         any particular workload. The measure includes the relative
+         performance and a linear scale of current to maximum frequency
+         such that at maximum frequency (as expressed in the DTB) the
+         reported compute capacity will be equal to the estimated
+         performance from the table. Values range between 0 and 1023 where
+         1023 is the highest capacity available in the system.
+
 config HAVE_ARM_SCU
 	bool
 	help
@@ -1572,6 +1954,7 @@ config LOCAL_TIMERS
 	bool "Use local timer interrupts"
 	depends on SMP
 	default y
+	select HAVE_ARM_TWD if (!ARCH_MSM_SCORPIONMP && !EXYNOS4_MCT && !ARCH_MT6589 && !ARCH_MT6582 && !ARCH_MT8135 && !ARCH_MT6592 && !ARCH_MT6595 && !ARCH_MT6795 && !ARCH_MT6752 && !ARCH_MT8127 && !ARCH_MT6572)
 	help
 	  Enable support for local timers on SMP platforms, rather then the
 	  legacy IPI broadcast method.  Local timers allows the system
@@ -1820,6 +2203,15 @@ config XEN
 	help
 	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.

+config ARM_FLUSH_CONSOLE_ON_RESTART
+	bool "Force flush the console on restart"
+	help
+	  If the console is locked while the system is rebooted, the messages
+	  in the temporary logbuffer would not have propogated to all the
+	  console drivers. This option forces the console lock to be
+	  released if it failed to be acquired, which will cause all the
+	  pending messages to be flushed.
+
 endmenu

 menu "Boot options"
@@ -1829,6 +2221,7 @@ config USE_OF
 	select IRQ_DOMAIN
 	select OF
 	select OF_EARLY_FLATTREE
+	select OF_RESERVED_MEM
 	help
 	  Include support for flattened device tree machine descriptions.

@@ -1849,6 +2242,21 @@ config DEPRECATED_PARAM_STRUCT
 	  This was deprecated in 2001 and announced to live on for 5 years.
 	  Some old boot loaders still use this way.

+config BUILD_ARM_APPENDED_DTB_IMAGE
+	bool "Build a concatenated zImage/dtb by default"
+	depends on OF
+	help
+	  Enabling this option will cause a concatenated zImage and list of
+	  DTBs to be built by default (instead of a standalone zImage.)
+	  The image will built in arch/arm/boot/zImage-dtb
+
+config BUILD_ARM_APPENDED_DTB_IMAGE_NAMES
+	string "Default dtb names"
+	depends on BUILD_ARM_APPENDED_DTB_IMAGE
+	help
+	  Space separated list of names of dtbs to append when
+	  building a concatenated zImage-dtb.
+
 # Compressed boot loader in ROM.  Yes, we really want to ask about
 # TEXT and BSS so we preserve their values in the config files.
 config ZBOOT_ROM_TEXT
@@ -1882,6 +2290,14 @@ config ZBOOT_ROM
 	  Say Y here if you intend to execute your compressed kernel image
 	  (zImage) directly from ROM or flash.  If unsure, say N.

+config COMPAT_CPUINFO
+	bool "Show /proc/cpuinfo in old form"
+	default n
+	help
+	  Show old style /proc/cpuinfo. Do not show cpu features with each
+	  cpu cores.
+	  If unsure, say N
+
 choice
 	prompt "Include SD/MMC loader in zImage (EXPERIMENTAL)"
 	depends on ZBOOT_ROM && ARCH_SH7372
@@ -2063,6 +2479,14 @@ config CRASH_DUMP

 	  For more details see Documentation/kdump/kdump.txt

+config RESTART_DISABLE_CACHE
+        bool "Disable cache in arm_machine_restart"
+	default y
+        help
+          Whther to disable DCache in arm_machine_restart().
+          This is a temporary solution before MTK change to use new kernel
+          restart flow.
+
 config AUTO_ZRELADDR
 	bool "Auto calculation of the decompressed kernel image address"
 	depends on !ZBOOT_ROM && !ARCH_U300
@@ -2186,6 +2610,12 @@ config VFP

 	  Say N if your target does not have VFP hardware.

+config VFP_OPT
+	def_bool n
+	depends on VFP
+	help
+	  Say Y if you want to enable VFP/NEON always
+
 config VFPv3
 	bool
 	depends on VFP
@@ -2229,6 +2659,11 @@ config ARCH_SUSPEND_POSSIBLE
 config ARM_CPU_SUSPEND
 	def_bool PM_SLEEP

+config ARCH_HIBERNATION_POSSIBLE
+        bool
+        depends on MMU
+        default y if ARCH_SUSPEND_POSSIBLE
+
 endmenu

 source "net/Kconfig"
@@ -2246,3 +2681,4 @@ source "crypto/Kconfig"
 source "lib/Kconfig"

 source "arch/arm/kvm/Kconfig"
+
@@ -63,6 +63,27 @@ config DEBUG_USER
 	      8 - SIGSEGV faults
 	     16 - SIGBUS faults

+config DEBUG_RODATA
+	bool "Write protect kernel text section"
+	default n
+	depends on DEBUG_KERNEL && MMU
+	---help---
+	  Mark the kernel text section as write-protected in the pagetables,
+	  in order to catch accidental (and incorrect) writes to such const
+	  data. This will cause the size of the kernel, plus up to 4MB, to
+	  be mapped as pages instead of sections, which will increase TLB
+	  pressure.
+	  If in doubt, say "N".
+
+config DEBUG_RODATA_TEST
+	bool "Testcase for the DEBUG_RODATA feature"
+	depends on DEBUG_RODATA
+	default n
+	---help---
+	  This option enables a testcase for the DEBUG_RODATA
+	  feature.
+	  If in doubt, say "N"
+
 # These options are only for real kernel hackers who want to get their hands dirty.
 config DEBUG_LL
 	bool "Kernel low-level debugging functions (read help!)"
@@ -669,6 +690,14 @@ config EARLY_PRINTK
 	  kernel low-level debugging functions. Add earlyprintk to your
 	  kernel parameters to enable this console.

+config EARLY_PRINTK_DIRECT
+	bool "Early printk direct"
+	depends on DEBUG_LL
+	help
+	  Say Y here if you want to have an early console using the
+	  kernel low-level debugging functions and EARLY_PRINTK is
+	  not early enough.
+
 config OC_ETM
 	bool "On-chip ETM and ETB"
 	depends on ARM_AMBA
@@ -158,6 +158,10 @@ machine-$(CONFIG_ARCH_KS8695)		+= ks8695
 machine-$(CONFIG_ARCH_LPC32XX)		+= lpc32xx
 machine-$(CONFIG_ARCH_MMP)		+= mmp
 machine-$(CONFIG_ARCH_MSM)		+= msm
+machine-$(CONFIG_ARCH_MT6582)		+= mt6582
+machine-$(CONFIG_ARCH_MT6592)		+= mt6592
+machine-$(CONFIG_ARCH_MT6572)		+= mt6572
+machine-$(CONFIG_ARCH_MT8127)		+= mt8127
 machine-$(CONFIG_ARCH_MV78XX0)		+= mv78xx0
 machine-$(CONFIG_ARCH_MXC)		+= imx
 machine-$(CONFIG_ARCH_MXS)		+= mxs
@@ -237,6 +241,11 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
 endif
 endif

+ifneq ($(filter y,$(CONFIG_ARCH_MT6752) $(CONFIG_ARCH_MT6732) $(CONFIG_ARCH_MT6595) $(CONFIG_ARCH_MT6795)),)
+MTK_PLATFORM := $(subst ",,$(CONFIG_MTK_PLATFORM))
+KBUILD_CPPFLAGS += -I$(srctree)/drivers/misc/mediatek/mach/$(MTK_PLATFORM)/include
+endif
+
 export	TEXT_OFFSET GZFLAGS MMUEXT

 # Do we have FASTFPE?
@@ -264,6 +273,8 @@ libs-y				:= arch/arm/lib/ $(libs-y)
 # Default target when executing plain make
 ifeq ($(CONFIG_XIP_KERNEL),y)
 KBUILD_IMAGE := xipImage
+else ifeq ($(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE),y)
+KBUILD_IMAGE := zImage-dtb
 else
 KBUILD_IMAGE := zImage
 endif
@@ -273,13 +284,19 @@ ifeq ($(CONFIG_USE_OF),y)
 KBUILD_DTBS := dtbs
 endif

+MTK_PLATFORM := $(CONFIG_MTK_PLATFORM:"%"=%)
+MTK_PROJECT := $(CONFIG_ARCH_MTK_PROJECT:"%"=%)
+export MTK_PLATFORM MTK_PROJECT MTK_TARGET_PROJECT
+
 all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)

 boot := arch/arm/boot

 archprepare:
 	$(Q)$(MAKE) $(build)=arch/arm/tools include/generated/mach-types.h
-
+-include $(srctree)/scripts/ptgen/$(MTK_PLATFORM)/ptgen.mk
+-include $(srctree)/scripts/drvgen/drvgen.mk
+-include $(srctree)/scripts/ptgen/$(MTK_PLATFORM)/ptgen.mk
 # Convert bzImage to zImage
 bzImage: zImage

@@ -295,6 +312,9 @@ zinstall uinstall install: vmlinux
 dtbs: scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) dtbs

+zImage-dtb: vmlinux scripts dtbs
+	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
@@ -4,3 +4,4 @@ xipImage
 bootpImage
 uImage
 *.dtb
+zImage-dtb
@@ -14,6 +14,7 @@
 ifneq ($(MACHINE),)
 include $(srctree)/$(MACHINE)/Makefile.boot
 endif
+include $(srctree)/arch/arm/boot/dts/Makefile

 # Note: the following conditions must always be true:
 #   ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET)
@@ -27,6 +28,14 @@ export ZRELADDR INITRD_PHYS PARAMS_PHYS

 targets := Image zImage xipImage bootpImage uImage

+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST))
+
 ifeq ($(CONFIG_XIP_KERNEL),y)

 $(obj)/xipImage: vmlinux FORCE
@@ -55,6 +64,10 @@ $(obj)/zImage:	$(obj)/compressed/vmlinux FORCE
 	$(call if_changed,objcopy)
 	@$(kecho) '  Kernel: $@ is ready'

+$(obj)/zImage-dtb:	$(obj)/zImage $(DTB_OBJS) FORCE
+	$(call if_changed,cat)
+	@echo '  Kernel: $@ is ready'
+
 endif

 ifneq ($(LOADADDR),)
@@ -717,6 +717,8 @@ __armv7_mmu_cache_on:
 		bic     r6, r6, #1 << 31        @ 32-bit translation system
 		bic     r6, r6, #3 << 0         @ use only ttbr0
 		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
+		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
 		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 #endif
@@ -91,6 +91,11 @@ dtb-$(CONFIG_ARCH_KIRKWOOD) += kirkwood-cloudbox.dtb \
 dtb-$(CONFIG_ARCH_MARCO) += marco-evb.dtb
 dtb-$(CONFIG_ARCH_MSM) += msm8660-surf.dtb \
 	msm8960-cdp.dtb
+dtb-$(CONFIG_ARCH_MT8135) += mt8135-ca7x2-ca15x2.dtb \
+	mt8135-ca7x2-ca15x1.dtb \
+	mt8135-ca7x1-ca15x1.dtb \
+	mt8135-ca7x1.dtb \
+	mt8135-ca7x2.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-370-db.dtb \
 	armada-370-mirabox.dtb \
 	armada-370-rd.dtb \
@@ -210,13 +215,31 @@ dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \
 	wm8850-w70v2.dtb
 dtb-$(CONFIG_ARCH_ZYNQ) += zynq-zc702.dtb

+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+
 targets += dtbs
-targets += $(dtb-y)
+targets += $(DTB_LIST)
 endif

 # *.dtb used to be generated in the directory above. Clean out the
 # old build results so people don't accidentally use them.
-dtbs: $(addprefix $(obj)/, $(dtb-y))
+dtbs: $(addprefix $(obj)/, $(DTB_LIST))
 	$(Q)rm -f $(obj)/../*.dtb

 clean-files := *.dtb
+
+MTK_PLATFORM := $(subst ",,$(CONFIG_MTK_PLATFORM))
+ARCH_MTK_PROJECT := $(subst ",,$(CONFIG_ARCH_MTK_PROJECT))
+ifneq ($(filter y,$(CONFIG_ARCH_MT6752) $(CONFIG_ARCH_MT6732) $(CONFIG_ARCH_MT6595) $(CONFIG_ARCH_MT6795)),)
+# For K2/6795 arm32
+DCT_EXTRA_PATH := $(objtree)/drivers/misc/mediatek/mach/$(MTK_PLATFORM)/$(ARCH_MTK_PROJECT)/dct/dct/
+else
+DCT_EXTRA_PATH := $(objtree)/arch/arm/mach-$(MTK_PLATFORM)/$(ARCH_MTK_PROJECT)/dct/dct/
+endif
+
+DTC_FLAGS ?= -i $(DCT_EXTRA_PATH)
@@ -37,30 +37,35 @@
 			device_type = "cpu";
 			compatible = "arm,cortex-a15";
 			reg = <0>;
+			cci-control-port = <&cci_control1>;
 		};

 		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a15";
 			reg = <1>;
+			cci-control-port = <&cci_control1>;
 		};

 		cpu2: cpu@2 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x100>;
+			cci-control-port = <&cci_control2>;
 		};

 		cpu3: cpu@3 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x101>;
+			cci-control-port = <&cci_control2>;
 		};

 		cpu4: cpu@4 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x102>;
+			cci-control-port = <&cci_control2>;
 		};
 	};

@@ -104,6 +109,36 @@
 		interrupts = <1 9 0xf04>;
 	};

+	cci@2c090000 {
+		compatible = "arm,cci-400";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+	cci-pmu@2c099000 {
+		compatible = "arm,cci-400-pmu";
+		reg = <0 0x2c099000 0 0x6000>;
+		interrupts = <0 101 4>,
+			     <0 102 4>,
+			     <0 103 4>,
+			     <0 104 4>,
+			     <0 105 4>;
+	};
+
 	memory-controller@7ffd0000 {
 		compatible = "arm,pl354", "arm,primecell";
 		reg = <0 0x7ffd0000 0 0x1000>;
@@ -17,3 +17,7 @@ config SHARP_PARAM

 config SHARP_SCOOP
 	bool
+
+config FIQ_GLUE
+	bool
+	select FIQ
@@ -4,6 +4,7 @@

 obj-y				+= firmware.o

+obj-$(CONFIG_FIQ_GLUE)		+= fiq_glue.o fiq_glue_setup.o
 obj-$(CONFIG_ICST)		+= icst.o
 obj-$(CONFIG_SA1111)		+= sa1111.o
 obj-$(CONFIG_PCI_HOST_VIA82C505) += via82c505.o
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+		.global fiq_glue_end
+
+		/* fiq stack: r0-r15,cpsr,spsr of interrupted mode */
+
+ENTRY(fiq_glue)
+		/* store pc, cpsr from previous mode, reserve space for spsr */
+		mrs	r12, spsr
+		sub	lr, lr, #4
+		subs	r10, #1
+		bne	nested_fiq
+
+		str	r12, [sp, #-8]!
+		str	lr, [sp, #-4]!
+
+		/* store r8-r14 from previous mode */
+		sub	sp, sp, #(7 * 4)
+		stmia	sp, {r8-r14}^
+		nop
+
+		/* store r0-r7 from previous mode */
+		stmfd	sp!, {r0-r7}
+
+		/* setup func(data,regs) arguments */
+		mov	r0, r9
+		mov	r1, sp
+		mov	r3, r8
+
+		mov	r7, sp
+
+		/* Get sp and lr from non-user modes */
+		and	r4, r12, #MODE_MASK
+		cmp	r4, #USR_MODE
+		beq	fiq_from_usr_mode
+
+		mov	r7, sp
+		orr	r4, r4, #(PSR_I_BIT | PSR_F_BIT)
+		msr	cpsr_c, r4
+		str	sp, [r7, #(4 * 13)]
+		str	lr, [r7, #(4 * 14)]
+		mrs	r5, spsr
+		str	r5, [r7, #(4 * 17)]
+
+		cmp	r4, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT)
+		/* use fiq stack if we reenter this mode */
+		subne	sp, r7, #(4 * 3)
+
+fiq_from_usr_mode:
+		msr	cpsr_c, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT)
+		mov	r2, sp
+		sub	sp, r7, #12
+		stmfd	sp!, {r2, ip, lr}
+		/* call func(data,regs) */
+		blx	r3
+		ldmfd	sp, {r2, ip, lr}
+		mov	sp, r2
+
+		/* restore/discard saved state */
+		cmp	r4, #USR_MODE
+		beq	fiq_from_usr_mode_exit
+
+		msr	cpsr_c, r4
+		ldr	sp, [r7, #(4 * 13)]
+		ldr	lr, [r7, #(4 * 14)]
+		msr	spsr_cxsf, r5
+
+fiq_from_usr_mode_exit:
+		msr	cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)
+
+		ldmfd	sp!, {r0-r7}
+		ldr	lr, [sp, #(4 * 7)]
+		ldr	r12, [sp, #(4 * 8)]
+		add	sp, sp, #(10 * 4)
+exit_fiq:
+		msr	spsr_cxsf, r12
+		add	r10, #1
+		cmp	r11, #0
+		moveqs	pc, lr
+		bx	r11 /* jump to custom fiq return function */
+
+nested_fiq:
+		orr	r12, r12, #(PSR_F_BIT)
+		b	exit_fiq
+
+fiq_glue_end:
+
+ENTRY(fiq_glue_setup) /* func, data, sp, smc call number */
+		stmfd		sp!, {r4}
+		mrs		r4, cpsr
+		msr		cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)
+		movs		r8, r0
+		mov		r9, r1
+		mov		sp, r2
+		mov		r11, r3
+		moveq		r10, #0
+		movne		r10, #1
+		msr		cpsr_c, r4
+		ldmfd		sp!, {r4}
+		bx		lr
+
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/fiq.h>
+#include <asm/fiq_glue.h>
+
+extern unsigned char fiq_glue, fiq_glue_end;
+extern void fiq_glue_setup(void *func, void *data, void *sp,
+			   fiq_return_handler_t fiq_return_handler);
+
+static struct fiq_handler fiq_debbuger_fiq_handler = {
+	.name = "fiq_glue",
+};
+DEFINE_PER_CPU(void *, fiq_stack);
+static struct fiq_glue_handler *current_handler;
+static fiq_return_handler_t fiq_return_handler;
+static DEFINE_MUTEX(fiq_glue_lock);
+
+static void fiq_glue_setup_helper(void *info)
+{
+	struct fiq_glue_handler *handler = info;
+	fiq_glue_setup(handler->fiq, handler,
+		__get_cpu_var(fiq_stack) + THREAD_START_SP,
+		fiq_return_handler);
+}
+
+int fiq_glue_register_handler(struct fiq_glue_handler *handler)
+{
+	int ret;
+	int cpu;
+
+	if (!handler || !handler->fiq)
+		return -EINVAL;
+
+	mutex_lock(&fiq_glue_lock);
+	if (fiq_stack) {
+		ret = -EBUSY;
+		goto err_busy;
+	}
+
+	for_each_possible_cpu(cpu) {
+		void *stack;
+		stack = (void *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+		if (WARN_ON(!stack)) {
+			ret = -ENOMEM;
+			goto err_alloc_fiq_stack;
+		}
+		per_cpu(fiq_stack, cpu) = stack;
+	}
+
+	ret = claim_fiq(&fiq_debbuger_fiq_handler);
+	if (WARN_ON(ret))
+		goto err_claim_fiq;
+
+	current_handler = handler;
+	on_each_cpu(fiq_glue_setup_helper, handler, true);
+	set_fiq_handler(&fiq_glue, &fiq_glue_end - &fiq_glue);
+
+	mutex_unlock(&fiq_glue_lock);
+	return 0;
+
+err_claim_fiq:
+err_alloc_fiq_stack:
+	for_each_possible_cpu(cpu) {
+		__free_pages(per_cpu(fiq_stack, cpu), THREAD_SIZE_ORDER);
+		per_cpu(fiq_stack, cpu) = NULL;
+	}
+err_busy:
+	mutex_unlock(&fiq_glue_lock);
+	return ret;
+}
+
+static void fiq_glue_update_return_handler(void (*fiq_return)(void))
+{
+	fiq_return_handler = fiq_return;
+	if (current_handler)
+		on_each_cpu(fiq_glue_setup_helper, current_handler, true);
+}
+
+int fiq_glue_set_return_handler(void (*fiq_return)(void))
+{
+	int ret;
+
+	mutex_lock(&fiq_glue_lock);
+	if (fiq_return_handler) {
+		ret = -EBUSY;
+		goto err_busy;
+	}
+	fiq_glue_update_return_handler(fiq_return);
+	ret = 0;
+err_busy:
+	mutex_unlock(&fiq_glue_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(fiq_glue_set_return_handler);
+
+int fiq_glue_clear_return_handler(void (*fiq_return)(void))
+{
+	int ret;
+
+	mutex_lock(&fiq_glue_lock);
+	if (WARN_ON(fiq_return_handler != fiq_return)) {
+		ret = -EINVAL;
+		goto err_inval;
+	}
+	fiq_glue_update_return_handler(NULL);
+	ret = 0;
+err_inval:
+	mutex_unlock(&fiq_glue_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(fiq_glue_clear_return_handler);
+
+/**
+ * fiq_glue_resume - Restore fiqs after suspend or low power idle states
+ *
+ * This must be called before calling local_fiq_enable after returning from a
+ * power state where the fiq mode registers were lost. If a driver provided
+ * a resume hook when it registered the handler it will be called.
+ */
+
+void fiq_glue_resume(void)
+{
+	if (!current_handler)
+		return;
+	fiq_glue_setup(current_handler->fiq, current_handler,
+		__get_cpu_var(fiq_stack) + THREAD_START_SP,
+		fiq_return_handler);
+	if (current_handler->resume)
+		current_handler->resume(current_handler);
+}
+
@@ -5,5 +5,12 @@
 obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o

+
+ifdef CONFIG_CRYPTO_AES_ARM32_CE
+KBUILD_AFLAGS      :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=crypto-neon-fp-armv8)
+aes-arm-y  := aes-armv8-aarch32.o aes_glue.o aes-armv8-bcm.o aes-armv8-bcm-glue.o
+else
 aes-arm-y  := aes-armv4.o aes_glue.o
+endif
+
 sha1-arm-y := sha1-armv4-large.o sha1_glue.o
@@ -0,0 +1,525 @@
+#define __ARM_ARCH__ __LINUX_ARM_ARCH__
+
+#include <linux/linkage.h>
+
+#if __ARM_ARCH__>=7
+
+.text
+
+# AES assembly implementation for ARMv8 AArch32
+#   - AES_encrypt
+#   - AES_decrypt
+#   - private_AES_set_encrypt_key
+#   - private_AES_set_decrypt_key
+
+# void AES_encrypt(const unsigned char *in, unsigned char *out,
+#      const AES_KEY *key) {
+.align  5
+ENTRY(AES_encrypt)
+    vld1.8      {d24-d25}, [r2]!
+    vld1.8      {d0-d1}, [r0]
+    ldr         r3, [r2, #240-16]
+    vld1.8      {d26-d27}, [r2]!
+    aese.8      q0, q12
+    aesmc.8     q0, q0
+    sub         r3, r3, #4
+    vld1.8      {d28-d29}, [r2]!
+.LPrivateEncLoop:
+    subs        r3, r3, #2
+    aese.8      q0, q13
+    aesmc.8     q0, q0
+    vld1.8      {d26-d27}, [r2]!
+    aese.8      q0, q14
+    aesmc.8     q0, q0
+    vld1.8      {d28-d29}, [r2]!
+    bpl         .LPrivateEncLoop
+    aese.8      q0, q13
+    veor.8      q0, q0, q14
+    vst1.8      {d0-d1}, [r1]
+    bx          lr
+ENDPROC(AES_encrypt)
+
+# void AES_decrypt(const unsigned char *in, unsigned char *out,
+#      const AES_KEY *key) {
+.align  5
+ENTRY(AES_decrypt)
+    vld1.8      {d24-d25}, [r2]!
+    vld1.8      {d0-d1}, [r0]
+    ldr         r3, [r2, #240-16]
+    vld1.8      {d26-d27}, [r2]!
+    aesd.8      q0, q12
+    aesimc.8    q0, q0
+    sub         r3, r3, #4
+    vld1.8      {d28-d29}, [r2]!
+.LPrivateDecLoop:
+    subs        r3, r3, #2
+    aesd.8      q0, q13
+    aesimc.8    q0, q0
+    vld1.8      {d26-d27}, [r2]!
+    aesd.8      q0, q14
+    aesimc.8    q0, q0
+    vld1.8      {d28-d29}, [r2]!
+    bpl         .LPrivateDecLoop
+    aesd.8      q0, q13
+    veor.8      q0, q0, q14
+    vst1.8      {d0-d1}, [r1]
+    bx lr
+ENDPROC(AES_decrypt)
+
+
+.align 5
+private_rcon:
+.long   0x00000001,0x00000001,0x00000001,0x00000001
+.long   0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+.long   0x0000001b,0x0000001b,0x0000001b,0x0000001b
+
+
+# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+#      AES_KEY *key) {
+.align  5
+ENTRY(private_AES_set_encrypt_key)
+    adr       r3,  private_rcon
+    cmp       r1,  #192
+    veor      q0,  q0,  q0
+    vld1.8    {q8},[r0]!
+    mov       r1,  #8
+    vld1.32   {q1,q2},[r3]!
+    beq       .LPrivateExpandEnc192
+    bgt       .LPrivateExpandEnc256
+.LPrivateExpandEnc128:
+.LPrivateLoopEnc128:
+    vst1.32   {q8},[r2]!
+    vtbl.8    d6,  {q8},d4
+    vtbl.8    d7,  {q8},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    subs      r1,  r1,  #1
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    bne       .LPrivateLoopEnc128
+    vld1.32   {q1},[r3]
+    vst1.32   {q8},[r2]!
+    vtbl.8    d6,  {q8},d4
+    vtbl.8    d7,  {q8},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    vst1.32   {q8},[r2]!
+    vtbl.8    d6,  {q8},d4
+    vtbl.8    d7,  {q8},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    veor      q8,  q3,  q15
+    vst1.32   {q8},[r2]
+    add       r2,  r2,  #0x50
+    mov       r12, #10
+    str       r12, [r2]
+    eor       r0,  r0,  r0
+    bx        lr
+
+.align  4
+.LPrivateExpandEnc192:
+    vld1.8    {d18},[r0]
+    vmov.i8   q3,  #8
+    vst1.32   {q8},[r2]!
+    vsub.i8   q2,  q2,  q3
+    vst1.32   {d18},[r2]!
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vtbl.8    d6,  {q9},d4
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    mov       r1,  #3
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    vdup.32   q15, d31[1]
+    veor      q15, q15, q9
+    vext.8    q14, q0,  q9,  #12
+    veor      q15, q15, q14
+    vst1.32   {q8},[r2]!
+    veor      q9,  q3,  q15
+    vst1.32   {d18},[r2]!
+.LPrivateLoopEnc192:
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vtbl.8    d6,  {q9},d4
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    subs      r1,  r1,  #1
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q10, q3,  q15
+    vdup.32   q15, d31[1]
+    veor      q15, q15, q9
+    vext.8    q14, q0,  q9,  #12
+    veor      q15, q15, q14
+    vst1.32   {q10},[r2]!
+    veor      q11, q3,  q15
+    vst1.32   {d22},[r2]!
+    vext.8    q14, q0,  q10, #12
+    veor      q15, q10, q14
+    vtbl.8    d6,  {q11},d4
+    vtbl.8    d7,  {q11},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    vdup.32   q15, d31[1]
+    veor      q15, q15, q11
+    vext.8    q14, q0,  q11, #12
+    veor      q15, q15, q14
+    vst1.32   {q8},[r2]!
+    veor      q9,  q3,  q15
+    vst1.32   {d18},[r2]!
+    bne       .LPrivateLoopEnc192
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vtbl.8    d6,  {q9},d4
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    veor      q8,  q3,  q15
+    vst1.32   {q8},[r2]!
+    mov       r12, #12
+    add       r2,  r2,  #0x20
+    str       r12, [r2]
+    eor       r0,  r0,  r0
+    bx        lr
+
+.align  4
+.LPrivateExpandEnc256:
+    vld1.8    {q9},[r0]
+    mov       r1,  #6
+.LPrivateLoopEnc256:
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vst1.32   {q8},[r2]!
+    vtbl.8    d6,  {q9},d4
+    vst1.32   {q9},[r2]!
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    subs      r1,  r1,  #1
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    vext.8    q14, q0,  q9,  #12
+    veor      q15, q9,  q14
+    vdup.32   q3,  d17[1]
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q9,  q3,  q15
+    bne       .LPrivateLoopEnc256
+    vst1.32   {q8},[r2]!
+    vtbl.8    d6,  {q9},d4
+    vst1.32   {q9},[r2]!
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    veor      q8,  q3,  q15
+    vst1.32   {q8},[r2]!
+    mov       r12, #14
+    str       r12, [r2]
+    eor       r0,  r0,  r0
+    bx        lr
+ENDPROC(private_AES_set_encrypt_key)
+
+
+# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+#       AES_KEY *key) {
+.align  5
+ENTRY(private_AES_set_decrypt_key)
+    adr       r3,  private_rcon
+    cmp       r1,  #192
+    veor      q0,  q0,  q0
+    vld1.8    {q8},[r0]!
+    add       r2,  r2,  #160
+    vld1.32   {q1,q2},[r3]!
+    beq       .LPrivateExpandDec192
+    bgt       .LPrivateExpandDec256
+.LPrivateExpandDec128:
+    vst1.32   {q8},[r2]
+    vtbl.8    d6,  {q8},d4
+    vtbl.8    d7,  {q8},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    sub       r2,  r2,  #16
+    aese.8    q3,  q0
+    mov       r1,  #7
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    aesimc.8  q12, q8
+.LPrivateLoopDec128:
+    vst1.32   {q12},[r2]
+    vtbl.8    d6,  {q8},d4
+    vtbl.8    d7,  {q8},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    sub       r2,  r2,  #16
+    aese.8    q3,  q0
+    subs      r1,  r1,  #1
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    aesimc.8  q12, q8
+    bne       .LPrivateLoopDec128
+    vld1.32   {q1},[r3]
+    vst1.32   {q12},[r2]
+    vtbl.8    d6,  {q8},d4
+    vtbl.8    d7,  {q8},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    sub       r2,  r2,  #16
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    aesimc.8  q12, q8
+    vst1.32   {q12},[r2]
+    vtbl.8    d6,  {q8},d4
+    vtbl.8    d7,  {q8},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    sub       r2,  r2,  #16
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    veor      q8,  q3,  q15
+    vst1.32   {q8},[r2]
+    add       r2,  r2,  #240
+    mov       r12, #10
+    str       r12, [r2]
+    eor       r0,  r0,  r0
+    bx        lr
+
+.align  4
+.LPrivateExpandDec192:
+    vld1.8    {d18},[r0]
+    add       r2,  r2,  #32
+    vmov.i8   q3,  #8
+    mov       r1,  #3
+    vst1.32   {q8},[r2]
+    sub       r2,  r2,  #16
+    vsub.i8   q2,  q2,  q3
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vtbl.8    d6,  {q9},d4
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    vmov      d26, d18
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    vdup.32   q15, d31[1]
+    vmov      d27, d16
+    veor      q15, q15, q9
+    aesimc.8  q13, q13
+    vmov      d24, d17
+    vext.8    q14, q0,  q9,  #12
+    veor      q15, q15, q14
+    vst1.32   {q13},[r2]
+    sub       r2,  r2,  #16
+    veor      q9,  q3,  q15
+    vmov      d25, d18
+.LPrivateLoopDec192:
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    aesimc.8  q12, q12
+    vtbl.8    d6,  {q9},d4
+    vtbl.8    d7,  {q9},d5
+    vst1.32   {q12},[r2]
+    sub       r2,  r2,  #16
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    subs      r1,  r1,  #1
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q10, q3,  q15
+    vdup.32   q15, d31[1]
+    veor      q15, q15, q9
+    aesimc.8  q12, q10
+    vext.8    q14, q0,  q9,  #12
+    veor      q15, q15, q14
+    vst1.32   {q12},[r2]
+    sub       r2,  r2,  #16
+    veor      q11, q3,  q15
+    vext.8    q14, q0,  q10, #12
+    veor      q15, q10, q14
+    vtbl.8    d6,  {q11},d4
+    vtbl.8    d7,  {q11},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    vmov      d26, d22
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    vdup.32   q15, d31[1]
+    vmov      d27, d16
+    veor      q15, q15, q11
+    aesimc.8  q13, q13
+    vmov      d24, d17
+    vext.8    q14, q0,  q11, #12
+    veor      q15, q15, q14
+    vst1.32   {q13},[r2]
+    sub       r2,  r2,  #16
+    veor      q9,  q3,  q15
+    vmov      d25, d18
+    bne       .LPrivateLoopDec192
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    aesimc.8  q12, q12
+    vtbl.8    d6,  {q9},d4
+    vtbl.8    d7,  {q9},d5
+    vst1.32   {q12},[r2]
+    sub       r2,  r2,  #16
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    veor      q8,  q3,  q15
+    vst1.32   {q8},[r2]
+    add       r2,  r2,  #240
+    mov       r12, #12
+    str       r12, [r2]
+    eor       r0,  r0,  r0
+    bx        lr
+
+.align  4
+.LPrivateExpandDec256:
+    vld1.8    {q9},[r0]
+    add       r2,  r2,  #80
+    vmov      q12, q8
+    mov       r1,  #14
+    str       r1,  [r2]
+    sub       r2,  r2,  #16
+    mov       r1,  #6
+.LPrivateLoopDec256:
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    aesimc.8  q13, q9
+    vst1.32   {q12},[r2]
+    sub       r2,  r2,  #16
+    vtbl.8    d6,  {q9},d4
+    vst1.32   {q13},[r2]
+    sub       r2,  r2,  #16
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    subs      r1,  r1,  #1
+    veor      q3,  q3,  q1
+    vshl.u8   q1,  q1,  #1
+    veor      q8,  q3,  q15
+    vext.8    q14, q0,  q9,  #12
+    veor      q15, q9,  q14
+    aesimc.8  q12, q8
+    vdup.32   q3,  d17[1]
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q9,  q3,  q15
+    bne       .LPrivateLoopDec256
+    aesimc.8  q13, q9
+    vst1.32   {q12},[r2]
+    sub       r2,  r2,  #16
+    vtbl.8    d6,  {q9},d4
+    vst1.32   {q13},[r2]
+    sub       r2,  r2,  #16
+    vtbl.8    d7,  {q9},d5
+    vext.8    q14, q0,  q8,  #12
+    veor      q15, q8,  q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    vext.8    q14, q0,  q14, #12
+    veor      q15, q15, q14
+    aese.8    q3,  q0
+    veor      q3,  q3,  q1
+    veor      q8,  q3,  q15
+    vst1.32   {q8},[r2]
+    eor       r0,  r0,  r0
+    bx        lr
+ENDPROC(private_AES_set_decrypt_key)
+
+.align  2
+
+#endif
@@ -0,0 +1,120 @@
+/*
+ * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+
+asmlinkage void aes_v8_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+asmlinkage void aes_v8_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	//kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_v8_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	//kernel_neon_end();
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	//kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_v8_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	//kernel_neon_end();
+	return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+	.cra_name		    = "__cbc-aes-armv8" ,
+	.cra_driver_name	= "__driver-cbc-aes-armv8" ,
+	.cra_priority		= 0,
+	.cra_flags		    = CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		    = &crypto_blkcipher_type,
+	.cra_module		    = THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		    = AES_BLOCK_SIZE,
+		.setkey		    = crypto_aes_set_key,
+		.encrypt	    = cbc_encrypt,
+		.decrypt	    = cbc_decrypt,
+	},
+}, {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-armv8",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+} };
+
+static int __init aes_init(void)
+{
+	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(aes_init);
+module_exit(aes_exit);
+
@@ -0,0 +1,598 @@
+#define __ARM_ARCH__ __LINUX_ARM_ARCH__
+
+#include <linux/linkage.h>
+
+#if __ARM_ARCH__>=7
+
+.text
+
+# AES assembly implementation for ARMv8 AArch32
+#   - aes_v8_cbc_encrypt
+#   - aes_v8_cbc_decrypt
+
+.align 5
+rcon:
+.long   0x00000001,0x00000001,0x00000001,0x00000001
+.long   0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+.long   0x0000001b,0x0000001b,0x0000001b,0x0000001b
+
+# void aes_v8_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+#      int rounds, int blocks, u8 iv[], int first);
+.align  5
+ENTRY(aes_v8_cbc_encrypt)
+    push      {r4, r5, r6}
+    vpush     {q4, q5, q6, q7}
+    ldr       r4, [sp, #76]     // blocks
+    ldr       r5, [sp, #80]     // iv
+    ldr       r6, [sp, #84]     // first
+
+    cmp       r6, #0
+    beq       .LcbcencDispatch
+    vld1.8    {d2-d3}, [r5]
+.LcbcencDispatch:
+    cmp       r3, #12
+    bhi       .Lcbcenc256
+    beq       .Lcbcenc192
+
+.Lcbcenc128:
+    // Load round keys
+    vld1.8    {d4-d5}, [r2]!
+    vld1.8    {d6-d7}, [r2]!
+    vld1.8    {d8-d9}, [r2]!
+    vld1.8    {d10-d11}, [r2]!
+    vld1.8    {d12-d13}, [r2]!
+    vld1.8    {d14-d15}, [r2]!
+    vld1.8    {d16-d17}, [r2]!
+    vld1.8    {d18-d19}, [r2]!
+    vld1.8    {d20-d21}, [r2]!
+    vld1.8    {d22-d23}, [r2]!
+    vld1.8    {d24-d25}, [r2]
+.Lcbcenc128Loop:
+    // load input 16 bytes, and eor with iv
+    vld1.8    {d0-d1}, [r1]!
+    veor.8    q0, q0, q1
+    pld       [r1, #16]
+    // aes kernel
+    aese.8    q0, q2
+    aesmc.8   q0, q0
+    aese.8    q0, q3
+    aesmc.8   q0, q0
+    aese.8    q0, q4
+    aesmc.8   q0, q0
+    aese.8    q0, q5
+    aesmc.8   q0, q0
+    aese.8    q0, q6
+    aesmc.8   q0, q0
+    aese.8    q0, q7
+    aesmc.8   q0, q0
+    aese.8    q0, q8
+    aesmc.8   q0, q0
+    aese.8    q0, q9
+    aesmc.8   q0, q0
+    aese.8    q0, q10
+    aesmc.8   q0, q0
+    aese.8    q0, q11
+    veor.8    q1, q0, q12
+    // store output 16 bytes, and continue next round
+    vst1.8    {d2-d3}, [r0]!
+    subs      r4, r4, #1
+    bne       .Lcbcenc128Loop
+.Lcbcenc128Done:
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+
+.Lcbcenc192:
+    // Load round keys
+    vld1.8    {d4-d5}, [r2]!
+    vld1.8    {d6-d7}, [r2]!
+    vld1.8    {d8-d9}, [r2]!
+    vld1.8    {d10-d11}, [r2]!
+    vld1.8    {d12-d13}, [r2]!
+    vld1.8    {d14-d15}, [r2]!
+    vld1.8    {d16-d17}, [r2]!
+    vld1.8    {d18-d19}, [r2]!
+    vld1.8    {d20-d21}, [r2]!
+    vld1.8    {d22-d23}, [r2]!
+    vld1.8    {d24-d25}, [r2]!
+    vld1.8    {d26-d27}, [r2]!
+    vld1.8    {d28-d29}, [r2]
+.Lcbcenc192Loop:
+    // load input 16 bytes, and eor with iv
+    vld1.8    {d0-d1}, [r1]!
+    veor.8    q0, q0, q1
+    pld       [r1, #16]
+    // aes kernel
+    aese.8    q0, q2
+    aesmc.8   q0, q0
+    aese.8    q0, q3
+    aesmc.8   q0, q0
+    aese.8    q0, q4
+    aesmc.8   q0, q0
+    aese.8    q0, q5
+    aesmc.8   q0, q0
+    aese.8    q0, q6
+    aesmc.8   q0, q0
+    aese.8    q0, q7
+    aesmc.8   q0, q0
+    aese.8    q0, q8
+    aesmc.8   q0, q0
+    aese.8    q0, q9
+    aesmc.8   q0, q0
+    aese.8    q0, q10
+    aesmc.8   q0, q0
+    aese.8    q0, q11
+    aesmc.8   q0, q0
+    aese.8    q0, q12
+    aesmc.8   q0, q0
+    aese.8    q0, q13
+    veor.8    q1, q0, q14
+    // store output 16 bytes, and continue next round
+    vst1.8    {d2-d3}, [r0]!
+    subs      r4, r4, #1
+    bne       .Lcbcenc192Loop
+.Lcbcenc192Done:
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+
+.Lcbcenc256:
+    // Load round keys
+    vld1.8    {d4-d5}, [r2]!
+    vld1.8    {d6-d7}, [r2]!
+    vld1.8    {d8-d9}, [r2]!
+    vld1.8    {d10-d11}, [r2]!
+    vld1.8    {d12-d13}, [r2]!
+    vld1.8    {d14-d15}, [r2]!
+    vld1.8    {d16-d17}, [r2]!
+    vld1.8    {d18-d19}, [r2]!
+    vld1.8    {d20-d21}, [r2]!
+    vld1.8    {d22-d23}, [r2]!
+    vld1.8    {d24-d25}, [r2]!
+    vld1.8    {d26-d27}, [r2]!
+    vld1.8    {d28-d29}, [r2]!
+    mov       r3, r2
+.Lcbcenc256Loop:
+    // load input 16 bytes, and eor with iv
+    vld1.8    {d0-d1}, [r1]!
+    veor.8    q0, q0, q1
+    pld       [r1, #16]
+    // aes kernel
+    aese.8    q0, q2
+    aesmc.8   q0, q0
+    aese.8    q0, q3
+    aesmc.8   q0, q0
+    vld1.8    {d30}, [r2]!
+    aese.8    q0, q4
+    aesmc.8   q0, q0
+    aese.8    q0, q5
+    aesmc.8   q0, q0
+    vld1.8    {d31}, [r2]!
+    aese.8    q0, q6
+    aesmc.8   q0, q0
+    aese.8    q0, q7
+    aesmc.8   q0, q0
+    aese.8    q0, q8
+    aesmc.8   q0, q0
+    aese.8    q0, q9
+    aesmc.8   q0, q0
+    aese.8    q0, q10
+    aesmc.8   q0, q0
+    aese.8    q0, q11
+    aesmc.8   q0, q0
+    aese.8    q0, q12
+    aesmc.8   q0, q0
+    aese.8    q0, q13
+    aesmc.8   q0, q0
+    aese.8    q0, q14
+    aesmc.8   q0, q0
+    aese.8    q0, q15
+    vld1.8    {d30-d31}, [r2]
+    veor.8    q1, q0, q15
+    mov       r2, r3
+    // store output 16 bytes, and continue next round
+    vst1.8    {d2-d3}, [r0]!
+    subs      r4, r4, #1
+    bne       .Lcbcenc256Loop
+.Lcbcenc256Done:
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+ENDPROC(aes_v8_cbc_encrypt)
+
+
+# void aes_v8_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+#      int rounds, int blocks, u8 iv[], int first);
+.align  5
+ENTRY(aes_v8_cbc_decrypt)
+    push      {r4, r5, r6}
+    vpush     {q4, q5, q6, q7}
+    ldr       r4, [sp, #76]     // blocks
+    ldr       r5, [sp, #80]     // iv
+    ldr       r6, [sp, #84]     // first
+
+    cmp       r6, #0
+    beq       .LcbcdecDispatch
+    vld1.8    {d4-d5}, [r5]
+.LcbcdecDispatch:
+    cmp       r3, #12
+    bhi       .Lcbcdec256
+    beq       .Lcbcdec192
+
+.Lcbcdec128:
+    // Load round keys
+    vld1.8    {d10-d11}, [r2]!
+    vld1.8    {d12-d13}, [r2]!
+    vld1.8    {d14-d15}, [r2]!
+    vld1.8    {d16-d17}, [r2]!
+    vld1.8    {d18-d19}, [r2]!
+    vld1.8    {d20-d21}, [r2]!
+    vld1.8    {d22-d23}, [r2]!
+    vld1.8    {d24-d25}, [r2]!
+    vld1.8    {d26-d27}, [r2]!
+    vld1.8    {d28-d29}, [r2]!
+    vld1.8    {d30-d31}, [r2]!
+    // Sub by 2
+    subs      r4, r4, #2
+    bmi       .Lcbcdec128_1X
+.Lcbcdec128_2XLoop:
+    // Load input 32 bytes
+    vld1.8    {d0-d3}, [r1]!
+    pld       [r1, #32]
+    vmov      q3, q0
+    vmov      q4, q1
+    // aes kernel
+    aesd.8    q0, q5
+    aesimc.8  q0, q0
+    aesd.8    q1, q5
+    aesimc.8  q1, q1
+    aesd.8    q0, q6
+    aesimc.8  q0, q0
+    aesd.8    q1, q6
+    aesimc.8  q1, q1
+    aesd.8    q0, q7
+    aesimc.8  q0, q0
+    aesd.8    q1, q7
+    aesimc.8  q1, q1
+    aesd.8    q0, q8
+    aesimc.8  q0, q0
+    aesd.8    q1, q8
+    aesimc.8  q1, q1
+    aesd.8    q0, q9
+    aesimc.8  q0, q0
+    aesd.8    q1, q9
+    aesimc.8  q1, q1
+    aesd.8    q0, q10
+    aesimc.8  q0, q0
+    aesd.8    q1, q10
+    aesimc.8  q1, q1
+    aesd.8    q0, q11
+    aesimc.8  q0, q0
+    aesd.8    q1, q11
+    aesimc.8  q1, q1
+    aesd.8    q0, q12
+    aesimc.8  q0, q0
+    aesd.8    q1, q12
+    aesimc.8  q1, q1
+    aesd.8    q0, q13
+    aesimc.8  q0, q0
+    aesd.8    q1, q13
+    aesimc.8  q1, q1
+    aesd.8    q0, q14
+    aesd.8    q1, q14
+    veor.8    q0, q0, q15
+    veor.8    q1, q1, q15
+    veor.8    q0, q0, q2
+    veor.8    q1, q1, q3
+    vmov      q2, q4
+    vst1.8    {d0-d1}, [r0]!
+    vst1.8    {d2-d3}, [r0]!
+    subs      r4, r4, #2
+    bpl       .Lcbcdec128_2XLoop
+.Lcbcdec128_1X:
+    adds      r4, r4, #2
+    bne       .Lcbcdec128_FinalRound
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+.Lcbcdec128_FinalRound:
+    // load input 16 bytes
+    vld1.8    {d0-d1}, [r1]!
+    // aes kernel
+    aesd.8    q0, q5
+    aesimc.8  q0, q0
+    aesd.8    q0, q6
+    aesimc.8  q0, q0
+    aesd.8    q0, q7
+    aesimc.8  q0, q0
+    aesd.8    q0, q8
+    aesimc.8  q0, q0
+    aesd.8    q0, q9
+    aesimc.8  q0, q0
+    aesd.8    q0, q10
+    aesimc.8  q0, q0
+    aesd.8    q0, q11
+    aesimc.8  q0, q0
+    aesd.8    q0, q12
+    aesimc.8  q0, q0
+    aesd.8    q0, q13
+    aesimc.8  q0, q0
+    aesd.8    q0, q14
+    veor.8    q0, q0, q15
+    veor.8    q0, q0, q2
+    vst1.8    {d0-d1}, [r0]!
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+
+.Lcbcdec192:
+    // Load round keys
+    vld1.8    {d10-d11}, [r2]!
+    vld1.8    {d12-d13}, [r2]!
+    vld1.8    {d14-d15}, [r2]!
+    vld1.8    {d16-d17}, [r2]!
+    vld1.8    {d18-d19}, [r2]!
+    vld1.8    {d20-d21}, [r2]!
+    vld1.8    {d22-d23}, [r2]!
+    vld1.8    {d24-d25}, [r2]!
+    vld1.8    {d26-d27}, [r2]!
+    vld1.8    {d28-d29}, [r2]!
+    mov       r3, r2
+    // Sub by 2
+    subs      r4, r4, #2
+    bmi       .Lcbcdec192_1X
+.Lcbcdec192_2XLoop:
+    // Load input 32 bytes
+    vld1.8    {d0-d3}, [r1]!
+    pld       [r1, #32]
+    vmov      q3, q0
+    vmov      q4, q1
+    // aes kernel
+    aesd.8    q0, q5
+    aesimc.8  q0, q0
+    aesd.8    q1, q5
+    aesimc.8  q1, q1
+    aesd.8    q0, q6
+    aesimc.8  q0, q0
+    aesd.8    q1, q6
+    aesimc.8  q1, q1
+    aesd.8    q0, q7
+    aesimc.8  q0, q0
+    aesd.8    q1, q7
+    aesimc.8  q1, q1
+    aesd.8    q0, q8
+    aesimc.8  q0, q0
+    aesd.8    q1, q8
+    aesimc.8  q1, q1
+    aesd.8    q0, q9
+    aesimc.8  q0, q0
+    aesd.8    q1, q9
+    aesimc.8  q1, q1
+    aesd.8    q0, q10
+    aesimc.8  q0, q0
+    aesd.8    q1, q10
+    aesimc.8  q1, q1
+    aesd.8    q0, q11
+    aesimc.8  q0, q0
+    aesd.8    q1, q11
+    aesimc.8  q1, q1
+    aesd.8    q0, q12
+    aesimc.8  q0, q0
+    aesd.8    q1, q12
+    aesimc.8  q1, q1
+    aesd.8    q0, q13
+    aesimc.8  q0, q0
+    aesd.8    q1, q13
+    aesimc.8  q1, q1
+    aesd.8    q0, q14
+    aesimc.8  q0, q0
+    aesd.8    q1, q14
+    aesimc.8  q1, q1
+    vld1.8    {d30-d31}, [r2]! 
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    aesd.8    q1, q15
+    aesimc.8  q1, q1
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    aesd.8    q1, q15
+    vld1.8    {d30-d31}, [r2]!
+    veor.8    q0, q0, q15
+    veor.8    q1, q1, q15
+    mov       r2, r3
+    veor.8    q0, q0, q2
+    veor.8    q1, q1, q3
+    vmov      q2, q4
+    vst1.8    {d0-d1}, [r0]!
+    vst1.8    {d2-d3}, [r0]!
+    subs      r4, r4, #2
+    bpl       .Lcbcdec192_2XLoop
+.Lcbcdec192_1X:
+    adds      r4, r4, #2
+    bne       .Lcbcdec192_FinalRound
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+.Lcbcdec192_FinalRound:
+    // load input 16 bytes
+    vld1.8    {d0-d1}, [r1]!
+    // aes kernel
+    aesd.8    q0, q5
+    aesimc.8  q0, q0
+    vld1.8    {d30}, [r2]!
+    aesd.8    q0, q6
+    aesimc.8  q0, q0
+    vld1.8    {d31}, [r2]!
+    aesd.8    q0, q7
+    aesimc.8  q0, q0
+    aesd.8    q0, q8
+    aesimc.8  q0, q0
+    aesd.8    q0, q9
+    aesimc.8  q0, q0
+    aesd.8    q0, q10
+    aesimc.8  q0, q0
+    aesd.8    q0, q11
+    aesimc.8  q0, q0
+    aesd.8    q0, q12
+    aesimc.8  q0, q0
+    aesd.8    q0, q13
+    aesimc.8  q0, q0
+    aesd.8    q0, q14
+    aesimc.8  q0, q0
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    vld1.8    {d30-d31}, [r2]!
+    veor.8    q0, q0, q15
+    veor.8    q0, q0, q2
+    vst1.8    {d0-d1}, [r0]!
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+
+.Lcbcdec256:
+    // Load round keys
+    vld1.8    {d10-d11}, [r2]!
+    vld1.8    {d12-d13}, [r2]!
+    vld1.8    {d14-d15}, [r2]!
+    vld1.8    {d16-d17}, [r2]!
+    vld1.8    {d18-d19}, [r2]!
+    vld1.8    {d20-d21}, [r2]!
+    vld1.8    {d22-d23}, [r2]!
+    vld1.8    {d24-d25}, [r2]!
+    vld1.8    {d26-d27}, [r2]!
+    vld1.8    {d28-d29}, [r2]!
+    mov       r3, r2
+    // Sub by 2
+    subs      r4, r4, #2
+    bmi       .Lcbcdec256_1X
+.Lcbcdec256_2XLoop:
+    // Load input 32 bytes
+    vld1.8    {d0-d3}, [r1]!
+    pld       [r1, #32]
+    vmov      q3, q0
+    vmov      q4, q1
+    // aes kernel
+    aesd.8    q0, q5
+    aesimc.8  q0, q0
+    aesd.8    q1, q5
+    aesimc.8  q1, q1
+    aesd.8    q0, q6
+    aesimc.8  q0, q0
+    aesd.8    q1, q6
+    aesimc.8  q1, q1
+    aesd.8    q0, q7
+    aesimc.8  q0, q0
+    aesd.8    q1, q7
+    aesimc.8  q1, q1
+    aesd.8    q0, q8
+    aesimc.8  q0, q0
+    aesd.8    q1, q8
+    aesimc.8  q1, q1
+    aesd.8    q0, q9
+    aesimc.8  q0, q0
+    aesd.8    q1, q9
+    aesimc.8  q1, q1
+    aesd.8    q0, q10
+    aesimc.8  q0, q0
+    aesd.8    q1, q10
+    aesimc.8  q1, q1
+    aesd.8    q0, q11
+    aesimc.8  q0, q0
+    aesd.8    q1, q11
+    aesimc.8  q1, q1
+    aesd.8    q0, q12
+    aesimc.8  q0, q0
+    aesd.8    q1, q12
+    aesimc.8  q1, q1
+    aesd.8    q0, q13
+    aesimc.8  q0, q0
+    aesd.8    q1, q13
+    aesimc.8  q1, q1
+    aesd.8    q0, q14
+    aesimc.8  q0, q0
+    aesd.8    q1, q14
+    aesimc.8  q1, q1
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    aesd.8    q1, q15
+    aesimc.8  q1, q1
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    aesd.8    q1, q15
+    aesimc.8  q1, q1
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    aesd.8    q1, q15
+    aesimc.8  q1, q1
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    aesd.8    q1, q15
+    vld1.8    {d30-d31}, [r2]!
+    veor.8    q0, q0, q15
+    veor.8    q1, q1, q15
+    mov       r2, r3
+    veor.8    q0, q0, q2
+    veor.8    q1, q1, q3
+    vmov      q2, q4
+    vst1.8    {d0-d1}, [r0]!
+    vst1.8    {d2-d3}, [r0]!
+    subs      r4, r4, #2
+    bpl       .Lcbcdec256_2XLoop
+.Lcbcdec256_1X:
+    adds      r4, r4, #2
+    bne       .Lcbcdec256_FinalRound
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+.Lcbcdec256_FinalRound:
+    // load input 16 bytes
+    vld1.8    {d0-d1}, [r1]!
+    // aes kernel
+    aesd.8    q0, q5
+    aesimc.8  q0, q0
+    vld1.8    {d30}, [r2]!
+    aesd.8    q0, q6
+    aesimc.8  q0, q0
+    vld1.8    {d31}, [r2]!
+    aesd.8    q0, q7
+    aesimc.8  q0, q0
+    aesd.8    q0, q8
+    aesimc.8  q0, q0
+    aesd.8    q0, q9
+    aesimc.8  q0, q0
+    aesd.8    q0, q10
+    aesimc.8  q0, q0
+    aesd.8    q0, q11
+    aesimc.8  q0, q0
+    aesd.8    q0, q12
+    aesimc.8  q0, q0
+    aesd.8    q0, q13
+    aesimc.8  q0, q0
+    aesd.8    q0, q14
+    aesimc.8  q0, q0
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    aesimc.8  q0, q0
+    vld1.8    {d30-d31}, [r2]!
+    aesd.8    q0, q15
+    vld1.8    {d30-d31}, [r2]!
+    veor.8    q0, q0, q15
+    veor.8    q0, q0, q2
+    vst1.8    {d0-d1}, [r0]!
+    vpop      {q4, q5, q6, q7}
+    pop       {r4, r5, r6}
+    bx        lr
+
+ENDPROC(aes_v8_cbc_decrypt)
+
+#endif
@@ -59,8 +59,10 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
+	#ifndef CONFIG_CRYPTO_AES_ARM32_CE
 	/* private_AES_set_decrypt_key expects an encryption key as input */
 	ctx->dec_key = ctx->enc_key;
+	#endif
 	if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
@@ -23,6 +23,7 @@ generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
+generic-y += simd.h
 generic-y += siginfo.h
 generic-y += sizes.h
 generic-y += socket.h
@@ -80,6 +80,15 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }

+static inline u64 arch_counter_get_cntpct(void)
+{
+	u64 cval;
+
+	isb();
+	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
+	return cval;
+}
+
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
@@ -59,6 +59,21 @@
 #define smp_wmb()	dmb()
 #endif

+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #define read_barrier_depends()		do { } while(0)
 #define smp_read_barrier_depends()	do { } while(0)

@@ -16,6 +16,7 @@
 #include <asm/shmparam.h>
 #include <asm/cachetype.h>
 #include <asm/outercache.h>
+#include <asm/rodata.h>

 #define CACHE_COLOUR(vaddr)	((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)

@@ -184,15 +185,34 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 */

 /* Invalidate I-cache */
+#ifdef CONFIG_ARM_ERRATA_831171                                         
+#define __flush_icache_all_generic()					\
+	do {								\
+	asm("mcr	p15, 0, %0, c7, c5, 0"				\
+	    : : "r" (0));						\
+	asm("mcr        p15, 0, %0, c7, c5, 0"				\
+	    : : "r" (0));						\
+	} while (0)
+#else
 #define __flush_icache_all_generic()					\
 	asm("mcr	p15, 0, %0, c7, c5, 0"				\
 	    : : "r" (0));
+#endif

 /* Invalidate I-cache inner shareable */
+#ifdef CONFIG_ARM_ERRATA_831171
+#define __flush_icache_all_v7_smp()					\
+	do {								\
+	asm("mcr        p15, 0, %0, c7, c1, 0"				\
+	    : : "r" (0));						\
+	asm("mcr	p15, 0, %0, c7, c1, 0"				\
+	    : : "r" (0));						\
+	} while (0)
+#else
 #define __flush_icache_all_v7_smp()					\
 	asm("mcr	p15, 0, %0, c7, c1, 0"				\
 	    : : "r" (0));
-
+#endif
 /*
 * Optimized __flush_icache_all for the common cases. Note that UP ARMv7
 * will fall through to use __flush_icache_all_generic.
@@ -9,6 +9,7 @@
 #define CPUID_TCM	2
 #define CPUID_TLBTYPE	3
 #define CPUID_MPIDR	5
+#define CPUID_REVIDR	6

 #define CPUID_EXT_PFR0	"c1, 0"
 #define CPUID_EXT_PFR1	"c1, 1"
@@ -52,6 +53,9 @@
 #define ARM_CPU_PART_CORTEX_A5		0xC050
 #define ARM_CPU_PART_CORTEX_A15		0xC0F0
 #define ARM_CPU_PART_CORTEX_A7		0xC070
+#define ARM_CPU_PART_CORTEX_A12		0xC0D0
+#define ARM_CPU_PART_CORTEX_A17		0xC0E0
+#define ARM_CPU_PART_CORTEX_A53		0xD030

 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
@@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];

 typedef struct user_fp elf_fpregset_t;

-#define EM_ARM	40
-
 #define EF_ARM_EABI_MASK	0xff000000
 #define EF_ARM_EABI_UNKNOWN	0x00000000
 #define EF_ARM_EABI_VER1	0x01000000
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __ASM_FIQ_GLUE_H
+#define __ASM_FIQ_GLUE_H
+
+struct fiq_glue_handler {
+	void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp);
+	void (*resume)(struct fiq_glue_handler *h);
+};
+typedef void (*fiq_return_handler_t)(void);
+
+int fiq_glue_register_handler(struct fiq_glue_handler *handler);
+int fiq_glue_set_return_handler(fiq_return_handler_t fiq_return);
+int fiq_glue_clear_return_handler(fiq_return_handler_t fiq_return);
+
+#ifdef CONFIG_FIQ_GLUE
+void fiq_glue_resume(void);
+#else
+static inline void fiq_glue_resume(void) {}
+#endif
+
+#endif
@@ -5,7 +5,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>

-#define NR_IPI	6
+#define NR_IPI	7

 typedef struct {
 	unsigned int __softirq_pending;
@@ -66,6 +66,7 @@
 #define   L2X0_STNDBY_MODE_EN		(1 << 0)

 /* Registers shifts and masks */
+#define L2X0_CACHE_ID_REV_MASK		(0x3f)
 #define L2X0_CACHE_ID_PART_MASK		(0xf << 6)
 #define L2X0_CACHE_ID_PART_L210		(1 << 6)
 #define L2X0_CACHE_ID_PART_L310		(3 << 6)
@@ -106,6 +107,8 @@

 #define L2X0_WAY_SIZE_SHIFT		3

+#define REV_PL310_R2P0				4
+
 #ifndef __ASSEMBLY__
 extern void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask);
 #if defined(CONFIG_CACHE_L2X0) && defined(CONFIG_OF)
@@ -17,15 +17,23 @@
 #define TRACER_ACCESSED_BIT	0
 #define TRACER_RUNNING_BIT	1
 #define TRACER_CYCLE_ACC_BIT	2
+#define TRACER_TRACE_DATA_BIT	3
+#define TRACER_TIMESTAMP_BIT	4
+#define TRACER_BRANCHOUTPUT_BIT	5
+#define TRACER_RETURN_STACK_BIT	6
 #define TRACER_ACCESSED		BIT(TRACER_ACCESSED_BIT)
 #define TRACER_RUNNING		BIT(TRACER_RUNNING_BIT)
 #define TRACER_CYCLE_ACC	BIT(TRACER_CYCLE_ACC_BIT)
+#define TRACER_TRACE_DATA	BIT(TRACER_TRACE_DATA_BIT)
+#define TRACER_TIMESTAMP	BIT(TRACER_TIMESTAMP_BIT)
+#define TRACER_BRANCHOUTPUT	BIT(TRACER_BRANCHOUTPUT_BIT)
+#define TRACER_RETURN_STACK	BIT(TRACER_RETURN_STACK_BIT)

 #define TRACER_TIMEOUT 10000

-#define etm_writel(t, v, x) \
-	(__raw_writel((v), (t)->etm_regs + (x)))
-#define etm_readl(t, x) (__raw_readl((t)->etm_regs + (x)))
+#define etm_writel(t, id, v, x) \
+	(__raw_writel((v), (t)->etm_regs[(id)] + (x)))
+#define etm_readl(t, id, x) (__raw_readl((t)->etm_regs[(id)] + (x)))

 /* CoreSight Management Registers */
 #define CSMR_LOCKACCESS 0xfb0
@@ -43,7 +51,7 @@
 #define ETMCTRL_POWERDOWN	1
 #define ETMCTRL_PROGRAM		(1 << 10)
 #define ETMCTRL_PORTSEL		(1 << 11)
-#define ETMCTRL_DO_CONTEXTID	(3 << 14)
+#define ETMCTRL_CONTEXTIDSIZE(x) (((x) & 3) << 14)
 #define ETMCTRL_PORTMASK1	(7 << 4)
 #define ETMCTRL_PORTMASK2	(1 << 21)
 #define ETMCTRL_PORTMASK	(ETMCTRL_PORTMASK1 | ETMCTRL_PORTMASK2)
@@ -55,9 +63,12 @@
 #define ETMCTRL_DATA_DO_BOTH	(ETMCTRL_DATA_DO_DATA | ETMCTRL_DATA_DO_ADDR)
 #define ETMCTRL_BRANCH_OUTPUT	(1 << 8)
 #define ETMCTRL_CYCLEACCURATE	(1 << 12)
+#define ETMCTRL_TIMESTAMP_EN	(1 << 28)
+#define ETMCTRL_RETURN_STACK_EN	(1 << 29)

 /* ETM configuration code register */
 #define ETMR_CONFCODE		(0x04)
+#define ETMCCR_ETMIDR_PRESENT	BIT(31)

 /* ETM trace start/stop resource control register */
 #define ETMR_TRACESSCTRL	(0x18)
@@ -113,10 +124,25 @@
 #define ETMR_TRACEENCTRL	0x24
 #define ETMTE_INCLEXCL		BIT(24)
 #define ETMR_TRACEENEVT		0x20
-#define ETMCTRL_OPTS		(ETMCTRL_DO_CPRT | \
-				ETMCTRL_DATA_DO_ADDR | \
-				ETMCTRL_BRANCH_OUTPUT | \
-				ETMCTRL_DO_CONTEXTID)
+
+#define ETMR_VIEWDATAEVT	0x30
+#define ETMR_VIEWDATACTRL1	0x34
+#define ETMR_VIEWDATACTRL2	0x38
+#define ETMR_VIEWDATACTRL3	0x3c
+#define ETMVDC3_EXCLONLY	BIT(16)
+
+#define ETMCTRL_OPTS		(ETMCTRL_DO_CPRT)
+
+#define ETMR_ID			0x1e4
+#define ETMIDR_VERSION(x)	(((x) >> 4) & 0xff)
+#define ETMIDR_VERSION_3_1	0x21
+#define ETMIDR_VERSION_PFT_1_0	0x30
+
+#define ETMR_CCE		0x1e8
+#define ETMCCER_RETURN_STACK_IMPLEMENTED	BIT(23)
+#define ETMCCER_TIMESTAMPING_IMPLEMENTED	BIT(22)
+
+#define ETMR_TRACEIDR		0x200

 /* ETM management registers, "ETM Architecture", 3.5.24 */
 #define ETMMR_OSLAR	0x300
@@ -140,14 +166,16 @@
 #define ETBFF_TRIGIN		BIT(8)
 #define ETBFF_TRIGEVT		BIT(9)
 #define ETBFF_TRIGFL		BIT(10)
+#define ETBFF_STOPFL		BIT(12)

 #define etb_writel(t, v, x) \
 	(__raw_writel((v), (t)->etb_regs + (x)))
 #define etb_readl(t, x) (__raw_readl((t)->etb_regs + (x)))

-#define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0)
-#define etm_unlock(t) \
-	do { etm_writel((t), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0)
+#define etm_lock(t, id) \
+	do { etm_writel((t), (id), 0, CSMR_LOCKACCESS); } while (0)
+#define etm_unlock(t, id) \
+	do { etm_writel((t), (id), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0)

 #define etb_lock(t) do { etb_writel((t), 0, CSMR_LOCKACCESS); } while (0)
 #define etb_unlock(t) \
@@ -35,6 +35,9 @@ extern void (*handle_arch_irq)(struct pt_regs *);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 #endif

+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+
 #endif

 #endif
@@ -30,7 +30,7 @@ struct machine_desc {
 	unsigned int		nr_irqs;	/* number of IRQs */

 #ifdef CONFIG_ZONE_DMA
-	unsigned long		dma_zone_size;	/* size of DMA-able area */
+	phys_addr_t		dma_zone_size;	/* size of DMA-able area */
 #endif

 	unsigned int		video_start;	/* start of video RAM	*/
@@ -0,0 +1,28 @@
+/*
+ *  arch/arm/include/asm/mach/mmc.h
+ */
+#ifndef ASMARM_MACH_MMC_H
+#define ASMARM_MACH_MMC_H
+
+#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+
+struct embedded_sdio_data {
+        struct sdio_cis cis;
+        struct sdio_cccr cccr;
+        struct sdio_embedded_func *funcs;
+        int num_funcs;
+};
+
+struct mmc_platform_data {
+	unsigned int ocr_mask;			/* available voltages */
+	int built_in;				/* built-in device flag */
+	int card_present;			/* card detect state */
+	u32 (*translate_vdd)(struct device *, unsigned int);
+	unsigned int (*status)(struct device *);
+	struct embedded_sdio_data *embedded_sdio;
+	int (*register_status_notify)(void (*callback)(int card_present, void *dev_id), void *dev_id);
+};
+
+#endif
@@ -28,6 +28,7 @@ struct outer_cache_fns {
 	void (*clean_range)(unsigned long, unsigned long);
 	void (*flush_range)(unsigned long, unsigned long);
 	void (*flush_all)(void);
+	void (*clean_all)(void);
 	void (*inv_all)(void);
 	void (*disable)(void);
 #ifdef CONFIG_OUTER_CACHE_SYNC
@@ -56,7 +57,11 @@ static inline void outer_flush_range(phys_addr_t start, phys_addr_t end)
 	if (outer_cache.flush_range)
 		outer_cache.flush_range(start, end);
 }
-
+static inline void outer_clean_all(void)
+{
+	if (outer_cache.clean_all)
+		outer_cache.clean_all();
+}
 static inline void outer_flush_all(void)
 {
 	if (outer_cache.flush_all)
@@ -89,6 +94,8 @@ static inline void outer_clean_range(phys_addr_t start, phys_addr_t end)
 { }
 static inline void outer_flush_range(phys_addr_t start, phys_addr_t end)
 { }
+static inline void outer_clean_all(void)
+{ }
 static inline void outer_flush_all(void) { }
 static inline void outer_inv_all(void) { }
 static inline void outer_disable(void) { }
@@ -62,9 +62,19 @@ struct pmu_hw_events {
 	raw_spinlock_t		pmu_lock;
 };

+struct cpupmu_regs {
+	u32 pmc;
+	u32 pmcntenset;
+	u32 pmuseren;
+	u32 pmintenset;
+	u32 pmxevttype[8];
+	u32 pmxevtcnt[8];
+};
+
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
+	cpumask_t	valid_cpus;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct perf_event *event);
@@ -81,6 +91,8 @@ struct arm_pmu {
 	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
 	void		(*free_irq)(struct arm_pmu *);
 	int		(*map_event)(struct perf_event *event);
+	void		(*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
+	void		(*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
 	int		num_events;
 	atomic_t	active_events;
 	struct mutex	reserve_mutex;
@@ -29,8 +29,20 @@ struct psci_operations {
 	int (*cpu_off)(struct psci_power_state state);
 	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
 	int (*migrate)(unsigned long cpuid);
+	int (*affinity_info)(unsigned long target_affinity,
+			unsigned long lowest_affinity_level);
+	int (*migrate_info_type)(void);
 };

 extern struct psci_operations psci_ops;
+extern struct smp_operations psci_smp_ops;
+
+#ifdef CONFIG_ARM_PSCI
+int psci_init(void);
+bool psci_smp_available(void);
+#else
+static inline int psci_init(void) { return 0; }
+static inline bool psci_smp_available(void) { return false; }
+#endif

 #endif /* __ASM_ARM_PSCI_H */
@@ -0,0 +1,32 @@
+/*
+ *  arch/arm/include/asm/rodata.h
+ *
+ *  Copyright (C) 2011 Google, Inc.
+ *
+ *  Author: Colin Cross <ccross@android.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASMARM_RODATA_H
+#define _ASMARM_RODATA_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_DEBUG_RODATA
+
+int set_memory_rw(unsigned long virt, int numpages);
+int set_memory_ro(unsigned long virt, int numpages);
+
+void mark_rodata_ro(void);
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+#else
+static inline void set_kernel_text_rw(void) { }
+static inline void set_kernel_text_ro(void) { }
+#endif
+
+#endif
+
+#endif
@@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);

+extern void smp_send_all_cpu_backtrace(void);
+
 struct smp_operations {
 #ifdef CONFIG_SMP
 	/*
@@ -103,8 +103,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
 }

-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 	/* ARM tasks don't change audit architectures on the fly. */
 	return AUDIT_ARCH_ARM;
@@ -68,6 +68,8 @@ struct thread_info {
 	unsigned long		thumbee_state;	/* ThumbEE Handler Base register */
 #endif
 	struct restart_block	restart_block;
+	int			cpu_excp;
+	void			*regs_on_excp;
 };

 #define INIT_THREAD_INFO(tsk)						\
@@ -156,6 +158,9 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
+#if defined(CONFIG_MT_RT_SCHED) || defined(CONFIG_MT_RT_SCHED_LOG)
+#define TIF_NEED_RELEASED	31
+#endif

 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
@@ -331,7 +331,9 @@ static inline void local_flush_tlb_all(void)
 	tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero);
 	tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero);
 	tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
-
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
+#endif 
 	if (tlb_flag(TLB_BARRIER)) {
 		dsb();
 		isb();
@@ -361,8 +363,14 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 	tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid);
 #ifdef CONFIG_ARM_ERRATA_720789
 	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", zero);
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", zero);
+#endif
 #else
 	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", asid);
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", asid);
+#endif
 #endif

 	if (tlb_flag(TLB_BARRIER))
@@ -394,8 +402,14 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 	tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr);
 #ifdef CONFIG_ARM_ERRATA_720789
 	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK);
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK);
+#endif
 #else
 	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", uaddr);
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", uaddr);
+#endif
 #endif

 	if (tlb_flag(TLB_BARRIER))
@@ -422,7 +436,9 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 	tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr);
 	tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr);
 	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr);
-
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr);
+#endif
 	if (tlb_flag(TLB_BARRIER)) {
 		dsb();
 		isb();
@@ -434,30 +450,22 @@ static inline void local_flush_bp_all(void)
 	const int zero = 0;
 	const unsigned int __tlb_flag = __cpu_tlb_flags;

-	if (tlb_flag(TLB_V7_UIS_BP))
+	if (tlb_flag(TLB_V7_UIS_BP)){
 		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero));
-	else if (tlb_flag(TLB_V6_BP))
+#ifdef CONFIG_ARM_ERRATA_831171
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero));
+#endif
+	}
+	else if (tlb_flag(TLB_V6_BP)){
 		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero));
-
+#ifdef CONFIG_ARM_ERRATA_831171
+		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero));
+#endif
+	}
 	if (tlb_flag(TLB_BARRIER))
 		isb();
 }

-#ifdef CONFIG_ARM_ERRATA_798181
-static inline void dummy_flush_tlb_a15_erratum(void)
-{
-	/*
-	 * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0.
-	 */
-	asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0));
-	dsb();
-}
-#else
-static inline void dummy_flush_tlb_a15_erratum(void)
-{
-}
-#endif
-
 /*
 *	flush_pmd_entry
 *
@@ -477,7 +485,9 @@ static inline void flush_pmd_entry(void *pmd)

 	tlb_op(TLB_DCLEAN, "c7, c10, 1	@ flush_pmd", pmd);
 	tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1  @ L2 flush_pmd", pmd);
-
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1  @ L2 flush_pmd", pmd);
+#endif
 	if (tlb_flag(TLB_WB))
 		dsb();
 }
@@ -488,6 +498,9 @@ static inline void clean_pmd_entry(void *pmd)

 	tlb_op(TLB_DCLEAN, "c7, c10, 1	@ flush_pmd", pmd);
 	tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1  @ L2 flush_pmd", pmd);
+#ifdef CONFIG_ARM_ERRATA_831171
+	tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1  @ L2 flush_pmd", pmd);
+#endif
 }

 #undef tlb_op
@@ -539,4 +552,21 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,

 #endif /* CONFIG_MMU */

+#ifndef __ASSEMBLY__
+#ifdef CONFIG_ARM_ERRATA_798181
+extern void erratum_a15_798181_init(void);
+#else
+static inline void erratum_a15_798181_init(void) {}
+#endif
+extern bool (*erratum_a15_798181_handler)(void);
+
+static inline bool erratum_a15_798181(void)
+{
+	if (unlikely(IS_ENABLED(CONFIG_ARM_ERRATA_798181) &&
+		erratum_a15_798181_handler))
+		return erratum_a15_798181_handler();
+	return false;
+}
+#endif
+
 #endif
@@ -19,6 +19,19 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 #define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
 #define topology_thread_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+#ifdef CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY
+#define CPUPOWER_FREQSCALE_SHIFT 10
+#define CPUPOWER_FREQSCALE_DEFAULT (1L << CPUPOWER_FREQSCALE_SHIFT)
+extern unsigned long arch_get_max_cpu_capacity(int);
+extern unsigned long arch_get_cpu_capacity(int);
+extern int arch_get_invariant_power_enabled(void);
+extern int arch_get_cpu_throttling(int);
+
+#define topology_max_cpu_capacity(cpu)	(arch_get_max_cpu_capacity(cpu))
+#define topology_cpu_capacity(cpu)	(arch_get_cpu_capacity(cpu))
+#define topology_cpu_throttling(cpu)	(arch_get_cpu_capacity(cpu))
+#define topology_cpu_inv_power_en(void) (arch_get_invariant_power_enabled())
+#endif /* CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY */

 #define mc_capable()	(cpu_topology[0].socket_id != -1)
 #define smt_capable()	(cpu_topology[0].thread_id != -1)
@@ -26,13 +39,88 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);

-#else
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+
+#if defined (CONFIG_HMP_PACK_SMALL_TASK) && !defined(CONFIG_MTK_SCHED_CMP)
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 0*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| arch_sd_share_power_line()		\
+				| 0*SD_SERIALIZE			\
+				,					\
+	.last_balance		 = jiffies,				\
+	.balance_interval	= 1,					\
+}
+#endif
+#endif /* CONFIG_HMP_PACK_SMALL_TASK */
+
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
+
+/* CPU cluster functions */
+extern void arch_build_cpu_topology_domain(void);
+extern int arch_cpu_is_big(unsigned int cpu);
+extern int arch_cpu_is_little(unsigned int cpu);
+extern int arch_is_multi_cluster(void);
+extern int arch_is_big_little(void);
+extern int arch_get_nr_clusters(void);
+extern int arch_get_cluster_id(unsigned int cpu);
+extern void arch_get_cluster_cpus(struct cpumask *cpus, int cluster_id);
+extern void arch_get_big_little_cpus(struct cpumask *big, struct cpumask *little);
+
+#else /* !CONFIG_ARM_CPU_TOPOLOGY */

 static inline void init_cpu_topology(void) { }
 static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+	cpumask_t *cluster_mask) { return -EINVAL; }

-#endif
+static inline void arch_build_cpu_topology_domain(void) {}
+static inline int arch_cpu_is_big(unsigned int cpu) { return 0; }
+static inline int arch_cpu_is_little(unsigned int cpu) { return 1; }
+static inline int arch_is_multi_cluster(void) { return 0; }
+static inline int arch_is_big_little(void) { return 0; }
+static inline int arch_get_nr_clusters(void) { return 1; }
+static inline int arch_get_cluster_id(unsigned int cpu) { return 0; }
+static inline void arch_get_cluster_cpus(struct cpumask *cpus, int cluster_id)
+{
+    cpumask_clear(cpus);
+	if (0 == cluster_id) {
+        unsigned int cpu;
+		for_each_possible_cpu(cpu)
+			cpumask_set_cpu(cpu, cpus);
+	}
+}
+static inline void arch_get_big_little_cpus(struct cpumask *big,struct cpumask *little)
+{
+    unsigned int cpu;
+    cpumask_clear(big);
+    cpumask_clear(little);
+    for_each_possible_cpu(cpu)
+        cpumask_set_cpu(cpu, little);
+}
+
+#endif /* CONFIG_ARM_CPU_TOPOLOGY */

 #include <asm-generic/topology.h>

@@ -15,6 +15,8 @@
 #define _UAPI__ASMARM_SETUP_H

 #include <linux/types.h>
+#include <mach/dfo_boot.h>
+#include <mach/mt_devinfo.h>

 #define COMMAND_LINE_SIZE 1024

@@ -43,6 +45,14 @@ struct tag_mem32 {
 	__u32	start;	/* physical start address */
 };

+/* it is allowed to have multiple ATAG_MEM nodes */
+#define ATAG_MEM64	0x54420002
+
+struct tag_mem64 {
+	__u64	size;
+	__u64	start;	/* physical start address */
+};
+
 /* VGA text type displays */
 #define ATAG_VIDEOTEXT	0x54410003

@@ -143,11 +153,59 @@ struct tag_memclk {
 	__u32 fmemclk;
 };

+/* boot information */
+#define ATAG_BOOT       0x41000802
+
+struct tag_boot {
+	u32 bootmode;
+};
+
+/*META com port information*/
+#define ATAG_META_COM 0x41000803
+
+struct tag_meta_com {
+	u32 meta_com_type; /* identify meta via uart or usb */
+	u32 meta_com_id;  /* multiple meta need to know com port id */
+};
+
+
+/* MDINFO */
+#define ATAG_MDINFO_DATA 0x41000806
+struct tag_mdinfo_data{
+	u8 md_type[4];
+};
+
+#define ATAG_TEE_DATA 0x41000808
+
+/* general memory descriptor */
+typedef struct {
+    u64 start;
+    u64 size;
+} mem_desc_t;
+
+/* mblock is used by CPU */
+typedef struct {
+	u64 start;
+	u64 size;
+	u32 rank;	/* rank the mblock belongs to */
+} mblock_t;
+
+typedef struct {
+	u32 mblock_num;
+	mblock_t mblock[4];
+} mblock_info_t;
+
+typedef struct {
+	u32 rank_num;
+	mem_desc_t rank_info[4];
+} dram_info_t;
+
 struct tag {
 	struct tag_header hdr;
 	union {
 		struct tag_core		core;
 		struct tag_mem32	mem;
+		struct tag_mem64	mem64;
 		struct tag_videotext	videotext;
 		struct tag_ramdisk	ramdisk;
 		struct tag_initrd	initrd;
@@ -165,6 +223,19 @@ struct tag {
 		 * DC21285 specific
 		 */
 		struct tag_memclk	memclk;
+		struct tag_boot		boot;
+		struct tag_meta_com	meta_com;
+		struct tag_devinfo_data	devinfo_data;
+                tag_dfo_boot     dfo_data;
+                struct tag_mdinfo_data mdinfo_data;
+		mem_desc_t tee_reserved_mem;
+#ifdef PT_ABTC_ATAG
+		struct tag_pt_info tag_pt_info;
+#endif
+#ifdef NAND_ABTC_ATAG
+		struct tag_nand_number tag_nand_number;
+		flashdev_info_t gen_FlashTable_p;
+#endif
 	} u;
 };

@@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 # Object file lists.

 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
-		   process.o ptrace.o return_address.o sched_clock.o \
+		   process.o ptrace.o return_address.o\
 		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o

 obj-$(CONFIG_ATAGS)		+= atags_parse.o
@@ -82,6 +82,6 @@ obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o

 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
-obj-$(CONFIG_ARM_PSCI)		+= psci.o
+obj-$(CONFIG_ARM_PSCI)		+= psci.o psci_smp.o

 extra-y := $(head-y) vmlinux.lds
@@ -13,7 +13,6 @@
 #include <linux/errno.h>

 #include <asm/delay.h>
-#include <asm/sched_clock.h>

 #include <clocksource/arm_arch_timer.h>

@@ -22,13 +21,6 @@ static unsigned long arch_timer_read_counter_long(void)
 	return arch_timer_read_counter();
 }

-static u32 sched_clock_mult __read_mostly;
-
-static unsigned long long notrace arch_timer_sched_clock(void)
-{
-	return arch_timer_read_counter() * sched_clock_mult;
-}
-
 static struct delay_timer arch_delay_timer;

 static void __init arch_timer_delay_timer_register(void)
@@ -48,11 +40,5 @@ int __init arch_timer_arch_init(void)

 	arch_timer_delay_timer_register();

-	/* Cache the sched_clock multiplier to save a divide in the hot path. */
-	sched_clock_mult = NSEC_PER_SEC / arch_timer_rate;
-	sched_clock_func = arch_timer_sched_clock;
-	pr_info("sched_clock: ARM arch timer >56 bits at %ukHz, resolution %uns\n",
-		arch_timer_rate / 1000, sched_clock_mult);
-
 	return 0;
 }
@@ -12,6 +12,7 @@
 */
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/suspend.h>
 #include <linux/dma-mapping.h>
 #ifdef CONFIG_KVM_ARM_HOST
 #include <linux/kvm_host.h>
@@ -193,5 +194,8 @@ int main(void)
 #endif
  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
 #endif
+  DEFINE(PBE_ADDRESS,     offsetof(struct pbe, address));
+  DEFINE(PBE_ORIG_ADDRESS,    offsetof(struct pbe, orig_address));
+  DEFINE(PBE_NEXT,        offsetof(struct pbe, next));
  return 0; 
 }
@@ -30,7 +30,7 @@

 #include "atags.h"

-static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
+char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;

 #ifndef MEM_SIZE
 #define MEM_SIZE	(16*1024*1024)
@@ -26,6 +26,8 @@
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>

+extern char default_command_line[COMMAND_LINE_SIZE];
+
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 	arm_add_memory(base, size);
@@ -183,6 +185,7 @@ struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
 	unsigned int score, mdesc_score = ~1;
 	unsigned long dt_root;
 	const char *model;
+	char *from = default_command_line;

 #ifdef CONFIG_ARCH_MULTIPLATFORM
 	DT_MACHINE_START(GENERIC_DT, "Generic DT based system")
@@ -245,5 +248,10 @@ struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
 	/* Change machine number to match the mdesc we're using */
 	__machine_arch_type = mdesc_best->nr;

+	if (mdesc_best->fixup) {
+        mdesc_best->fixup((void *)dt_root, &from, &meminfo);
+        strlcpy(boot_command_line, from, COMMAND_LINE_SIZE);
+    }
+
 	return mdesc_best;
 }
@@ -685,6 +685,28 @@ ENDPROC(ret_from_exception)
 ENTRY(__switch_to)
 UNWIND(.fnstart	)
 UNWIND(.cantunwind	)
+#ifdef CONFIG_VFP_OPT
+	add	ip, r1, #TI_CPU_SAVE
+	stmfa	ip!, {r0, r1, r2, r5, r6, r8, lr}
+
+	@1. save vfp state for previous thread_info
+	mov	r0, r1
+	add	r0, r0, #TI_VFPSTATE	@ r0 = workspace
+	VFPFMRX	r1, FPEXC
+	mov	r5, ip			@ save ip to r5, because vfp_save_state may change ip
+	mov	r6, r2			@ save r2 to r6, because vfp_save_state may change r2
+	bl	vfp_save_state
+	mov	ip, r5
+	mov	r2, r6
+
+	@ 2. restore vfp state from next thread_info
+	add	r2, r2, #TI_VFPSTATE	@ r2 = workspace
+	VFPFLDMIA	r2, r0		@ reload the working registers while
+					@ FPEXC is in a safe state
+	ldmia	r2, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
+	VFPFMXR	FPSCR, r5		@ restore status
+	ldmfa	ip!, {r0, r1, r2, r5, r6, r8, lr}
+#endif
 	add	ip, r1, #TI_CPU_SAVE
 	ldr	r3, [r2, #TI_TP_VALUE]
 ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 #include <linux/sysrq.h>
 #include <linux/device.h>
 #include <linux/clk.h>
@@ -37,26 +38,37 @@ MODULE_AUTHOR("Alexander Shishkin");
 struct tracectx {
 	unsigned int	etb_bufsz;
 	void __iomem	*etb_regs;
-	void __iomem	*etm_regs;
+	void __iomem	**etm_regs;
+	int		etm_regs_count;
 	unsigned long	flags;
 	int		ncmppairs;
 	int		etm_portsz;
+	int		etm_contextid_size;
+	u32		etb_fc;
+	unsigned long	range_start;
+	unsigned long	range_end;
+	unsigned long	data_range_start;
+	unsigned long	data_range_end;
+	bool		dump_initial_etb;
 	struct device	*dev;
 	struct clk	*emu_clk;
 	struct mutex	mutex;
 };

-static struct tracectx tracer;
+static struct tracectx tracer = {
+	.range_start = (unsigned long)_stext,
+	.range_end = (unsigned long)_etext,
+};

 static inline bool trace_isrunning(struct tracectx *t)
 {
 	return !!(t->flags & TRACER_RUNNING);
 }

-static int etm_setup_address_range(struct tracectx *t, int n,
+static int etm_setup_address_range(struct tracectx *t, int id, int n,
 		unsigned long start, unsigned long end, int exclude, int data)
 {
-	u32 flags = ETMAAT_ARM | ETMAAT_IGNCONTEXTID | ETMAAT_NSONLY | \
+	u32 flags = ETMAAT_ARM | ETMAAT_IGNCONTEXTID | ETMAAT_IGNSECURITY |
 		    ETMAAT_NOVALCMP;

 	if (n < 1 || n > t->ncmppairs)
@@ -72,95 +84,185 @@ static int etm_setup_address_range(struct tracectx *t, int n,
 		flags |= ETMAAT_IEXEC;

 	/* first comparator for the range */
-	etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2));
-	etm_writel(t, start, ETMR_COMP_VAL(n * 2));
+	etm_writel(t, id, flags, ETMR_COMP_ACC_TYPE(n * 2));
+	etm_writel(t, id, start, ETMR_COMP_VAL(n * 2));

 	/* second comparator is right next to it */
-	etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2 + 1));
-	etm_writel(t, end, ETMR_COMP_VAL(n * 2 + 1));
+	etm_writel(t, id, flags, ETMR_COMP_ACC_TYPE(n * 2 + 1));
+	etm_writel(t, id, end, ETMR_COMP_VAL(n * 2 + 1));

-	flags = exclude ? ETMTE_INCLEXCL : 0;
-	etm_writel(t, flags | (1 << n), ETMR_TRACEENCTRL);
+	if (data) {
+		flags = exclude ? ETMVDC3_EXCLONLY : 0;
+		if (exclude)
+			n += 8;
+		etm_writel(t, id, flags | BIT(n), ETMR_VIEWDATACTRL3);
+	} else {
+		flags = exclude ? ETMTE_INCLEXCL : 0;
+		etm_writel(t, id, flags | (1 << n), ETMR_TRACEENCTRL);
+	}

 	return 0;
 }

+static int trace_start_etm(struct tracectx *t, int id)
+{
+	u32 v;
+	unsigned long timeout = TRACER_TIMEOUT;
+
+	v = ETMCTRL_OPTS | ETMCTRL_PROGRAM | ETMCTRL_PORTSIZE(t->etm_portsz);
+	v |= ETMCTRL_CONTEXTIDSIZE(t->etm_contextid_size);
+
+	if (t->flags & TRACER_CYCLE_ACC)
+		v |= ETMCTRL_CYCLEACCURATE;
+
+	if (t->flags & TRACER_BRANCHOUTPUT)
+		v |= ETMCTRL_BRANCH_OUTPUT;
+
+	if (t->flags & TRACER_TRACE_DATA)
+		v |= ETMCTRL_DATA_DO_ADDR;
+
+	if (t->flags & TRACER_TIMESTAMP)
+		v |= ETMCTRL_TIMESTAMP_EN;
+
+	if (t->flags & TRACER_RETURN_STACK)
+		v |= ETMCTRL_RETURN_STACK_EN;
+
+	etm_unlock(t, id);
+
+	etm_writel(t, id, v, ETMR_CTRL);
+
+	while (!(etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout)
+		;
+	if (!timeout) {
+		dev_dbg(t->dev, "Waiting for progbit to assert timed out\n");
+		etm_lock(t, id);
+		return -EFAULT;
+	}
+
+	if (t->range_start || t->range_end)
+		etm_setup_address_range(t, id, 1,
+					t->range_start, t->range_end, 0, 0);
+	else
+		etm_writel(t, id, ETMTE_INCLEXCL, ETMR_TRACEENCTRL);
+
+	etm_writel(t, id, 0, ETMR_TRACEENCTRL2);
+	etm_writel(t, id, 0, ETMR_TRACESSCTRL);
+	etm_writel(t, id, 0x6f, ETMR_TRACEENEVT);
+
+	etm_writel(t, id, 0, ETMR_VIEWDATACTRL1);
+	etm_writel(t, id, 0, ETMR_VIEWDATACTRL2);
+
+	if (t->data_range_start || t->data_range_end)
+		etm_setup_address_range(t, id, 2, t->data_range_start,
+					t->data_range_end, 0, 1);
+	else
+		etm_writel(t, id, ETMVDC3_EXCLONLY, ETMR_VIEWDATACTRL3);
+
+	etm_writel(t, id, 0x6f, ETMR_VIEWDATAEVT);
+
+	v &= ~ETMCTRL_PROGRAM;
+	v |= ETMCTRL_PORTSEL;
+
+	etm_writel(t, id, v, ETMR_CTRL);
+
+	timeout = TRACER_TIMEOUT;
+	while (etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM && --timeout)
+		;
+	if (!timeout) {
+		dev_dbg(t->dev, "Waiting for progbit to deassert timed out\n");
+		etm_lock(t, id);
+		return -EFAULT;
+	}
+
+	etm_lock(t, id);
+	return 0;
+}
+
 static int trace_start(struct tracectx *t)
 {
-	u32 v;
-	unsigned long timeout = TRACER_TIMEOUT;
+	int ret;
+	int id;
+	u32 etb_fc = t->etb_fc;

 	etb_unlock(t);

-	etb_writel(t, 0, ETBR_FORMATTERCTRL);
+	t->dump_initial_etb = false;
+	etb_writel(t, 0, ETBR_WRITEADDR);
+	etb_writel(t, etb_fc, ETBR_FORMATTERCTRL);
 	etb_writel(t, 1, ETBR_CTRL);

 	etb_lock(t);

-	/* configure etm */
-	v = ETMCTRL_OPTS | ETMCTRL_PROGRAM | ETMCTRL_PORTSIZE(t->etm_portsz);
-
-	if (t->flags & TRACER_CYCLE_ACC)
-		v |= ETMCTRL_CYCLEACCURATE;
-
-	etm_unlock(t);
-
-	etm_writel(t, v, ETMR_CTRL);
-
-	while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout)
-		;
-	if (!timeout) {
-		dev_dbg(t->dev, "Waiting for progbit to assert timed out\n");
-		etm_lock(t);
-		return -EFAULT;
+	/* configure etm(s) */
+	for (id = 0; id < t->etm_regs_count; id++) {
+		ret = trace_start_etm(t, id);
+		if (ret)
+			return ret;
 	}

-	etm_setup_address_range(t, 1, (unsigned long)_stext,
-			(unsigned long)_etext, 0, 0);
-	etm_writel(t, 0, ETMR_TRACEENCTRL2);
-	etm_writel(t, 0, ETMR_TRACESSCTRL);
-	etm_writel(t, 0x6f, ETMR_TRACEENEVT);
-
-	v &= ~ETMCTRL_PROGRAM;
-	v |= ETMCTRL_PORTSEL;
-
-	etm_writel(t, v, ETMR_CTRL);
-
-	timeout = TRACER_TIMEOUT;
-	while (etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM && --timeout)
-		;
-	if (!timeout) {
-		dev_dbg(t->dev, "Waiting for progbit to deassert timed out\n");
-		etm_lock(t);
-		return -EFAULT;
-	}
-
-	etm_lock(t);
-
 	t->flags |= TRACER_RUNNING;

 	return 0;
 }

-static int trace_stop(struct tracectx *t)
+static int trace_stop_etm(struct tracectx *t, int id)
 {
 	unsigned long timeout = TRACER_TIMEOUT;

-	etm_unlock(t);
+	etm_unlock(t, id);

-	etm_writel(t, 0x440, ETMR_CTRL);
-	while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout)
+	etm_writel(t, id, 0x440, ETMR_CTRL);
+	while (!(etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout)
 		;
 	if (!timeout) {
-		dev_dbg(t->dev, "Waiting for progbit to assert timed out\n");
-		etm_lock(t);
+		dev_err(t->dev,
+			"etm%d: Waiting for progbit to assert timed out\n",
+			id);
+		etm_lock(t, id);
 		return -EFAULT;
 	}

-	etm_lock(t);
+	etm_lock(t, id);
+	return 0;
+}
+
+static int trace_power_down_etm(struct tracectx *t, int id)
+{
+	unsigned long timeout = TRACER_TIMEOUT;
+	etm_unlock(t, id);
+	while (!(etm_readl(t, id, ETMR_STATUS) & ETMST_PROGBIT) && --timeout)
+		;
+	if (!timeout) {
+		dev_err(t->dev, "etm%d: Waiting for status progbit to assert timed out\n",
+			id);
+		etm_lock(t, id);
+		return -EFAULT;
+	}
+
+	etm_writel(t, id, 0x441, ETMR_CTRL);
+
+	etm_lock(t, id);
+	return 0;
+}
+
+static int trace_stop(struct tracectx *t)
+{
+	int id;
+	unsigned long timeout = TRACER_TIMEOUT;
+	u32 etb_fc = t->etb_fc;
+
+	for (id = 0; id < t->etm_regs_count; id++)
+		trace_stop_etm(t, id);
+
+	for (id = 0; id < t->etm_regs_count; id++)
+		trace_power_down_etm(t, id);

 	etb_unlock(t);
-	etb_writel(t, ETBFF_MANUAL_FLUSH, ETBR_FORMATTERCTRL);
+	if (etb_fc) {
+		etb_fc |= ETBFF_STOPFL;
+		etb_writel(t, t->etb_fc, ETBR_FORMATTERCTRL);
+	}
+	etb_writel(t, etb_fc | ETBFF_MANUAL_FLUSH, ETBR_FORMATTERCTRL);

 	timeout = TRACER_TIMEOUT;
 	while (etb_readl(t, ETBR_FORMATTERCTRL) &
@@ -185,24 +287,15 @@ static int trace_stop(struct tracectx *t)
 static int etb_getdatalen(struct tracectx *t)
 {
 	u32 v;
-	int rp, wp;
+	int wp;

 	v = etb_readl(t, ETBR_STATUS);

 	if (v & 1)
 		return t->etb_bufsz;

-	rp = etb_readl(t, ETBR_READADDR);
 	wp = etb_readl(t, ETBR_WRITEADDR);
-
-	if (rp > wp) {
-		etb_writel(t, 0, ETBR_READADDR);
-		etb_writel(t, 0, ETBR_WRITEADDR);
-
-		return 0;
-	}
-
-	return wp - rp;
+	return wp;
 }

 /* sysrq+v will always stop the running trace and leave it at that */
@@ -235,21 +328,18 @@ static void etm_dump(void)
 		printk("%08x", cpu_to_be32(etb_readl(t, ETBR_READMEM)));
 	printk(KERN_INFO "\n--- ETB buffer end ---\n");

-	/* deassert the overflow bit */
-	etb_writel(t, 1, ETBR_CTRL);
-	etb_writel(t, 0, ETBR_CTRL);
-
-	etb_writel(t, 0, ETBR_TRIGGERCOUNT);
-	etb_writel(t, 0, ETBR_READADDR);
-	etb_writel(t, 0, ETBR_WRITEADDR);
-
 	etb_lock(t);
 }

 static void sysrq_etm_dump(int key)
 {
+	if (!mutex_trylock(&tracer.mutex)) {
+		printk(KERN_INFO "Tracing hardware busy\n");
+		return;
+	}
 	dev_dbg(tracer.dev, "Dumping ETB buffer\n");
 	etm_dump();
+	mutex_unlock(&tracer.mutex);
 }

 static struct sysrq_key_op sysrq_etm_op = {
@@ -276,6 +366,10 @@ static ssize_t etb_read(struct file *file, char __user *data,
 	struct tracectx *t = file->private_data;
 	u32 first = 0;
 	u32 *buf;
+	int wpos;
+	int skip;
+	long wlength;
+	loff_t pos = *ppos;

 	mutex_lock(&t->mutex);

@@ -287,31 +381,39 @@ static ssize_t etb_read(struct file *file, char __user *data,
 	etb_unlock(t);

 	total = etb_getdatalen(t);
+	if (total == 0 && t->dump_initial_etb)
+		total = t->etb_bufsz;
 	if (total == t->etb_bufsz)
 		first = etb_readl(t, ETBR_WRITEADDR);

+	if (pos > total * 4) {
+		skip = 0;
+		wpos = total;
+	} else {
+		skip = (int)pos % 4;
+		wpos = (int)pos / 4;
+	}
+	total -= wpos;
+	first = (first + wpos) % t->etb_bufsz;
+
 	etb_writel(t, first, ETBR_READADDR);

-	length = min(total * 4, (int)len);
-	buf = vmalloc(length);
+	wlength = min(total, DIV_ROUND_UP(skip + (int)len, 4));
+	length = min(total * 4 - skip, (int)len);
+	buf = vmalloc(wlength * 4);

-	dev_dbg(t->dev, "ETB buffer length: %d\n", total);
+	dev_dbg(t->dev, "ETB read %ld bytes to %lld from %ld words at %d\n",
+		length, pos, wlength, first);
+	dev_dbg(t->dev, "ETB buffer length: %d\n", total + wpos);
 	dev_dbg(t->dev, "ETB status reg: %x\n", etb_readl(t, ETBR_STATUS));
-	for (i = 0; i < length / 4; i++)
+	for (i = 0; i < wlength; i++)
 		buf[i] = etb_readl(t, ETBR_READMEM);

-	/* the only way to deassert overflow bit in ETB status is this */
-	etb_writel(t, 1, ETBR_CTRL);
-	etb_writel(t, 0, ETBR_CTRL);
-
-	etb_writel(t, 0, ETBR_WRITEADDR);
-	etb_writel(t, 0, ETBR_READADDR);
-	etb_writel(t, 0, ETBR_TRIGGERCOUNT);
-
 	etb_lock(t);

-	length -= copy_to_user(data, buf, length);
+	length -= copy_to_user(data, (u8 *)buf + skip, length);
 	vfree(buf);
+	*ppos = pos + length;

 out:
 	mutex_unlock(&t->mutex);
@@ -348,28 +450,17 @@ static int etb_probe(struct amba_device *dev, const struct amba_id *id)
 	if (ret)
 		goto out;

+	mutex_lock(&t->mutex);
 	t->etb_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res));
 	if (!t->etb_regs) {
 		ret = -ENOMEM;
 		goto out_release;
 	}

+	t->dev = &dev->dev;
+	t->dump_initial_etb = true;
 	amba_set_drvdata(dev, t);

-	etb_miscdev.parent = &dev->dev;
-
-	ret = misc_register(&etb_miscdev);
-	if (ret)
-		goto out_unmap;
-
-	t->emu_clk = clk_get(&dev->dev, "emu_src_ck");
-	if (IS_ERR(t->emu_clk)) {
-		dev_dbg(&dev->dev, "Failed to obtain emu_src_ck.\n");
-		return -EFAULT;
-	}
-
-	clk_enable(t->emu_clk);
-
 	etb_unlock(t);
 	t->etb_bufsz = etb_readl(t, ETBR_DEPTH);
 	dev_dbg(&dev->dev, "Size: %x\n", t->etb_bufsz);
@@ -378,6 +469,20 @@ static int etb_probe(struct amba_device *dev, const struct amba_id *id)
 	etb_writel(t, 0, ETBR_CTRL);
 	etb_writel(t, 0x1000, ETBR_FORMATTERCTRL);
 	etb_lock(t);
+	mutex_unlock(&t->mutex);
+
+	etb_miscdev.parent = &dev->dev;
+
+	ret = misc_register(&etb_miscdev);
+	if (ret)
+		goto out_unmap;
+
+	/* Get optional clock. Currently used to select clock source on omap3 */
+	t->emu_clk = clk_get(&dev->dev, "emu_src_ck");
+	if (IS_ERR(t->emu_clk))
+		dev_dbg(&dev->dev, "Failed to obtain emu_src_ck.\n");
+	else
+		clk_enable(t->emu_clk);

 	dev_dbg(&dev->dev, "ETB AMBA driver initialized.\n");

@@ -385,10 +490,13 @@ out:
 	return ret;

 out_unmap:
+	mutex_lock(&t->mutex);
 	amba_set_drvdata(dev, NULL);
 	iounmap(t->etb_regs);
+	t->etb_regs = NULL;

 out_release:
+	mutex_unlock(&t->mutex);
 	amba_release_regions(dev);

 	return ret;
@@ -403,8 +511,10 @@ static int etb_remove(struct amba_device *dev)
 	iounmap(t->etb_regs);
 	t->etb_regs = NULL;

-	clk_disable(t->emu_clk);
-	clk_put(t->emu_clk);
+	if (!IS_ERR(t->emu_clk)) {
+		clk_disable(t->emu_clk);
+		clk_put(t->emu_clk);
+	}

 	amba_release_regions(dev);

@@ -448,7 +558,10 @@ static ssize_t trace_running_store(struct kobject *kobj,
 		return -EINVAL;

 	mutex_lock(&tracer.mutex);
-	ret = value ? trace_start(&tracer) : trace_stop(&tracer);
+	if (!tracer.etb_regs)
+		ret = -ENODEV;
+	else
+		ret = value ? trace_start(&tracer) : trace_stop(&tracer);
 	mutex_unlock(&tracer.mutex);

 	return ret ? : n;
@@ -463,36 +576,50 @@ static ssize_t trace_info_show(struct kobject *kobj,
 {
 	u32 etb_wa, etb_ra, etb_st, etb_fc, etm_ctrl, etm_st;
 	int datalen;
+	int id;
+	int ret;

-	etb_unlock(&tracer);
-	datalen = etb_getdatalen(&tracer);
-	etb_wa = etb_readl(&tracer, ETBR_WRITEADDR);
-	etb_ra = etb_readl(&tracer, ETBR_READADDR);
-	etb_st = etb_readl(&tracer, ETBR_STATUS);
-	etb_fc = etb_readl(&tracer, ETBR_FORMATTERCTRL);
-	etb_lock(&tracer);
+	mutex_lock(&tracer.mutex);
+	if (tracer.etb_regs) {
+		etb_unlock(&tracer);
+		datalen = etb_getdatalen(&tracer);
+		etb_wa = etb_readl(&tracer, ETBR_WRITEADDR);
+		etb_ra = etb_readl(&tracer, ETBR_READADDR);
+		etb_st = etb_readl(&tracer, ETBR_STATUS);
+		etb_fc = etb_readl(&tracer, ETBR_FORMATTERCTRL);
+		etb_lock(&tracer);
+	} else {
+		etb_wa = etb_ra = etb_st = etb_fc = ~0;
+		datalen = -1;
+	}

-	etm_unlock(&tracer);
-	etm_ctrl = etm_readl(&tracer, ETMR_CTRL);
-	etm_st = etm_readl(&tracer, ETMR_STATUS);
-	etm_lock(&tracer);
-
-	return sprintf(buf, "Trace buffer len: %d\nComparator pairs: %d\n"
+	ret = sprintf(buf, "Trace buffer len: %d\nComparator pairs: %d\n"
 			"ETBR_WRITEADDR:\t%08x\n"
 			"ETBR_READADDR:\t%08x\n"
 			"ETBR_STATUS:\t%08x\n"
-			"ETBR_FORMATTERCTRL:\t%08x\n"
-			"ETMR_CTRL:\t%08x\n"
-			"ETMR_STATUS:\t%08x\n",
+			"ETBR_FORMATTERCTRL:\t%08x\n",
 			datalen,
 			tracer.ncmppairs,
 			etb_wa,
 			etb_ra,
 			etb_st,
-			etb_fc,
+			etb_fc
+			);
+
+	for (id = 0; id < tracer.etm_regs_count; id++) {
+		etm_unlock(&tracer, id);
+		etm_ctrl = etm_readl(&tracer, id, ETMR_CTRL);
+		etm_st = etm_readl(&tracer, id, ETMR_STATUS);
+		etm_lock(&tracer, id);
+		ret += sprintf(buf + ret, "ETMR_CTRL:\t%08x\n"
+			"ETMR_STATUS:\t%08x\n",
 			etm_ctrl,
 			etm_st
 			);
+	}
+	mutex_unlock(&tracer.mutex);
+
+	return ret;
 }

 static struct kobj_attribute trace_info_attr =
@@ -531,42 +658,260 @@ static ssize_t trace_mode_store(struct kobject *kobj,
 static struct kobj_attribute trace_mode_attr =
 	__ATTR(trace_mode, 0644, trace_mode_show, trace_mode_store);

+static ssize_t trace_contextid_size_show(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 char *buf)
+{
+	/* 0: No context id tracing, 1: One byte, 2: Two bytes, 3: Four bytes */
+	return sprintf(buf, "%d\n", (1 << tracer.etm_contextid_size) >> 1);
+}
+
+static ssize_t trace_contextid_size_store(struct kobject *kobj,
+					  struct kobj_attribute *attr,
+					  const char *buf, size_t n)
+{
+	unsigned int contextid_size;
+
+	if (sscanf(buf, "%u", &contextid_size) != 1)
+		return -EINVAL;
+
+	if (contextid_size == 3 || contextid_size > 4)
+		return -EINVAL;
+
+	mutex_lock(&tracer.mutex);
+	tracer.etm_contextid_size = fls(contextid_size);
+	mutex_unlock(&tracer.mutex);
+
+	return n;
+}
+
+static struct kobj_attribute trace_contextid_size_attr =
+	__ATTR(trace_contextid_size, 0644,
+		trace_contextid_size_show, trace_contextid_size_store);
+
+static ssize_t trace_branch_output_show(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%d\n", !!(tracer.flags & TRACER_BRANCHOUTPUT));
+}
+
+static ssize_t trace_branch_output_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t n)
+{
+	unsigned int branch_output;
+
+	if (sscanf(buf, "%u", &branch_output) != 1)
+		return -EINVAL;
+
+	mutex_lock(&tracer.mutex);
+	if (branch_output) {
+		tracer.flags |= TRACER_BRANCHOUTPUT;
+		/* Branch broadcasting is incompatible with the return stack */
+		tracer.flags &= ~TRACER_RETURN_STACK;
+	} else {
+		tracer.flags &= ~TRACER_BRANCHOUTPUT;
+	}
+	mutex_unlock(&tracer.mutex);
+
+	return n;
+}
+
+static struct kobj_attribute trace_branch_output_attr =
+	__ATTR(trace_branch_output, 0644,
+		trace_branch_output_show, trace_branch_output_store);
+
+static ssize_t trace_return_stack_show(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  char *buf)
+{
+	return sprintf(buf, "%d\n", !!(tracer.flags & TRACER_RETURN_STACK));
+}
+
+static ssize_t trace_return_stack_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t n)
+{
+	unsigned int return_stack;
+
+	if (sscanf(buf, "%u", &return_stack) != 1)
+		return -EINVAL;
+
+	mutex_lock(&tracer.mutex);
+	if (return_stack) {
+		tracer.flags |= TRACER_RETURN_STACK;
+		/* Return stack is incompatible with branch broadcasting */
+		tracer.flags &= ~TRACER_BRANCHOUTPUT;
+	} else {
+		tracer.flags &= ~TRACER_RETURN_STACK;
+	}
+	mutex_unlock(&tracer.mutex);
+
+	return n;
+}
+
+static struct kobj_attribute trace_return_stack_attr =
+	__ATTR(trace_return_stack, 0644,
+		trace_return_stack_show, trace_return_stack_store);
+
+static ssize_t trace_timestamp_show(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  char *buf)
+{
+	return sprintf(buf, "%d\n", !!(tracer.flags & TRACER_TIMESTAMP));
+}
+
+static ssize_t trace_timestamp_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t n)
+{
+	unsigned int timestamp;
+
+	if (sscanf(buf, "%u", &timestamp) != 1)
+		return -EINVAL;
+
+	mutex_lock(&tracer.mutex);
+	if (timestamp)
+		tracer.flags |= TRACER_TIMESTAMP;
+	else
+		tracer.flags &= ~TRACER_TIMESTAMP;
+	mutex_unlock(&tracer.mutex);
+
+	return n;
+}
+
+static struct kobj_attribute trace_timestamp_attr =
+	__ATTR(trace_timestamp, 0644,
+		trace_timestamp_show, trace_timestamp_store);
+
+static ssize_t trace_range_show(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  char *buf)
+{
+	return sprintf(buf, "%08lx %08lx\n",
+			tracer.range_start, tracer.range_end);
+}
+
+static ssize_t trace_range_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t n)
+{
+	unsigned long range_start, range_end;
+
+	if (sscanf(buf, "%lx %lx", &range_start, &range_end) != 2)
+		return -EINVAL;
+
+	mutex_lock(&tracer.mutex);
+	tracer.range_start = range_start;
+	tracer.range_end = range_end;
+	mutex_unlock(&tracer.mutex);
+
+	return n;
+}
+
+
+static struct kobj_attribute trace_range_attr =
+	__ATTR(trace_range, 0644, trace_range_show, trace_range_store);
+
+static ssize_t trace_data_range_show(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  char *buf)
+{
+	unsigned long range_start;
+	u64 range_end;
+	mutex_lock(&tracer.mutex);
+	range_start = tracer.data_range_start;
+	range_end = tracer.data_range_end;
+	if (!range_end && (tracer.flags & TRACER_TRACE_DATA))
+		range_end = 0x100000000ULL;
+	mutex_unlock(&tracer.mutex);
+	return sprintf(buf, "%08lx %08llx\n", range_start, range_end);
+}
+
+static ssize_t trace_data_range_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t n)
+{
+	unsigned long range_start;
+	u64 range_end;
+
+	if (sscanf(buf, "%lx %llx", &range_start, &range_end) != 2)
+		return -EINVAL;
+
+	mutex_lock(&tracer.mutex);
+	tracer.data_range_start = range_start;
+	tracer.data_range_end = (unsigned long)range_end;
+	if (range_end)
+		tracer.flags |= TRACER_TRACE_DATA;
+	else
+		tracer.flags &= ~TRACER_TRACE_DATA;
+	mutex_unlock(&tracer.mutex);
+
+	return n;
+}
+
+
+static struct kobj_attribute trace_data_range_attr =
+	__ATTR(trace_data_range, 0644,
+		trace_data_range_show, trace_data_range_store);
+
 static int etm_probe(struct amba_device *dev, const struct amba_id *id)
 {
 	struct tracectx *t = &tracer;
 	int ret = 0;
+	void __iomem **new_regs;
+	int new_count;
+	u32 etmccr;
+	u32 etmidr;
+	u32 etmccer = 0;
+	u8 etm_version = 0;

-	if (t->etm_regs) {
-		dev_dbg(&dev->dev, "ETM already initialized\n");
-		ret = -EBUSY;
+	mutex_lock(&t->mutex);
+	new_count = t->etm_regs_count + 1;
+	new_regs = krealloc(t->etm_regs,
+				sizeof(t->etm_regs[0]) * new_count, GFP_KERNEL);
+
+	if (!new_regs) {
+		dev_dbg(&dev->dev, "Failed to allocate ETM register array\n");
+		ret = -ENOMEM;
 		goto out;
 	}
+	t->etm_regs = new_regs;

 	ret = amba_request_regions(dev, NULL);
 	if (ret)
 		goto out;

-	t->etm_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res));
-	if (!t->etm_regs) {
+	t->etm_regs[t->etm_regs_count] =
+		ioremap_nocache(dev->res.start, resource_size(&dev->res));
+	if (!t->etm_regs[t->etm_regs_count]) {
 		ret = -ENOMEM;
 		goto out_release;
 	}

-	amba_set_drvdata(dev, t);
+	amba_set_drvdata(dev, t->etm_regs[t->etm_regs_count]);

-	mutex_init(&t->mutex);
-	t->dev = &dev->dev;
-	t->flags = TRACER_CYCLE_ACC;
+	t->flags = TRACER_CYCLE_ACC | TRACER_TRACE_DATA | TRACER_BRANCHOUTPUT;
 	t->etm_portsz = 1;
+	t->etm_contextid_size = 3;

-	etm_unlock(t);
-	(void)etm_readl(t, ETMMR_PDSR);
+	etm_unlock(t, t->etm_regs_count);
+	(void)etm_readl(t, t->etm_regs_count, ETMMR_PDSR);
 	/* dummy first read */
-	(void)etm_readl(&tracer, ETMMR_OSSRR);
+	(void)etm_readl(&tracer, t->etm_regs_count, ETMMR_OSSRR);

-	t->ncmppairs = etm_readl(t, ETMR_CONFCODE) & 0xf;
-	etm_writel(t, 0x440, ETMR_CTRL);
-	etm_lock(t);
+	etmccr = etm_readl(t, t->etm_regs_count, ETMR_CONFCODE);
+	t->ncmppairs = etmccr & 0xf;
+	if (etmccr & ETMCCR_ETMIDR_PRESENT) {
+		etmidr = etm_readl(t, t->etm_regs_count, ETMR_ID);
+		etm_version = ETMIDR_VERSION(etmidr);
+		if (etm_version >= ETMIDR_VERSION_3_1)
+			etmccer = etm_readl(t, t->etm_regs_count, ETMR_CCE);
+	}
+	etm_writel(t, t->etm_regs_count, 0x441, ETMR_CTRL);
+	etm_writel(t, t->etm_regs_count, new_count, ETMR_TRACEIDR);
+	etm_lock(t, t->etm_regs_count);

 	ret = sysfs_create_file(&dev->dev.kobj,
 			&trace_running_attr.attr);
@@ -582,35 +927,100 @@ static int etm_probe(struct amba_device *dev, const struct amba_id *id)
 	if (ret)
 		dev_dbg(&dev->dev, "Failed to create trace_mode in sysfs\n");

-	dev_dbg(t->dev, "ETM AMBA driver initialized.\n");
+	ret = sysfs_create_file(&dev->dev.kobj,
+				&trace_contextid_size_attr.attr);
+	if (ret)
+		dev_dbg(&dev->dev,
+			"Failed to create trace_contextid_size in sysfs\n");
+
+	ret = sysfs_create_file(&dev->dev.kobj,
+				&trace_branch_output_attr.attr);
+	if (ret)
+		dev_dbg(&dev->dev,
+			"Failed to create trace_branch_output in sysfs\n");
+
+	if (etmccer & ETMCCER_RETURN_STACK_IMPLEMENTED) {
+		ret = sysfs_create_file(&dev->dev.kobj,
+					&trace_return_stack_attr.attr);
+		if (ret)
+			dev_dbg(&dev->dev,
+			      "Failed to create trace_return_stack in sysfs\n");
+	}
+
+	if (etmccer & ETMCCER_TIMESTAMPING_IMPLEMENTED) {
+		ret = sysfs_create_file(&dev->dev.kobj,
+					&trace_timestamp_attr.attr);
+		if (ret)
+			dev_dbg(&dev->dev,
+				"Failed to create trace_timestamp in sysfs\n");
+	}
+
+	ret = sysfs_create_file(&dev->dev.kobj, &trace_range_attr.attr);
+	if (ret)
+		dev_dbg(&dev->dev, "Failed to create trace_range in sysfs\n");
+
+	if (etm_version < ETMIDR_VERSION_PFT_1_0) {
+		ret = sysfs_create_file(&dev->dev.kobj,
+					&trace_data_range_attr.attr);
+		if (ret)
+			dev_dbg(&dev->dev,
+				"Failed to create trace_data_range in sysfs\n");
+	} else {
+		tracer.flags &= ~TRACER_TRACE_DATA;
+	}
+
+	dev_dbg(&dev->dev, "ETM AMBA driver initialized.\n");
+
+	/* Enable formatter if there are multiple trace sources */
+	if (new_count > 1)
+		t->etb_fc = ETBFF_ENFCONT | ETBFF_ENFTC;
+
+	t->etm_regs_count = new_count;

 out:
+	mutex_unlock(&t->mutex);
 	return ret;

 out_unmap:
 	amba_set_drvdata(dev, NULL);
-	iounmap(t->etm_regs);
+	iounmap(t->etm_regs[t->etm_regs_count]);

 out_release:
 	amba_release_regions(dev);

+	mutex_unlock(&t->mutex);
 	return ret;
 }

 static int etm_remove(struct amba_device *dev)
 {
-	struct tracectx *t = amba_get_drvdata(dev);
-
-	amba_set_drvdata(dev, NULL);
-
-	iounmap(t->etm_regs);
-	t->etm_regs = NULL;
-
-	amba_release_regions(dev);
+	int i;
+	struct tracectx *t = &tracer;
+	void __iomem	*etm_regs = amba_get_drvdata(dev);

 	sysfs_remove_file(&dev->dev.kobj, &trace_running_attr.attr);
 	sysfs_remove_file(&dev->dev.kobj, &trace_info_attr.attr);
 	sysfs_remove_file(&dev->dev.kobj, &trace_mode_attr.attr);
+	sysfs_remove_file(&dev->dev.kobj, &trace_range_attr.attr);
+	sysfs_remove_file(&dev->dev.kobj, &trace_data_range_attr.attr);
+
+	amba_set_drvdata(dev, NULL);
+
+	mutex_lock(&t->mutex);
+	for (i = 0; i < t->etm_regs_count; i++)
+		if (t->etm_regs[i] == etm_regs)
+			break;
+	for (; i < t->etm_regs_count - 1; i++)
+		t->etm_regs[i] = t->etm_regs[i + 1];
+	t->etm_regs_count--;
+	if (!t->etm_regs_count) {
+		kfree(t->etm_regs);
+		t->etm_regs = NULL;
+	}
+	mutex_unlock(&t->mutex);
+
+	iounmap(etm_regs);
+	amba_release_regions(dev);

 	return 0;
 }
@@ -620,6 +1030,10 @@ static struct amba_id etm_ids[] = {
 		.id	= 0x0003b921,
 		.mask	= 0x0007ffff,
 	},
+	{
+		.id	= 0x0003b950,
+		.mask	= 0x0007ffff,
+	},
 	{ 0, 0 },
 };

@@ -637,6 +1051,8 @@ static int __init etm_init(void)
 {
 	int retval;

+	mutex_init(&tracer.mutex);
+
 	retval = amba_driver_register(&etb_driver);
 	if (retval) {
 		printk(KERN_ERR "Failed to register etb\n");
@@ -13,6 +13,7 @@
 */

 #include <linux/ftrace.h>
+#include <linux/module.h>
 #include <linux/uaccess.h>

 #include <asm/cacheflush.h>
@@ -63,6 +64,20 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
 }
 #endif

+int ftrace_arch_code_modify_prepare(void)
+{
+	set_kernel_text_rw();
+	set_all_modules_text_rw();
+	return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+	set_all_modules_text_ro();
+	set_kernel_text_ro();
+	return 0;
+}
+
 static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 {
 	return arm_gen_branch_link(pc, addr);
@@ -156,10 +171,8 @@ int ftrace_make_nop(struct module *mod,
 	return ret;
 }

-int __init ftrace_dyn_arch_init(void *data)
+int __init ftrace_dyn_arch_init(void)
 {
-	*(unsigned long *)data = 0;
-
 	return 0;
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
@@ -22,6 +22,7 @@
 #include <asm/memory.h>
 #include <asm/thread_info.h>
 #include <asm/pgtable.h>
+#include <asm/vfpmacros.h>

 #if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_SEMIHOSTING)
 #include CONFIG_DEBUG_LL_INCLUDE
@@ -344,6 +345,17 @@ __turn_mmu_on_loc:
 #if defined(CONFIG_SMP)
 	__CPUINIT
 ENTRY(secondary_startup)
+#ifdef CONFIG_VFP_OPT
+	ldr	r10, =(0xF << 20)
+	mcr	p15, 0, r10, c1, c0, 2
+	VFPFMRX	r10, FPEXC			@ Is the VFP enabled?
+	orr 	r10, r10, #FPEXC_EN		@ user FPEXC has the enable bit set
+	bic 	r10, r10, #FPEXC_EX		@ make sure exceptions are disabled
+	VFPFMXR	FPEXC, r10			@ enable VFP, disable any pending
+						@ exceptions, so we can get at the
+						@ rest of it
+#endif
+
 	/*
 	 * Common entry point for secondary CPUs.
 	 *
@@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = {

 static void __init pm_init(void)
 {
-	cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+	if (has_ossr)
+		cpu_pm_register_notifier(&dbg_cpu_pm_nb);
 }
 #else
 static inline void pm_init(void)
@@ -42,6 +42,7 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>

+#include <linux/mt_sched_mon.h>
 unsigned long irq_err_count;

 int arch_show_interrupts(struct seq_file *p, int prec)
@@ -56,6 +57,10 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	return 0;
 }

+#ifdef CONFIG_MTK_SCHED_TRACERS
+#include <trace/events/mtk_events.h>
+#endif
+
 /*
 * handle_IRQ handles all hardware IRQ's.  Decoded IRQs should
 * not come via this function.  Instead, they should provide their
@@ -65,8 +70,17 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 void handle_IRQ(unsigned int irq, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
+#ifdef CONFIG_MTK_SCHED_TRACERS
+    struct irq_desc *desc;
+#endif

 	irq_enter();
+    mt_trace_ISR_start(irq);
+#ifdef CONFIG_MTK_SCHED_TRACERS
+    desc = irq_to_desc(irq);
+    trace_irq_entry(irq,
+            (desc && desc->action && desc->action->name) ? desc->action->name : "-");
+#endif

 	/*
 	 * Some hardware gives randomly wrong interrupts.  Rather
@@ -79,7 +93,10 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
 	} else {
 		generic_handle_irq(irq);
 	}
-
+#ifdef CONFIG_MTK_SCHED_TRACERS
+    trace_irq_exit(irq);
+#endif
+    mt_trace_ISR_end(irq);
 	irq_exit();
 	set_irq_regs(old_regs);
 }
@@ -144,6 +144,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,

 static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)
 {
+	if (user_mode(regs))
+		return -1;
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);

 	return 0;
@@ -151,6 +153,8 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)

 static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
 {
+	if (user_mode(regs))
+		return -1;
 	compiled_break = 1;
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);

@@ -39,7 +39,7 @@
 #ifdef CONFIG_MMU
 void *module_alloc(unsigned long size)
 {
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+	return size == 0 ? NULL : __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
 				GFP_KERNEL, PAGE_KERNEL_EXEC, -1,
 				__builtin_return_address(0));
 }
@@ -12,6 +12,7 @@
 */
 #define pr_fmt(fmt) "hw perfevents: " fmt

+#include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -86,6 +87,9 @@ armpmu_map_event(struct perf_event *event,
 		return armpmu_map_cache_event(cache_map, config);
 	case PERF_TYPE_RAW:
 		return armpmu_map_raw_event(raw_event_mask, config);
+	default:
+		if (event->attr.type >= PERF_TYPE_MAX)
+			return armpmu_map_raw_event(raw_event_mask, config);
 	}

 	return -ENOENT;
@@ -163,6 +167,8 @@ armpmu_stop(struct perf_event *event, int flags)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;

+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -179,6 +185,8 @@ static void armpmu_start(struct perf_event *event, int flags)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;

+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
 	 * PERF_EF_RELOAD, see the comment below.
@@ -206,6 +214,9 @@ armpmu_del(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;

+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
+
 	armpmu_stop(event, PERF_EF_UPDATE);
 	hw_events->events[idx] = NULL;
 	clear_bit(idx, hw_events->used_mask);
@@ -222,6 +233,10 @@ armpmu_add(struct perf_event *event, int flags)
 	int idx;
 	int err = 0;

+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return 0;
+
 	perf_pmu_disable(event->pmu);

 	/* If we don't have a space for the counter then finish early. */
@@ -431,6 +446,10 @@ static int armpmu_event_init(struct perf_event *event)
 	int err = 0;
 	atomic_t *active_events = &armpmu->active_events;

+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
+		return -ENOENT;
+
 	/* does not support taken branch sampling */
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
@@ -19,6 +19,7 @@
 #define pr_fmt(fmt) "CPU PMU: " fmt

 #include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
@@ -31,33 +32,36 @@
 #include <asm/pmu.h>

 /* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);

 static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
 static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);

+static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+
 /*
 * Despite the names, these two functions are CPU-specific and are used
 * by the OProfile/perf code.
 */
 const char *perf_pmu_name(void)
 {
-	if (!cpu_pmu)
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+	if (!pmu)
 		return NULL;

-	return cpu_pmu->name;
+	return pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);

 int perf_num_counters(void)
 {
-	int max_events = 0;
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);

-	if (cpu_pmu != NULL)
-		max_events = cpu_pmu->num_events;
+	if (!pmu)
+		return 0;

-	return max_events;
+	return pmu->num_events;
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);

@@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
 	int i, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int cpu = -1;

 	irqs = min(pmu_device->num_resources, num_possible_cpus());

 	for (i = 0; i < irqs; ++i) {
-		if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
+		if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
 			continue;
 		irq = platform_get_irq(pmu_device, i);
 		if (irq >= 0)
@@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int cpu = -1;

 	if (!pmu_device)
 		return -ENODEV;
@@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)

 	for (i = 0; i < irqs; ++i) {
 		err = 0;
+		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
 		irq = platform_get_irq(pmu_device, i);
 		if (irq < 0)
 			continue;
@@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		 * assume that we're running on a uniprocessor machine and
 		 * continue. Otherwise, continue without this interrupt.
 		 */
-		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+		if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
 				    irq, i);
 			continue;
@@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 			return err;
 		}

-		cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+		cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
 	}

 	return 0;
@@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int cpu;
-	for_each_possible_cpu(cpu) {
+	for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
 		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
 		events->events = per_cpu(hw_events, cpu);
 		events->used_mask = per_cpu(used_mask, cpu);
@@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)

 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+		on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
 }

 /*
@@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 				    unsigned long action, void *hcpu)
 {
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+
 	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 		return NOTIFY_DONE;

-	if (cpu_pmu && cpu_pmu->reset)
-		cpu_pmu->reset(cpu_pmu);
+	if (pmu && pmu->reset)
+		pmu->reset(pmu);
 	else
 		return NOTIFY_DONE;

 	return NOTIFY_OK;
 }

+static int cpu_pmu_pm_notify(struct notifier_block *b,
+				    unsigned long action, void *hcpu)
+{
+	int cpu = smp_processor_id();
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
+	struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+
+	if (!pmu)
+		return NOTIFY_DONE;
+
+	if (action == CPU_PM_ENTER && pmu->save_regs) {
+		pmu->save_regs(pmu, pmuregs);
+	} else if (action == CPU_PM_EXIT && pmu->restore_regs) {
+		pmu->restore_regs(pmu, pmuregs);
+	}
+
+	return NOTIFY_OK;
+}
+
 static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
 	.notifier_call = cpu_pmu_notify,
 };

+static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
+	.notifier_call = cpu_pmu_pm_notify,
+};
+
 /*
 * PMU platform driver and devicetree bindings.
 */
@@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 		}
 	}

+	/* assume PMU support all the CPUs in this case */
+	cpumask_setall(&pmu->valid_cpus);
+
 	put_cpu();
 	return ret;
 }
@@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
-	int (*init_fn)(struct arm_pmu *);
 	struct device_node *node = pdev->dev.of_node;
 	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	if (cpu_pmu) {
-		pr_info("attempt to register multiple PMU devices!");
-		return -ENOSPC;
-	}
+	int ret = 0;
+	int cpu;

 	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
 	if (!pmu) {
@@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	}

 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-		ret = init_fn(pmu);
+		smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
+		struct device_node *ncluster;
+		int cluster = -1;
+		cpumask_t sibling_mask;
+
+		ncluster = of_parse_phandle(node, "cluster", 0);
+		if (ncluster) {
+			int len;
+			const u32 *hwid;
+			hwid = of_get_property(ncluster, "reg", &len);
+			if (hwid && len == 4)
+				cluster = be32_to_cpup(hwid);
+		}
+		/* set sibling mask to all cpu mask if socket is not specified */
+		if (cluster == -1 ||
+			cluster_to_logical_mask(cluster, &sibling_mask))
+			cpumask_setall(&sibling_mask);
+
+		smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+
+		/* now set the valid_cpus after init */
+		cpumask_copy(&pmu->valid_cpus, &sibling_mask);
 	} else {
 		ret = probe_current_pmu(pmu);
 	}
@@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		goto out_free;
 	}

-	cpu_pmu = pmu;
-	cpu_pmu->plat_device = pdev;
-	cpu_pmu_init(cpu_pmu);
-	ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
+	for_each_cpu_mask(cpu, pmu->valid_cpus)
+		per_cpu(cpu_pmu, cpu) = pmu;
+
+	pmu->plat_device = pdev;
+	cpu_pmu_init(pmu);
+	ret = armpmu_register(pmu, -1);

 	if (!ret)
 		return 0;
@@ -313,9 +366,17 @@ static int __init register_pmu_driver(void)
 	if (err)
 		return err;

-	err = platform_driver_register(&cpu_pmu_driver);
-	if (err)
+	err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
+	if (err) {
 		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+		return err;
+	}
+
+	err = platform_driver_register(&cpu_pmu_driver);
+	if (err) {
+		cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
+		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+	}

 	return err;
 }
@@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
 }
 #endif

+static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+					struct cpupmu_regs *regs)
+{
+	unsigned int cnt;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
+	if (!(regs->pmc & ARMV7_PMNC_E))
+		return;
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
+	asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
+	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mrc p15, 0, %0, c9, c13, 1"
+					: "=r"(regs->pmxevttype[cnt]));
+		asm volatile("mrc p15, 0, %0, c9, c13, 2"
+					: "=r"(regs->pmxevtcnt[cnt]));
+	}
+	return;
+}
+
+static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+					struct cpupmu_regs *regs)
+{
+	unsigned int cnt;
+	if (!(regs->pmc & ARMV7_PMNC_E))
+		return;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
+	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mcr p15, 0, %0, c9, c13, 1"
+					: : "r"(regs->pmxevttype[cnt]));
+		asm volatile("mcr p15, 0, %0, c9, c13, 2"
+					: : "r"(regs->pmxevtcnt[cnt]));
+	}
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+}
+
 static void armv7pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
@@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->start		= armv7pmu_start;
 	cpu_pmu->stop		= armv7pmu_stop;
 	cpu_pmu->reset		= armv7pmu_reset;
+	cpu_pmu->save_regs	= armv7pmu_save_regs;
+	cpu_pmu->restore_regs	= armv7pmu_restore_regs;
 	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };

@@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void)
 static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A8";
+	cpu_pmu->name		= "ARMv7_Cortex_A8";
 	cpu_pmu->map_event	= armv7_a8_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A9";
+	cpu_pmu->name		= "ARMv7_Cortex_A9";
 	cpu_pmu->map_event	= armv7_a9_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A5";
+	cpu_pmu->name		= "ARMv7_Cortex_A5";
 	cpu_pmu->map_event	= armv7_a5_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A15";
+	cpu_pmu->name		= "ARMv7_Cortex_A15";
 	cpu_pmu->map_event	= armv7_a15_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A7";
+	cpu_pmu->name		= "ARMv7_Cortex_A7";
 	cpu_pmu->map_event	= armv7_a7_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
--- a/Show More
+++ b/Show More