From a29ccf6f823a84d89e1c7aaaf221cf7282022024 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Tue, 3 Jun 2008 14:59:40 +0100
Subject: [PATCH 001/853] Make console charset translation optional

By turning off the new CONSOLE_TRANSLATIONS option and dropping the
associated code and tables from the kernel, we can save about 7KiB.

Taken from linux-tiny project by Tim Bird and mangled further by dwmw2.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/char/Kconfig       |  8 ++++++++
 drivers/char/Makefile      |  4 ++--
 drivers/char/vt.c          |  2 +-
 include/linux/consolemap.h | 14 ++++++++++++++
 include/linux/vt_kern.h    | 19 +++++++++++++++++++
 5 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 595a925c62a..b7f7371dee7 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -36,6 +36,14 @@ config VT
 	  If unsure, say Y, or else you won't be able to do much with your new
 	  shiny Linux system :-)
 
+config CONSOLE_TRANSLATIONS
+	depends on VT
+	default y
+	bool "Enable character translations in console" if EMBEDDED
+	---help---
+	  This enables support for font mapping and Unicode translation
+	  on virtual consoles.
+
 config VT_CONSOLE
 	bool "Support for console on virtual terminal" if EMBEDDED
 	depends on VT
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 4c1c584e9eb..6ef173cab14 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -12,8 +12,8 @@ obj-y	 += mem.o random.o tty_io.o n_tty.o tty_ioctl.o
 obj-$(CONFIG_LEGACY_PTYS)	+= pty.o
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
 obj-y				+= misc.o
-obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o consolemap.o \
-				   consolemap_deftbl.o selection.o keyboard.o
+obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)	+= vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index fa1ffbf2c62..18b7fb06dac 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2208,7 +2208,7 @@ rescan_last_byte:
 			c = 0xfffd;
 		    tc = c;
 		} else {	/* no utf or alternate charset mode */
-		    tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
+		    tc = vc_translate(vc, c);
 		}
 
 		param.c = tc;
diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index e2bf7e5db39..c4811da1338 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -3,6 +3,9 @@
  *
  * Interface between console.c, selection.c  and consolemap.c
  */
+#ifndef __LINUX_CONSOLEMAP_H__
+#define __LINUX_CONSOLEMAP_H__
+
 #define LAT1_MAP 0
 #define GRAF_MAP 1
 #define IBMPC_MAP 2
@@ -10,6 +13,7 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
 struct vc_data;
 
 extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
@@ -18,3 +22,13 @@ extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
 extern u32 conv_8bit_to_uni(unsigned char c);
 extern int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
+#else
+#define inverse_translate(conp, glyph, uni) ((uint16_t)glyph)
+#define set_translate(m, vc) ((unsigned short *)NULL)
+#define conv_uni_to_pc(conp, ucs) ((int) (ucs > 0xff ? -1: ucs))
+#define conv_8bit_to_uni(c) ((uint32_t)(c))
+#define conv_uni_to_8bit(c) ((int) ((c) & 0xff))
+#define console_map_init(c) do { ; } while (0)
+#endif /* CONFIG_CONSOLE_TRANSLATIONS */
+
+#endif /* __LINUX_CONSOLEMAP_H__ */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 9448ffbdcbf..14c0e91be9b 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -12,6 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/console_struct.h>
 #include <linux/mm.h>
+#include <linux/consolemap.h>
 
 /*
  * Presently, a lot of graphics programs do not restore the contents of
@@ -54,6 +55,7 @@ void redraw_screen(struct vc_data *vc, int is_switch);
 struct tty_struct;
 int tioclinux(struct tty_struct *tty, unsigned long arg);
 
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
 /* consolemap.c */
 
 struct unimapinit;
@@ -71,6 +73,23 @@ void con_free_unimap(struct vc_data *vc);
 void con_protect_unimap(struct vc_data *vc, int rdonly);
 int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 
+#define vc_translate(vc, c) ((vc)->vc_translate[(c) |			\
+					(vc)->vc_toggle_meta ? 0x80 : 0])
+#else
+#define con_set_trans_old(arg) (0)
+#define con_get_trans_old(arg) (-EINVAL)
+#define con_set_trans_new(arg) (0)
+#define con_get_trans_new(arg) (-EINVAL)
+#define con_clear_unimap(vc, ui) (0)
+#define con_set_unimap(vc, ct, list) (0)
+#define con_set_default_unimap(vc) (0)
+#define con_copy_unimap(d, s) (0)
+#define con_get_unimap(vc, ct, uct, list) (-EINVAL)
+#define con_free_unimap(vc) do { ; } while (0)
+
+#define vc_translate(vc, c) (c)
+#endif
+
 /* vt.c */
 int vt_waitactive(int vt);
 void change_console(struct vc_data *new_vc);
-- 
GitLab


From 4a5e3638b11978262ab76bbb2062e57fefaaedba Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 15 Jul 2008 09:42:57 -0600
Subject: [PATCH 002/853] ACPI: stop complaints about interrupt link End Tags
 and blank IRQ descriptors

Silently ignore _PRS End Tags.  We already ignore Start Dependent Functions in
_PRS, and we already ignore End Tags in _CRS, so we might as well ignore End
Tags in _PRS as well.

Silently ignore _PRS IRQ descriptors that mention no interrupts.  The spec
allows this (section 6.4.2.1 in ACPI 3.0b spec), and it probably means the
interrupt link can't be configured at all.

This patch doesn't change any functional behavior; it just removes confusing
complaints like these:

    ACPI: Blank IRQ resource
    ACPI: Resource is not an IRQ entry

when parsing _PRS data "23 00 00 18 79 00" from an IBM xSeries 335 dual
Pentium IV Xeon 2.40 GHz machine.  For more details, see
    http://bugzilla.kernel.org/show_bug.cgi?id=11049

The "23 00 00 18" part is a three-byte-long small IRQ resource with no bits set
in the IRQ mask ("00 00"), and level-triggered, active low, shareable ("18").

The "79 00" is an End Tag (type 0x7).  It is superfluous since there is no
Start Dependent Function tag and there are no resources after it, but it is
harmless.

Thanks to Gabriele Trombetti <g.trombetti.lkrnl1213@logicschema.com>
(aka Kurk) for reporting this and testing the patch.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/pci_link.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 233c40c5168..89f3b2abfdc 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -113,20 +113,23 @@ acpi_pci_link_check_possible(struct acpi_resource *resource, void *context)
 
 	switch (resource->type) {
 	case ACPI_RESOURCE_TYPE_START_DEPENDENT:
+	case ACPI_RESOURCE_TYPE_END_TAG:
 		return AE_OK;
 	case ACPI_RESOURCE_TYPE_IRQ:
 		{
 			struct acpi_resource_irq *p = &resource->data.irq;
 			if (!p || !p->interrupt_count) {
-				printk(KERN_WARNING PREFIX "Blank IRQ resource\n");
+				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+						  "Blank _PRS IRQ resource\n"));
 				return AE_OK;
 			}
 			for (i = 0;
 			     (i < p->interrupt_count
 			      && i < ACPI_PCI_LINK_MAX_POSSIBLE); i++) {
 				if (!p->interrupts[i]) {
-					printk(KERN_WARNING PREFIX "Invalid IRQ %d\n",
-						      p->interrupts[i]);
+					printk(KERN_WARNING PREFIX
+					       "Invalid _PRS IRQ %d\n",
+					       p->interrupts[i]);
 					continue;
 				}
 				link->irq.possible[i] = p->interrupts[i];
@@ -143,15 +146,16 @@ acpi_pci_link_check_possible(struct acpi_resource *resource, void *context)
 			    &resource->data.extended_irq;
 			if (!p || !p->interrupt_count) {
 				printk(KERN_WARNING PREFIX
-					      "Blank EXT IRQ resource\n");
+					      "Blank _PRS EXT IRQ resource\n");
 				return AE_OK;
 			}
 			for (i = 0;
 			     (i < p->interrupt_count
 			      && i < ACPI_PCI_LINK_MAX_POSSIBLE); i++) {
 				if (!p->interrupts[i]) {
-					printk(KERN_WARNING PREFIX "Invalid IRQ %d\n",
-						      p->interrupts[i]);
+					printk(KERN_WARNING PREFIX
+					       "Invalid _PRS IRQ %d\n",
+					       p->interrupts[i]);
 					continue;
 				}
 				link->irq.possible[i] = p->interrupts[i];
@@ -163,7 +167,8 @@ acpi_pci_link_check_possible(struct acpi_resource *resource, void *context)
 			break;
 		}
 	default:
-		printk(KERN_ERR PREFIX "Resource is not an IRQ entry\n");
+		printk(KERN_ERR PREFIX "_PRS resource type 0x%x isn't an IRQ\n",
+		       resource->type);
 		return AE_OK;
 	}
 
@@ -199,6 +204,9 @@ acpi_pci_link_check_current(struct acpi_resource *resource, void *context)
 
 
 	switch (resource->type) {
+	case ACPI_RESOURCE_TYPE_START_DEPENDENT:
+	case ACPI_RESOURCE_TYPE_END_TAG:
+		return AE_OK;
 	case ACPI_RESOURCE_TYPE_IRQ:
 		{
 			struct acpi_resource_irq *p = &resource->data.irq;
@@ -208,7 +216,7 @@ acpi_pci_link_check_current(struct acpi_resource *resource, void *context)
 				 * particularly those those w/ _STA disabled
 				 */
 				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-						  "Blank IRQ resource\n"));
+						  "Blank _CRS IRQ resource\n"));
 				return AE_OK;
 			}
 			*irq = p->interrupts[0];
@@ -224,7 +232,7 @@ acpi_pci_link_check_current(struct acpi_resource *resource, void *context)
 				 * return at least 1 IRQ
 				 */
 				printk(KERN_WARNING PREFIX
-					      "Blank EXT IRQ resource\n");
+					      "Blank _CRS EXT IRQ resource\n");
 				return AE_OK;
 			}
 			*irq = p->interrupts[0];
@@ -232,10 +240,11 @@ acpi_pci_link_check_current(struct acpi_resource *resource, void *context)
 		}
 		break;
 	default:
-		printk(KERN_ERR PREFIX "Resource %d isn't an IRQ\n", resource->type);
-	case ACPI_RESOURCE_TYPE_END_TAG:
+		printk(KERN_ERR PREFIX "_CRS resource type 0x%x isn't an IRQ\n",
+		       resource->type);
 		return AE_OK;
 	}
+
 	return AE_CTRL_TERMINATE;
 }
 
-- 
GitLab


From c2c789057f075022658b38b498755c29c1ba8055 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Thu, 17 Jul 2008 10:46:05 +0800
Subject: [PATCH 003/853] ACPI: Ignore _BQC object when registering backlight
 device

According to acpi spec , the objectes of  _BCL and _BCM are required if
integrated LCD is present and supports brightness level and the _BQC is
the optional object. So the _BQC object will be ignored when the backlight
device is registered.
At the same time when there is no _BQC object, the current brightness will be
set to the maximum.

http://bugzilla.kernel.org/show_bug.cgi?id=10206

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Zhang Rui  <rui.zhang@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/video.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 64c889331f3..e32b6c14d92 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -741,7 +741,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 
 	max_level = acpi_video_init_brightness(device);
 
-	if (device->cap._BCL && device->cap._BCM && device->cap._BQC && max_level > 0){
+	if (device->cap._BCL && device->cap._BCM && max_level > 0) {
 		int result;
 		static int count = 0;
 		char *name;
@@ -753,7 +753,17 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		device->backlight = backlight_device_register(name,
 			NULL, device, &acpi_backlight_ops);
 		device->backlight->props.max_brightness = device->brightness->count-3;
-		device->backlight->props.brightness = acpi_video_get_brightness(device->backlight);
+		/*
+		 * If there exists the _BQC object, the _BQC object will be
+		 * called to get the current backlight brightness. Otherwise
+		 * the brightness will be set to the maximum.
+		 */
+		if (device->cap._BQC)
+			device->backlight->props.brightness =
+				acpi_video_get_brightness(device->backlight);
+		else
+			device->backlight->props.brightness =
+				device->backlight->props.max_brightness;
 		backlight_update_status(device->backlight);
 		kfree(name);
 
-- 
GitLab


From ea51011a27db48ea0a80a5e20de3969b292d5d4d Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 14 Jul 2008 15:14:03 +0800
Subject: [PATCH 004/853] ACPI : Set FAN device to correct state in boot phase

Subject:ACPI: Set FAN device to correct state in boot phase
From: Zhao Yakui <yakui.zhao@intel.com>

On some laptops when ACPI FAN driver is loaded, maybe the FAN device will be
turned on. But if the temperature is below the threshold, the corresponding
FAN device should be turned off in the course of loading thermal driver.

So it is necessary to set the FAN device to the correct state in course of loading
the thermal driver.

http://bugzilla.kernel.org/show_bug.cgi?id=8049

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/thermal.c | 43 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 84c795fb9b1..9adfd180df6 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -769,6 +769,47 @@ static void acpi_thermal_run(unsigned long data)
 		acpi_os_execute(OSL_GPE_HANDLER, acpi_thermal_check, (void *)data);
 }
 
+static void acpi_thermal_active_off(void *data)
+{
+	int result = 0;
+	struct acpi_thermal *tz = data;
+	int i = 0;
+	int j = 0;
+	struct acpi_thermal_active *active = NULL;
+
+	if (!tz) {
+		printk(KERN_ERR PREFIX "Invalid (NULL) context\n");
+		return;
+	}
+
+	result = acpi_thermal_get_temperature(tz);
+	if (result)
+		return;
+
+	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
+		active = &(tz->trips.active[i]);
+		if (!active || !active->flags.valid)
+			break;
+		if (tz->temperature >= active->temperature) {
+			/*
+			 * If the thermal temperature is greater than the
+			 * active threshod, unnecessary to turn off the
+			 * the active cooling device.
+			 */
+			continue;
+		}
+		/*
+		 * Below Threshold?
+		 * ----------------
+		 * Turn OFF all cooling devices associated with this
+		 * threshold.
+		 */
+		for (j = 0; j < active->devices.count; j++)
+			result = acpi_bus_set_power(active->devices.handles[j],
+						    ACPI_STATE_D3);
+	}
+}
+
 static void acpi_thermal_check(void *data)
 {
 	int result = 0;
@@ -1624,6 +1665,8 @@ static int acpi_thermal_add(struct acpi_device *device)
 
 	init_timer(&tz->timer);
 
+	acpi_thermal_active_off(tz);
+
 	acpi_thermal_check(tz);
 
 	status = acpi_install_notify_handler(device->handle,
-- 
GitLab


From b1d77fae0c429d1be84ca0c9e627d9ab0e2a6d0b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 18 Jul 2008 01:42:20 +0200
Subject: [PATCH 005/853] Revert "Fix FADT parsing"

This reverts commit 01a5bba576b9364b33f61f0cd9fa70c2cf5535e2.

There seem to be some FADTs around with bogus information
in the v2 fields. Revert this patch for now until
this can be properly resolved.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/tables/tbfadt.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/tables/tbfadt.c b/drivers/acpi/tables/tbfadt.c
index ccb5b64bbef..a4a41ba2484 100644
--- a/drivers/acpi/tables/tbfadt.c
+++ b/drivers/acpi/tables/tbfadt.c
@@ -124,7 +124,7 @@ static struct acpi_fadt_info fadt_info_table[] = {
 
 static void inline
 acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
-			     u8 byte_width, u64 address)
+			     u8 bit_width, u64 address)
 {
 
 	/*
@@ -136,7 +136,7 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
 	/* All other fields are byte-wide */
 
 	generic_address->space_id = ACPI_ADR_SPACE_SYSTEM_IO;
-	generic_address->bit_width = byte_width << 3;
+	generic_address->bit_width = bit_width;
 	generic_address->bit_offset = 0;
 	generic_address->access_width = 0;
 }
@@ -343,11 +343,9 @@ static void acpi_tb_convert_fadt(void)
 	 *
 	 * The PM event blocks are split into two register blocks, first is the
 	 * PM Status Register block, followed immediately by the PM Enable Register
-	 * block. Each is of length (xpm1x_event_block.bit_width/2)
+	 * block. Each is of length (pm1_event_length/2)
 	 */
-	WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1a_event_block.bit_width));
-	pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT
-					       .xpm1a_event_block.bit_width);
+	pm1_register_length = (u8) ACPI_DIV_2(acpi_gbl_FADT.pm1_event_length);
 
 	/* The PM1A register block is required */
 
@@ -362,17 +360,14 @@ static void acpi_tb_convert_fadt(void)
 	/* The PM1B register block is optional, ignore if not present */
 
 	if (acpi_gbl_FADT.xpm1b_event_block.address) {
-		WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1b_event_block.bit_width));
-		pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT
-						       .xpm1b_event_block
-						       .bit_width);
 		acpi_tb_init_generic_address(&acpi_gbl_xpm1b_enable,
 					     pm1_register_length,
 					     (acpi_gbl_FADT.xpm1b_event_block.
 					      address + pm1_register_length));
 		/* Don't forget to copy space_id of the GAS */
 		acpi_gbl_xpm1b_enable.space_id =
-		    acpi_gbl_FADT.xpm1b_event_block.space_id;
+		    acpi_gbl_FADT.xpm1a_event_block.space_id;
+
 	}
 }
 
-- 
GitLab


From e1469c34eb623cd1945ef09bfd7de7bc2f9ff6b3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 18 Jul 2008 01:43:08 +0200
Subject: [PATCH 006/853] Revert "dock: bay: Don't call acpi_walk_namespace()
 when ACPI is disabled."

Revert double commit by mistake. Noticed by Thomas Gleixner.

This reverts commit cc7e51666d82aedfd6b9a033ca1a10d71c21f1ca.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/bay.c  | 3 ---
 drivers/acpi/dock.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/drivers/acpi/bay.c b/drivers/acpi/bay.c
index e6caf5d42e0..61b6c5beb2d 100644
--- a/drivers/acpi/bay.c
+++ b/drivers/acpi/bay.c
@@ -377,9 +377,6 @@ static int __init bay_init(void)
 
 	INIT_LIST_HEAD(&drive_bays);
 
-	if (acpi_disabled)
-		return -ENODEV;
-
 	if (acpi_disabled)
 		return -ENODEV;
 
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 1e872e79db3..bb7c51f712b 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -917,9 +917,6 @@ static int __init dock_init(void)
 
 	dock_station = NULL;
 
-	if (acpi_disabled)
-		return 0;
-
 	if (acpi_disabled)
 		return 0;
 
-- 
GitLab


From 725c3a2d70f958adee807c178178819a50f68a56 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 18 Jul 2008 09:12:49 +0200
Subject: [PATCH 007/853] Revert "ACPI: don't walk tables if ACPI was disabled"

This reverts commit d1857056904d5f313f11184fcfa624652ff9620a.

Double commit, noticed by Thomas Gleixner.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/glue.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 0f2dd81736b..2f173e83f8a 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -335,9 +335,6 @@ static int __init acpi_rtc_init(void)
 {
 	struct device *dev = get_rtc_dev();
 
-	if (acpi_disabled)
-		return 0;
-
 	if (acpi_disabled)
 		return 0;
 
-- 
GitLab


From 8b95d9172be7146c87e7a998310ce2919c851adc Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 14 Jul 2008 23:32:32 +0200
Subject: [PATCH 008/853] fix core/stacktrace changes on avr32, mips, sh

Fixes this type of problem:

  CC      arch/s390/kernel/stacktrace.o
arch/s390/kernel/stacktrace.c:84: warning: data definition has no type or storage class
arch/s390/kernel/stacktrace.c:84: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL_GPL'
arch/s390/kernel/stacktrace.c:84: warning: parameter names (without types) in function declaration
arch/s390/kernel/stacktrace.c:97: warning: data definition has no type or storage class
arch/s390/kernel/stacktrace.c:97: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL_GPL'
arch/s390/kernel/stacktrace.c:97: warning: parameter names (without types) in function declaration

caused by "stacktrace: export save_stack_trace[_tsk]"

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/avr32/kernel/stacktrace.c | 1 +
 arch/mips/kernel/stacktrace.c  | 1 +
 arch/sh/kernel/stacktrace.c    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/avr32/kernel/stacktrace.c b/arch/avr32/kernel/stacktrace.c
index f4bdb448049..c09f0d8dd67 100644
--- a/arch/avr32/kernel/stacktrace.c
+++ b/arch/avr32/kernel/stacktrace.c
@@ -10,6 +10,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
+#include <linux/module.h>
 
 register unsigned long current_frame_pointer asm("r7");
 
diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c
index 5eb4681a73d..702e2e92a1c 100644
--- a/arch/mips/kernel/stacktrace.c
+++ b/arch/mips/kernel/stacktrace.c
@@ -7,6 +7,7 @@
  */
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
+#include <linux/module.h
 #include <asm/stacktrace.h>
 
 /*
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index 1b2ae35c4a7..54d1f61aa00 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
+#include <linux/module.h>
 #include <asm/ptrace.h>
 
 /*
-- 
GitLab


From b8f8c3cf0a4ac0632ec3f0e15e9dc0c29de917af Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Jul 2008 17:27:28 +0200
Subject: [PATCH 009/853] nohz: prevent tick stop outside of the idle loop

Jack Ren and Eric Miao tracked down the following long standing
problem in the NOHZ code:

	scheduler switch to idle task
	enable interrupts

Window starts here

	----> interrupt happens (does not set NEED_RESCHED)
	      	irq_exit() stops the tick

	----> interrupt happens (does set NEED_RESCHED)

	return from schedule()

	cpu_idle(): preempt_disable();

Window ends here

The interrupts can happen at any point inside the race window. The
first interrupt stops the tick, the second one causes the scheduler to
rerun and switch away from idle again and we end up with the tick
disabled.

The fact that it needs two interrupts where the first one does not set
NEED_RESCHED and the second one does made the bug obscure and extremly
hard to reproduce and analyse. Kudos to Jack and Eric.

Solution: Limit the NOHZ functionality to the idle loop to make sure
that we can not run into such a situation ever again.

cpu_idle()
{
	preempt_disable();

	while(1) {
		 tick_nohz_stop_sched_tick(1); <- tell NOHZ code that we
		 			          are in the idle loop

		 while (!need_resched())
		       halt();

		 tick_nohz_restart_sched_tick(); <- disables NOHZ mode
		 preempt_enable_no_resched();
		 schedule();
		 preempt_disable();
	}
}

In hindsight we should have done this forever, but ...

/me grabs a large brown paperbag.

Debugged-by: Jack Ren <jack.ren@marvell.com>,
Debugged-by: eric miao <eric.y.miao@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/kernel/process.c              |  2 +-
 arch/avr32/kernel/process.c            |  2 +-
 arch/blackfin/kernel/process.c         |  2 +-
 arch/mips/kernel/process.c             |  2 +-
 arch/powerpc/kernel/idle.c             |  2 +-
 arch/powerpc/platforms/iseries/setup.c |  4 ++--
 arch/sh/kernel/process_32.c            |  2 +-
 arch/sparc64/kernel/process.c          |  2 +-
 arch/um/kernel/process.c               |  2 +-
 arch/x86/kernel/process_32.c           |  2 +-
 arch/x86/kernel/process_64.c           |  2 +-
 include/linux/tick.h                   |  5 +++--
 kernel/softirq.c                       |  2 +-
 kernel/time/tick-sched.c               | 12 ++++++++++--
 14 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 46bf2ede612..84f5a4c778f 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -164,7 +164,7 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 		leds_event(led_idle_start);
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			idle();
 		leds_event(led_idle_end);
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 6cf9df17627..ff820a9e743 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -31,7 +31,7 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			cpu_idle_sleep();
 		tick_nohz_restart_sched_tick();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 53c2cd25544..77800dd83e5 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -105,7 +105,7 @@ void cpu_idle(void)
 #endif
 		if (!idle)
 			idle = default_idle;
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			idle();
 		tick_nohz_restart_sched_tick();
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 2c09a442e5e..bdead3aad25 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -53,7 +53,7 @@ void __noreturn cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
 #ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
 			extern void smtc_idle_loop_hook(void);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index c3cf0e8f3ac..d308a9f70f1 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -60,7 +60,7 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b72120751bb..70b688c1aef 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -561,7 +561,7 @@ static void yield_shared_processor(void)
 static void iseries_shared_idle(void)
 {
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched() && !hvlpevent_is_pending()) {
 			local_irq_disable();
 			ppc64_runlatch_off();
@@ -591,7 +591,7 @@ static void iseries_dedicated_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		if (!need_resched()) {
 			while (!need_resched()) {
 				ppc64_runlatch_off();
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index b98e37a1f54..921892c351d 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -86,7 +86,7 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			idle();
 		tick_nohz_restart_sched_tick();
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2084f81a76e..0798928ba36 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -97,7 +97,7 @@ void cpu_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while(1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 
 		while (!need_resched() && !cpu_is_offline(cpu))
 			sparc64_yield(cpu);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 83603cfbde8..a1c6d07cac3 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -243,7 +243,7 @@ void default_idle(void)
 		if (need_resched())
 			schedule();
 
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		nsecs = disable_timer();
 		idle_sleep(nsecs);
 		tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476dfbb60..1f5fa1cf16d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -166,7 +166,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
 			void (*idle)(void);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988..c0a5c2a687e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -148,7 +148,7 @@ void cpu_idle(void)
 	current_thread_info()->status |= TS_POLLING;
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
 			void (*idle)(void);
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index a881c652f7e..d3c02695dc5 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -49,6 +49,7 @@ struct tick_sched {
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
 	ktime_t				idle_tick;
+	int				inidle;
 	int				tick_stopped;
 	unsigned long			idle_jiffies;
 	unsigned long			idle_calls;
@@ -105,14 +106,14 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
-extern void tick_nohz_stop_sched_tick(void);
+extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
 extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern void tick_nohz_stop_idle(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
-static inline void tick_nohz_stop_sched_tick(void) { }
+static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
 static inline void tick_nohz_update_jiffies(void) { }
 static inline ktime_t tick_nohz_get_sleep_length(void)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 36e06174004..05f248039d7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -312,7 +312,7 @@ void irq_exit(void)
 #ifdef CONFIG_NO_HZ
 	/* Make sure that timer wheel updates are propagated */
 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(0);
 	rcu_irq_exit();
 #endif
 	preempt_enable_no_resched();
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 86baa4f0dfe..ee962d11107 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -195,7 +195,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
  * Called either from the idle loop or from irq_exit() when an idle period was
  * just interrupted by an interrupt which did not cause a reschedule.
  */
-void tick_nohz_stop_sched_tick(void)
+void tick_nohz_stop_sched_tick(int inidle)
 {
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
 	struct tick_sched *ts;
@@ -224,6 +224,11 @@ void tick_nohz_stop_sched_tick(void)
 	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
 		goto end;
 
+	if (!inidle && !ts->inidle)
+		goto end;
+
+	ts->inidle = 1;
+
 	if (need_resched())
 		goto end;
 
@@ -372,11 +377,14 @@ void tick_nohz_restart_sched_tick(void)
 	local_irq_disable();
 	tick_nohz_stop_idle(cpu);
 
-	if (!ts->tick_stopped) {
+	if (!ts->inidle || !ts->tick_stopped) {
+		ts->inidle = 0;
 		local_irq_enable();
 		return;
 	}
 
+	ts->inidle = 0;
+
 	rcu_exit_nohz();
 
 	/* Update jiffies first */
-- 
GitLab


From 8df185a95c9b84fc0c3c02224e64fdc5b83bae34 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 8 Jul 2008 15:55:48 -0700
Subject: [PATCH 010/853] kthread: reduce stack pressure in create_kthread and
 kthreadd

  * Replace:

  	set_cpus_allowed(..., CPU_MASK_ALL)

    with:

  	set_cpus_allowed_ptr(..., CPU_MASK_ALL_PTR)

    to remove excessive stack requirements when NR_CPUS=4096.

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/kthread.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index ac3fb732664..6111c27491b 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -106,7 +106,7 @@ static void create_kthread(struct kthread_create_info *create)
 		 */
 		sched_setscheduler(create->result, SCHED_NORMAL, &param);
 		set_user_nice(create->result, KTHREAD_NICE_LEVEL);
-		set_cpus_allowed(create->result, CPU_MASK_ALL);
+		set_cpus_allowed_ptr(create->result, CPU_MASK_ALL_PTR);
 	}
 	complete(&create->done);
 }
@@ -233,7 +233,7 @@ int kthreadd(void *unused)
 	set_task_comm(tsk, "kthreadd");
 	ignore_signals(tsk);
 	set_user_nice(tsk, KTHREAD_NICE_LEVEL);
-	set_cpus_allowed(tsk, CPU_MASK_ALL);
+	set_cpus_allowed_ptr(tsk, CPU_MASK_ALL_PTR);
 
 	current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
 
-- 
GitLab


From e338125b8a886923ba8367207c144764dc352584 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 19 Jul 2008 09:33:21 +0200
Subject: [PATCH 011/853] nohz: adjust tick_nohz_stop_sched_tick() call of s390
 as well

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 85defd01d29..9839767d084 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -142,7 +142,7 @@ static void default_idle(void)
 void cpu_idle(void)
 {
 	for (;;) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			default_idle();
 		tick_nohz_restart_sched_tick();
-- 
GitLab


From 3a87208028ef59215a88a143c723ac0b83c11df0 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 09:15:49 -0300
Subject: [PATCH 012/853] ACPI: thinkpad-acpi: minor refactor on radio switch
 init

Change the code of hotkey_init, wan_init and bluetooth_init a bit to make it
much easier to add some Kconfig-selected debugging code later.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
---
 drivers/misc/thinkpad_acpi.c | 49 +++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index b5969298f3d..c800855be27 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -2167,9 +2167,10 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 		printk(TPACPI_INFO
 			"radio switch found; radios are %s\n",
 			enabled(status, 0));
+	}
+	if (tp_features.hotkey_wlsw)
 		res = add_to_attr_set(hotkey_dev_attributes,
 				&dev_attr_hotkey_radio_sw.attr);
-	}
 
 	/* For X41t, X60t, X61t Tablets... */
 	if (!res && acpi_evalf(hkey_handle, &status, "MHKG", "qd")) {
@@ -2646,18 +2647,19 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm)
 		str_supported(tp_features.bluetooth),
 		status);
 
+	if (tp_features.bluetooth &&
+	    !(status & TP_ACPI_BLUETOOTH_HWPRESENT)) {
+		/* no bluetooth hardware present in system */
+		tp_features.bluetooth = 0;
+		dbg_printk(TPACPI_DBG_INIT,
+			   "bluetooth hardware not installed\n");
+	}
+
 	if (tp_features.bluetooth) {
-		if (!(status & TP_ACPI_BLUETOOTH_HWPRESENT)) {
-			/* no bluetooth hardware present in system */
-			tp_features.bluetooth = 0;
-			dbg_printk(TPACPI_DBG_INIT,
-				   "bluetooth hardware not installed\n");
-		} else {
-			res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
-					&bluetooth_attr_group);
-			if (res)
-				return res;
-		}
+		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+				&bluetooth_attr_group);
+		if (res)
+			return res;
 	}
 
 	return (tp_features.bluetooth)? 0 : 1;
@@ -2818,18 +2820,19 @@ static int __init wan_init(struct ibm_init_struct *iibm)
 		str_supported(tp_features.wan),
 		status);
 
+	if (tp_features.wan &&
+	    !(status & TP_ACPI_WANCARD_HWPRESENT)) {
+		/* no wan hardware present in system */
+		tp_features.wan = 0;
+		dbg_printk(TPACPI_DBG_INIT,
+			   "wan hardware not installed\n");
+	}
+
 	if (tp_features.wan) {
-		if (!(status & TP_ACPI_WANCARD_HWPRESENT)) {
-			/* no wan hardware present in system */
-			tp_features.wan = 0;
-			dbg_printk(TPACPI_DBG_INIT,
-				   "wan hardware not installed\n");
-		} else {
-			res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
-					&wan_attr_group);
-			if (res)
-				return res;
-		}
+		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+				&wan_attr_group);
+		if (res)
+			return res;
 	}
 
 	return (tp_features.wan)? 0 : 1;
-- 
GitLab


From 733e27c1cc86afae2d9481838693661b3d839950 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 09:15:49 -0300
Subject: [PATCH 013/853] ACPI: thinkpad-acpi: consolidate wlsw notification
 function

Rename tpacpi_input_send_radiosw() to tpacpi_send_radiosw_update(), and
make it a central point to issue "radio switch changed state" notifications
by consolidating also the poll() notification in the same function.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
---
 drivers/misc/thinkpad_acpi.c | 39 ++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index c800855be27..9179f2367d4 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -1285,21 +1285,6 @@ static int hotkey_status_set(int status)
 	return 0;
 }
 
-static void tpacpi_input_send_radiosw(void)
-{
-	int wlsw;
-
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) {
-		mutex_lock(&tpacpi_inputdev_send_mutex);
-
-		input_report_switch(tpacpi_inputdev,
-				    SW_RFKILL_ALL, !!wlsw);
-		input_sync(tpacpi_inputdev);
-
-		mutex_unlock(&tpacpi_inputdev_send_mutex);
-	}
-}
-
 static void tpacpi_input_send_tabletsw(void)
 {
 	int state;
@@ -1921,6 +1906,22 @@ static struct attribute *hotkey_mask_attributes[] __initdata = {
 	&dev_attr_hotkey_wakeup_hotunplug_complete.attr,
 };
 
+static void tpacpi_send_radiosw_update(void)
+{
+	int wlsw;
+
+	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) {
+		mutex_lock(&tpacpi_inputdev_send_mutex);
+
+		input_report_switch(tpacpi_inputdev,
+				    SW_RFKILL_ALL, !!wlsw);
+		input_sync(tpacpi_inputdev);
+
+		mutex_unlock(&tpacpi_inputdev_send_mutex);
+	}
+	hotkey_radio_sw_notify_change();
+}
+
 static void hotkey_exit(void)
 {
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
@@ -2288,7 +2289,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	tpacpi_inputdev->close = &hotkey_inputdev_close;
 
 	hotkey_poll_setup_safe(1);
-	tpacpi_input_send_radiosw();
+	tpacpi_send_radiosw_update();
 	tpacpi_input_send_tabletsw();
 
 	return 0;
@@ -2420,8 +2421,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event)
 		case 7:
 			/* 0x7000-0x7FFF: misc */
 			if (tp_features.hotkey_wlsw && hkey == 0x7000) {
-				tpacpi_input_send_radiosw();
-				hotkey_radio_sw_notify_change();
+				tpacpi_send_radiosw_update();
 				send_acpi_ev = 0;
 				break;
 			}
@@ -2464,8 +2464,7 @@ static void hotkey_resume(void)
 		printk(TPACPI_ERR
 		       "error while trying to read hot key mask "
 		       "from firmware\n");
-	tpacpi_input_send_radiosw();
-	hotkey_radio_sw_notify_change();
+	tpacpi_send_radiosw_update();
 	hotkey_tablet_mode_notify_change();
 	hotkey_wakeup_reason_notify_change();
 	hotkey_wakeup_hotunplug_complete_notify_change();
-- 
GitLab


From 07431ec82bf9dc74b470a1d820b41c92c4d86e6f Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 09:15:50 -0300
Subject: [PATCH 014/853] ACPI: thinkpad-acpi: prepare for bluetooth and wwan
 rfkill support

Get rid of some forward definitions by moving code around, this will make
the rfkill conversion of wwan and bluetooth a bit cleaner.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
---
 drivers/misc/thinkpad_acpi.c | 154 +++++++++++++++++------------------
 1 file changed, 74 insertions(+), 80 deletions(-)

diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 9179f2367d4..743a4d6098e 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -2581,8 +2581,37 @@ enum {
 	TP_ACPI_BLUETOOTH_UNK		= 0x04,	/* unknown function */
 };
 
-static int bluetooth_get_radiosw(void);
-static int bluetooth_set_radiosw(int radio_on);
+static int bluetooth_get_radiosw(void)
+{
+	int status;
+
+	if (!tp_features.bluetooth)
+		return -ENODEV;
+
+	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
+		return -EIO;
+
+	return (status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0;
+}
+
+static int bluetooth_set_radiosw(int radio_on)
+{
+	int status;
+
+	if (!tp_features.bluetooth)
+		return -ENODEV;
+
+	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
+		return -EIO;
+	if (radio_on)
+		status |= TP_ACPI_BLUETOOTH_RADIOSSW;
+	else
+		status &= ~TP_ACPI_BLUETOOTH_RADIOSSW;
+	if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status))
+		return -EIO;
+
+	return 0;
+}
 
 /* sysfs bluetooth enable ---------------------------------------------- */
 static ssize_t bluetooth_enable_show(struct device *dev,
@@ -2628,6 +2657,12 @@ static const struct attribute_group bluetooth_attr_group = {
 	.attrs = bluetooth_attributes,
 };
 
+static void bluetooth_exit(void)
+{
+	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+			&bluetooth_attr_group);
+}
+
 static int __init bluetooth_init(struct ibm_init_struct *iibm)
 {
 	int res;
@@ -2664,44 +2699,6 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm)
 	return (tp_features.bluetooth)? 0 : 1;
 }
 
-static void bluetooth_exit(void)
-{
-	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
-			&bluetooth_attr_group);
-}
-
-static int bluetooth_get_radiosw(void)
-{
-	int status;
-
-	if (!tp_features.bluetooth)
-		return -ENODEV;
-
-	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
-		return -EIO;
-
-	return ((status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0);
-}
-
-static int bluetooth_set_radiosw(int radio_on)
-{
-	int status;
-
-	if (!tp_features.bluetooth)
-		return -ENODEV;
-
-	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
-		return -EIO;
-	if (radio_on)
-		status |= TP_ACPI_BLUETOOTH_RADIOSSW;
-	else
-		status &= ~TP_ACPI_BLUETOOTH_RADIOSSW;
-	if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status))
-		return -EIO;
-
-	return 0;
-}
-
 /* procfs -------------------------------------------------------------- */
 static int bluetooth_read(char *p)
 {
@@ -2756,8 +2753,37 @@ enum {
 	TP_ACPI_WANCARD_UNK		= 0x04,	/* unknown function */
 };
 
-static int wan_get_radiosw(void);
-static int wan_set_radiosw(int radio_on);
+static int wan_get_radiosw(void)
+{
+	int status;
+
+	if (!tp_features.wan)
+		return -ENODEV;
+
+	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
+		return -EIO;
+
+	return (status & TP_ACPI_WANCARD_RADIOSSW) != 0;
+}
+
+static int wan_set_radiosw(int radio_on)
+{
+	int status;
+
+	if (!tp_features.wan)
+		return -ENODEV;
+
+	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
+		return -EIO;
+	if (radio_on)
+		status |= TP_ACPI_WANCARD_RADIOSSW;
+	else
+		status &= ~TP_ACPI_WANCARD_RADIOSSW;
+	if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status))
+		return -EIO;
+
+	return 0;
+}
 
 /* sysfs wan enable ---------------------------------------------------- */
 static ssize_t wan_enable_show(struct device *dev,
@@ -2803,6 +2829,12 @@ static const struct attribute_group wan_attr_group = {
 	.attrs = wan_attributes,
 };
 
+static void wan_exit(void)
+{
+	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+		&wan_attr_group);
+}
+
 static int __init wan_init(struct ibm_init_struct *iibm)
 {
 	int res;
@@ -2837,44 +2869,6 @@ static int __init wan_init(struct ibm_init_struct *iibm)
 	return (tp_features.wan)? 0 : 1;
 }
 
-static void wan_exit(void)
-{
-	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
-		&wan_attr_group);
-}
-
-static int wan_get_radiosw(void)
-{
-	int status;
-
-	if (!tp_features.wan)
-		return -ENODEV;
-
-	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
-		return -EIO;
-
-	return ((status & TP_ACPI_WANCARD_RADIOSSW) != 0);
-}
-
-static int wan_set_radiosw(int radio_on)
-{
-	int status;
-
-	if (!tp_features.wan)
-		return -ENODEV;
-
-	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
-		return -EIO;
-	if (radio_on)
-		status |= TP_ACPI_WANCARD_RADIOSSW;
-	else
-		status &= ~TP_ACPI_WANCARD_RADIOSSW;
-	if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status))
-		return -EIO;
-
-	return 0;
-}
-
 /* procfs -------------------------------------------------------------- */
 static int wan_read(char *p)
 {
-- 
GitLab


From 133ec3bd3ae409895eacdce326cdc8d73c249e8a Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 09:15:50 -0300
Subject: [PATCH 015/853] ACPI: thinkpad-acpi: WLSW overrides other rfkill
 switches

On ThinkPads where the WLSW switch exists, the firmware or the hardware
ANDs the WLSW state with the device-specific switches (WWAN, Bluetooth).
It is downright impossible to enable WWAN or Bluetooth when WLSW is
blocking the radios.

This reality does not necessarily carry over to the WWAN and Bluetooth
firmware interfaces, though... so the state thinkpad-acpi was reporting
could be incorrect.

Tie the three switches in the driver so that we keep their state sane.
When WLSL is off, force the other switches to off as well.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
---
 drivers/misc/thinkpad_acpi.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 743a4d6098e..202d63e1b39 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -2588,6 +2588,10 @@ static int bluetooth_get_radiosw(void)
 	if (!tp_features.bluetooth)
 		return -ENODEV;
 
+	/* WLSW overrides bluetooth in firmware/hardware, reflect that */
+	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
+		return 0;
+
 	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
 		return -EIO;
 
@@ -2601,6 +2605,12 @@ static int bluetooth_set_radiosw(int radio_on)
 	if (!tp_features.bluetooth)
 		return -ENODEV;
 
+	/* WLSW overrides bluetooth in firmware/hardware, but there is no
+	 * reason to risk weird behaviour. */
+	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
+	    && radio_on)
+		return -EPERM;
+
 	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
 		return -EIO;
 	if (radio_on)
@@ -2760,6 +2770,10 @@ static int wan_get_radiosw(void)
 	if (!tp_features.wan)
 		return -ENODEV;
 
+	/* WLSW overrides WWAN in firmware/hardware, reflect that */
+	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
+		return 0;
+
 	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
 		return -EIO;
 
@@ -2773,6 +2787,12 @@ static int wan_set_radiosw(int radio_on)
 	if (!tp_features.wan)
 		return -ENODEV;
 
+	/* WLSW overrides bluetooth in firmware/hardware, but there is no
+	 * reason to risk weird behaviour. */
+	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
+	    && radio_on)
+		return -EPERM;
+
 	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
 		return -EIO;
 	if (radio_on)
-- 
GitLab


From 0e74dc2646db04b644faa8ea10ff4f408d55cf90 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 09:15:51 -0300
Subject: [PATCH 016/853] ACPI: thinkpad-acpi: add bluetooth and WWAN rfkill
 support

Add a read/write rfkill interface to the bluetooth radio switch on the
bluetooth submodule, and one for the wireless wan radio switch to the wan
submodule.

Since rfkill does care for when a switch changes state, use WLSW
notifications to also check if the WWAN or Bluetooth switches did not
change state (due to them being slaves of WLSW in firmware/hardware, but
that reality not being always properly exported by the thinkpad firmware).

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: Ivo van Doorn <IvDoorn@gmail.com>
Cc: John W. Linville <linville@tuxdriver.com>
---
 Documentation/laptops/thinkpad-acpi.txt |  22 ++-
 drivers/misc/Kconfig                    |   2 +
 drivers/misc/thinkpad_acpi.c            | 208 +++++++++++++++++++++---
 3 files changed, 200 insertions(+), 32 deletions(-)

diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 64b3f146e4b..1c1c0217ebd 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -621,7 +621,8 @@ Bluetooth
 ---------
 
 procfs: /proc/acpi/ibm/bluetooth
-sysfs device attribute: bluetooth_enable
+sysfs device attribute: bluetooth_enable (deprecated)
+sysfs rfkill class: switch "tpacpi_bluetooth_sw"
 
 This feature shows the presence and current state of a ThinkPad
 Bluetooth device in the internal ThinkPad CDC slot.
@@ -643,8 +644,12 @@ Sysfs notes:
 		0: disables Bluetooth / Bluetooth is disabled
 		1: enables Bluetooth / Bluetooth is enabled.
 
-	Note: this interface will be probably be superseded by the
-	generic rfkill class, so it is NOT to be considered stable yet.
+	Note: this interface has been superseded by the	generic rfkill
+	class.  It has been deprecated, and it will be removed in year
+	2010.
+
+	rfkill controller switch "tpacpi_bluetooth_sw": refer to
+	Documentation/rfkill.txt for details.
 
 Video output control -- /proc/acpi/ibm/video
 --------------------------------------------
@@ -1374,7 +1379,8 @@ EXPERIMENTAL: WAN
 -----------------
 
 procfs: /proc/acpi/ibm/wan
-sysfs device attribute: wwan_enable
+sysfs device attribute: wwan_enable (deprecated)
+sysfs rfkill class: switch "tpacpi_wwan_sw"
 
 This feature is marked EXPERIMENTAL because the implementation
 directly accesses hardware registers and may not work as expected. USE
@@ -1404,8 +1410,12 @@ Sysfs notes:
 		0: disables WWAN card / WWAN card is disabled
 		1: enables WWAN card / WWAN card is enabled.
 
-	Note: this interface will be probably be superseded by the
-	generic rfkill class, so it is NOT to be considered stable yet.
+	Note: this interface has been superseded by the	generic rfkill
+	class.  It has been deprecated, and it will be removed in year
+	2010.
+
+	rfkill controller switch "tpacpi_wwan_sw": refer to
+	Documentation/rfkill.txt for details.
 
 Multiple Commands, Module Parameters
 ------------------------------------
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 1921b8dbb24..b27ca91fd15 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -279,6 +279,8 @@ config THINKPAD_ACPI
 	select INPUT
 	select NEW_LEDS
 	select LEDS_CLASS
+	select NET
+	select RFKILL
 	---help---
 	  This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
 	  support for Fn-Fx key combinations, Bluetooth control, video
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 202d63e1b39..dc8d00a4570 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -68,6 +68,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/input.h>
 #include <linux/leds.h>
+#include <linux/rfkill.h>
 #include <asm/uaccess.h>
 
 #include <linux/dmi.h>
@@ -144,6 +145,12 @@ enum {
 
 #define TPACPI_MAX_ACPI_ARGS 3
 
+/* rfkill switches */
+enum {
+	TPACPI_RFK_BLUETOOTH_SW_ID = 0,
+	TPACPI_RFK_WWAN_SW_ID,
+};
+
 /* Debugging */
 #define TPACPI_LOG TPACPI_FILE ": "
 #define TPACPI_ERR	   KERN_ERR    TPACPI_LOG
@@ -905,6 +912,43 @@ static int __init tpacpi_check_std_acpi_brightness_support(void)
 	return 0;
 }
 
+static int __init tpacpi_new_rfkill(const unsigned int id,
+			struct rfkill **rfk,
+			const enum rfkill_type rfktype,
+			const char *name,
+			int (*toggle_radio)(void *, enum rfkill_state),
+			int (*get_state)(void *, enum rfkill_state *))
+{
+	int res;
+	enum rfkill_state initial_state;
+
+	*rfk = rfkill_allocate(&tpacpi_pdev->dev, rfktype);
+	if (!*rfk) {
+		printk(TPACPI_ERR
+			"failed to allocate memory for rfkill class\n");
+		return -ENOMEM;
+	}
+
+	(*rfk)->name = name;
+	(*rfk)->get_state = get_state;
+	(*rfk)->toggle_radio = toggle_radio;
+
+	if (!get_state(NULL, &initial_state))
+		(*rfk)->state = initial_state;
+
+	res = rfkill_register(*rfk);
+	if (res < 0) {
+		printk(TPACPI_ERR
+			"failed to register %s rfkill switch: %d\n",
+			name, res);
+		rfkill_free(*rfk);
+		*rfk = NULL;
+		return res;
+	}
+
+	return 0;
+}
+
 /*************************************************************************
  * thinkpad-acpi driver attributes
  */
@@ -1906,10 +1950,18 @@ static struct attribute *hotkey_mask_attributes[] __initdata = {
 	&dev_attr_hotkey_wakeup_hotunplug_complete.attr,
 };
 
+static void bluetooth_update_rfk(void);
+static void wan_update_rfk(void);
 static void tpacpi_send_radiosw_update(void)
 {
 	int wlsw;
 
+	/* Sync these BEFORE sending any rfkill events */
+	if (tp_features.bluetooth)
+		bluetooth_update_rfk();
+	if (tp_features.wan)
+		wan_update_rfk();
+
 	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) {
 		mutex_lock(&tpacpi_inputdev_send_mutex);
 
@@ -2581,6 +2633,8 @@ enum {
 	TP_ACPI_BLUETOOTH_UNK		= 0x04,	/* unknown function */
 };
 
+static struct rfkill *tpacpi_bluetooth_rfkill;
+
 static int bluetooth_get_radiosw(void)
 {
 	int status;
@@ -2590,15 +2644,29 @@ static int bluetooth_get_radiosw(void)
 
 	/* WLSW overrides bluetooth in firmware/hardware, reflect that */
 	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
-		return 0;
+		return RFKILL_STATE_HARD_BLOCKED;
 
 	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
 		return -EIO;
 
-	return (status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0;
+	return ((status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0) ?
+		RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
 }
 
-static int bluetooth_set_radiosw(int radio_on)
+static void bluetooth_update_rfk(void)
+{
+	int status;
+
+	if (!tpacpi_bluetooth_rfkill)
+		return;
+
+	status = bluetooth_get_radiosw();
+	if (status < 0)
+		return;
+	rfkill_force_state(tpacpi_bluetooth_rfkill, status);
+}
+
+static int bluetooth_set_radiosw(int radio_on, int update_rfk)
 {
 	int status;
 
@@ -2620,6 +2688,9 @@ static int bluetooth_set_radiosw(int radio_on)
 	if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status))
 		return -EIO;
 
+	if (update_rfk)
+		bluetooth_update_rfk();
+
 	return 0;
 }
 
@@ -2634,7 +2705,8 @@ static ssize_t bluetooth_enable_show(struct device *dev,
 	if (status < 0)
 		return status;
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", status ? 1 : 0);
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			(status == RFKILL_STATE_UNBLOCKED) ? 1 : 0);
 }
 
 static ssize_t bluetooth_enable_store(struct device *dev,
@@ -2647,7 +2719,7 @@ static ssize_t bluetooth_enable_store(struct device *dev,
 	if (parse_strtoul(buf, 1, &t))
 		return -EINVAL;
 
-	res = bluetooth_set_radiosw(t);
+	res = bluetooth_set_radiosw(t, 1);
 
 	return (res) ? res : count;
 }
@@ -2667,8 +2739,27 @@ static const struct attribute_group bluetooth_attr_group = {
 	.attrs = bluetooth_attributes,
 };
 
+static int tpacpi_bluetooth_rfk_get(void *data, enum rfkill_state *state)
+{
+	int bts = bluetooth_get_radiosw();
+
+	if (bts < 0)
+		return bts;
+
+	*state = bts;
+	return 0;
+}
+
+static int tpacpi_bluetooth_rfk_set(void *data, enum rfkill_state state)
+{
+	return bluetooth_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
+}
+
 static void bluetooth_exit(void)
 {
+	if (tpacpi_bluetooth_rfkill)
+		rfkill_unregister(tpacpi_bluetooth_rfkill);
+
 	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
 			&bluetooth_attr_group);
 }
@@ -2699,14 +2790,26 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm)
 			   "bluetooth hardware not installed\n");
 	}
 
-	if (tp_features.bluetooth) {
-		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+	if (!tp_features.bluetooth)
+		return 1;
+
+	res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
 				&bluetooth_attr_group);
-		if (res)
-			return res;
+	if (res)
+		return res;
+
+	res = tpacpi_new_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID,
+				&tpacpi_bluetooth_rfkill,
+				RFKILL_TYPE_BLUETOOTH,
+				"tpacpi_bluetooth_sw",
+				tpacpi_bluetooth_rfk_set,
+				tpacpi_bluetooth_rfk_get);
+	if (res) {
+		bluetooth_exit();
+		return res;
 	}
 
-	return (tp_features.bluetooth)? 0 : 1;
+	return 0;
 }
 
 /* procfs -------------------------------------------------------------- */
@@ -2719,7 +2822,8 @@ static int bluetooth_read(char *p)
 		len += sprintf(p + len, "status:\t\tnot supported\n");
 	else {
 		len += sprintf(p + len, "status:\t\t%s\n",
-				(status)? "enabled" : "disabled");
+				(status == RFKILL_STATE_UNBLOCKED) ?
+					"enabled" : "disabled");
 		len += sprintf(p + len, "commands:\tenable, disable\n");
 	}
 
@@ -2735,9 +2839,9 @@ static int bluetooth_write(char *buf)
 
 	while ((cmd = next_cmd(&buf))) {
 		if (strlencmp(cmd, "enable") == 0) {
-			bluetooth_set_radiosw(1);
+			bluetooth_set_radiosw(1, 1);
 		} else if (strlencmp(cmd, "disable") == 0) {
-			bluetooth_set_radiosw(0);
+			bluetooth_set_radiosw(0, 1);
 		} else
 			return -EINVAL;
 	}
@@ -2763,6 +2867,8 @@ enum {
 	TP_ACPI_WANCARD_UNK		= 0x04,	/* unknown function */
 };
 
+static struct rfkill *tpacpi_wan_rfkill;
+
 static int wan_get_radiosw(void)
 {
 	int status;
@@ -2772,15 +2878,29 @@ static int wan_get_radiosw(void)
 
 	/* WLSW overrides WWAN in firmware/hardware, reflect that */
 	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
-		return 0;
+		return RFKILL_STATE_HARD_BLOCKED;
 
 	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
 		return -EIO;
 
-	return (status & TP_ACPI_WANCARD_RADIOSSW) != 0;
+	return ((status & TP_ACPI_WANCARD_RADIOSSW) != 0) ?
+		RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
 }
 
-static int wan_set_radiosw(int radio_on)
+static void wan_update_rfk(void)
+{
+	int status;
+
+	if (!tpacpi_wan_rfkill)
+		return;
+
+	status = wan_get_radiosw();
+	if (status < 0)
+		return;
+	rfkill_force_state(tpacpi_wan_rfkill, status);
+}
+
+static int wan_set_radiosw(int radio_on, int update_rfk)
 {
 	int status;
 
@@ -2802,6 +2922,9 @@ static int wan_set_radiosw(int radio_on)
 	if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status))
 		return -EIO;
 
+	if (update_rfk)
+		wan_update_rfk();
+
 	return 0;
 }
 
@@ -2816,7 +2939,8 @@ static ssize_t wan_enable_show(struct device *dev,
 	if (status < 0)
 		return status;
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", status ? 1 : 0);
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			(status == RFKILL_STATE_UNBLOCKED) ? 1 : 0);
 }
 
 static ssize_t wan_enable_store(struct device *dev,
@@ -2829,7 +2953,7 @@ static ssize_t wan_enable_store(struct device *dev,
 	if (parse_strtoul(buf, 1, &t))
 		return -EINVAL;
 
-	res = wan_set_radiosw(t);
+	res = wan_set_radiosw(t, 1);
 
 	return (res) ? res : count;
 }
@@ -2849,8 +2973,27 @@ static const struct attribute_group wan_attr_group = {
 	.attrs = wan_attributes,
 };
 
+static int tpacpi_wan_rfk_get(void *data, enum rfkill_state *state)
+{
+	int wans = wan_get_radiosw();
+
+	if (wans < 0)
+		return wans;
+
+	*state = wans;
+	return 0;
+}
+
+static int tpacpi_wan_rfk_set(void *data, enum rfkill_state state)
+{
+	return wan_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
+}
+
 static void wan_exit(void)
 {
+	if (tpacpi_wan_rfkill)
+		rfkill_unregister(tpacpi_wan_rfkill);
+
 	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
 		&wan_attr_group);
 }
@@ -2879,14 +3022,26 @@ static int __init wan_init(struct ibm_init_struct *iibm)
 			   "wan hardware not installed\n");
 	}
 
-	if (tp_features.wan) {
-		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+	if (!tp_features.wan)
+		return 1;
+
+	res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
 				&wan_attr_group);
-		if (res)
-			return res;
+	if (res)
+		return res;
+
+	res = tpacpi_new_rfkill(TPACPI_RFK_WWAN_SW_ID,
+				&tpacpi_wan_rfkill,
+				RFKILL_TYPE_WWAN,
+				"tpacpi_wwan_sw",
+				tpacpi_wan_rfk_set,
+				tpacpi_wan_rfk_get);
+	if (res) {
+		wan_exit();
+		return res;
 	}
 
-	return (tp_features.wan)? 0 : 1;
+	return 0;
 }
 
 /* procfs -------------------------------------------------------------- */
@@ -2899,7 +3054,8 @@ static int wan_read(char *p)
 		len += sprintf(p + len, "status:\t\tnot supported\n");
 	else {
 		len += sprintf(p + len, "status:\t\t%s\n",
-				(status)? "enabled" : "disabled");
+				(status == RFKILL_STATE_UNBLOCKED) ?
+					"enabled" : "disabled");
 		len += sprintf(p + len, "commands:\tenable, disable\n");
 	}
 
@@ -2915,9 +3071,9 @@ static int wan_write(char *buf)
 
 	while ((cmd = next_cmd(&buf))) {
 		if (strlencmp(cmd, "enable") == 0) {
-			wan_set_radiosw(1);
+			wan_set_radiosw(1, 1);
 		} else if (strlencmp(cmd, "disable") == 0) {
-			wan_set_radiosw(0);
+			wan_set_radiosw(0, 1);
 		} else
 			return -EINVAL;
 	}
-- 
GitLab


From 490673dc98adfc7de1703cc88508902bd10f446b Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 09:15:51 -0300
Subject: [PATCH 017/853] ACPI: thinkpad-acpi: bump up version to 0.21

rfkill support deserves a new version checkpoint...

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
---
 Documentation/laptops/thinkpad-acpi.txt | 4 ++--
 drivers/misc/thinkpad_acpi.c            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 1c1c0217ebd..02dc748b76c 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1,7 +1,7 @@
 		     ThinkPad ACPI Extras Driver
 
-                            Version 0.20
-                          April 09th, 2008
+                            Version 0.21
+                           May 29th, 2008
 
                Borislav Deianov <borislav@users.sf.net>
              Henrique de Moraes Holschuh <hmh@hmh.eng.br>
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index dc8d00a4570..3eb01afe430 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -21,7 +21,7 @@
  *  02110-1301, USA.
  */
 
-#define TPACPI_VERSION "0.20"
+#define TPACPI_VERSION "0.21"
 #define TPACPI_SYSFS_VERSION 0x020200
 
 /*
-- 
GitLab


From bf20e740a4bcc686de02e2fd1c1810a58872f46e Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 09:15:51 -0300
Subject: [PATCH 018/853] ACPI: thinkpad-acpi: don't misdetect in
 get_thinkpad_model_data() on -ENOMEM

Explicitly check for memory allocation failures, and return status to
indicate that we could not collect data due to errors.

This lets the driver have a far more predictable failure mode on ENOMEM in
that codepath: it will refuse to load.  This is far better than trying to
proceed with missing data which is used to detect quirks, etc.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
---
 drivers/misc/thinkpad_acpi.c | 47 +++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 3eb01afe430..d3eb7903c34 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -6340,13 +6340,18 @@ err_out:
 
 /* Probing */
 
-static void __init get_thinkpad_model_data(struct thinkpad_id_data *tp)
+/* returns 0 - probe ok, or < 0 - probe error.
+ * Probe ok doesn't mean thinkpad found.
+ * On error, kfree() cleanup on tp->* is not performed, caller must do it */
+static int __must_check __init get_thinkpad_model_data(
+						struct thinkpad_id_data *tp)
 {
 	const struct dmi_device *dev = NULL;
 	char ec_fw_string[18];
+	char const *s;
 
 	if (!tp)
-		return;
+		return -EINVAL;
 
 	memset(tp, 0, sizeof(*tp));
 
@@ -6355,12 +6360,14 @@ static void __init get_thinkpad_model_data(struct thinkpad_id_data *tp)
 	else if (dmi_name_in_vendors("LENOVO"))
 		tp->vendor = PCI_VENDOR_ID_LENOVO;
 	else
-		return;
+		return 0;
 
-	tp->bios_version_str = kstrdup(dmi_get_system_info(DMI_BIOS_VERSION),
-					GFP_KERNEL);
+	s = dmi_get_system_info(DMI_BIOS_VERSION);
+	tp->bios_version_str = kstrdup(s, GFP_KERNEL);
+	if (s && !tp->bios_version_str)
+		return -ENOMEM;
 	if (!tp->bios_version_str)
-		return;
+		return 0;
 	tp->bios_model = tp->bios_version_str[0]
 			 | (tp->bios_version_str[1] << 8);
 
@@ -6379,21 +6386,27 @@ static void __init get_thinkpad_model_data(struct thinkpad_id_data *tp)
 			ec_fw_string[strcspn(ec_fw_string, " ]")] = 0;
 
 			tp->ec_version_str = kstrdup(ec_fw_string, GFP_KERNEL);
+			if (!tp->ec_version_str)
+				return -ENOMEM;
 			tp->ec_model = ec_fw_string[0]
 					| (ec_fw_string[1] << 8);
 			break;
 		}
 	}
 
-	tp->model_str = kstrdup(dmi_get_system_info(DMI_PRODUCT_VERSION),
-					GFP_KERNEL);
-	if (tp->model_str && strnicmp(tp->model_str, "ThinkPad", 8) != 0) {
-		kfree(tp->model_str);
-		tp->model_str = NULL;
+	s = dmi_get_system_info(DMI_PRODUCT_VERSION);
+	if (s && !strnicmp(s, "ThinkPad", 8)) {
+		tp->model_str = kstrdup(s, GFP_KERNEL);
+		if (!tp->model_str)
+			return -ENOMEM;
 	}
 
-	tp->nummodel_str = kstrdup(dmi_get_system_info(DMI_PRODUCT_NAME),
-					GFP_KERNEL);
+	s = dmi_get_system_info(DMI_PRODUCT_NAME);
+	tp->nummodel_str = kstrdup(s, GFP_KERNEL);
+	if (s && !tp->nummodel_str)
+		return -ENOMEM;
+
+	return 0;
 }
 
 static int __init probe_for_thinkpad(void)
@@ -6656,7 +6669,13 @@ static int __init thinkpad_acpi_module_init(void)
 
 	/* Driver-level probe */
 
-	get_thinkpad_model_data(&thinkpad_id);
+	ret = get_thinkpad_model_data(&thinkpad_id);
+	if (ret) {
+		printk(TPACPI_ERR
+			"unable to get DMI data: %d\n", ret);
+		thinkpad_acpi_module_exit();
+		return ret;
+	}
 	ret = probe_for_thinkpad();
 	if (ret) {
 		thinkpad_acpi_module_exit();
-- 
GitLab


From f88133d76ea38761b7379d6233b752ed82250a4a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Jul 2008 15:57:45 +0200
Subject: [PATCH 019/853] acpi: fix crash in core ACPI code, triggered by
 CONFIG_ACPI_PCI_SLOT=y

-tip testing found the following boot crash on 32-bit x86 (Core2Duo
laptop) yesterday:

[    5.606664] scsi4 : ata_piix
[    5.606664] scsi5 : ata_piix
[    5.606664] ACPI Error (psargs-0358): [\_SB_.PCI0.LPC_.EC__.BSTA] Namespace lookup failure, AE_NOT_FOUND
[    5.606664] ACPI Error (psparse-0530): ACPI Error (nsnames-0186): Invalid NS Node (f7c0e960) while traversing path [20080609]
[    5.606664] BUG: unable to handle kernel NULL pointer dereference at 0000000f
[    5.606664] IP: [<80339e2f>] acpi_ns_build_external_path+0x1f/0x80
[    5.609997] *pdpt = 0000000000a03001 *pde = 0000000000000000
[    5.609997] Oops: 0002 [#1] SMP
[    5.609997]
[    5.609997] Pid: 1, comm: swapper Not tainted (2.6.26-tip-03965-gbbfb62e-dirty #3153)
[    5.609997] EIP: 0060:[<80339e2f>] EFLAGS: 00010286 CPU: 0
[    5.609997] EIP is at acpi_ns_build_external_path+0x1f/0x80
[    5.609997] EAX: f7c18c18 EBX: ffffffff ECX: 00000010 EDX: 00000000
[    5.609997] ESI: f7c18c18 EDI: 00000010 EBP: f7c4dc28 ESP: f7c4dc18
[    5.609997]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[    5.609997] Process swapper (pid: 1, ti=f7c4c000 task=f7c50000 task.ti=f7c4c000)
[    5.609997] Stack: 00000000 00000000 f7c18c18 f7c4dc48 f7c4dc40 80339ed0 00000000 f7c18c18
[    5.609997]        8084c1b6 8084c1b6 f7c4dc58 8033a60a 00000000 00000010 00000000 f7c18c18
[    5.609997]        f7c4dc70 8033a68f f7c18c18 00000000 f6de7600 00000005 f7c4dc98 8033c34d
[    5.609997] Call Trace:
[    5.609997]  [<80339ed0>] ? acpi_ns_handle_to_pathname+0x40/0x72
[    5.609997]  [<8033a60a>] ? acpi_ns_print_node_pathname+0x2c/0x61
[    5.609997]  [<8033a68f>] ? acpi_ns_report_method_error+0x50/0x6d
[    5.609997]  [<8033c34d>] ? acpi_ps_parse_aml+0x149/0x2f9
[    5.609997]  [<8033d6dd>] ? acpi_ps_execute_method+0x132/0x201
[    5.609997]  [<80339d19>] ? acpi_ns_evaluate+0x1ad/0x258
[    5.609997]  [<803406c4>] ? acpi_ut_evaluate_object+0x55/0x18f
[    5.609997]  [<803408b7>] ? acpi_ut_execute_STA+0x22/0x7a
[    5.609997]  [<8033a907>] ? acpi_get_object_info+0x131/0x1be
[    5.609997]  [<80344bb2>] ? do_acpi_find_child+0x22/0x4b
[    5.609997]  [<8033b855>] ? acpi_ns_walk_namespace+0xa5/0x124
[    5.609997]  [<803394f3>] ? acpi_walk_namespace+0x54/0x74
[    5.609997]  [<80344b90>] ? do_acpi_find_child+0x0/0x4b
[    5.609997]  [<80344b85>] ? acpi_get_child+0x38/0x43
[    5.609997]  [<80344b90>] ? do_acpi_find_child+0x0/0x4b
[    5.609997]  [<804d0148>] ? ata_acpi_associate+0xb5/0x1b5
[    5.609997]  [<804c6ecb>] ? ata_scsi_add_hosts+0x8e/0xdc
[    5.609997]  [<804c40c8>] ? ata_host_register+0x9f/0x1d6
[    5.609997]  [<804cbc7f>] ? ata_pci_sff_activate_host+0x179/0x19f
[    5.609997]  [<804cdd45>] ? ata_sff_interrupt+0x0/0x1c7
[    5.609997]  [<8069b033>] ? piix_init_one+0x569/0x5b0
[    5.609997]  [<801bd400>] ? sysfs_ilookup_test+0x0/0x11
[    5.609997]  [<801987d7>] ? ilookup5_nowait+0x29/0x30
[    5.609997]  [<802efc7e>] ? pci_match_device+0x99/0xa3
[    5.609997]  [<802efd3c>] ? pci_device_probe+0x39/0x59
[    5.609997]  [<803bc4af>] ? driver_probe_device+0xa0/0x11b
[    5.609997]  [<803bc564>] ? __driver_attach+0x3a/0x59
[    5.609997]  [<803bbde3>] ? bus_for_each_dev+0x36/0x58
[    5.609997]  [<803bc354>] ? driver_attach+0x14/0x16
[    5.609997]  [<803bc52a>] ? __driver_attach+0x0/0x59
[    5.609997]  [<803bc161>] ? bus_add_driver+0x93/0x196
[    5.609997]  [<803bc773>] ? driver_register+0x71/0xcd
[    5.609997]  [<802eff05>] ? __pci_register_driver+0x3f/0x6e
[    5.609997]  [<809af7ff>] ? piix_init+0x14/0x24
[    5.609997]  [<80984568>] ? kernel_init+0x128/0x269
[    5.609997]  [<809af7eb>] ? piix_init+0x0/0x24
[    5.609997]  [<802e2758>] ? trace_hardirqs_on_thunk+0xc/0x10
[    5.609997]  [<80116aef>] ? restore_nocheck_notrace+0x0/0xe
[    5.609997]  [<80984440>] ? kernel_init+0x0/0x269
[    5.609997]  [<80984440>] ? kernel_init+0x0/0x269
[    5.609997]  [<80117d87>] ? kernel_thread_helper+0x7/0x10
[    5.609997]  =======================
[    5.609997] Code: 75 02 b3 01 8d 43 01 8b 5d fc c9 c3 55 89 e5 57 89 cf 56 53 89 d3 4b 83 ec 04 83 fb 03 89 55 f0 77 09 c6 01 5c c6 41 01 00 eb 59 <c6> 04 19 00 8b 55 f0 8d 34 11 89 c2 eb 19 8b 42 08 83 eb 05 89
[    5.609997] EIP: [<80339e2f>] acpi_ns_build_external_path+0x1f/0x80 SS:ESP 0068:f7c4dc18
[    5.613331] Kernel panic - not syncing: Fatal exception
[    5.613331] Rebooting in 1 seconds..[    4.646664] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)

I have bisected it down to:

 # bad:  [5b664cbe] Merge branch 'upstream-linus' of git://git.kernel.
 # good: [bce7f795] Linux 2.6.26
 # good: [e18425ab] Merge branch 'tracing/for-linus' of git://git.kern
 # good: [cadc7236] Merge branch 'bkl-removal' into next
 # good: [4515889a] Merge branch 'merge' of git://git.kernel.org/pub/s
 # good: [42fdd14e] Merge git://git.kernel.org/pub/scm/linux/kernel/gi
 # good: [8a0ca91f] Merge branch 'for-linus' of git://git.kernel.org/p
 # bad:  [0af4b8cb] ACPI: Introduce new device wakeup flag 'prepared'
 # good: [fe997407] PCI: construct one fakephp slot per PCI slot
 # bad:  [531f254a] PCIE: aer: use dev_printk when possible
 # bad:  [15650a20] x86/PCI: fixup early quirk probing
 # good: [0e6859d9] ACPI PM: Remove obsolete Toshiba workaround
 # bad:  [8344b566] PCI: ACPI PCI slot detection driver
 # good: [f46753c9] PCI: introduce pci_slot

 | 8344b568f5bdc7ee1bba909de3294c6348c36056 is first bad commit
 | commit 8344b568f5bdc7ee1bba909de3294c6348c36056
 | Author: Alex Chiang <achiang@hp.com>
 | Date:   Tue Jun 10 15:30:42 2008 -0600
 |
 |     PCI: ACPI PCI slot detection driver
 |
 |     Detect all physical PCI slots as described by ACPI, and create entries in
 |     /sys/bus/pci/slots/.

I.e. the new CONFIG_ACPI_PCI_SLOT=y option was causing this crash.

But the bug is not mainly in this new PCI code - that code was just
hitting the ACPI code in a new way which made ACPI break.

The crash signature shows that we are crashing on this instruction:

   movb $0x0, (%ecx, %ebx, 1)

ECX and EBX are 0x10 and -1. It's this line in
drivers/acpi/namespace/nsnames.c's acpi_ns_build_external_path():

        name_buffer[index] = 0;

I.e. name_buffer is 0x10 and index is -1.

index -1 corresponds to size 0, and name_buffer 0x10 is slab's
ZERO_SIZE_PTR special-case for zero-sized allocations.

I.e. when we called acpi_ns_handle_to_pathname(), we got required_size
of 0 due to an error condition, but this is passed to the ACPI allocator
unconditionally:

        required_size = acpi_ns_get_pathname_length(node);

        /* Validate/Allocate/Clear caller buffer */

        status = acpi_ut_initialize_buffer(buffer, required_size);
        if (ACPI_FAILURE(status)) {
                return_ACPI_STATUS(status);
        }

Where acpi_ut_initialize_buffer(), through many (unnecessary) layers,
ends up calling kzalloc(0). Which returns 0x10 and that then causes the
crash later on.

So fix both callers of acpi_ns_get_pathname_length(), which can return 0
in case of an invalid node.

Also add a WARN_ON() against zero sized allocations in
acpi_ut_initialize_buffer() to make it easier to find similar instances
of this bug.

I have tested this patch for the past 24 hours and the crash has not
reappeared.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/namespace/nsnames.c | 8 ++++++++
 drivers/acpi/utilities/utalloc.c | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/acpi/namespace/nsnames.c b/drivers/acpi/namespace/nsnames.c
index cffef1bcbdb..549db42f16c 100644
--- a/drivers/acpi/namespace/nsnames.c
+++ b/drivers/acpi/namespace/nsnames.c
@@ -137,6 +137,10 @@ char *acpi_ns_get_external_pathname(struct acpi_namespace_node *node)
 	/* Calculate required buffer size based on depth below root */
 
 	size = acpi_ns_get_pathname_length(node);
+	if (!size) {
+		ACPI_ERROR((AE_INFO, "Invalid node failure"));
+		return_PTR(NULL);
+	}
 
 	/* Allocate a buffer to be returned to caller */
 
@@ -229,6 +233,10 @@ acpi_ns_handle_to_pathname(acpi_handle target_handle,
 	/* Determine size required for the caller buffer */
 
 	required_size = acpi_ns_get_pathname_length(node);
+	if (!required_size) {
+		ACPI_ERROR((AE_INFO, "Invalid node failure"));
+		return_ACPI_STATUS(AE_ERROR);
+	}
 
 	/* Validate/Allocate/Clear caller buffer */
 
diff --git a/drivers/acpi/utilities/utalloc.c b/drivers/acpi/utilities/utalloc.c
index 3dfb8a442b2..e7bf34a7b1d 100644
--- a/drivers/acpi/utilities/utalloc.c
+++ b/drivers/acpi/utilities/utalloc.c
@@ -242,6 +242,10 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer,
 {
 	acpi_status status = AE_OK;
 
+	if (!required_length) {
+		WARN_ON(1);
+		return AE_ERROR;
+	}
 	switch (buffer->length) {
 	case ACPI_NO_BUFFER:
 
-- 
GitLab


From 11d579ee0a19052a5a90ebfe0c39e7ed8ce8a9dc Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Sat, 28 Jun 2008 20:31:52 +0200
Subject: [PATCH 020/853] powerpc/mpc5200: Fix wrong 'no interrupt' handling in
 of_i2c

If an I2C device node does not specify an interrupt, the .irq member of the
board_info struct was set to -1. This caused crashes on following
irq_dispose_mappings. Leave it NO_IRQ as returned from irq_of_parse_and_map.
(Suggesting -1 as 'i2c-no-irq' used to be a bug in linux/i2c.h.)

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Sean MacLennan <smaclennan@pikatech.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/of_i2c.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index 5c015d310d4..344e1b03dd8 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -91,8 +91,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
 		}
 
 		info.irq = irq_of_parse_and_map(node, 0);
-		if (info.irq == NO_IRQ)
-			info.irq = -1;
 
 		if (of_find_i2c_driver(node, &info) < 0) {
 			irq_dispose_mapping(info.irq);
-- 
GitLab


From 6a4a636fad018500c5db7a2b56a00caeb21cbb2c Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Sun, 20 Jul 2008 11:27:22 -0400
Subject: [PATCH 021/853] powerpc/mpc5200: Add AC97 register definitions for
 the MPC52xx PSC

Needed by the PSC AC97 sound driver

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/asm-powerpc/mpc52xx_psc.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/asm-powerpc/mpc52xx_psc.h b/include/asm-powerpc/mpc52xx_psc.h
index 710c5d36efa..5467c2c0faa 100644
--- a/include/asm-powerpc/mpc52xx_psc.h
+++ b/include/asm-powerpc/mpc52xx_psc.h
@@ -132,8 +132,12 @@ struct mpc52xx_psc {
 	u8		reserved5[3];
 	u8		ctlr;		/* PSC + 0x1c */
 	u8		reserved6[3];
-	u16		ccr;		/* PSC + 0x20 */
-	u8		reserved7[14];
+	/* BitClkDiv field of CCR is byte swapped in
+	 * the hardware for mpc5200/b compatibility */
+	u32		ccr;		/* PSC + 0x20 */
+	u32		ac97_slots;	/* PSC + 0x24 */
+	u32		ac97_cmd;	/* PSC + 0x28 */
+	u32		ac97_data;	/* PSC + 0x2c */
 	u8		ivr;		/* PSC + 0x30 */
 	u8		reserved8[3];
 	u8		ip;		/* PSC + 0x34 */
-- 
GitLab


From 78f56bd3d2dbe173bf1a946b353bf72ab9c0b94e Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Sun, 20 Jul 2008 11:30:08 -0400
Subject: [PATCH 022/853] powerpc/mpc5200: Remove fsl-soc.c from mpc5200 build,
 it is not needed.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/52xx/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index acd2fc8cf49..981b84b7599 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,6 @@
 config PPC_MPC52xx
 	bool "52xx-based boards"
 	depends on PPC_MULTIPLATFORM && PPC32
-	select FSL_SOC
 	select PPC_CLOCK
 
 config PPC_MPC5200_SIMPLE
-- 
GitLab


From 6d5509babce654fd9ce0ff6689dbdf6ce56c43ae Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 May 2008 17:04:53 -0600
Subject: [PATCH 023/853] powerpc/mpc5200: Make mpc5200 GPIO driver select the
 GENERIC_GPIO config

CONFIG_GENERIC_GPIO is needed for the gpio driver to work.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/52xx/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 981b84b7599..c1ca4f3ee5d 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -46,6 +46,7 @@ config PPC_MPC5200_BUGFIX
 config PPC_MPC5200_GPIO
 	bool "MPC5200 GPIO support"
 	depends on PPC_MPC52xx
+	select GENERIC_GPIO
 	select HAVE_GPIO_LIB
 	help
 	  Enable gpiolib support for mpc5200 based boards
-- 
GitLab


From a19dd1bd7df839c52a668abcf288c2239442c3c9 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 22 Jul 2008 01:13:54 -0600
Subject: [PATCH 024/853] powerpc/mpc5200: add PSC SICR bit definitions

Required by the PSC I2S audio driver.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/asm-powerpc/mpc52xx_psc.h | 32 ++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/asm-powerpc/mpc52xx_psc.h b/include/asm-powerpc/mpc52xx_psc.h
index 5467c2c0faa..8917ed63056 100644
--- a/include/asm-powerpc/mpc52xx_psc.h
+++ b/include/asm-powerpc/mpc52xx_psc.h
@@ -60,10 +60,12 @@
 #define MPC52xx_PSC_RXTX_FIFO_ALARM	0x0002
 #define MPC52xx_PSC_RXTX_FIFO_EMPTY	0x0001
 
-/* PSC interrupt mask bits */
+/* PSC interrupt status/mask bits */
 #define MPC52xx_PSC_IMR_TXRDY		0x0100
 #define MPC52xx_PSC_IMR_RXRDY		0x0200
 #define MPC52xx_PSC_IMR_DB		0x0400
+#define MPC52xx_PSC_IMR_TXEMP		0x0800
+#define MPC52xx_PSC_IMR_ORERR		0x1000
 #define MPC52xx_PSC_IMR_IPC		0x8000
 
 /* PSC input port change bit */
@@ -92,6 +94,34 @@
 
 #define MPC52xx_PSC_RFNUM_MASK	0x01ff
 
+#define MPC52xx_PSC_SICR_DTS1			(1 << 29)
+#define MPC52xx_PSC_SICR_SHDR			(1 << 28)
+#define MPC52xx_PSC_SICR_SIM_MASK		(0xf << 24)
+#define MPC52xx_PSC_SICR_SIM_UART		(0x0 << 24)
+#define MPC52xx_PSC_SICR_SIM_UART_DCD		(0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_8		(0x1 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_16		(0x2 << 24)
+#define MPC52xx_PSC_SICR_SIM_AC97		(0x3 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR		(0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR_DCD		(0xc << 24)
+#define MPC52xx_PSC_SICR_SIM_MIR		(0x5 << 24)
+#define MPC52xx_PSC_SICR_SIM_FIR		(0x6 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_24		(0x7 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_32		(0xf << 24)
+#define MPC52xx_PSC_SICR_GENCLK			(1 << 23)
+#define MPC52xx_PSC_SICR_I2S			(1 << 22)
+#define MPC52xx_PSC_SICR_CLKPOL			(1 << 21)
+#define MPC52xx_PSC_SICR_SYNCPOL		(1 << 20)
+#define MPC52xx_PSC_SICR_CELLSLAVE		(1 << 19)
+#define MPC52xx_PSC_SICR_CELL2XCLK		(1 << 18)
+#define MPC52xx_PSC_SICR_ESAI			(1 << 17)
+#define MPC52xx_PSC_SICR_ENAC97			(1 << 16)
+#define MPC52xx_PSC_SICR_SPI			(1 << 15)
+#define MPC52xx_PSC_SICR_MSTR			(1 << 14)
+#define MPC52xx_PSC_SICR_CPOL			(1 << 13)
+#define MPC52xx_PSC_SICR_CPHA			(1 << 12)
+#define MPC52xx_PSC_SICR_USEEOF			(1 << 11)
+#define MPC52xx_PSC_SICR_DISABLEEOF		(1 << 10)
 
 /* Structure of the hardware registers */
 struct mpc52xx_psc {
-- 
GitLab


From 79c28acb2b7d66ca48d23e1c8b5e9e043aa634f8 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 11 Jul 2008 16:17:57 -0600
Subject: [PATCH 025/853] of-bindings: Add binding documentation for SPI busses
 and devices

Add documentation about how to describe SPI busses in the device tree.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
---
 Documentation/powerpc/booting-without-of.txt | 57 ++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index aee243a846a..ee92fedada1 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -59,6 +59,7 @@ Table of Contents
       p) Freescale Synchronous Serial Interface
 	  q) USB EHCI controllers
       r) MDIO on GPIOs
+      s) SPI busses
 
   VII - Marvell Discovery mv64[345]6x System Controller chips
     1) The /system-controller node
@@ -1881,6 +1882,62 @@ platforms are moved over to use the flattened-device-tree model.
 			 &qe_pio_c 6>;
 	};
 
+    s) SPI (Serial Peripheral Interface) busses
+
+    SPI busses can be described with a node for the SPI master device
+    and a set of child nodes for each SPI slave on the bus.  For this
+    discussion, it is assumed that the system's SPI controller is in
+    SPI master mode.  This binding does not describe SPI controllers
+    in slave mode.
+
+    The SPI master node requires the following properties:
+    - #address-cells  - number of cells required to define a chip select
+			address on the SPI bus.
+    - #size-cells     - should be zero.
+    - compatible      - name of SPI bus controller following generic names
+			recommended practice.
+    No other properties are required in the SPI bus node.  It is assumed
+    that a driver for an SPI bus device will understand that it is an SPI bus.
+    However, the binding does not attempt to define the specific method for
+    assigning chip select numbers.  Since SPI chip select configuration is
+    flexible and non-standardized, it is left out of this binding with the
+    assumption that board specific platform code will be used to manage
+    chip selects.  Individual drivers can define additional properties to
+    support describing the chip select layout.
+
+    SPI slave nodes must be children of the SPI master node and can
+    contain the following properties.
+    - reg             - (required) chip select address of device.
+    - compatible      - (required) name of SPI device following generic names
+			recommended practice
+    - spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz
+    - spi-cpol        - (optional) Empty property indicating device requires
+			inverse clock polarity (CPOL) mode
+    - spi-cpha        - (optional) Empty property indicating device requires
+			shifted clock phase (CPHA) mode
+
+    SPI example for an MPC5200 SPI bus:
+		spi@f00 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
+			reg = <0xf00 0x20>;
+			interrupts = <2 13 0 2 14 0>;
+			interrupt-parent = <&mpc5200_pic>;
+
+			ethernet-switch@0 {
+				compatible = "micrel,ks8995m";
+				spi-max-frequency = <1000000>;
+				reg = <0>;
+			};
+
+			codec@1 {
+				compatible = "ti,tlv320aic26";
+				spi-max-frequency = <100000>;
+				reg = <1>;
+			};
+		};
+
 VII - Marvell Discovery mv64[345]6x System Controller chips
 ===========================================================
 
-- 
GitLab


From e4268aad42e9f37d01925022830b16bab3d0d5af Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 17 Jul 2008 11:13:32 -0600
Subject: [PATCH 026/853] PCI hotplug: fix error path in pci_slot's
 register_slot

Juha Leppnen noticed that an error path in register_slot() wasn't
returning appropriately, leading to a condition where we might access a
kfree'ed pointer, so let's fix that.

Additionally, fix up the copyright information in the file while
we're in there.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_slot.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index b9ab030a52d..dd376f7ad09 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -6,8 +6,8 @@
  *  Thanks to Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> for code
  *  review and fixes.
  *
- *  Copyright (C) 2007 Alex Chiang <achiang@hp.com>
- *  Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
+ *  Copyright (C) 2007-2008 Hewlett-Packard Development Company, L.P.
+ *  	Alex Chiang <achiang@hp.com>
  *
  *  This program is free software; you can redistribute it and/or modify it
  *  under the terms and conditions of the GNU General Public License,
@@ -158,6 +158,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 	if (IS_ERR(pci_slot)) {
 		err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot));
 		kfree(slot);
+		return AE_OK;
 	}
 
 	slot->root_handle = parent_context->root_handle;
-- 
GitLab


From f42e86d95fa53d3a62b2795515da18b4f41b0480 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Tue, 22 Jul 2008 14:14:18 -0700
Subject: [PATCH 027/853] PCI/DMAR: don't assume presence of RMRRs

RMRRs do not necessarily have to be present on all VT-d capable platforms.
The printk is just informational and does not need to be followed by an error
return.

Signed-off-by: Yong Y Wang <yong.y.wang@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: mark gross <mgross@linux.intel.com>
Cc: Keshavamurthy, Anil S <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/dmar.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f941f609dbf..8bf86ae2333 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -317,10 +317,8 @@ int __init dmar_table_init(void)
 		return -ENODEV;
 	}
 
-	if (list_empty(&dmar_rmrr_units)) {
+	if (list_empty(&dmar_rmrr_units))
 		printk(KERN_INFO PREFIX "No RMRR found\n");
-		return -ENODEV;
-	}
 
 	return 0;
 }
-- 
GitLab


From dd5bdff83b19d9174126e0398b47117c3a80e22d Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@voltaire.com>
Date: Tue, 22 Jul 2008 14:14:22 -0700
Subject: [PATCH 028/853] RDMA/cma: Add RDMA_CM_EVENT_ADDR_CHANGE event

Add an RDMA_CM_EVENT_ADDR_CHANGE event can be used by rdma-cm
consumers that wish to have their RDMA sessions always use the same
links (eg <hca/port>) as the IP stack does.  In the current code, this
does not happen when bonding is used and fail-over happened but the IB
link used by an already existing session is operating fine.

Use the netevent notification for sensing that a change has happened
in the IP stack, then scan the rdma-cm ID list to see if there is an
ID that is "misaligned" with respect to the IP stack, and deliver
RDMA_CM_EVENT_ADDR_CHANGE for this ID.  The consumer can act on the
event or just ignore it.

Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c | 92 +++++++++++++++++++++++++++++++++++
 include/rdma/rdma_cm.h        |  3 +-
 2 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index ae11d5cc74d..79792c92e6f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -168,6 +168,12 @@ struct cma_work {
 	struct rdma_cm_event	event;
 };
 
+struct cma_ndev_work {
+	struct work_struct	work;
+	struct rdma_id_private	*id;
+	struct rdma_cm_event	event;
+};
+
 union cma_ip_addr {
 	struct in6_addr ip6;
 	struct {
@@ -1598,6 +1604,30 @@ out:
 	kfree(work);
 }
 
+static void cma_ndev_work_handler(struct work_struct *_work)
+{
+	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+	struct rdma_id_private *id_priv = work->id;
+	int destroy = 0;
+
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state == CMA_DESTROYING ||
+	    id_priv->state == CMA_DEVICE_REMOVAL)
+		goto out;
+
+	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		destroy = 1;
+	}
+
+out:
+	mutex_unlock(&id_priv->handler_mutex);
+	cma_deref_id(id_priv);
+	if (destroy)
+		rdma_destroy_id(&id_priv->id);
+	kfree(work);
+}
+
 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 {
 	struct rdma_route *route = &id_priv->id.route;
@@ -2723,6 +2753,65 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 }
 EXPORT_SYMBOL(rdma_leave_multicast);
 
+static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
+{
+	struct rdma_dev_addr *dev_addr;
+	struct cma_ndev_work *work;
+
+	dev_addr = &id_priv->id.route.addr.dev_addr;
+
+	if ((dev_addr->src_dev == ndev) &&
+	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
+		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
+		       ndev->name, &id_priv->id);
+		work = kzalloc(sizeof *work, GFP_KERNEL);
+		if (!work)
+			return -ENOMEM;
+
+		INIT_WORK(&work->work, cma_ndev_work_handler);
+		work->id = id_priv;
+		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
+		atomic_inc(&id_priv->refcount);
+		queue_work(cma_wq, &work->work);
+	}
+
+	return 0;
+}
+
+static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
+			       void *ctx)
+{
+	struct net_device *ndev = (struct net_device *)ctx;
+	struct cma_device *cma_dev;
+	struct rdma_id_private *id_priv;
+	int ret = NOTIFY_DONE;
+
+	if (dev_net(ndev) != &init_net)
+		return NOTIFY_DONE;
+
+	if (event != NETDEV_BONDING_FAILOVER)
+		return NOTIFY_DONE;
+
+	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+		return NOTIFY_DONE;
+
+	mutex_lock(&lock);
+	list_for_each_entry(cma_dev, &dev_list, list)
+		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+			ret = cma_netdev_change(ndev, id_priv);
+			if (ret)
+				goto out;
+		}
+
+out:
+	mutex_unlock(&lock);
+	return ret;
+}
+
+static struct notifier_block cma_nb = {
+	.notifier_call = cma_netdev_callback
+};
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
@@ -2831,6 +2920,7 @@ static int cma_init(void)
 
 	ib_sa_register_client(&sa_client);
 	rdma_addr_register_client(&addr_client);
+	register_netdevice_notifier(&cma_nb);
 
 	ret = ib_register_client(&cma_client);
 	if (ret)
@@ -2838,6 +2928,7 @@ static int cma_init(void)
 	return 0;
 
 err:
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
@@ -2847,6 +2938,7 @@ err:
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 22bb2e7bab1..001d606517f 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -57,7 +57,8 @@ enum rdma_cm_event_type {
 	RDMA_CM_EVENT_DISCONNECTED,
 	RDMA_CM_EVENT_DEVICE_REMOVAL,
 	RDMA_CM_EVENT_MULTICAST_JOIN,
-	RDMA_CM_EVENT_MULTICAST_ERROR
+	RDMA_CM_EVENT_MULTICAST_ERROR,
+	RDMA_CM_EVENT_ADDR_CHANGE
 };
 
 enum rdma_port_space {
-- 
GitLab


From 38ca83a588662f0af684ba2567dd910a564268ab Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.co.il>
Date: Tue, 22 Jul 2008 14:14:23 -0700
Subject: [PATCH 029/853] RDMA/cma: Add RDMA_CM_EVENT_TIMEWAIT_EXIT event

Consumers that want to re-use their QPs in new connections need to
know when the QP has exited the timewait state.  Report the timewait
event through the rdma_cm.

Signed-off-by: Amir Vadai <amirv@mellanox.co.il>
Acked-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c | 7 ++++++-
 include/rdma/rdma_cm.h        | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 79792c92e6f..e980ff3335d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -920,7 +920,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	struct rdma_cm_event event;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, CMA_CONNECT))
+	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
+		cma_disable_callback(id_priv, CMA_CONNECT)) ||
+	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
+		cma_disable_callback(id_priv, CMA_DISCONNECT)))
 		return 0;
 
 	memset(&event, 0, sizeof event);
@@ -956,6 +959,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		event.event = RDMA_CM_EVENT_DISCONNECTED;
 		break;
 	case IB_CM_TIMEWAIT_EXIT:
+		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
+		break;
 	case IB_CM_MRA_RECEIVED:
 		/* ignore event */
 		goto out;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 001d606517f..df7faf09d66 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -58,7 +58,8 @@ enum rdma_cm_event_type {
 	RDMA_CM_EVENT_DEVICE_REMOVAL,
 	RDMA_CM_EVENT_MULTICAST_JOIN,
 	RDMA_CM_EVENT_MULTICAST_ERROR,
-	RDMA_CM_EVENT_ADDR_CHANGE
+	RDMA_CM_EVENT_ADDR_CHANGE,
+	RDMA_CM_EVENT_TIMEWAIT_EXIT
 };
 
 enum rdma_port_space {
-- 
GitLab


From 2f5de1512884da8c74bec2c76e8f114b972ab4be Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@voltaire.com>
Date: Tue, 22 Jul 2008 14:16:21 -0700
Subject: [PATCH 030/853] IB/iser: Add support for RDMA_CM_EVENT_ADDR_CHANGE
 event

Enhance iser to act upon notification on network stack changes that
make its RDMA connection unaligned with the link used by the stack for
the <src,dst> IPs used to establish the connection.

When RDMA_CM_EVENT_ADDR_CHANGE arrives, just disconnect the
connection, assuming that the user space iscsid daemon will reconnect,
and the new connection will be aligned with the IP stack.

Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 3a917c1f796..63462ecca14 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -483,6 +483,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+	case RDMA_CM_EVENT_ADDR_CHANGE:
 		iser_disconnected_handler(cma_id);
 		break;
 	default:
-- 
GitLab


From 5b673b71c8ca0fbdb99dc1b1434cfb554212d6ff Mon Sep 17 00:00:00 2001
From: Joachim Fenkes <fenkes@de.ibm.com>
Date: Tue, 22 Jul 2008 14:18:07 -0700
Subject: [PATCH 031/853] IB/ehca: Filter PATH_MIG events if QP was never armed

Certain firmware versions sometimes cause spurious PATH_MIG events to
occur during QP creation.  Filter these events by making sure PATH_MIG
events are only handed down when they actually make sense (i.e. when
the QP has been armed at least once).

Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ehca/ehca_classes.h | 1 +
 drivers/infiniband/hw/ehca/ehca_irq.c     | 4 ++++
 drivers/infiniband/hw/ehca/ehca_qp.c      | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1e9e99a1393..0b0618edd64 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -194,6 +194,7 @@ struct ehca_qp {
 	u32 packet_count;
 	atomic_t nr_events; /* events seen */
 	wait_queue_head_t wait_completion;
+	int mig_armed;
 };
 
 #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 0792d930c48..99642a6e17c 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -178,6 +178,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
 {
 	struct ib_event event;
 
+	/* PATH_MIG without the QP ever having been armed is false alarm */
+	if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
+		return;
+
 	event.device = &shca->ib_device;
 	event.event = event_type;
 
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 3f59587338e..ea13efddf17 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1460,6 +1460,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 			goto modify_qp_exit2;
 		}
 		mqpcb->path_migration_state = attr->path_mig_state + 1;
+		if (attr->path_mig_state == IB_MIG_REARM)
+			my_qp->mig_armed = 1;
 		update_mask |=
 			EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
 	}
-- 
GitLab


From 593e4d4a05c8263a6dbd5452c21d47c5bdadd40c Mon Sep 17 00:00:00 2001
From: Joachim Fenkes <fenkes@de.ibm.com>
Date: Tue, 22 Jul 2008 14:18:08 -0700
Subject: [PATCH 032/853] IB/ehca: Use default value for Local CA ACK Delay if
 FW returns 0

Some firmware versions report a Local CA ACK Delay of 0.  In that
case, return a more sensible default value of 12 (-> 16 msec) instead.

Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ehca/ehca_hca.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index bc3b37d2070..46288220cfb 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -114,7 +114,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
 	}
 
 	props->max_pkeys           = 16;
-	props->local_ca_ack_delay  = min_t(u8, rblock->local_ca_ack_delay, 255);
+	/* Some FW versions say 0 here; insert sensible value in that case */
+	props->local_ca_ack_delay  = rblock->local_ca_ack_delay ?
+		min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
 	props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
 	props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
 	props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
-- 
GitLab


From 1a867c33bb65f2921351a9bdd98548bb96f0ff8c Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 22 Jul 2008 14:18:10 -0700
Subject: [PATCH 033/853] IB/ehca: Release mutex in error path of
 alloc_small_queue_page()

The pd->lock mutex is released on a successful return, so it should be
released on an error return as well.

The semantic patch that makes this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression l;
@@

mutex_lock(l);
... when != mutex_unlock(l)
    when any
    when strict
(
if (...) { ... when != mutex_unlock(l)
+   mutex_unlock(l);
    return ...;
}
|
mutex_unlock(l);
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ehca/ipz_pt_fn.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 661f8db6270..c3a32846543 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -163,6 +163,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
 
 out:
 	ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
+	mutex_unlock(&pd->lock);
 	return 0;
 }
 
-- 
GitLab


From 64b784b583061ebfe1d484dd1fdc5a26c6d4293f Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Tue, 22 Jul 2008 14:18:33 -0700
Subject: [PATCH 034/853] IB/sa_query: Check if sm_ah is NULL in
 ib_sa_remove_one()

If update_sm_ah() fails, it leaves the port's sm_ah as NULL.  Then if
the device or module is removed, ib_sa_remove_one() will dereference a
NULL pointer when it calls kref_put().  Fix this by testing if sm_ah
is NULL before dropping the reference.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/sa_query.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1341de793e5..7863a50d56f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1064,7 +1064,8 @@ static void ib_sa_remove_one(struct ib_device *device)
 
 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
 		ib_unregister_mad_agent(sa_dev->port[i].agent);
-		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
+		if (sa_dev->port[i].sm_ah)
+			kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
 	}
 
 	kfree(sa_dev);
-- 
GitLab


From 01b3fc8b15432f7931e40fe099839e1559fb0e09 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@voltaire.com>
Date: Tue, 22 Jul 2008 14:18:34 -0700
Subject: [PATCH 035/853] IPoIB: Include err code in trace message for
 ib_sa_path_rec_get() failures

Print the return code of ib_sa_path_rec_get() if it fails to help
debug errors.

Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8be9ea0436e..f51201b17bf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -548,7 +548,7 @@ static int path_rec_start(struct net_device *dev,
 				   path_rec_completion,
 				   path, &path->query);
 	if (path->query_id < 0) {
-		ipoib_warn(priv, "ib_sa_path_rec_get failed\n");
+		ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
 		path->query = NULL;
 		return path->query_id;
 	}
-- 
GitLab


From 1ca8d15619f725e223c19137350b0336b9196193 Mon Sep 17 00:00:00 2001
From: Dotan Barak <dotanba@gmail.com>
Date: Tue, 22 Jul 2008 14:18:34 -0700
Subject: [PATCH 036/853] RDMA/iwcm: Remove IB_ACCESS_LOCAL_WRITE from remote
 QP attributes

Remove IB_ACCESS_LOCAL_WRITE from qp.qp_access_flags because this
attribute is only used to set remote permissions.

Signed-off-by: Dotan Barak <dotanba@gmail.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/iwcm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 81c9195b512..8f9509e1ebf 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -942,8 +942,7 @@ static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
 	case IW_CM_STATE_CONN_RECV:
 	case IW_CM_STATE_ESTABLISHED:
 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
-		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
-					   IB_ACCESS_REMOTE_WRITE|
+		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
 					   IB_ACCESS_REMOTE_READ;
 		ret = 0;
 		break;
-- 
GitLab


From 51f5f0ee22b98980f7816d42647467cd5f4b3b45 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 22 Jul 2008 14:19:37 -0700
Subject: [PATCH 037/853] mlx4_core: Add module parameter to enable QoS support

Add a module parameter "enable_qos" to mlx4_core.  If this param is
set, enable support for QoS in the INIT_HCA command.  By default, the
parameter is set to 0 (disabled).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/fw.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 2b5006b9be6..0851ebdddfd 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -46,6 +46,10 @@ enum {
 extern void __buggy_use_of_MLX4_GET(void);
 extern void __buggy_use_of_MLX4_PUT(void);
 
+static int enable_qos;
+module_param(enable_qos, bool, 0444);
+MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)");
+
 #define MLX4_GET(dest, source, offset)				      \
 	do {							      \
 		void *__p = (char *) (source) + (offset);	      \
@@ -737,6 +741,10 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
 
+	/* Enable QoS support if module parameter set */
+	if (enable_qos)
+		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2);
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
-- 
GitLab


From 47b374752aed1c029f995473c7c463ee3ae5fbaa Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 22 Jul 2008 14:19:39 -0700
Subject: [PATCH 038/853] IB/mlx4: Rename struct mlx4_lso_seg to
 mlx4_wqe_lso_seg

Make the struct name consistent with other WQE segment struct types
defined in <linux/mlx4/qp.h>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/qp.c | 2 +-
 include/linux/mlx4/qp.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 89eb6cbe592..bda0859a5ac 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1395,7 +1395,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 	dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 7f128b266fa..f02e9ed36cf 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -219,7 +219,7 @@ struct mlx4_wqe_datagram_seg {
 	__be32			reservd[2];
 };
 
-struct mlx4_lso_seg {
+struct mlx4_wqe_lso_seg {
 	__be32			mss_hdr_size;
 	__be32			header[0];
 };
-- 
GitLab


From 899698dad72340b562478b8b770317f2f0fe0c09 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 22 Jul 2008 14:19:39 -0700
Subject: [PATCH 039/853] mlx4_code: Add missing FW status return code

Add ICM_ERROR firmware status code.  In mapping to errnos, -ENFILE
seems closest.

This is in preparation for providing more detailed log info using
mlx4_err() in low-level driver when a non-zero status is returned.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/cmd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 70dff94a8bc..04d5bc69a6f 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -67,6 +67,8 @@ enum {
 	CMD_STAT_BAD_INDEX	= 0x0a,
 	/* FW image corrupted: */
 	CMD_STAT_BAD_NVMEM	= 0x0b,
+	/* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */
+	CMD_STAT_ICM_ERROR	= 0x0c,
 	/* Attempt to modify a QP/EE which is not in the presumed state: */
 	CMD_STAT_BAD_QP_STATE   = 0x10,
 	/* Bad segment parameters (Address/Size): */
@@ -119,6 +121,7 @@ static int mlx4_status_to_errno(u8 status)
 		[CMD_STAT_BAD_RES_STATE]  = -EBADF,
 		[CMD_STAT_BAD_INDEX]	  = -EBADF,
 		[CMD_STAT_BAD_NVMEM]	  = -EFAULT,
+		[CMD_STAT_ICM_ERROR]	  = -ENFILE,
 		[CMD_STAT_BAD_QP_STATE]   = -EINVAL,
 		[CMD_STAT_BAD_SEG_PARAM]  = -EFAULT,
 		[CMD_STAT_REG_BOUND]	  = -EBUSY,
-- 
GitLab


From e4044cfc493338cd09870bd45dc646336bb66e9f Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 22 Jul 2008 14:19:40 -0700
Subject: [PATCH 040/853] mlx4_core: Keep free count for MTT buddy allocator

MTT entries are allocated with a buddy allocator, which just keeps
bitmaps for each level of the buddy table.  However, all free space
starts out at the highest order, and small allocations start scanning
from the lowest order.  When the lowest order tables have no free
space, this can lead to scanning potentially millions of bits before
finding a free entry at a higher order.

We can avoid this by just keeping a count of how many free entries
each order has, and skipping the bitmap scan when an order is
completely empty.  This provides a nice performance boost for a
negligible increase in memory usage.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/mlx4.h |  1 +
 drivers/net/mlx4/mr.c   | 26 ++++++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index a4023c2dd05..78038499cff 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -118,6 +118,7 @@ struct mlx4_bitmap {
 
 struct mlx4_buddy {
 	unsigned long	      **bits;
+	unsigned int	       *num_free;
 	int			max_order;
 	spinlock_t		lock;
 };
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 03a9abcce52..b3ea93b9868 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -79,23 +79,26 @@ static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
 
 	spin_lock(&buddy->lock);
 
-	for (o = order; o <= buddy->max_order; ++o) {
-		m = 1 << (buddy->max_order - o);
-		seg = find_first_bit(buddy->bits[o], m);
-		if (seg < m)
-			goto found;
-	}
+	for (o = order; o <= buddy->max_order; ++o)
+		if (buddy->num_free[o]) {
+			m = 1 << (buddy->max_order - o);
+			seg = find_first_bit(buddy->bits[o], m);
+			if (seg < m)
+				goto found;
+		}
 
 	spin_unlock(&buddy->lock);
 	return -1;
 
  found:
 	clear_bit(seg, buddy->bits[o]);
+	--buddy->num_free[o];
 
 	while (o > order) {
 		--o;
 		seg <<= 1;
 		set_bit(seg ^ 1, buddy->bits[o]);
+		++buddy->num_free[o];
 	}
 
 	spin_unlock(&buddy->lock);
@@ -113,11 +116,13 @@ static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
 
 	while (test_bit(seg ^ 1, buddy->bits[order])) {
 		clear_bit(seg ^ 1, buddy->bits[order]);
+		--buddy->num_free[order];
 		seg >>= 1;
 		++order;
 	}
 
 	set_bit(seg, buddy->bits[order]);
+	++buddy->num_free[order];
 
 	spin_unlock(&buddy->lock);
 }
@@ -131,7 +136,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
 
 	buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
 			      GFP_KERNEL);
-	if (!buddy->bits)
+	buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+				  GFP_KERNEL);
+	if (!buddy->bits || !buddy->num_free)
 		goto err_out;
 
 	for (i = 0; i <= buddy->max_order; ++i) {
@@ -143,6 +150,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
 	}
 
 	set_bit(0, buddy->bits[buddy->max_order]);
+	buddy->num_free[buddy->max_order] = 1;
 
 	return 0;
 
@@ -150,9 +158,10 @@ err_out_free:
 	for (i = 0; i <= buddy->max_order; ++i)
 		kfree(buddy->bits[i]);
 
+err_out:
 	kfree(buddy->bits);
+	kfree(buddy->num_free);
 
-err_out:
 	return -ENOMEM;
 }
 
@@ -164,6 +173,7 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
 		kfree(buddy->bits[i]);
 
 	kfree(buddy->bits);
+	kfree(buddy->num_free);
 }
 
 static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
-- 
GitLab


From e8bb4beb2b1f90d499134f2849727ed04c3bedc4 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 22 Jul 2008 14:20:05 -0700
Subject: [PATCH 041/853] IB/mthca: Keep free count for MTT buddy allocator

MTT entries are allocated with a buddy allocator, which just keeps
bitmaps for each level of the buddy table.  However, all free space
starts out at the highest order, and small allocations start scanning
from the lowest order.  When the lowest order tables have no free
space, this can lead to scanning potentially millions of bits before
finding a free entry at a higher order.

We can avoid this by just keeping a count of how many free entries
each order has, and skipping the bitmap scan when an order is
completely empty.  This provides a nice performance boost for a
negligible increase in memory usage.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_dev.h |  1 +
 drivers/infiniband/hw/mthca/mthca_mr.c  | 26 +++++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index ee4d073c889..252590116df 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -202,6 +202,7 @@ struct mthca_pd_table {
 
 struct mthca_buddy {
 	unsigned long **bits;
+	int	       *num_free;
 	int             max_order;
 	spinlock_t      lock;
 };
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 8489b1e81c0..882e6b73591 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -89,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
 
 	spin_lock(&buddy->lock);
 
-	for (o = order; o <= buddy->max_order; ++o) {
-		m = 1 << (buddy->max_order - o);
-		seg = find_first_bit(buddy->bits[o], m);
-		if (seg < m)
-			goto found;
-	}
+	for (o = order; o <= buddy->max_order; ++o)
+		if (buddy->num_free[o]) {
+			m = 1 << (buddy->max_order - o);
+			seg = find_first_bit(buddy->bits[o], m);
+			if (seg < m)
+				goto found;
+		}
 
 	spin_unlock(&buddy->lock);
 	return -1;
 
  found:
 	clear_bit(seg, buddy->bits[o]);
+	--buddy->num_free[o];
 
 	while (o > order) {
 		--o;
 		seg <<= 1;
 		set_bit(seg ^ 1, buddy->bits[o]);
+		++buddy->num_free[o];
 	}
 
 	spin_unlock(&buddy->lock);
@@ -123,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
 
 	while (test_bit(seg ^ 1, buddy->bits[order])) {
 		clear_bit(seg ^ 1, buddy->bits[order]);
+		--buddy->num_free[order];
 		seg >>= 1;
 		++order;
 	}
 
 	set_bit(seg, buddy->bits[order]);
+	++buddy->num_free[order];
 
 	spin_unlock(&buddy->lock);
 }
@@ -141,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
 
 	buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
 			      GFP_KERNEL);
-	if (!buddy->bits)
+	buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+				  GFP_KERNEL);
+	if (!buddy->bits || !buddy->num_free)
 		goto err_out;
 
 	for (i = 0; i <= buddy->max_order; ++i) {
@@ -154,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
 	}
 
 	set_bit(0, buddy->bits[buddy->max_order]);
+	buddy->num_free[buddy->max_order] = 1;
 
 	return 0;
 
@@ -161,9 +169,10 @@ err_out_free:
 	for (i = 0; i <= buddy->max_order; ++i)
 		kfree(buddy->bits[i]);
 
+err_out:
 	kfree(buddy->bits);
+	kfree(buddy->num_free);
 
-err_out:
 	return -ENOMEM;
 }
 
@@ -175,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
 		kfree(buddy->bits[i]);
 
 	kfree(buddy->bits);
+	kfree(buddy->num_free);
 }
 
 static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
-- 
GitLab


From e5899e1b7d73e67de758a32174a859cc2586c0b9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 19 Jul 2008 14:39:24 +0200
Subject: [PATCH 042/853] PCI PM: make more PCI PM core functionality available
 to drivers

Make more PCI PM core functionality available to drivers

* Export pci_pme_capable() so that it can be called directly by
  drivers (for example, tg3 needs that).

* Move the state choosing part of pci_prepare_to_sleep() to a
  separate function, pci_target_state(), that can be called directly
  by drivers (for example, tg3 needs that).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 34 ++++++++++++++++++++++++----------
 include/linux/pci.h |  2 ++
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d00f0e0d845..e9c356236d2 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1040,7 +1040,7 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
  * @dev: PCI device to handle.
  * @state: PCI state from which device will issue PME#.
  */
-static bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
+bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
 {
 	if (!dev->pm_cap)
 		return false;
@@ -1123,17 +1123,10 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
 }
 
 /**
- * pci_prepare_to_sleep - prepare PCI device for system-wide transition into a sleep state
- * @dev: Device to handle.
- *
- * Choose the power state appropriate for the device depending on whether
- * it can wake up the system and/or is power manageable by the platform
- * (PCI_D3hot is the default) and put the device into that state.
  */
-int pci_prepare_to_sleep(struct pci_dev *dev)
+pci_power_t pci_target_state(struct pci_dev *dev)
 {
 	pci_power_t target_state = PCI_D3hot;
-	int error;
 
 	if (platform_pci_power_manageable(dev)) {
 		/*
@@ -1160,7 +1153,7 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
 		 * to generate PME#.
 		 */
 		if (!dev->pm_cap)
-			return -EIO;
+			return PCI_POWER_ERROR;
 
 		if (dev->pme_support) {
 			while (target_state
@@ -1169,6 +1162,25 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
 		}
 	}
 
+	return target_state;
+}
+
+/**
+ * pci_prepare_to_sleep - prepare PCI device for system-wide transition into a sleep state
+ * @dev: Device to handle.
+ *
+ * Choose the power state appropriate for the device depending on whether
+ * it can wake up the system and/or is power manageable by the platform
+ * (PCI_D3hot is the default) and put the device into that state.
+ */
+int pci_prepare_to_sleep(struct pci_dev *dev)
+{
+	pci_power_t target_state = pci_target_state(dev);
+	int error;
+
+	if (target_state == PCI_POWER_ERROR)
+		return -EIO;
+
 	pci_enable_wake(dev, target_state, true);
 
 	error = pci_set_power_state(dev, target_state);
@@ -1918,7 +1930,9 @@ EXPORT_SYMBOL(pci_select_bars);
 EXPORT_SYMBOL(pci_set_power_state);
 EXPORT_SYMBOL(pci_save_state);
 EXPORT_SYMBOL(pci_restore_state);
+EXPORT_SYMBOL(pci_pme_capable);
 EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
 EXPORT_SYMBOL(pci_back_from_sleep);
 EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a6a088e1a80..1d296d31abe 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -638,7 +638,9 @@ int pci_save_state(struct pci_dev *dev);
 int pci_restore_state(struct pci_dev *dev);
 int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
+bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 
-- 
GitLab


From f17a077e61b627e58db5926bc474cf308318dad9 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 22 Jul 2008 14:40:47 -0700
Subject: [PATCH 043/853] PCI: fixup sparse endianness warnings in proc.c

drivers/pci/proc.c:91:3: warning: cast from restricted __le16
drivers/pci/proc.c:100:3: warning: cast from restricted __le32
drivers/pci/proc.c:109:3: warning: cast from restricted __le16
drivers/pci/proc.c:161:40: warning: cast to restricted __le16
drivers/pci/proc.c:170:41: warning: cast to restricted __le32
drivers/pci/proc.c:179:40: warning: cast to restricted __le16

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/proc.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 4400dffbd93..e1098c302c4 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -88,7 +88,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp
 	if ((pos & 3) && cnt > 2) {
 		unsigned short val;
 		pci_user_read_config_word(dev, pos, &val);
-		__put_user(cpu_to_le16(val), (unsigned short __user *) buf);
+		__put_user(cpu_to_le16(val), (__le16 __user *) buf);
 		buf += 2;
 		pos += 2;
 		cnt -= 2;
@@ -97,7 +97,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp
 	while (cnt >= 4) {
 		unsigned int val;
 		pci_user_read_config_dword(dev, pos, &val);
-		__put_user(cpu_to_le32(val), (unsigned int __user *) buf);
+		__put_user(cpu_to_le32(val), (__le32 __user *) buf);
 		buf += 4;
 		pos += 4;
 		cnt -= 4;
@@ -106,7 +106,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp
 	if (cnt >= 2) {
 		unsigned short val;
 		pci_user_read_config_word(dev, pos, &val);
-		__put_user(cpu_to_le16(val), (unsigned short __user *) buf);
+		__put_user(cpu_to_le16(val), (__le16 __user *) buf);
 		buf += 2;
 		pos += 2;
 		cnt -= 2;
@@ -156,8 +156,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof
 	}
 
 	if ((pos & 3) && cnt > 2) {
-		unsigned short val;
-		__get_user(val, (unsigned short __user *) buf);
+		__le16 val;
+		__get_user(val, (__le16 __user *) buf);
 		pci_user_write_config_word(dev, pos, le16_to_cpu(val));
 		buf += 2;
 		pos += 2;
@@ -165,8 +165,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof
 	}
 
 	while (cnt >= 4) {
-		unsigned int val;
-		__get_user(val, (unsigned int __user *) buf);
+		__le32 val;
+		__get_user(val, (__le32 __user *) buf);
 		pci_user_write_config_dword(dev, pos, le32_to_cpu(val));
 		buf += 4;
 		pos += 4;
@@ -174,8 +174,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof
 	}
 
 	if (cnt >= 2) {
-		unsigned short val;
-		__get_user(val, (unsigned short __user *) buf);
+		__le16 val;
+		__get_user(val, (__le16 __user *) buf);
 		pci_user_write_config_word(dev, pos, le16_to_cpu(val));
 		buf += 2;
 		pos += 2;
-- 
GitLab


From 9bcab8405c98c34849c5795c717b7e6a3e2d3875 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Fri, 11 Jul 2008 09:03:09 +1000
Subject: [PATCH 044/853] powerpc/spufs: correct kcalloc usage

kcalloc is supposed to be called with the count as its first argument and
the element size as the second.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
---
 arch/powerpc/platforms/cell/spufs/sputrace.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 8c0e95766a6..92d20e993ed 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -196,8 +196,7 @@ static int __init sputrace_init(void)
 	struct proc_dir_entry *entry;
 	int i, error = -ENOMEM;
 
-	sputrace_log = kcalloc(sizeof(struct sputrace),
-				bufsize, GFP_KERNEL);
+	sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
 	if (!sputrace_log)
 		goto out;
 
-- 
GitLab


From 8a6d2ea0cd121e3bfff4dbce5bc111874cf9e9d2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 22 Jul 2008 21:53:40 -0700
Subject: [PATCH 045/853] sky2: don't stop queue on shutdown

It is unnecessary, to stop queue and turn off carrier in shutdown
routine. With new netdev_queue this causes warnings.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sky2.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 711e4a8948e..5257cf464f1 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1829,9 +1829,6 @@ static int sky2_down(struct net_device *dev)
 	if (netif_msg_ifdown(sky2))
 		printk(KERN_INFO PFX "%s: disabling interface\n", dev->name);
 
-	/* Stop more packets from being queued */
-	netif_stop_queue(dev);
-
 	/* Disable port IRQ */
 	imask = sky2_read32(hw, B0_IMSK);
 	imask &= ~portirq_msk[port];
@@ -1887,8 +1884,6 @@ static int sky2_down(struct net_device *dev)
 
 	sky2_phy_power_down(hw, port);
 
-	netif_carrier_off(dev);
-
 	/* turn off LED's */
 	sky2_write16(hw, B0_Y2LED, LED_STAT_OFF);
 
-- 
GitLab


From deca05c3e81df4fcc38aa891eb8d8add14bce68b Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@goober.(none)>
Date: Wed, 4 Jun 2008 21:20:32 +1000
Subject: [PATCH 046/853] m68knommu: change to a configs directory for board
 configurations

Remove the old example m68knommu defconfig. Create a configs directory
for specific board configurations. Make the m5208evb the default.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/configs/m5208evb_defconfig | 610 ++++++++++++++++++++++
 1 file changed, 610 insertions(+)
 create mode 100644 arch/m68knommu/configs/m5208evb_defconfig

diff --git a/arch/m68knommu/configs/m5208evb_defconfig b/arch/m68knommu/configs/m5208evb_defconfig
new file mode 100644
index 00000000000..6fae33a05e2
--- /dev/null
+++ b/arch/m68knommu/configs/m5208evb_defconfig
@@ -0,0 +1,610 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.26-rc1
+#
+CONFIG_M68K=y
+# CONFIG_MMU is not set
+# CONFIG_FPU is not set
+CONFIG_ZONE_DMA=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_TIME_LOW_RES=y
+CONFIG_NO_IOPORT=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+# CONFIG_SYSCTL_SYSCALL is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_BASE_FULL=y
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_HAVE_OPROFILE is not set
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_SLABINFO=y
+CONFIG_TINY_SHMEM=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+CONFIG_CLASSIC_RCU=y
+
+#
+# Processor type and features
+#
+# CONFIG_M68328 is not set
+# CONFIG_M68EZ328 is not set
+# CONFIG_M68VZ328 is not set
+# CONFIG_M68360 is not set
+# CONFIG_M5206 is not set
+# CONFIG_M5206e is not set
+CONFIG_M520x=y
+# CONFIG_M523x is not set
+# CONFIG_M5249 is not set
+# CONFIG_M5271 is not set
+# CONFIG_M5272 is not set
+# CONFIG_M5275 is not set
+# CONFIG_M528x is not set
+# CONFIG_M5307 is not set
+# CONFIG_M532x is not set
+# CONFIG_M5407 is not set
+CONFIG_COLDFIRE=y
+CONFIG_CLOCK_SET=y
+CONFIG_CLOCK_FREQ=166666666
+CONFIG_CLOCK_DIV=2
+
+#
+# Platform
+#
+CONFIG_M5208EVB=y
+CONFIG_FREESCALE=y
+# CONFIG_4KSTACKS is not set
+CONFIG_HZ=100
+
+#
+# RAM configuration
+#
+CONFIG_RAMBASE=0x40000000
+CONFIG_RAMSIZE=0x2000000
+CONFIG_VECTORBASE=0x40000000
+CONFIG_KERNELBASE=0x40020000
+# CONFIG_RAMAUTOBIT is not set
+# CONFIG_RAM8BIT is not set
+CONFIG_RAM16BIT=y
+# CONFIG_RAM32BIT is not set
+
+#
+# ROM configuration
+#
+# CONFIG_ROM is not set
+CONFIG_RAMKERNEL=y
+# CONFIG_ROMKERNEL is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_VIRT_TO_BUS=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+# CONFIG_PCI is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_FLAT=y
+# CONFIG_BINFMT_ZFLAT is not set
+# CONFIG_BINFMT_SHARED_FLAT is not set
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+CONFIG_MTD_RAM=y
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PHYSMAP is not set
+CONFIG_MTD_UCLINUX=y
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
+CONFIG_FEC=y
+# CONFIG_FEC2 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI is not set
+# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_COLDFIRE is not set
+CONFIG_SERIAL_MCF=y
+CONFIG_SERIAL_MCF_BAUDRATE=115200
+CONFIG_SERIAL_MCF_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+# CONFIG_SYSFS is not set
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_FULLDEBUG=y
+# CONFIG_HIGHPROFILE is not set
+# CONFIG_BOOTPARAM is not set
+# CONFIG_NO_KERNEL_MSG is not set
+# CONFIG_BDM_DISABLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_DMA=y
-- 
GitLab


From 8c81b0574fd8877b5214f2d33816b199c62e3335 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@goober.(none)>
Date: Wed, 4 Jun 2008 21:22:18 +1000
Subject: [PATCH 047/853] m68knommu: defconfig for M5249EVB board

Add a defconfig for the Freescale M5249EVB board.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/configs/m5249evb_defconfig | 497 ++++++++++++++++++++++
 1 file changed, 497 insertions(+)
 create mode 100644 arch/m68knommu/configs/m5249evb_defconfig

diff --git a/arch/m68knommu/configs/m5249evb_defconfig b/arch/m68knommu/configs/m5249evb_defconfig
new file mode 100644
index 00000000000..cc6458333d6
--- /dev/null
+++ b/arch/m68knommu/configs/m5249evb_defconfig
@@ -0,0 +1,497 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.26-rc1
+#
+CONFIG_M68K=y
+# CONFIG_MMU is not set
+# CONFIG_FPU is not set
+CONFIG_ZONE_DMA=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_TIME_LOW_RES=y
+CONFIG_NO_IOPORT=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+# CONFIG_SYSCTL_SYSCALL is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_BASE_FULL=y
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_HAVE_OPROFILE is not set
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_SLABINFO=y
+CONFIG_TINY_SHMEM=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+CONFIG_CLASSIC_RCU=y
+
+#
+# Processor type and features
+#
+# CONFIG_M68328 is not set
+# CONFIG_M68EZ328 is not set
+# CONFIG_M68VZ328 is not set
+# CONFIG_M68360 is not set
+# CONFIG_M5206 is not set
+# CONFIG_M5206e is not set
+# CONFIG_M520x is not set
+# CONFIG_M523x is not set
+CONFIG_M5249=y
+# CONFIG_M5271 is not set
+# CONFIG_M5272 is not set
+# CONFIG_M5275 is not set
+# CONFIG_M528x is not set
+# CONFIG_M5307 is not set
+# CONFIG_M532x is not set
+# CONFIG_M5407 is not set
+CONFIG_COLDFIRE=y
+CONFIG_CLOCK_SET=y
+CONFIG_CLOCK_FREQ=140000000
+CONFIG_CLOCK_DIV=2
+
+#
+# Platform
+#
+CONFIG_M5249C3=y
+CONFIG_FREESCALE=y
+CONFIG_4KSTACKS=y
+CONFIG_HZ=100
+
+#
+# RAM configuration
+#
+CONFIG_RAMBASE=0x00000000
+CONFIG_RAMSIZE=0x00800000
+CONFIG_VECTORBASE=0x00000000
+CONFIG_KERNELBASE=0x00020000
+CONFIG_RAMAUTOBIT=y
+# CONFIG_RAM8BIT is not set
+# CONFIG_RAM16BIT is not set
+# CONFIG_RAM32BIT is not set
+
+#
+# ROM configuration
+#
+# CONFIG_ROM is not set
+CONFIG_RAMKERNEL=y
+# CONFIG_ROMKERNEL is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_VIRT_TO_BUS=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+# CONFIG_PCI is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_FLAT=y
+# CONFIG_BINFMT_ZFLAT is not set
+# CONFIG_BINFMT_SHARED_FLAT is not set
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# Networking
+#
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_RAM=y
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_UCLINUX=y
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_MD is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_COLDFIRE is not set
+CONFIG_SERIAL_MCF=y
+CONFIG_SERIAL_MCF_BAUDRATE=19200
+CONFIG_SERIAL_MCF_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SAMPLES is not set
+# CONFIG_FULLDEBUG is not set
+# CONFIG_HIGHPROFILE is not set
+# CONFIG_BOOTPARAM is not set
+# CONFIG_NO_KERNEL_MSG is not set
+# CONFIG_BDM_DISABLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_DMA=y
-- 
GitLab


From c26def07f97d662ee96362795400cbf6f44d11ce Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@goober.(none)>
Date: Wed, 4 Jun 2008 21:23:43 +1000
Subject: [PATCH 048/853] m68knommu: defconfig for M5275EVB board

Add a defconfig for the Freescale M5275EVB board.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/configs/m5275evb_defconfig | 627 ++++++++++++++++++++++
 1 file changed, 627 insertions(+)
 create mode 100644 arch/m68knommu/configs/m5275evb_defconfig

diff --git a/arch/m68knommu/configs/m5275evb_defconfig b/arch/m68knommu/configs/m5275evb_defconfig
new file mode 100644
index 00000000000..0d1256f5add
--- /dev/null
+++ b/arch/m68knommu/configs/m5275evb_defconfig
@@ -0,0 +1,627 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.26-rc1
+#
+CONFIG_M68K=y
+# CONFIG_MMU is not set
+# CONFIG_FPU is not set
+CONFIG_ZONE_DMA=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_TIME_LOW_RES=y
+CONFIG_NO_IOPORT=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+# CONFIG_SYSCTL_SYSCALL is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_BASE_FULL=y
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_HAVE_OPROFILE is not set
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_SLABINFO=y
+CONFIG_TINY_SHMEM=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+CONFIG_CLASSIC_RCU=y
+
+#
+# Processor type and features
+#
+# CONFIG_M68328 is not set
+# CONFIG_M68EZ328 is not set
+# CONFIG_M68VZ328 is not set
+# CONFIG_M68360 is not set
+# CONFIG_M5206 is not set
+# CONFIG_M5206e is not set
+# CONFIG_M520x is not set
+# CONFIG_M523x is not set
+# CONFIG_M5249 is not set
+# CONFIG_M5271 is not set
+# CONFIG_M5272 is not set
+CONFIG_M5275=y
+# CONFIG_M528x is not set
+# CONFIG_M5307 is not set
+# CONFIG_M532x is not set
+# CONFIG_M5407 is not set
+CONFIG_M527x=y
+CONFIG_COLDFIRE=y
+CONFIG_CLOCK_SET=y
+CONFIG_CLOCK_FREQ=150000000
+CONFIG_CLOCK_DIV=2
+
+#
+# Platform
+#
+CONFIG_M5275EVB=y
+CONFIG_FREESCALE=y
+# CONFIG_4KSTACKS is not set
+CONFIG_HZ=100
+
+#
+# RAM configuration
+#
+CONFIG_RAMBASE=0x00000000
+CONFIG_RAMSIZE=0x00000000
+CONFIG_VECTORBASE=0x00000000
+CONFIG_KERNELBASE=0x00020000
+CONFIG_RAMAUTOBIT=y
+# CONFIG_RAM8BIT is not set
+# CONFIG_RAM16BIT is not set
+# CONFIG_RAM32BIT is not set
+
+#
+# ROM configuration
+#
+# CONFIG_ROM is not set
+CONFIG_RAMKERNEL=y
+# CONFIG_ROMKERNEL is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_VIRT_TO_BUS=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+# CONFIG_PCI is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_FLAT=y
+# CONFIG_BINFMT_ZFLAT is not set
+# CONFIG_BINFMT_SHARED_FLAT is not set
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_RAM=y
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_UCLINUX=y
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
+CONFIG_FEC=y
+CONFIG_FEC2=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI is not set
+# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_WAN is not set
+CONFIG_PPP=y
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+# CONFIG_PPP_ASYNC is not set
+# CONFIG_PPP_SYNC_TTY is not set
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_PPPOL2TP is not set
+# CONFIG_SLIP is not set
+CONFIG_SLHC=y
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_COLDFIRE is not set
+CONFIG_SERIAL_MCF=y
+CONFIG_SERIAL_MCF_BAUDRATE=19200
+CONFIG_SERIAL_MCF_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+# CONFIG_NFS_FS is not set
+# CONFIG_NFSD is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SAMPLES is not set
+# CONFIG_FULLDEBUG is not set
+# CONFIG_HIGHPROFILE is not set
+# CONFIG_BOOTPARAM is not set
+# CONFIG_NO_KERNEL_MSG is not set
+# CONFIG_BDM_DISABLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_DMA=y
-- 
GitLab


From f3a64eaf9ea87d70487b482a6d733cfd52ae4499 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@goober.(none)>
Date: Wed, 4 Jun 2008 21:24:54 +1000
Subject: [PATCH 049/853] m68knommu: defconfig for M5307C3 board

Add a defconfig for the Freescale M5307C3 board.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/configs/m5307c3_defconfig | 580 +++++++++++++++++++++++
 1 file changed, 580 insertions(+)
 create mode 100644 arch/m68knommu/configs/m5307c3_defconfig

diff --git a/arch/m68knommu/configs/m5307c3_defconfig b/arch/m68knommu/configs/m5307c3_defconfig
new file mode 100644
index 00000000000..fe2acdfa4d7
--- /dev/null
+++ b/arch/m68knommu/configs/m5307c3_defconfig
@@ -0,0 +1,580 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.26-rc1
+#
+CONFIG_M68K=y
+# CONFIG_MMU is not set
+# CONFIG_FPU is not set
+CONFIG_ZONE_DMA=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_TIME_LOW_RES=y
+CONFIG_NO_IOPORT=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+# CONFIG_SYSCTL_SYSCALL is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_BASE_FULL=y
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_HAVE_OPROFILE is not set
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_SLABINFO=y
+CONFIG_TINY_SHMEM=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+CONFIG_CLASSIC_RCU=y
+
+#
+# Processor type and features
+#
+# CONFIG_M68328 is not set
+# CONFIG_M68EZ328 is not set
+# CONFIG_M68VZ328 is not set
+# CONFIG_M68360 is not set
+# CONFIG_M5206 is not set
+# CONFIG_M5206e is not set
+# CONFIG_M520x is not set
+# CONFIG_M523x is not set
+# CONFIG_M5249 is not set
+# CONFIG_M5271 is not set
+# CONFIG_M5272 is not set
+# CONFIG_M5275 is not set
+# CONFIG_M528x is not set
+CONFIG_M5307=y
+# CONFIG_M532x is not set
+# CONFIG_M5407 is not set
+CONFIG_COLDFIRE=y
+CONFIG_CLOCK_SET=y
+CONFIG_CLOCK_FREQ=90000000
+CONFIG_CLOCK_DIV=2
+# CONFIG_OLDMASK is not set
+
+#
+# Platform
+#
+# CONFIG_ARN5307 is not set
+CONFIG_M5307C3=y
+# CONFIG_eLIA is not set
+# CONFIG_SECUREEDGEMP3 is not set
+# CONFIG_CLEOPATRA is not set
+# CONFIG_NETtel is not set
+CONFIG_FREESCALE=y
+# CONFIG_4KSTACKS is not set
+CONFIG_HZ=100
+
+#
+# RAM configuration
+#
+CONFIG_RAMBASE=0x00000000
+CONFIG_RAMSIZE=0x00800000
+CONFIG_VECTORBASE=0x00000000
+CONFIG_KERNELBASE=0x00020000
+CONFIG_RAMAUTOBIT=y
+# CONFIG_RAM8BIT is not set
+# CONFIG_RAM16BIT is not set
+# CONFIG_RAM32BIT is not set
+
+#
+# ROM configuration
+#
+# CONFIG_ROM is not set
+CONFIG_RAMKERNEL=y
+# CONFIG_ROMKERNEL is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_VIRT_TO_BUS=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+# CONFIG_PCI is not set
+# CONFIG_COMEMPCI is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_FLAT=y
+# CONFIG_BINFMT_ZFLAT is not set
+# CONFIG_BINFMT_SHARED_FLAT is not set
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI is not set
+# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_WAN is not set
+CONFIG_PPP=y
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+# CONFIG_PPP_ASYNC is not set
+# CONFIG_PPP_SYNC_TTY is not set
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_PPPOL2TP is not set
+CONFIG_SLIP=y
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLHC=y
+# CONFIG_SLIP_SMART is not set
+# CONFIG_SLIP_MODE_SLIP6 is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_COLDFIRE is not set
+CONFIG_SERIAL_MCF=y
+CONFIG_SERIAL_MCF_BAUDRATE=19200
+CONFIG_SERIAL_MCF_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_FULLDEBUG=y
+# CONFIG_HIGHPROFILE is not set
+# CONFIG_BOOTPARAM is not set
+# CONFIG_NO_KERNEL_MSG is not set
+# CONFIG_BDM_DISABLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_DMA=y
-- 
GitLab


From ab88e474c8ffa300660f03a8e6b08ea660956bef Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@goober.(none)>
Date: Wed, 4 Jun 2008 21:26:38 +1000
Subject: [PATCH 050/853] m68knommu: defconfig for M5407C3 board

Add a defconfig for the Freescale M5407C3 board.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/configs/m5407c3_defconfig | 641 +++++++++++++++++++++++
 1 file changed, 641 insertions(+)
 create mode 100644 arch/m68knommu/configs/m5407c3_defconfig

diff --git a/arch/m68knommu/configs/m5407c3_defconfig b/arch/m68knommu/configs/m5407c3_defconfig
new file mode 100644
index 00000000000..1118936d20e
--- /dev/null
+++ b/arch/m68knommu/configs/m5407c3_defconfig
@@ -0,0 +1,641 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.26-rc1
+# Wed May  7 10:25:16 2008
+#
+CONFIG_M68K=y
+# CONFIG_MMU is not set
+# CONFIG_FPU is not set
+CONFIG_ZONE_DMA=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+CONFIG_TIME_LOW_RES=y
+CONFIG_NO_IOPORT=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+# CONFIG_SYSCTL_SYSCALL is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_BASE_FULL=y
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_HAVE_OPROFILE is not set
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_SLABINFO=y
+CONFIG_TINY_SHMEM=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+CONFIG_CLASSIC_RCU=y
+
+#
+# Processor type and features
+#
+# CONFIG_M68328 is not set
+# CONFIG_M68EZ328 is not set
+# CONFIG_M68VZ328 is not set
+# CONFIG_M68360 is not set
+# CONFIG_M5206 is not set
+# CONFIG_M5206e is not set
+# CONFIG_M520x is not set
+# CONFIG_M523x is not set
+# CONFIG_M5249 is not set
+# CONFIG_M5271 is not set
+# CONFIG_M5272 is not set
+# CONFIG_M5275 is not set
+# CONFIG_M528x is not set
+# CONFIG_M5307 is not set
+# CONFIG_M532x is not set
+CONFIG_M5407=y
+CONFIG_COLDFIRE=y
+CONFIG_CLOCK_SET=y
+CONFIG_CLOCK_FREQ=50000000
+CONFIG_CLOCK_DIV=1
+
+#
+# Platform
+#
+CONFIG_M5407C3=y
+# CONFIG_CLEOPATRA is not set
+CONFIG_FREESCALE=y
+CONFIG_4KSTACKS=y
+CONFIG_HZ=100
+
+#
+# RAM configuration
+#
+CONFIG_RAMBASE=0x00000000
+CONFIG_RAMSIZE=0x00000000
+CONFIG_VECTORBASE=0x00000000
+CONFIG_KERNELBASE=0x00020000
+CONFIG_RAMAUTOBIT=y
+# CONFIG_RAM8BIT is not set
+# CONFIG_RAM16BIT is not set
+# CONFIG_RAM32BIT is not set
+
+#
+# ROM configuration
+#
+# CONFIG_ROM is not set
+CONFIG_RAMKERNEL=y
+# CONFIG_ROMKERNEL is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_VIRT_TO_BUS=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+# CONFIG_PCI is not set
+# CONFIG_COMEMPCI is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_FLAT=y
+# CONFIG_BINFMT_ZFLAT is not set
+# CONFIG_BINFMT_SHARED_FLAT is not set
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_RAM=y
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_UCLINUX=y
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI is not set
+# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_WAN is not set
+CONFIG_PPP=y
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+# CONFIG_PPP_ASYNC is not set
+# CONFIG_PPP_SYNC_TTY is not set
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_PPPOL2TP is not set
+# CONFIG_SLIP is not set
+CONFIG_SLHC=y
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_COLDFIRE is not set
+CONFIG_SERIAL_MCF=y
+CONFIG_SERIAL_MCF_BAUDRATE=19200
+CONFIG_SERIAL_MCF_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SAMPLES is not set
+# CONFIG_FULLDEBUG is not set
+# CONFIG_HIGHPROFILE is not set
+# CONFIG_BOOTPARAM is not set
+# CONFIG_NO_KERNEL_MSG is not set
+# CONFIG_BDM_DISABLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_DMA=y
-- 
GitLab


From 5732b38ddb770b98110ea218232fc072e5626b87 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Fri, 9 May 2008 16:18:33 +0200
Subject: [PATCH 051/853] m68knommu: Add Coldfire DMA Timer support

This one could be used as a hrtimer.

Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/platform/coldfire/Makefile    |  2 +-
 arch/m68knommu/platform/coldfire/dma_timer.c | 68 ++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 arch/m68knommu/platform/coldfire/dma_timer.c

diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile
index 40cf20be1b9..4f416a91a82 100644
--- a/arch/m68knommu/platform/coldfire/Makefile
+++ b/arch/m68knommu/platform/coldfire/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_COLDFIRE)	+= dma.o entry.o vectors.o
 obj-$(CONFIG_M5206)	+= timers.o
 obj-$(CONFIG_M5206e)	+= timers.o
 obj-$(CONFIG_M520x)	+= pit.o
-obj-$(CONFIG_M523x)	+= pit.o
+obj-$(CONFIG_M523x)	+= pit.o dma_timer.o
 obj-$(CONFIG_M5249)	+= timers.o
 obj-$(CONFIG_M527x)	+= pit.o
 obj-$(CONFIG_M5272)	+= timers.o
diff --git a/arch/m68knommu/platform/coldfire/dma_timer.c b/arch/m68knommu/platform/coldfire/dma_timer.c
new file mode 100644
index 00000000000..b623c993219
--- /dev/null
+++ b/arch/m68knommu/platform/coldfire/dma_timer.c
@@ -0,0 +1,68 @@
+/*
+ * dma_timer.c -- Freescale ColdFire DMA Timer.
+ *
+ * Copyright (C) 2007, Benedikt Spranger <b.spranger@linutronix.de>
+ * Copyright (C) 2008. Sebastian Siewior, Linutronix
+ *
+ */
+
+#include <linux/clocksource.h>
+#include <linux/io.h>
+
+#include <asm/machdep.h>
+#include <asm/coldfire.h>
+#include <asm/mcfpit.h>
+#include <asm/mcfsim.h>
+
+#define DMA_TIMER_0	(0x00)
+#define DMA_TIMER_1	(0x40)
+#define DMA_TIMER_2	(0x80)
+#define DMA_TIMER_3	(0xc0)
+
+#define DTMR0	(MCF_IPSBAR + DMA_TIMER_0 + 0x400)
+#define DTXMR0	(MCF_IPSBAR + DMA_TIMER_0 + 0x402)
+#define DTER0	(MCF_IPSBAR + DMA_TIMER_0 + 0x403)
+#define DTRR0	(MCF_IPSBAR + DMA_TIMER_0 + 0x404)
+#define DTCR0	(MCF_IPSBAR + DMA_TIMER_0 + 0x408)
+#define DTCN0	(MCF_IPSBAR + DMA_TIMER_0 + 0x40c)
+
+#define DMA_FREQ    ((MCF_CLK / 2) / 16)
+
+/* DTMR */
+#define DMA_DTMR_RESTART	(1 << 3)
+#define DMA_DTMR_CLK_DIV_1	(1 << 1)
+#define DMA_DTMR_CLK_DIV_16	(2 << 1)
+#define DMA_DTMR_ENABLE		(1 << 0)
+
+static cycle_t cf_dt_get_cycles(void)
+{
+	return __raw_readl(DTCN0);
+}
+
+static struct clocksource clocksource_cf_dt = {
+	.name		= "coldfire_dma_timer",
+	.rating		= 200,
+	.read		= cf_dt_get_cycles,
+	.mask		= CLOCKSOURCE_MASK(32),
+	.shift		= 20,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int __init  init_cf_dt_clocksource(void)
+{
+	/*
+	 * We setup DMA timer 0 in free run mode. This incrementing counter is
+	 * used as a highly precious clock source. With MCF_CLOCK = 150 MHz we
+	 * get a ~213 ns resolution and the 32bit register will overflow almost
+	 * every 15 minutes.
+	 */
+	__raw_writeb(0x00, DTXMR0);
+	__raw_writeb(0x00, DTER0);
+	__raw_writel(0x00000000, DTRR0);
+	__raw_writew(DMA_DTMR_CLK_DIV_16 | DMA_DTMR_ENABLE, DTMR0);
+	clocksource_cf_dt.mult = clocksource_hz2mult(DMA_FREQ,
+						     clocksource_cf_dt.shift);
+	return clocksource_register(&clocksource_cf_dt);
+}
+
+arch_initcall(init_cf_dt_clocksource);
-- 
GitLab


From 1fda83d83c664ad74bfec8ce093a86d4d962f093 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Fri, 9 May 2008 16:13:36 +0200
Subject: [PATCH 052/853] m68knommu: m68knommu: add old stack trace method

The old method is used when frame pointers are not available.
Also fix formating with CONFIG_KALLSYMS=n which eliminates \n.

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/kernel/traps.c | 38 ++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/arch/m68knommu/kernel/traps.c b/arch/m68knommu/kernel/traps.c
index ec9aea652e7..46f8f9d0c40 100644
--- a/arch/m68knommu/kernel/traps.c
+++ b/arch/m68knommu/kernel/traps.c
@@ -103,12 +103,28 @@ asmlinkage void buserr_c(struct frame *fp)
 	force_sig(SIGSEGV, current);
 }
 
+static void print_this_address(unsigned long addr, int i)
+{
+#ifdef CONFIG_KALLSYMS
+	printk(KERN_EMERG " [%08lx] ", addr);
+	print_symbol(KERN_CONT "%s\n", addr);
+#else
+	if (i % 5)
+		printk(KERN_CONT " [%08lx] ", addr);
+	else
+		printk(KERN_CONT "\n" KERN_EMERG " [%08lx] ", addr);
+	i++;
+#endif
+}
+
 int kstack_depth_to_print = 48;
 
 static void __show_stack(struct task_struct *task, unsigned long *stack)
 {
 	unsigned long *endstack, addr;
+#ifdef CONFIG_FRAME_POINTER
 	unsigned long *last_stack;
+#endif
 	int i;
 
 	if (!stack)
@@ -126,6 +142,7 @@ static void __show_stack(struct task_struct *task, unsigned long *stack)
 		printk(" %08lx", *(stack + i));
 	}
 	printk("\n");
+	i = 0;
 
 #ifdef CONFIG_FRAME_POINTER
 	printk(KERN_EMERG "Call Trace:\n");
@@ -134,15 +151,30 @@ static void __show_stack(struct task_struct *task, unsigned long *stack)
 	while (stack <= endstack && stack > last_stack) {
 
 		addr = *(stack + 1);
-		printk(KERN_EMERG " [%08lx] ", addr);
-		print_symbol(KERN_CONT "%s\n", addr);
+		print_this_address(addr, i);
+		i++;
 
 		last_stack = stack;
 		stack = (unsigned long *)*stack;
 	}
 	printk("\n");
 #else
-	printk(KERN_EMERG "CONFIG_FRAME_POINTER disabled, no symbolic call trace\n");
+	printk(KERN_EMERG "Call Trace with CONFIG_FRAME_POINTER disabled:\n");
+	while (stack <= endstack) {
+		addr = *stack++;
+		/*
+		 * If the address is either in the text segment of the kernel,
+		 * or in a region which is occupied by a module then it *may*
+		 * be the address of a calling routine; if so, print it so that
+		 * someone tracing down the cause of the crash will be able to
+		 * figure out the call path that was taken.
+		 */
+		if (__kernel_text_address(addr)) {
+			print_this_address(addr, i);
+			i++;
+		}
+	}
+	printk(KERN_CONT "\n");
 #endif
 }
 
-- 
GitLab


From 0df185f5a1430ab8b437be402d286ee0728ef9f8 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Mon, 28 Apr 2008 11:43:00 +0200
Subject: [PATCH 053/853] m68knommu: move code within time.c

This patch creates two functions do_set_rtc() and read_rtc_mmss()
based on allready available code.

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/kernel/time.c | 40 +++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index 0ccfb2ad638..d33ed9a84cc 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -33,22 +33,11 @@ static inline int set_rtc_mmss(unsigned long nowtime)
 	return -1;
 }
 
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-irqreturn_t arch_timer_interrupt(int irq, void *dummy)
+static inline void do_set_rtc(void)
 {
 	/* last time the cmos clock got updated */
 	static long last_rtc_update=0;
 
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
-
-	write_seqlock(&xtime_lock);
-
-	do_timer(1);
-
 	/*
 	 * If we have an externally synchronized Linux clock, then update
 	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
@@ -63,6 +52,23 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
 	  else
 	    last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
 	}
+}
+
+/*
+ * timer_interrupt() needs to keep up the real-time clock,
+ * as well as call the "do_timer()" routine every clocktick
+ */
+irqreturn_t arch_timer_interrupt(int irq, void *dummy)
+{
+
+	if (current->pid)
+		profile_tick(CPU_PROFILING);
+
+	write_seqlock(&xtime_lock);
+
+	do_timer(1);
+
+	do_set_rtc();
 
 	write_sequnlock(&xtime_lock);
 
@@ -72,7 +78,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
 	return(IRQ_HANDLED);
 }
 
-void time_init(void)
+static unsigned long read_rtc_mmss(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 
@@ -83,7 +89,13 @@ void time_init(void)
 
 	if ((year += 1900) < 1970)
 		year += 100;
-	xtime.tv_sec = mktime(year, mon, day, hour, min, sec);
+
+	return  mktime(year, mon, day, hour, min, sec);;
+}
+
+void time_init(void)
+{
+	xtime.tv_sec = read_rtc_mmss();
 	xtime.tv_nsec = 0;
 	wall_to_monotonic.tv_sec = -xtime.tv_sec;
 
-- 
GitLab


From 95469bd64a7a9ab405b566deb8c81d4aaf67ed9e Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Mon, 28 Apr 2008 11:43:01 +0200
Subject: [PATCH 054/853] m68knommu: complete generic time

do_set_rtc() isn't required because the work that is handled is
allready served if read_persistent_clock() & update_persistent_clock()
are implemented and CONFIG_GENERIC_CMOS_UPDATE is. sync_cmos_clock()
looks very familiar :)

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/Kconfig       |  4 ++++
 arch/m68knommu/kernel/time.c | 38 ++++++++++--------------------------
 2 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 8e8441587c2..bfd35304d58 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -58,6 +58,10 @@ config GENERIC_TIME
 	bool
 	default y
 
+config GENERIC_CMOS_UPDATE
+	bool
+	default y
+
 config TIME_LOW_RES
 	bool
 	default y
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d33ed9a84cc..67944aa2728 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -33,27 +33,6 @@ static inline int set_rtc_mmss(unsigned long nowtime)
 	return -1;
 }
 
-static inline void do_set_rtc(void)
-{
-	/* last time the cmos clock got updated */
-	static long last_rtc_update=0;
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec  / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-	  if (set_rtc_mmss(xtime.tv_sec) == 0)
-	    last_rtc_update = xtime.tv_sec;
-	  else
-	    last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
-	}
-}
-
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
@@ -68,8 +47,6 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
 
 	do_timer(1);
 
-	do_set_rtc();
-
 	write_sequnlock(&xtime_lock);
 
 #ifndef CONFIG_SMP
@@ -93,12 +70,17 @@ static unsigned long read_rtc_mmss(void)
 	return  mktime(year, mon, day, hour, min, sec);;
 }
 
-void time_init(void)
+unsigned long read_persistent_clock(void)
+{
+	return read_rtc_mmss();
+}
+
+int update_persistent_clock(struct timespec now)
 {
-	xtime.tv_sec = read_rtc_mmss();
-	xtime.tv_nsec = 0;
-	wall_to_monotonic.tv_sec = -xtime.tv_sec;
+	return set_rtc_mmss(now.tv_sec);
+}
 
+void time_init(void)
+{
 	hw_timer_init();
 }
-
-- 
GitLab


From 5bed10a5ee272fbf18ce0ce764245bbb8f28e2e6 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Mon, 28 Apr 2008 11:43:03 +0200
Subject: [PATCH 055/853] m68knommu: add sched_clock() for the DMA timer

with this printk() and other sched_clock() user use the more precise
timestamps. The highly optimized math is from arch/x86/kernel/tsc_32.c.

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/platform/coldfire/dma_timer.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/m68knommu/platform/coldfire/dma_timer.c b/arch/m68knommu/platform/coldfire/dma_timer.c
index b623c993219..772578b1084 100644
--- a/arch/m68knommu/platform/coldfire/dma_timer.c
+++ b/arch/m68knommu/platform/coldfire/dma_timer.c
@@ -66,3 +66,19 @@ static int __init  init_cf_dt_clocksource(void)
 }
 
 arch_initcall(init_cf_dt_clocksource);
+
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+#define CYC2NS_SCALE	((1000000 << CYC2NS_SCALE_FACTOR) / (DMA_FREQ / 1000))
+
+static unsigned long long cycles2ns(unsigned long cycl)
+{
+	return (unsigned long long) ((unsigned long long)cycl *
+			CYC2NS_SCALE) >> CYC2NS_SCALE_FACTOR;
+}
+
+unsigned long long sched_clock(void)
+{
+	unsigned long cycl = __raw_readl(DTCN0);
+
+	return cycles2ns(cycl);
+}
-- 
GitLab


From a6260ef84103fa8a51a67b6a58e5e16c676e08ad Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Fri, 9 May 2008 16:10:37 +0200
Subject: [PATCH 056/853] m68knommu: add ffs and __ffs plattform which support
 ISA A+ or ISA C

the ff1 and bitrev opcode appears in ISA C and ISA A+ what isn't
supported by all plattforms. The assembly optimization is automaticly
enabled if the compiler understand the required cpu keyword.
My m5235 seems to boot and run fine so far.

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/Makefile        | 11 +++++++----
 include/asm-m68knommu/bitops.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/m68knommu/Makefile b/arch/m68knommu/Makefile
index e0b5f62e395..b63bbcf874f 100644
--- a/arch/m68knommu/Makefile
+++ b/arch/m68knommu/Makefile
@@ -8,6 +8,8 @@
 # (C) Copyright 2002, Greg Ungerer <gerg@snapgear.com>
 #
 
+KBUILD_DEFCONFIG := m5208evb_defconfig
+
 platform-$(CONFIG_M68328)	:= 68328
 platform-$(CONFIG_M68EZ328)	:= 68EZ328
 platform-$(CONFIG_M68VZ328)	:= 68VZ328
@@ -90,13 +92,14 @@ export PLATFORM BOARD MODEL CPUCLASS
 cflags-$(CONFIG_M5206)		:= -m5200
 cflags-$(CONFIG_M5206e)		:= -m5200
 cflags-$(CONFIG_M520x)		:= -m5307
-cflags-$(CONFIG_M523x)		:= -m5307
+cflags-$(CONFIG_M523x)		:= $(call cc-option,-mcpu=523x,-m5307)
 cflags-$(CONFIG_M5249)		:= -m5200
-cflags-$(CONFIG_M527x)		:= -m5307
+cflags-$(CONFIG_M5271)		:= $(call cc-option,-mcpu=5271,-m5307)
 cflags-$(CONFIG_M5272)		:= -m5307
-cflags-$(CONFIG_M528x)		:= -m5307
+cflags-$(CONFIG_M5275)		:= $(call cc-option,-mcpu=5275,-m5307)
+cflags-$(CONFIG_M528x)		:= $(call cc-option,-m528x,-m5307)
 cflags-$(CONFIG_M5307)		:= -m5307
-cflags-$(CONFIG_M532x)		:= -m5307
+cflags-$(CONFIG_M532x)		:= $(call cc-option,-mcpu=532x,-m5307)
 cflags-$(CONFIG_M5407)		:= -m5200
 cflags-$(CONFIG_M68328)		:= -m68000
 cflags-$(CONFIG_M68EZ328)	:= -m68000
diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h
index c142fbf2f37..6f3685eab44 100644
--- a/include/asm-m68knommu/bitops.h
+++ b/include/asm-m68knommu/bitops.h
@@ -14,8 +14,38 @@
 #error only <linux/bitops.h> can be included directly
 #endif
 
+#if defined (__mcfisaaplus__) || defined (__mcfisac__)
+static inline int ffs(unsigned int val)
+{
+        if (!val)
+                return 0;
+
+        asm volatile(
+                        "bitrev %0\n\t"
+                        "ff1 %0\n\t"
+                        : "=d" (val)
+                        : "0" (val)
+		    );
+        val++;
+        return val;
+}
+
+static inline int __ffs(unsigned int val)
+{
+        asm volatile(
+                        "bitrev %0\n\t"
+                        "ff1 %0\n\t"
+                        : "=d" (val)
+                        : "0" (val)
+		    );
+        return val;
+}
+
+#else
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
+#endif
+
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ffz.h>
 
-- 
GitLab


From e872504b311cec52f7a316a0037fb959080dbea0 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Sat, 17 May 2008 21:51:15 +0200
Subject: [PATCH 057/853] m68knommu: add byteswap assembly opcode for ISA A+

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 include/asm-m68knommu/byteorder.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/asm-m68knommu/byteorder.h b/include/asm-m68knommu/byteorder.h
index 8fcde907b0f..20bb4426b61 100644
--- a/include/asm-m68knommu/byteorder.h
+++ b/include/asm-m68knommu/byteorder.h
@@ -1,13 +1,27 @@
 #ifndef _M68KNOMMU_BYTEORDER_H
 #define _M68KNOMMU_BYTEORDER_H
 
-#include <asm/types.h>
+#include <linux/types.h>
 
 #if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
 #  define __BYTEORDER_HAS_U64__
 #  define __SWAB_64_THRU_32__
 #endif
 
+#if defined (__mcfisaaplus__) || defined (__mcfisac__)
+static inline __attribute_const__ __u32 ___arch__swab32(__u32 val)
+{
+	asm(
+			"byterev %0"
+			: "=d" (val)
+			: "0" (val)
+	   );
+	return val;
+}
+
+#define __arch__swab32(x) ___arch__swab32(x)
+#endif
+
 #include <linux/byteorder/big_endian.h>
 
 #endif /* _M68KNOMMU_BYTEORDER_H */
-- 
GitLab


From 6dbeb456baaba05d60e7ca8213da26142062408a Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Tue, 13 May 2008 18:52:44 +0200
Subject: [PATCH 058/853] m68knommu: add read_barrier_depends() and
 irqs_disabled_flags()

/home/bigeasy/git/linux-2.6-ftrace/kernel/trace/trace.c: In function 'tracing_generic_entry_update':
/home/bigeasy/git/linux-2.6-ftrace/kernel/trace/trace.c:802: error: implicit declaration of function 'irqs_disabled_flags'
make[3]: *** [kernel/trace/trace.o] Error 1
/home/bigeasy/git/linux-2.6-ftrace/kernel/trace/ftrace.c: In function 'ftrace_list_func':
/home/bigeasy/git/linux-2.6-ftrace/kernel/trace/ftrace.c:61: error: implicit declaration of function 'read_barrier_depends'

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 include/asm-m68knommu/system.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/asm-m68knommu/system.h b/include/asm-m68knommu/system.h
index 64c64432bbb..40f49de6982 100644
--- a/include/asm-m68knommu/system.h
+++ b/include/asm-m68knommu/system.h
@@ -118,6 +118,8 @@ asmlinkage void resume(void);
 #define smp_read_barrier_depends()	do { } while(0)
 #endif
 
+#define read_barrier_depends()  ((void)0)
+
 #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
 struct __xchg_dummy { unsigned long a[100]; };
@@ -310,4 +312,13 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 #endif
 #define arch_align_stack(x) (x)
 
+
+static inline int irqs_disabled_flags(unsigned long flags)
+{
+	if (flags & 0x0700)
+		return 0;
+	else
+		return 1;
+}
+
 #endif /* _M68KNOMMU_SYSTEM_H */
-- 
GitLab


From 2b9a69861c39ae4c232385def833816acc07a0a4 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Mon, 28 Apr 2008 11:43:04 +0200
Subject: [PATCH 059/853] m68knommu: MCF5307 PIT GENERIC_CLOCKEVENTS support

The PIT code has been changed in order to suppport GENERIC_CLOCKEVENTS.
The priority of the PIT clocksource has been decreased in favor of the
DMA timer.

pit_cycles_per_jiffy become a constant (PIT_CYCLES_PER_JIFFY) because it
is known at compile time and does not change afterwards.

Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/Kconfig                 | 11 ++++
 arch/m68knommu/kernel/time.c           |  2 +
 arch/m68knommu/platform/coldfire/pit.c | 91 ++++++++++++++++++++++----
 3 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index bfd35304d58..2e7515e8db9 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -66,6 +66,10 @@ config TIME_LOW_RES
 	bool
 	default y
 
+config GENERIC_CLOCKEVENTS
+	bool
+	default n
+
 config NO_IOPORT
 	def_bool y
 
@@ -112,11 +116,13 @@ config M5206e
 
 config M520x
 	bool "MCF520x"
+	select GENERIC_CLOCKEVENTS
 	help
 	   Freescale Coldfire 5207/5208 processor support.
 
 config M523x
 	bool "MCF523x"
+	select GENERIC_CLOCKEVENTS
 	help
 	  Freescale Coldfire 5230/1/2/4/5 processor support
 
@@ -142,6 +148,7 @@ config M5275
 
 config M528x
 	bool "MCF528x"
+	select GENERIC_CLOCKEVENTS
 	help
 	  Motorola ColdFire 5280/5282 processor support.
 
@@ -165,6 +172,7 @@ endchoice
 config M527x
 	bool
 	depends on (M5271 || M5275)
+	select GENERIC_CLOCKEVENTS
 	default y
 
 config COLDFIRE
@@ -678,6 +686,9 @@ endchoice
 if COLDFIRE
 source "kernel/Kconfig.preempt"
 endif
+
+source "kernel/time/Kconfig"
+
 source "mm/Kconfig"
 
 endmenu
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index 67944aa2728..d182b2f7221 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -33,6 +33,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
 	return -1;
 }
 
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
@@ -54,6 +55,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
 #endif
 	return(IRQ_HANDLED);
 }
+#endif
 
 static unsigned long read_rtc_mmss(void)
 {
diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c
index 4290638012e..c5b916700b2 100644
--- a/arch/m68knommu/platform/coldfire/pit.c
+++ b/arch/m68knommu/platform/coldfire/pit.c
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/clocksource.h>
+#include <linux/clockchips.h>
 #include <asm/machdep.h>
 #include <asm/io.h>
 #include <asm/coldfire.h>
@@ -33,22 +33,86 @@
 #define	FREQ	((MCF_CLK / 2) / 64)
 #define	TA(a)	(MCF_IPSBAR + MCFPIT_BASE1 + (a))
 #define	INTC0	(MCF_IPSBAR + MCFICM_INTC0)
+#define PIT_CYCLES_PER_JIFFY (FREQ / HZ)
 
-static u32 pit_cycles_per_jiffy;
 static u32 pit_cnt;
 
+/*
+ * Initialize the PIT timer.
+ *
+ * This is also called after resume to bring the PIT into operation again.
+ */
+
+static void init_cf_pit_timer(enum clock_event_mode mode,
+                             struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+
+		__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
+		__raw_writew(PIT_CYCLES_PER_JIFFY, TA(MCFPIT_PMR));
+		__raw_writew(MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE | \
+				MCFPIT_PCSR_OVW | MCFPIT_PCSR_RLD | \
+				MCFPIT_PCSR_CLK64, TA(MCFPIT_PCSR));
+		break;
+
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_UNUSED:
+
+		__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
+		break;
+
+	case CLOCK_EVT_MODE_ONESHOT:
+
+		__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
+		__raw_writew(MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE | \
+				MCFPIT_PCSR_OVW | MCFPIT_PCSR_CLK64, \
+				TA(MCFPIT_PCSR));
+		break;
+
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here */
+		break;
+	}
+}
+
+/*
+ * Program the next event in oneshot mode
+ *
+ * Delta is given in PIT ticks
+ */
+static int cf_pit_next_event(unsigned long delta,
+		struct clock_event_device *evt)
+{
+	__raw_writew(delta, TA(MCFPIT_PMR));
+	return 0;
+}
+
+struct clock_event_device cf_pit_clockevent = {
+	.name		= "pit",
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_mode	= init_cf_pit_timer,
+	.set_next_event	= cf_pit_next_event,
+	.shift		= 32,
+	.irq		= MCFINT_VECBASE + MCFINT_PIT1,
+};
+
+
+
 /***************************************************************************/
 
 static irqreturn_t pit_tick(int irq, void *dummy)
 {
+	struct clock_event_device *evt = &cf_pit_clockevent;
 	u16 pcsr;
 
 	/* Reset the ColdFire timer */
 	pcsr = __raw_readw(TA(MCFPIT_PCSR));
 	__raw_writew(pcsr | MCFPIT_PCSR_PIF, TA(MCFPIT_PCSR));
 
-	pit_cnt += pit_cycles_per_jiffy;
-	return arch_timer_interrupt(irq, dummy);
+	pit_cnt += PIT_CYCLES_PER_JIFFY;
+	evt->event_handler(evt);
+	return IRQ_HANDLED;
 }
 
 /***************************************************************************/
@@ -72,14 +136,14 @@ static cycle_t pit_read_clk(void)
 	cycles = pit_cnt;
 	local_irq_restore(flags);
 
-	return cycles + pit_cycles_per_jiffy - pcntr;
+	return cycles + PIT_CYCLES_PER_JIFFY - pcntr;
 }
 
 /***************************************************************************/
 
 static struct clocksource pit_clk = {
 	.name	= "pit",
-	.rating	= 250,
+	.rating	= 100,
 	.read	= pit_read_clk,
 	.shift	= 20,
 	.mask	= CLOCKSOURCE_MASK(32),
@@ -92,6 +156,14 @@ void hw_timer_init(void)
 {
 	u32 imr;
 
+	cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32);
+	cf_pit_clockevent.max_delta_ns =
+		clockevent_delta2ns(0xFFFF, &cf_pit_clockevent);
+	cf_pit_clockevent.min_delta_ns =
+		clockevent_delta2ns(0x3f, &cf_pit_clockevent);
+	clockevents_register_device(&cf_pit_clockevent);
+
 	setup_irq(MCFINT_VECBASE + MCFINT_PIT1, &pit_irq);
 
 	__raw_writeb(ICR_INTRCONF, INTC0 + MCFINTC_ICR0 + MCFINT_PIT1);
@@ -99,13 +171,6 @@ void hw_timer_init(void)
 	imr &= ~MCFPIT_IMR_IBIT;
 	__raw_writel(imr, INTC0 + MCFPIT_IMR);
 
-	/* Set up PIT timer 1 as poll clock */
-	pit_cycles_per_jiffy = FREQ / HZ;
-	__raw_writew(MCFPIT_PCSR_DISABLE, TA(MCFPIT_PCSR));
-	__raw_writew(pit_cycles_per_jiffy, TA(MCFPIT_PMR));
-	__raw_writew(MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE | MCFPIT_PCSR_OVW |
-		MCFPIT_PCSR_RLD | MCFPIT_PCSR_CLK64, TA(MCFPIT_PCSR));
-
 	pit_clk.mult = clocksource_hz2mult(FREQ, pit_clk.shift);
 	clocksource_register(&pit_clk);
 }
-- 
GitLab


From 0d176af5b7e658490b75427ccd23ff6a158c472b Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Mon, 28 Apr 2008 11:43:20 +0200
Subject: [PATCH 060/853] m68knommu: fec: remove FADS

I found config FADS only in ppc/Kconfig. Bye bye relic.

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 drivers/net/fec.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 32a4f17d35f..6abbcd5f7c3 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -2,12 +2,6 @@
  * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx.
  * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
  *
- * This version of the driver is specific to the FADS implementation,
- * since the board contains control registers external to the processor
- * for the control of the LevelOne LXT970 transceiver.  The MPC860T manual
- * describes connections using the internal parallel port I/O, which
- * is basically all of Port D.
- *
  * Right now, I am very wasteful with the buffers.  I allocate memory
  * pages and then divide them into 2K frame buffers.  This way I know I
  * have buffers large enough to hold one frame within one buffer descriptor.
@@ -1809,10 +1803,6 @@ static void __inline__ fec_request_intrs(struct net_device *dev)
 	*/
 	*((uint *) RPX_CSR_ADDR) &= ~BCSR2_FETHLEDMODE;
 #endif
-#ifdef CONFIG_FADS
-	if (request_8xxirq(SIU_IRQ2, mii_link_interrupt, 0, "mii", dev) != 0)
-		panic("Could not allocate MII IRQ!");
-#endif
 }
 
 static void __inline__ fec_get_mac(struct net_device *dev)
-- 
GitLab


From c1863bed8c88324405dc2a922c153fe5d7df716c Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Mon, 28 Apr 2008 11:43:17 +0200
Subject: [PATCH 061/853] m68knommu: remove RPXCLASSIC from the m68k tree

This ifdefs are leftovers from the time as the driver was running
on a ppc.
Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 drivers/net/fec.c                | 42 --------------------------------
 include/asm-m68knommu/commproc.h | 19 ---------------
 2 files changed, 61 deletions(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 6abbcd5f7c3..0ef7226efd5 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -43,17 +43,9 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 
-#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || \
-    defined(CONFIG_M5272) || defined(CONFIG_M528x) || \
-    defined(CONFIG_M520x) || defined(CONFIG_M532x)
 #include <asm/coldfire.h>
 #include <asm/mcfsim.h>
 #include "fec.h"
-#else
-#include <asm/8xx_immap.h>
-#include <asm/mpc8xx.h>
-#include "commproc.h"
-#endif
 
 #if defined(CONFIG_FEC2)
 #define	FEC_MAX_PORTS	2
@@ -1229,14 +1221,9 @@ static phy_info_t const * const phy_info[] = {
 
 /* ------------------------------------------------------------------------- */
 #ifdef HAVE_mii_link_interrupt
-#ifdef CONFIG_RPXCLASSIC
-static void
-mii_link_interrupt(void *dev_id);
-#else
 static irqreturn_t
 mii_link_interrupt(int irq, void * dev_id);
 #endif
-#endif
 
 #if defined(CONFIG_M5272)
 /*
@@ -1789,20 +1776,6 @@ static void __inline__ fec_request_intrs(struct net_device *dev)
 
 	if (request_8xxirq(FEC_INTERRUPT, fec_enet_interrupt, 0, "fec", dev) != 0)
 		panic("Could not allocate FEC IRQ!");
-
-#ifdef CONFIG_RPXCLASSIC
-	/* Make Port C, bit 15 an input that causes interrupts.
-	*/
-	immap->im_ioport.iop_pcpar &= ~0x0001;
-	immap->im_ioport.iop_pcdir &= ~0x0001;
-	immap->im_ioport.iop_pcso &= ~0x0001;
-	immap->im_ioport.iop_pcint |= 0x0001;
-	cpm_install_handler(CPMVEC_PIO_PC15, mii_link_interrupt, dev);
-
-	/* Make LEDS reflect Link status.
-	*/
-	*((uint *) RPX_CSR_ADDR) &= ~BCSR2_FETHLEDMODE;
-#endif
 }
 
 static void __inline__ fec_get_mac(struct net_device *dev)
@@ -1811,16 +1784,6 @@ static void __inline__ fec_get_mac(struct net_device *dev)
 
 	bd = (bd_t *)__res;
 	memcpy(dev->dev_addr, bd->bi_enetaddr, ETH_ALEN);
-
-#ifdef CONFIG_RPXCLASSIC
-	/* The Embedded Planet boards have only one MAC address in
-	 * the EEPROM, but can have two Ethernet ports.  For the
-	 * FEC port, we create another address by setting one of
-	 * the address bits above something that would have (up to
-	 * now) been allocated.
-	 */
-	dev->dev_adrd[3] |= 0x80;
-#endif
 }
 
 static void __inline__ fec_set_mii(struct net_device *dev, struct fec_enet_private *fep)
@@ -2099,13 +2062,8 @@ mii_discover_phy(uint mii_reg, struct net_device *dev)
 /* This interrupt occurs when the PHY detects a link change.
 */
 #ifdef HAVE_mii_link_interrupt
-#ifdef CONFIG_RPXCLASSIC
-static void
-mii_link_interrupt(void *dev_id)
-#else
 static irqreturn_t
 mii_link_interrupt(int irq, void * dev_id)
-#endif
 {
 	struct	net_device *dev = dev_id;
 	struct fec_enet_private *fep = netdev_priv(dev);
diff --git a/include/asm-m68knommu/commproc.h b/include/asm-m68knommu/commproc.h
index 36e870b468e..edf5eb6c08d 100644
--- a/include/asm-m68knommu/commproc.h
+++ b/include/asm-m68knommu/commproc.h
@@ -519,25 +519,6 @@ typedef struct scc_enet {
 #define SICR_ENET_CLKRT	((uint)0x00002c00)
 #endif
 
-#ifdef CONFIG_RPXCLASSIC
-/* Bits in parallel I/O port registers that have to be set/cleared
- * to configure the pins for SCC1 use.
- */
-#define PA_ENET_RXD	((ushort)0x0001)
-#define PA_ENET_TXD	((ushort)0x0002)
-#define PA_ENET_TCLK	((ushort)0x0200)
-#define PA_ENET_RCLK	((ushort)0x0800)
-#define PB_ENET_TENA	((uint)0x00001000)
-#define PC_ENET_CLSN	((ushort)0x0010)
-#define PC_ENET_RENA	((ushort)0x0020)
-
-/* Control bits in the SICR to route TCLK (CLK2) and RCLK (CLK4) to
- * SCC1.  Also, make sure GR1 (bit 24) and SC1 (bit 25) are zero.
- */
-#define SICR_ENET_MASK	((uint)0x000000ff)
-#define SICR_ENET_CLKRT	((uint)0x0000003d)
-#endif
-
 /* SCC Event register as used by Ethernet.
 */
 #define SCCE_ENET_GRA	((ushort)0x0080)	/* Graceful stop complete */
-- 
GitLab


From 87f4abb45bc640638e6986f0f4d412b2d0ea21e1 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Fri, 6 Jun 2008 15:55:36 +1000
Subject: [PATCH 062/853] m68knommu: remove last use of CONFIG_FADS and
 CONFIG_RPXCLASSIC

They have never been used in this port of the driver. It is has only
ever been used on the ColdFire SoC ethernet core.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 drivers/net/fec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 0ef7226efd5..ecd5c71a7a8 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -53,7 +53,7 @@
 #define	FEC_MAX_PORTS	1
 #endif
 
-#if defined(CONFIG_FADS) || defined(CONFIG_RPXCLASSIC) || defined(CONFIG_M5272)
+#if defined(CONFIG_M5272)
 #define HAVE_mii_link_interrupt
 #endif
 
-- 
GitLab


From 9b0e74102494971ca37a425c63031fea68bb5b79 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Fri, 11 Jul 2008 15:29:36 +1000
Subject: [PATCH 063/853] m68knommu: put ColdFire head code into .text.head
 section

Switch the ColdFire head start up code to be in the .text.head segment.
And make sure that segment is at the start of the final linked text
segment. Fixes the linker warnings about section use mis-matches:

  WARNING: vmlinux.o(.text+0xa8): Section mismatch in reference from the variable _clear_bss to the function .init.text:start_kernel()
  The function _clear_bss() references
  the function __init start_kernel().
  This is often because _clear_bss lacks a __init
  annotation or the annotation of start_kernel is wrong.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68knommu/kernel/vmlinux.lds.S     | 1 +
 arch/m68knommu/platform/coldfire/head.S | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index 93e69236ed6..69ba9b10767 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -62,6 +62,7 @@ SECTIONS {
 	.text : {
 		_text = .;
 		_stext = . ;
+		HEAD_TEXT
 		TEXT_TEXT
 		SCHED_TEXT
 		LOCK_TEXT
diff --git a/arch/m68knommu/platform/coldfire/head.S b/arch/m68knommu/platform/coldfire/head.S
index b9aa0ca29bf..2b0d73c0cc3 100644
--- a/arch/m68knommu/platform/coldfire/head.S
+++ b/arch/m68knommu/platform/coldfire/head.S
@@ -10,6 +10,7 @@
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
+#include <linux/init.h>
 #include <asm/asm-offsets.h>
 #include <asm/coldfire.h>
 #include <asm/mcfcache.h>
@@ -126,7 +127,7 @@ _ramend:
 
 /*****************************************************************************/
 
-.text
+__HEAD
 
 /*
  *	This is the codes first entry point. This is where it all
-- 
GitLab


From b7c2a75725dee9b5643a0aae3a4cb47f52e00a49 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 22 Jul 2008 22:34:29 -0700
Subject: [PATCH 064/853] sparc64: Fix lockdep issues in LDC protocol layer.

We're calling request_irq() with a IRQs disabled.

No straightforward fix exists because we want to
enable these IRQs and setup state atomically before
getting into the IRQ handler the first time.

What happens now is that we mark the VIRQ to not be
automatically enabled by request_irq().  Then we
make explicit enable_irq() calls when we grab the
LDC channel.

This way we don't need to call request_irq() illegally
under the LDC channel lock any more.

Bump LDC version and release date.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/irq.c | 10 +++++++++-
 arch/sparc64/kernel/ldc.c | 38 +++++++++++++++++++-------------------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index b441a26b73b..c481673d249 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -621,8 +621,9 @@ unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
 unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
 {
 	struct irq_handler_data *data;
-	struct ino_bucket *bucket;
 	unsigned long hv_err, cookie;
+	struct ino_bucket *bucket;
+	struct irq_desc *desc;
 	unsigned int virt_irq;
 
 	bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
@@ -643,6 +644,13 @@ unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
 	if (unlikely(!data))
 		return 0;
 
+	/* In order to make the LDC channel startup sequence easier,
+	 * especially wrt. locking, we do not let request_irq() enable
+	 * the interrupt.
+	 */
+	desc = irq_desc + virt_irq;
+	desc->status |= IRQ_NOAUTOEN;
+
 	set_irq_chip_data(virt_irq, data);
 
 	/* Catch accidental accesses to these things.  IMAP/ICLR handling
diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c
index 63969f61028..d68982330f6 100644
--- a/arch/sparc64/kernel/ldc.c
+++ b/arch/sparc64/kernel/ldc.c
@@ -1,6 +1,6 @@
 /* ldc.c: Logical Domain Channel link-layer protocol driver.
  *
- * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
  */
 
 #include <linux/kernel.h>
@@ -23,8 +23,8 @@
 
 #define DRV_MODULE_NAME		"ldc"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"1.0"
-#define DRV_MODULE_RELDATE	"June 25, 2007"
+#define DRV_MODULE_VERSION	"1.1"
+#define DRV_MODULE_RELDATE	"July 22, 2008"
 
 static char version[] __devinitdata =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -1235,13 +1235,9 @@ int ldc_bind(struct ldc_channel *lp, const char *name)
 	unsigned long hv_err, flags;
 	int err = -EINVAL;
 
-	spin_lock_irqsave(&lp->lock, flags);
-
-	if (!name)
-		goto out_err;
-
-	if (lp->state != LDC_STATE_INIT)
-		goto out_err;
+	if (!name ||
+	    (lp->state != LDC_STATE_INIT))
+		return -EINVAL;
 
 	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
 	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
@@ -1250,25 +1246,32 @@ int ldc_bind(struct ldc_channel *lp, const char *name)
 			  IRQF_SAMPLE_RANDOM | IRQF_SHARED,
 			  lp->rx_irq_name, lp);
 	if (err)
-		goto out_err;
+		return err;
 
 	err = request_irq(lp->cfg.tx_irq, ldc_tx,
 			  IRQF_SAMPLE_RANDOM | IRQF_SHARED,
 			  lp->tx_irq_name, lp);
-	if (err)
-		goto out_free_rx_irq;
+	if (err) {
+		free_irq(lp->cfg.rx_irq, lp);
+		return err;
+	}
+
 
+	spin_lock_irqsave(&lp->lock, flags);
+
+	enable_irq(lp->cfg.rx_irq);
+	enable_irq(lp->cfg.tx_irq);
 
 	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
 
 	err = -ENODEV;
 	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
 	if (hv_err)
-		goto out_free_tx_irq;
+		goto out_free_irqs;
 
 	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 	if (hv_err)
-		goto out_free_tx_irq;
+		goto out_free_irqs;
 
 	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
 	if (hv_err)
@@ -1304,14 +1307,11 @@ out_unmap_rx:
 out_unmap_tx:
 	sun4v_ldc_tx_qconf(lp->id, 0, 0);
 
-out_free_tx_irq:
+out_free_irqs:
 	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
 	free_irq(lp->cfg.tx_irq, lp);
-
-out_free_rx_irq:
 	free_irq(lp->cfg.rx_irq, lp);
 
-out_err:
 	spin_unlock_irqrestore(&lp->lock, flags);
 
 	return err;
-- 
GitLab


From e14fa82439d33cef67eaafc1a48960bbfa610c8e Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@iki.fi>
Date: Sat, 31 May 2008 14:43:41 +0100
Subject: [PATCH 065/853] leds: Add pca9532 led driver

NXP pca9532 is a LED dimmer/controller attached to i2c bus.  It allows
attaching upto 16 leds which can either be on, off or dimmed and/or blinked
with the two PWM modulators available.

This driver is a "new-style" i2c driver that adheres to the driver model and
implements the led framework api.  Since the leds connected to the driver are
platform specific, it is only useful when platform data is passed to the
driver to define what leds are connected to which pins.

Signed-off-by: Riku Voipio <riku.voipio@iki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/leds/Kconfig         |   8 +
 drivers/leds/Makefile        |   1 +
 drivers/leds/leds-pca9532.c  | 337 +++++++++++++++++++++++++++++++++++
 include/linux/leds-pca9532.h |  45 +++++
 4 files changed, 391 insertions(+)
 create mode 100644 drivers/leds/leds-pca9532.c
 create mode 100644 include/linux/leds-pca9532.h

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 86a369bc57d..1c35dfaef72 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -103,6 +103,14 @@ config LEDS_HP6XX
 	  This option enables led support for the handheld
 	  HP Jornada 620/660/680/690.
 
+config LEDS_PCA9532
+	tristate "LED driver for PCA9532 dimmer"
+	depends on LEDS_CLASS && I2C && INPUT && EXPERIMENTAL
+	help
+	  This option enables support for NXP pca9532
+	  led controller. It is generally only usefull
+	  as a platform driver
+
 config LEDS_GPIO
 	tristate "LED Support for GPIO connected LEDs"
 	depends on LEDS_CLASS && GENERIC_GPIO
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 973d626f5f4..7156f9970fa 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_LEDS_WRAP)			+= leds-wrap.o
 obj-$(CONFIG_LEDS_H1940)		+= leds-h1940.o
 obj-$(CONFIG_LEDS_COBALT_QUBE)		+= leds-cobalt-qube.o
 obj-$(CONFIG_LEDS_COBALT_RAQ)		+= leds-cobalt-raq.o
+obj-$(CONFIG_LEDS_PCA9532)		+= leds-pca9532.o
 obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_CM_X270)              += leds-cm-x270.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
new file mode 100644
index 00000000000..4064d4f6b33
--- /dev/null
+++ b/drivers/leds/leds-pca9532.c
@@ -0,0 +1,337 @@
+/*
+ * pca9532.c - 16-bit Led dimmer
+ *
+ * Copyright (C) 2008 Riku Voipio <riku.voipio@movial.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Datasheet: http://www.nxp.com/acrobat/datasheets/PCA9532_3.pdf
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+#include <linux/mutex.h>
+#include <linux/leds-pca9532.h>
+
+static const unsigned short normal_i2c[] = { /*0x60,*/ I2C_CLIENT_END};
+I2C_CLIENT_INSMOD_1(pca9532);
+
+#define PCA9532_REG_PSC(i) (0x2+(i)*2)
+#define PCA9532_REG_PWM(i) (0x3+(i)*2)
+#define PCA9532_REG_LS0  0x6
+#define LED_REG(led) ((led>>2)+PCA9532_REG_LS0)
+#define LED_NUM(led) (led & 0x3)
+
+#define ldev_to_led(c)       container_of(c, struct pca9532_led, ldev)
+
+struct pca9532_data {
+	struct i2c_client *client;
+	struct pca9532_led leds[16];
+	struct mutex update_lock;
+	struct input_dev    *idev;
+	u8 pwm[2];
+	u8 psc[2];
+};
+
+static int pca9532_probe(struct i2c_client *client,
+	const struct i2c_device_id *id);
+static int pca9532_remove(struct i2c_client *client);
+
+static const struct i2c_device_id pca9532_id[] = {
+	{ "pca9532", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, pca9532_id);
+
+static struct i2c_driver pca9532_driver = {
+	.driver = {
+		.name   = "pca9532",
+	},
+	.probe  = pca9532_probe,
+	.remove = pca9532_remove,
+	.id_table = pca9532_id,
+};
+
+/* We have two pwm/blinkers, but 16 possible leds to drive. Additionaly,
+ * the clever Thecus people are using one pwm to drive the beeper. So,
+ * as a compromise we average one pwm to the values requested by all
+ * leds that are not ON/OFF.
+ * */
+static int pca9532_setpwm(struct i2c_client *client, int pwm, int blink,
+	enum led_brightness value)
+{
+	int a = 0, b = 0, i = 0;
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	for (i = 0; i < 16; i++) {
+		if (data->leds[i].type == PCA9532_TYPE_LED &&
+			data->leds[i].state == PCA9532_PWM0+pwm) {
+				a++;
+				b += data->leds[i].ldev.brightness;
+		}
+	}
+	if (a == 0) {
+		dev_err(&client->dev,
+		"fear of division by zero %d/%d, wanted %d\n",
+			b, a, value);
+		return -EINVAL;
+	}
+	b = b/a;
+	if (b > 0xFF)
+		return -EINVAL;
+	mutex_lock(&data->update_lock);
+	data->pwm[pwm] = b;
+	i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(pwm),
+		data->pwm[pwm]);
+	data->psc[pwm] = blink;
+	i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(pwm),
+		data->psc[pwm]);
+	mutex_unlock(&data->update_lock);
+	return 0;
+}
+
+/* Set LED routing */
+static void pca9532_setled(struct pca9532_led *led)
+{
+	struct i2c_client *client = led->client;
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	char reg;
+
+	mutex_lock(&data->update_lock);
+	reg = i2c_smbus_read_byte_data(client, LED_REG(led->id));
+	/* zero led bits */
+	reg = reg & ~(0x3<<LED_NUM(led->id)*2);
+	/* set the new value */
+	reg = reg | (led->state << LED_NUM(led->id)*2);
+	i2c_smbus_write_byte_data(client, LED_REG(led->id), reg);
+	mutex_unlock(&data->update_lock);
+}
+
+static void pca9532_set_brightness(struct led_classdev *led_cdev,
+	enum led_brightness value)
+{
+	int err = 0;
+	struct pca9532_led *led = ldev_to_led(led_cdev);
+
+	if (value == LED_OFF)
+		led->state = PCA9532_OFF;
+	else if (value == LED_FULL)
+		led->state = PCA9532_ON;
+	else {
+		led->state = PCA9532_PWM0; /* Thecus: hardcode one pwm */
+		err = pca9532_setpwm(led->client, 0, 0, value);
+		if (err)
+			return; /* XXX: led api doesn't allow error code? */
+	}
+	pca9532_setled(led);
+}
+
+static int pca9532_set_blink(struct led_classdev *led_cdev,
+	unsigned long *delay_on, unsigned long *delay_off)
+{
+	struct pca9532_led *led = ldev_to_led(led_cdev);
+	struct i2c_client *client = led->client;
+	int psc;
+
+	if (*delay_on == 0 && *delay_off == 0) {
+	/* led subsystem ask us for a blink rate */
+		*delay_on  = 1000;
+		*delay_off = 1000;
+	}
+	if (*delay_on != *delay_off || *delay_on > 1690 || *delay_on < 6)
+		return -EINVAL;
+
+	/* Thecus specific: only use PSC/PWM 0 */
+	psc = (*delay_on * 152-1)/1000;
+	return pca9532_setpwm(client, 0, psc, led_cdev->brightness);
+}
+
+int pca9532_event(struct input_dev *dev, unsigned int type, unsigned int code,
+	int value)
+{
+	struct pca9532_data *data = input_get_drvdata(dev);
+
+	if (type != EV_SND && (code != SND_BELL || code != SND_TONE))
+		return -1;
+
+	/* XXX: allow different kind of beeps with psc/pwm modifications */
+	if (value > 1 && value < 32767)
+		data->pwm[1] = 127;
+	else
+		data->pwm[1] = 0;
+
+	dev_info(&dev->dev, "setting beep to %d \n", data->pwm[1]);
+	mutex_lock(&data->update_lock);
+	i2c_smbus_write_byte_data(data->client, PCA9532_REG_PWM(1),
+		data->pwm[1]);
+	mutex_unlock(&data->update_lock);
+
+	return 0;
+}
+
+static int pca9532_configure(struct i2c_client *client,
+	struct pca9532_data *data, struct pca9532_platform_data *pdata)
+{
+	int i, err = 0;
+
+	for (i = 0; i < 2; i++)	{
+		data->pwm[i] = pdata->pwm[i];
+		data->psc[i] = pdata->psc[i];
+		i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(i),
+			data->pwm[i]);
+		i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(i),
+			data->psc[i]);
+	}
+
+	for (i = 0; i < 16; i++) {
+		struct pca9532_led *led = &data->leds[i];
+		struct pca9532_led *pled = &pdata->leds[i];
+		led->client = client;
+		led->id = i;
+		led->type = pled->type;
+		switch (led->type) {
+		case PCA9532_TYPE_NONE:
+			break;
+		case PCA9532_TYPE_LED:
+			led->state = pled->state;
+			led->name =  pled->name;
+			led->ldev.name = led->name;
+			led->ldev.brightness = LED_OFF;
+			led->ldev.brightness_set = pca9532_set_brightness;
+			led->ldev.blink_set = pca9532_set_blink;
+			if (led_classdev_register(&client->dev,
+				&led->ldev) < 0)	{
+				dev_err(&client->dev,
+					"couldn't register LED %s\n",
+					led->name);
+				goto exit;
+			}
+			pca9532_setled(led);
+			break;
+		case PCA9532_TYPE_N2100_BEEP:
+			BUG_ON(data->idev);
+			led->state = PCA9532_PWM1;
+			pca9532_setled(led);
+			data->idev = input_allocate_device();
+			if (data->idev == NULL) {
+				err = -ENOMEM;
+				goto exit;
+			}
+			data->idev->name = pled->name;
+			data->idev->phys = "i2c/pca9532";
+			data->idev->id.bustype = BUS_HOST;
+			data->idev->id.vendor  = 0x001f;
+			data->idev->id.product = 0x0001;
+			data->idev->id.version = 0x0100;
+			data->idev->evbit[0] = BIT_MASK(EV_SND);
+			data->idev->sndbit[0] = BIT_MASK(SND_BELL) |
+						BIT_MASK(SND_TONE);
+			data->idev->event = pca9532_event;
+			input_set_drvdata(data->idev, data);
+			err = input_register_device(data->idev);
+			if (err) {
+				input_free_device(data->idev);
+				data->idev = NULL;
+				goto exit;
+			}
+			break;
+		}
+	}
+	return 0;
+
+exit:
+	if (i > 0)
+		for (i = i - 1; i >= 0; i--)
+			switch (data->leds[i].type) {
+			case PCA9532_TYPE_NONE:
+				break;
+			case PCA9532_TYPE_LED:
+				led_classdev_unregister(&data->leds[i].ldev);
+				break;
+			case PCA9532_TYPE_N2100_BEEP:
+				if (data->idev != NULL) {
+					input_unregister_device(data->idev);
+					input_free_device(data->idev);
+					data->idev = NULL;
+				}
+				break;
+			}
+
+	return err;
+
+}
+
+static int pca9532_probe(struct i2c_client *client,
+	const struct i2c_device_id *id)
+{
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	struct pca9532_platform_data *pca9532_pdata = client->dev.platform_data;
+
+	if (!i2c_check_functionality(client->adapter,
+		I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	data = kzalloc(sizeof(struct pca9532_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	dev_info(&client->dev, "setting platform data\n");
+	i2c_set_clientdata(client, data);
+	data->client = client;
+	mutex_init(&data->update_lock);
+
+	if (pca9532_pdata == NULL)
+		return -EIO;
+
+	pca9532_configure(client, data, pca9532_pdata);
+	return 0;
+
+}
+
+static int pca9532_remove(struct i2c_client *client)
+{
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	int i;
+	for (i = 0; i < 16; i++)
+		switch (data->leds[i].type) {
+		case PCA9532_TYPE_NONE:
+			break;
+		case PCA9532_TYPE_LED:
+			led_classdev_unregister(&data->leds[i].ldev);
+			break;
+		case PCA9532_TYPE_N2100_BEEP:
+			if (data->idev != NULL) {
+				input_unregister_device(data->idev);
+				input_free_device(data->idev);
+				data->idev = NULL;
+			}
+			break;
+		}
+
+	kfree(data);
+	i2c_set_clientdata(client, NULL);
+	return 0;
+}
+
+static int __init pca9532_init(void)
+{
+	return i2c_add_driver(&pca9532_driver);
+}
+
+static void __exit pca9532_exit(void)
+{
+	i2c_del_driver(&pca9532_driver);
+}
+
+MODULE_AUTHOR("Riku Voipio <riku.voipio@movial.fi>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PCA 9532 LED dimmer");
+
+module_init(pca9532_init);
+module_exit(pca9532_exit);
+
diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
new file mode 100644
index 00000000000..81b4207deb9
--- /dev/null
+++ b/include/linux/leds-pca9532.h
@@ -0,0 +1,45 @@
+/*
+ * pca9532.h - platform data structure for pca9532 led controller
+ *
+ * Copyright (C) 2008 Riku Voipio <riku.voipio@movial.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Datasheet: http://www.nxp.com/acrobat/datasheets/PCA9532_3.pdf
+ *
+ */
+
+#ifndef __LINUX_PCA9532_H
+#define __LINUX_PCA9532_H
+
+#include <linux/leds.h>
+
+enum pca9532_state {
+	PCA9532_OFF  = 0x0,
+	PCA9532_ON   = 0x1,
+	PCA9532_PWM0 = 0x2,
+	PCA9532_PWM1 = 0x3
+};
+
+enum pca9532_type { PCA9532_TYPE_NONE, PCA9532_TYPE_LED,
+	PCA9532_TYPE_N2100_BEEP };
+
+struct pca9532_led {
+	u8 id;
+	struct i2c_client *client;
+	char *name;
+	struct led_classdev ldev;
+	enum pca9532_type type;
+	enum pca9532_state state;
+};
+
+struct pca9532_platform_data {
+	struct pca9532_led leds[16];
+	u8 pwm[2];
+	u8 psc[2];
+};
+
+#endif /* __LINUX_PCA9532_H */
+
-- 
GitLab


From 30be0486791fb637e758c771956c8f73bef3467c Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@iki.fi>
Date: Sat, 31 May 2008 14:45:16 +0100
Subject: [PATCH 066/853] leds: Add pca9532 platform data for Thecus N2100

Thecus N2100 has leds and a buzzer attached to a pca9532 controller.  Attach
the driver to the i2c bus and define the pca9532 pin coniguration for this
platform in n2100_leds.

With this patch, support for N2100 should be complete in mainline Linux.

Signed-off-by: Riku Voipio <riku.voipio@iki.fi>
Acked-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 arch/arm/mach-iop32x/n2100.c | 52 ++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c
index 2741063bf36..28f164ea472 100644
--- a/arch/arm/mach-iop32x/n2100.c
+++ b/arch/arm/mach-iop32x/n2100.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/f75375s.h>
+#include <linux/leds-pca9532.h>
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
@@ -206,6 +207,53 @@ static struct f75375s_platform_data n2100_f75375s = {
 	.pwm_enable = { 0, 0 },
 };
 
+static struct pca9532_platform_data n2100_leds = {
+	.leds = {
+	{	.name = "n2100:red:satafail0",
+		.state = PCA9532_OFF,
+		.type = PCA9532_TYPE_LED,
+	},
+	{	.name = "n2100:red:satafail1",
+		.state = PCA9532_OFF,
+		.type = PCA9532_TYPE_LED,
+	},
+	{	.name = "n2100:blue:usb",
+		.state = PCA9532_OFF,
+		.type = PCA9532_TYPE_LED,
+	},
+	{ 	.type = PCA9532_TYPE_NONE },
+
+	{ 	.type = PCA9532_TYPE_NONE },
+	{ 	.type = PCA9532_TYPE_NONE },
+	{ 	.type = PCA9532_TYPE_NONE },
+	{	.name = "n2100:red:usb",
+		.state = PCA9532_OFF,
+		.type = PCA9532_TYPE_LED,
+	},
+
+	{	.type = PCA9532_TYPE_NONE }, /* power OFF gpio */
+	{	.type = PCA9532_TYPE_NONE }, /* reset gpio */
+	{	.type = PCA9532_TYPE_NONE },
+	{	.type = PCA9532_TYPE_NONE },
+
+	{	.type = PCA9532_TYPE_NONE },
+	{	.name = "n2100:orange:system",
+		.state = PCA9532_OFF,
+		.type = PCA9532_TYPE_LED,
+	},
+	{	.name = "n2100:red:system",
+		.state = PCA9532_OFF,
+		.type = PCA9532_TYPE_LED,
+	},
+	{	.name = "N2100 beeper"  ,
+		.state =  PCA9532_OFF,
+		.type = PCA9532_TYPE_N2100_BEEP,
+	},
+	},
+	.psc = { 0, 0 },
+	.pwm = { 0, 0 },
+};
+
 static struct i2c_board_info __initdata n2100_i2c_devices[] = {
 	{
 		I2C_BOARD_INFO("rs5c372b", 0x32),
@@ -214,6 +262,10 @@ static struct i2c_board_info __initdata n2100_i2c_devices[] = {
 		I2C_BOARD_INFO("f75375", 0x2e),
 		.platform_data = &n2100_f75375s,
 	},
+	{
+		I2C_BOARD_INFO("pca9532", 0x60),
+		.platform_data = &n2100_leds,
+	},
 };
 
 /*
-- 
GitLab


From e49575f46cdb40014e14789a18e637f8fb917317 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Sat, 31 May 2008 15:18:55 +0100
Subject: [PATCH 067/853] leds: fix unsigned value overflow in atmel pwm driver

Fix an unsigned value overflow in the error handling code in the
Atmel PWM driver.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/leds/leds-atmel-pwm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/leds/leds-atmel-pwm.c b/drivers/leds/leds-atmel-pwm.c
index 28db6c1444e..52297c3ab24 100644
--- a/drivers/leds/leds-atmel-pwm.c
+++ b/drivers/leds/leds-atmel-pwm.c
@@ -37,7 +37,7 @@ static int __init pwmled_probe(struct platform_device *pdev)
 {
 	const struct gpio_led_platform_data	*pdata;
 	struct pwmled				*leds;
-	unsigned				i;
+	int					i;
 	int					status;
 
 	pdata = pdev->dev.platform_data;
-- 
GitLab


From 781a54e7664cc0089287a90d27086e9656ac68a1 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Sat, 31 May 2008 15:23:19 +0100
Subject: [PATCH 068/853] leds: mark led_classdev.default_trigger as const

LED classdev core doesn't modify memory pointed by the default_trigger,
so mark it as const and we'll able to pass const char *s without getting
compiler warnings.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 include/linux/leds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 519df72e939..e7a5e89932f 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -48,7 +48,7 @@ struct led_classdev {
 
 	struct device		*dev;
 	struct list_head	 node;			/* LED Device list */
-	char			*default_trigger;	/* Trigger to use */
+	const char		*default_trigger;	/* Trigger to use */
 
 #ifdef CONFIG_LEDS_TRIGGERS
 	/* Protects the trigger data below */
-- 
GitLab


From dd1160dc1842ae172495a6da274a77e35c593ed8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 9 Jun 2008 22:00:49 +0100
Subject: [PATCH 069/853] leds: Fix sparse warnings in leds-h1940 driver

Fixes the following sparse errors:
drivers/leds/leds-h1940.c:26:6: warning: symbol 'h1940_greenled_set' was not declared. Should it be static?
drivers/leds/leds-h1940.c:55:6: warning: symbol 'h1940_redled_set' was not declared. Should it be static?
drivers/leds/leds-h1940.c:85:6: warning: symbol 'h1940_blueled_set' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/leds/leds-h1940.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/leds/leds-h1940.c b/drivers/leds/leds-h1940.c
index bcec4223038..73c70502168 100644
--- a/drivers/leds/leds-h1940.c
+++ b/drivers/leds/leds-h1940.c
@@ -23,7 +23,8 @@
 /*
  * Green led.
  */
-void h1940_greenled_set(struct led_classdev *led_dev, enum led_brightness value)
+static void h1940_greenled_set(struct led_classdev *led_dev,
+			       enum led_brightness value)
 {
 	switch (value) {
 	case LED_HALF:
@@ -52,7 +53,8 @@ static struct led_classdev h1940_greenled = {
 /*
  * Red led.
  */
-void h1940_redled_set(struct led_classdev *led_dev, enum led_brightness value)
+static void h1940_redled_set(struct led_classdev *led_dev,
+			     enum led_brightness value)
 {
 	switch (value) {
 	case LED_HALF:
@@ -82,7 +84,8 @@ static struct led_classdev h1940_redled = {
  * Blue led.
  * (it can only be blue flashing led)
  */
-void h1940_blueled_set(struct led_classdev *led_dev, enum led_brightness value)
+static void h1940_blueled_set(struct led_classdev *led_dev,
+			      enum led_brightness value)
 {
 	if (value) {
 		/* flashing Blue */
-- 
GitLab


From f46e9203d9a100bae216cc06e17f2e77351aa8d8 Mon Sep 17 00:00:00 2001
From: Nate Case <ncase@xes-inc.com>
Date: Wed, 16 Jul 2008 22:49:55 +0100
Subject: [PATCH 070/853] leds: Add support for Philips PCA955x I2C LED drivers

This driver supports the PCA9550, PCA9551, PCA9552, and PCA9553
LED driver chips.

Signed-off-by: Nate Case <ncase@xes-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/leds/Kconfig        |   8 +
 drivers/leds/Makefile       |   1 +
 drivers/leds/leds-pca955x.c | 384 ++++++++++++++++++++++++++++++++++++
 include/linux/leds.h        |  14 ++
 4 files changed, 407 insertions(+)
 create mode 100644 drivers/leds/leds-pca955x.c

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 1c35dfaef72..9556262dda5 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -155,6 +155,14 @@ config LEDS_CLEVO_MAIL
 	  To compile this driver as a module, choose M here: the
 	  module will be called leds-clevo-mail.
 
+config LEDS_PCA955X
+	tristate "LED Support for PCA955x I2C chips"
+	depends on LEDS_CLASS && I2C
+	help
+	  This option enables support for LEDs connected to PCA955x
+	  LED driver chips accessed via the I2C bus.  Supported
+	  devices include PCA9550, PCA9551, PCA9552, and PCA9553.
+
 comment "LED Triggers"
 
 config LEDS_TRIGGERS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 7156f9970fa..ff7982b4456 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_LEDS_CM_X270)              += leds-cm-x270.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
 obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
+obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 
 # LED Triggers
 obj-$(CONFIG_LEDS_TRIGGER_TIMER)	+= ledtrig-timer.o
diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
new file mode 100644
index 00000000000..146c0697286
--- /dev/null
+++ b/drivers/leds/leds-pca955x.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2007-2008 Extreme Engineering Solutions, Inc.
+ *
+ * Author: Nate Case <ncase@xes-inc.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License.  See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * LED driver for various PCA955x I2C LED drivers
+ *
+ * Supported devices:
+ *
+ *	Device		Description		7-bit slave address
+ *	------		-----------		-------------------
+ *	PCA9550		2-bit driver		0x60 .. 0x61
+ *	PCA9551		8-bit driver		0x60 .. 0x67
+ *	PCA9552		16-bit driver		0x60 .. 0x67
+ *	PCA9553/01	4-bit driver		0x62
+ *	PCA9553/02	4-bit driver		0x63
+ *
+ * Philips PCA955x LED driver chips follow a register map as shown below:
+ *
+ *	Control Register		Description
+ *	----------------		-----------
+ *	0x0				Input register 0
+ *					..
+ *	NUM_INPUT_REGS - 1		Last Input register X
+ *
+ *	NUM_INPUT_REGS			Frequency prescaler 0
+ *	NUM_INPUT_REGS + 1		PWM register 0
+ *	NUM_INPUT_REGS + 2		Frequency prescaler 1
+ *	NUM_INPUT_REGS + 3		PWM register 1
+ *
+ *	NUM_INPUT_REGS + 4		LED selector 0
+ *	NUM_INPUT_REGS + 4
+ *	    + NUM_LED_REGS - 1		Last LED selector
+ *
+ *  where NUM_INPUT_REGS and NUM_LED_REGS vary depending on how many
+ *  bits the chip supports.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/leds.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/workqueue.h>
+
+/* LED select registers determine the source that drives LED outputs */
+#define PCA955X_LS_LED_ON	0x0	/* Output LOW */
+#define PCA955X_LS_LED_OFF	0x1	/* Output HI-Z */
+#define PCA955X_LS_BLINK0	0x2	/* Blink at PWM0 rate */
+#define PCA955X_LS_BLINK1	0x3	/* Blink at PWM1 rate */
+
+enum pca955x_type {
+	pca9550,
+	pca9551,
+	pca9552,
+	pca9553,
+};
+
+struct pca955x_chipdef {
+	int			bits;
+	u8			slv_addr;	/* 7-bit slave address mask */
+	int			slv_addr_shift;	/* Number of bits to ignore */
+};
+
+static struct pca955x_chipdef pca955x_chipdefs[] = {
+	[pca9550] = {
+		.bits		= 2,
+		.slv_addr	= /* 110000x */ 0x60,
+		.slv_addr_shift	= 1,
+	},
+	[pca9551] = {
+		.bits		= 8,
+		.slv_addr	= /* 1100xxx */ 0x60,
+		.slv_addr_shift	= 3,
+	},
+	[pca9552] = {
+		.bits		= 16,
+		.slv_addr	= /* 1100xxx */ 0x60,
+		.slv_addr_shift	= 3,
+	},
+	[pca9553] = {
+		.bits		= 4,
+		.slv_addr	= /* 110001x */ 0x62,
+		.slv_addr_shift	= 1,
+	},
+};
+
+static const struct i2c_device_id pca955x_id[] = {
+	{ "pca9550", pca9550 },
+	{ "pca9551", pca9551 },
+	{ "pca9552", pca9552 },
+	{ "pca9553", pca9553 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pca955x_id);
+
+struct pca955x_led {
+	struct pca955x_chipdef	*chipdef;
+	struct i2c_client	*client;
+	struct work_struct	work;
+	spinlock_t		lock;
+	enum led_brightness	brightness;
+	struct led_classdev	led_cdev;
+	int			led_num;	/* 0 .. 15 potentially */
+	char			name[32];
+};
+
+/* 8 bits per input register */
+static inline int pca95xx_num_input_regs(int bits)
+{
+	return (bits + 7) / 8;
+}
+
+/* 4 bits per LED selector register */
+static inline int pca95xx_num_led_regs(int bits)
+{
+	return (bits + 3)  / 4;
+}
+
+/*
+ * Return an LED selector register value based on an existing one, with
+ * the appropriate 2-bit state value set for the given LED number (0-3).
+ */
+static inline u8 pca955x_ledsel(u8 oldval, int led_num, int state)
+{
+	return (oldval & (~(0x3 << (led_num << 1)))) |
+		((state & 0x3) << (led_num << 1));
+}
+
+/*
+ * Write to frequency prescaler register, used to program the
+ * period of the PWM output.  period = (PSCx + 1) / 38
+ */
+static void pca955x_write_psc(struct i2c_client *client, int n, u8 val)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	i2c_smbus_write_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 2*n,
+		val);
+}
+
+/*
+ * Write to PWM register, which determines the duty cycle of the
+ * output.  LED is OFF when the count is less than the value of this
+ * register, and ON when it is greater.  If PWMx == 0, LED is always OFF.
+ *
+ * Duty cycle is (256 - PWMx) / 256
+ */
+static void pca955x_write_pwm(struct i2c_client *client, int n, u8 val)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	i2c_smbus_write_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + 2*n,
+		val);
+}
+
+/*
+ * Write to LED selector register, which determines the source that
+ * drives the LED output.
+ */
+static void pca955x_write_ls(struct i2c_client *client, int n, u8 val)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	i2c_smbus_write_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n,
+		val);
+}
+
+/*
+ * Read the LED selector register, which determines the source that
+ * drives the LED output.
+ */
+static u8 pca955x_read_ls(struct i2c_client *client, int n)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	return (u8) i2c_smbus_read_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n);
+}
+
+static void pca955x_led_work(struct work_struct *work)
+{
+	struct pca955x_led *pca955x;
+	u8 ls;
+	int chip_ls;	/* which LSx to use (0-3 potentially) */
+	int ls_led;	/* which set of bits within LSx to use (0-3) */
+
+	pca955x = container_of(work, struct pca955x_led, work);
+	chip_ls = pca955x->led_num / 4;
+	ls_led = pca955x->led_num % 4;
+
+	ls = pca955x_read_ls(pca955x->client, chip_ls);
+
+	switch (pca955x->brightness) {
+	case LED_FULL:
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_ON);
+		break;
+	case LED_OFF:
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_OFF);
+		break;
+	case LED_HALF:
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_BLINK0);
+		break;
+	default:
+		/*
+		 * Use PWM1 for all other values.  This has the unwanted
+		 * side effect of making all LEDs on the chip share the
+		 * same brightness level if set to a value other than
+		 * OFF, HALF, or FULL.  But, this is probably better than
+		 * just turning off for all other values.
+		 */
+		pca955x_write_pwm(pca955x->client, 1, 255-pca955x->brightness);
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_BLINK1);
+		break;
+	}
+
+	pca955x_write_ls(pca955x->client, chip_ls, ls);
+}
+
+void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness value)
+{
+	struct pca955x_led *pca955x;
+
+	pca955x = container_of(led_cdev, struct pca955x_led, led_cdev);
+
+	spin_lock(&pca955x->lock);
+	pca955x->brightness = value;
+
+	/*
+	 * Must use workqueue for the actual I/O since I2C operations
+	 * can sleep.
+	 */
+	schedule_work(&pca955x->work);
+
+	spin_unlock(&pca955x->lock);
+}
+
+static int __devinit pca955x_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct pca955x_led *pca955x;
+	int i;
+	int err = -ENODEV;
+	struct pca955x_chipdef *chip;
+	struct i2c_adapter *adapter;
+	struct led_platform_data *pdata;
+
+	chip = &pca955x_chipdefs[id->driver_data];
+	adapter = to_i2c_adapter(client->dev.parent);
+	pdata = client->dev.platform_data;
+
+	/* Make sure the slave address / chip type combo given is possible */
+	if ((client->addr & ~((1 << chip->slv_addr_shift) - 1)) !=
+	    chip->slv_addr) {
+		dev_err(&client->dev, "invalid slave address %02x\n",
+				client->addr);
+		return -ENODEV;
+	}
+
+	printk(KERN_INFO "leds-pca955x: Using %s %d-bit LED driver at "
+			"slave address 0x%02x\n",
+			id->name, chip->bits, client->addr);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+		return -EIO;
+
+	if (pdata) {
+		if (pdata->num_leds != chip->bits) {
+			dev_err(&client->dev, "board info claims %d LEDs"
+					" on a %d-bit chip\n",
+					pdata->num_leds, chip->bits);
+			return -ENODEV;
+		}
+	}
+
+	for (i = 0; i < chip->bits; i++) {
+		pca955x = kzalloc(sizeof(struct pca955x_led), GFP_KERNEL);
+		if (!pca955x) {
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		pca955x->chipdef = chip;
+		pca955x->client = client;
+		pca955x->led_num = i;
+		/* Platform data can specify LED names and default triggers */
+		if (pdata) {
+			if (pdata->leds[i].name)
+				snprintf(pca955x->name, 32, "pca955x:%s",
+							pdata->leds[i].name);
+			if (pdata->leds[i].default_trigger)
+				pca955x->led_cdev.default_trigger =
+					pdata->leds[i].default_trigger;
+		} else {
+			snprintf(pca955x->name, 32, "pca955x:%d", i);
+		}
+		spin_lock_init(&pca955x->lock);
+
+		pca955x->led_cdev.name = pca955x->name;
+		pca955x->led_cdev.brightness_set =
+				pca955x_led_set;
+
+		/*
+		 * Client data is a pointer to the _first_ pca955x_led
+		 * struct
+		 */
+		if (i == 0)
+			i2c_set_clientdata(client, pca955x);
+
+		INIT_WORK(&(pca955x->work), pca955x_led_work);
+
+		led_classdev_register(&client->dev, &(pca955x->led_cdev));
+	}
+
+	/* Turn off LEDs */
+	for (i = 0; i < pca95xx_num_led_regs(chip->bits); i++)
+		pca955x_write_ls(client, i, 0x55);
+
+	/* PWM0 is used for half brightness or 50% duty cycle */
+	pca955x_write_pwm(client, 0, 255-LED_HALF);
+
+	/* PWM1 is used for variable brightness, default to OFF */
+	pca955x_write_pwm(client, 1, 0);
+
+	/* Set to fast frequency so we do not see flashing */
+	pca955x_write_psc(client, 0, 0);
+	pca955x_write_psc(client, 1, 0);
+
+	return 0;
+exit:
+	return err;
+}
+
+static int __devexit pca955x_remove(struct i2c_client *client)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+	int leds = pca955x->chipdef->bits;
+	int i;
+
+	for (i = 0; i < leds; i++) {
+		led_classdev_unregister(&(pca955x->led_cdev));
+		cancel_work_sync(&(pca955x->work));
+		kfree(pca955x);
+		pca955x = pca955x + 1;
+	}
+
+	return 0;
+}
+
+static struct i2c_driver pca955x_driver = {
+	.driver = {
+		.name	= "leds-pca955x",
+		.owner	= THIS_MODULE,
+	},
+	.probe	= pca955x_probe,
+	.remove	= __devexit_p(pca955x_remove),
+	.id_table = pca955x_id,
+};
+
+static int __init pca955x_leds_init(void)
+{
+	return i2c_add_driver(&pca955x_driver);
+}
+
+static void __exit pca955x_leds_exit(void)
+{
+	i2c_del_driver(&pca955x_driver);
+}
+
+module_init(pca955x_leds_init);
+module_exit(pca955x_leds_exit);
+
+MODULE_AUTHOR("Nate Case <ncase@xes-inc.com>");
+MODULE_DESCRIPTION("PCA955x LED driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e7a5e89932f..d41ccb56146 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -118,6 +118,20 @@ extern void ledtrig_ide_activity(void);
 #define ledtrig_ide_activity() do {} while(0)
 #endif
 
+/*
+ * Generic LED platform data for describing LED names and default triggers.
+ */
+struct led_info {
+	const char	*name;
+	char		*default_trigger;
+	int		flags;
+};
+
+struct led_platform_data {
+	int		num_leds;
+	struct led_info	*leds;
+};
+
 /* For the leds-gpio driver */
 struct gpio_led {
 	const char *name;
-- 
GitLab


From fe3025b55c8ed06929afe94e9c9095fc19d15aa0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Wed, 16 Jul 2008 22:51:14 +0100
Subject: [PATCH 071/853] leds: Ensure led->trigger is set earlier

Make sure led->trigger is valid before calling trigger->activate

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/leds/led-triggers.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 0f242b3f09b..f910eaffe3a 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -111,16 +111,17 @@ void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger)
 			flags);
 		if (led_cdev->trigger->deactivate)
 			led_cdev->trigger->deactivate(led_cdev);
+		led_cdev->trigger = NULL;
 		led_set_brightness(led_cdev, LED_OFF);
 	}
 	if (trigger) {
 		write_lock_irqsave(&trigger->leddev_list_lock, flags);
 		list_add_tail(&led_cdev->trig_list, &trigger->led_cdevs);
 		write_unlock_irqrestore(&trigger->leddev_list_lock, flags);
+		led_cdev->trigger = trigger;
 		if (trigger->activate)
 			trigger->activate(led_cdev);
 	}
-	led_cdev->trigger = trigger;
 }
 EXPORT_SYMBOL_GPL(led_trigger_set);
 
-- 
GitLab


From 7be35c72e6454059a33ad844153349973d22fcb7 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Mon, 9 Jun 2008 21:56:16 +0100
Subject: [PATCH 072/853] backlight: Add Nvidia-based Apple Macbook Pro
 backlight driver

Nvidia-based Apple Macbook Pros don't appear to handle backlight control
through the graphics card registers or ACPI, but instead trigger changes
via SMI calls. This driver registers a generic backlight device that
lets existing userspace deal with it. Code derived from Julien Blache's
Pommed application.

Signed-off-by: Julien Blache <jb@jblache.org>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/video/backlight/Kconfig         |   9 ++
 drivers/video/backlight/Makefile        |   2 +
 drivers/video/backlight/mbp_nvidia_bl.c | 116 ++++++++++++++++++++++++
 3 files changed, 127 insertions(+)
 create mode 100644 drivers/video/backlight/mbp_nvidia_bl.c

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 30bf7f2f163..62547bd2ea4 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -119,3 +119,12 @@ config BACKLIGHT_PWM
 	help
 	  If you have a LCD backlight adjustable by PWM, say Y to enable
 	  this driver.
+
+config BACKLIGHT_MBP_NVIDIA
+       tristate "MacBook Pro Nvidia Backlight Driver"
+       depends on BACKLIGHT_CLASS_DEVICE && X86
+       default n
+       help
+         If you have an Apple Macbook Pro with Nvidia graphics hardware say Y
+	 to enable a driver for its backlight
+
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index b51a7cd1250..c7c4d95fdc1 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -11,3 +11,5 @@ obj-$(CONFIG_BACKLIGHT_OMAP1)	+= omap1_bl.o
 obj-$(CONFIG_BACKLIGHT_PROGEAR) += progear_bl.o
 obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o
 obj-$(CONFIG_BACKLIGHT_PWM)	+= pwm_bl.o
+obj-$(CONFIG_BACKLIGHT_MBP_NVIDIA) += mbp_nvidia_bl.o
+
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
new file mode 100644
index 00000000000..385cba40ea8
--- /dev/null
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -0,0 +1,116 @@
+/*
+ *  Backlight Driver for Nvidia 8600 in Macbook Pro
+ *
+ *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Based on code from Pommed:
+ *  Copyright (C) 2006 Nicolas Boichat <nicolas @boichat.ch>
+ *  Copyright (C) 2006 Felipe Alfaro Solana <felipe_alfaro @linuxmail.org>
+ *  Copyright (C) 2007 Julien BLACHE <jb@jblache.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This driver triggers SMIs which cause the firmware to change the
+ *  backlight brightness. This is icky in many ways, but it's impractical to
+ *  get at the firmware code in order to figure out what it's actually doing.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/backlight.h>
+#include <linux/err.h>
+#include <linux/dmi.h>
+#include <linux/io.h>
+
+static struct backlight_device *mbp_backlight_device;
+
+static struct dmi_system_id __initdata mbp_device_table[] = {
+	{
+		.ident = "3,1",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3,1"),
+		},
+	},
+	{
+		.ident = "3,2",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3,2"),
+		},
+	},
+	{
+		.ident = "4,1",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4,1"),
+		},
+	},
+	{ }
+};
+
+static int mbp_send_intensity(struct backlight_device *bd)
+{
+	int intensity = bd->props.brightness;
+
+	outb(0x04 | (intensity << 4), 0xb3);
+	outb(0xbf, 0xb2);
+
+	return 0;
+}
+
+static int mbp_get_intensity(struct backlight_device *bd)
+{
+	outb(0x03, 0xb3);
+	outb(0xbf, 0xb2);
+	return inb(0xb3) >> 4;
+}
+
+static struct backlight_ops mbp_ops = {
+	.get_brightness = mbp_get_intensity,
+	.update_status  = mbp_send_intensity,
+};
+
+static int __init mbp_init(void)
+{
+	if (!dmi_check_system(mbp_device_table))
+		return -ENODEV;
+
+	if (!request_region(0xb2, 2, "Macbook Pro backlight"))
+		return -ENXIO;
+
+	mbp_backlight_device = backlight_device_register("mbp_backlight",
+							 NULL, NULL,
+							 &mbp_ops);
+	if (IS_ERR(mbp_backlight_device)) {
+		release_region(0xb2, 2);
+		return PTR_ERR(mbp_backlight_device);
+	}
+
+	mbp_backlight_device->props.max_brightness = 15;
+	mbp_backlight_device->props.brightness =
+		mbp_get_intensity(mbp_backlight_device);
+	backlight_update_status(mbp_backlight_device);
+
+	return 0;
+}
+
+static void __exit mbp_exit(void)
+{
+	backlight_device_unregister(mbp_backlight_device);
+
+	release_region(0xb2, 2);
+}
+
+module_init(mbp_init);
+module_exit(mbp_exit);
+
+MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_DESCRIPTION("Nvidia-based Macbook Pro Backlight Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("svnAppleInc.:pnMacBookPro3,1");
+MODULE_ALIAS("svnAppleInc.:pnMacBookPro3,2");
+MODULE_ALIAS("svnAppleInc.:pnMacBookPro4,1");
-- 
GitLab


From f6ec2d96796d0accda6c325890206f3629130729 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Wed, 16 Jul 2008 23:05:49 +0100
Subject: [PATCH 073/853] backlight: Fix missing kernel doc entry

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/video/backlight/backlight.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 39394757679..fab0bc874b5 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -191,6 +191,7 @@ static struct device_attribute bl_device_attributes[] = {
  *   backlight_device class.
  * @name: the name of the new object(must be the same as the name of the
  *   respective framebuffer device).
+ * @parent: a pointer to the parent device
  * @devdata: an optional pointer to be stored for private driver use. The
  *   methods may retrieve it by using bl_get_data(bd).
  * @ops: the backlight operations structure.
-- 
GitLab


From 422037bafde8083acc3c539ceba3dfc60a04110c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Jul 2008 11:16:38 +0200
Subject: [PATCH 074/853] sched: fix hrtick & generic-ipi dependency

Andrew Morton reported this s390 allmodconfig build failure:

 kernel/built-in.o: In function `hrtick_start_fair':
 sched.c:(.text+0x69c6): undefined reference to `__smp_call_function_single'

the reason is that s390 is not a generic-ipi SMP platform yet, while
the hrtick code relies on it. Fix the dependency.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/Kconfig.hz | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a84675..382dd5a8b2d 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
 	default 1000 if HZ_1000
 
 config SCHED_HRTICK
-	def_bool HIGH_RES_TIMERS
+	def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS
-- 
GitLab


From 36bd53d07243ae83c1b73bae549086cea2252854 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 23 Jul 2008 00:58:13 -0700
Subject: [PATCH 075/853] arch/mips/kernel/stacktrace.c: Heiko can't type

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/mips/kernel/stacktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c
index 702e2e92a1c..0632e2a849c 100644
--- a/arch/mips/kernel/stacktrace.c
+++ b/arch/mips/kernel/stacktrace.c
@@ -7,7 +7,7 @@
  */
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
-#include <linux/module.h
+#include <linux/module.h>
 #include <asm/stacktrace.h>
 
 /*
-- 
GitLab


From 95d04f0735b4fc837bff9aedcc3f3efb20ddc3d1 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 23 Jul 2008 08:12:26 -0700
Subject: [PATCH 076/853] IB/mlx4: Add support for memory management extensions
 and local DMA L_Key

Add support for the following operations to mlx4 when device firmware
supports them:

 - Send with invalidate and local invalidate send queue work requests;
 - Allocate/free fast register MRs;
 - Allocate/free fast register MR page lists;
 - Fast register MR send queue work requests;
 - Local DMA L_Key.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c      | 12 +++++
 drivers/infiniband/hw/mlx4/main.c    | 11 +++++
 drivers/infiniband/hw/mlx4/mlx4_ib.h | 15 ++++++
 drivers/infiniband/hw/mlx4/mr.c      | 70 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx4/qp.c      | 72 ++++++++++++++++++++++++++--
 drivers/net/mlx4/fw.c                | 10 ++--
 drivers/net/mlx4/fw.h                |  2 +-
 drivers/net/mlx4/main.c              |  2 +
 drivers/net/mlx4/mr.c                | 23 +++++++--
 include/linux/mlx4/device.h          | 10 ++++
 include/linux/mlx4/qp.h              | 16 +++++--
 11 files changed, 221 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 299f20832ab..0b191a4842c 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -637,6 +637,7 @@ repoll:
 		case MLX4_OPCODE_SEND_IMM:
 			wc->wc_flags |= IB_WC_WITH_IMM;
 		case MLX4_OPCODE_SEND:
+		case MLX4_OPCODE_SEND_INVAL:
 			wc->opcode    = IB_WC_SEND;
 			break;
 		case MLX4_OPCODE_RDMA_READ:
@@ -657,6 +658,12 @@ repoll:
 		case MLX4_OPCODE_LSO:
 			wc->opcode    = IB_WC_LSO;
 			break;
+		case MLX4_OPCODE_FMR:
+			wc->opcode    = IB_WC_FAST_REG_MR;
+			break;
+		case MLX4_OPCODE_LOCAL_INVAL:
+			wc->opcode    = IB_WC_LOCAL_INV;
+			break;
 		}
 	} else {
 		wc->byte_len = be32_to_cpu(cqe->byte_cnt);
@@ -667,6 +674,11 @@ repoll:
 			wc->wc_flags	= IB_WC_WITH_IMM;
 			wc->ex.imm_data = cqe->immed_rss_invalid;
 			break;
+		case MLX4_RECV_OPCODE_SEND_INVAL:
+			wc->opcode	= IB_WC_RECV;
+			wc->wc_flags	= IB_WC_WITH_INVALIDATE;
+			wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
+			break;
 		case MLX4_RECV_OPCODE_SEND:
 			wc->opcode   = IB_WC_RECV;
 			wc->wc_flags = 0;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bcf50648fa1..38d6907ab52 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -104,6 +104,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 	if (dev->dev->caps.max_gso_sz)
 		props->device_cap_flags |= IB_DEVICE_UD_TSO;
+	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
+		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+	if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
+	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
+	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
+		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 
 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 		0xffffff;
@@ -127,6 +133,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->max_srq		   = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
 	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
+	props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64);
 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
 		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
@@ -565,6 +572,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
 	ibdev->ib_dev.owner		= THIS_MODULE;
 	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
+	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
 	ibdev->ib_dev.phys_port_cnt	= dev->caps.num_ports;
 	ibdev->ib_dev.num_comp_vectors	= 1;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
@@ -627,6 +635,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.get_dma_mr	= mlx4_ib_get_dma_mr;
 	ibdev->ib_dev.reg_user_mr	= mlx4_ib_reg_user_mr;
 	ibdev->ib_dev.dereg_mr		= mlx4_ib_dereg_mr;
+	ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+	ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
+	ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
 	ibdev->ib_dev.attach_mcast	= mlx4_ib_mcg_attach;
 	ibdev->ib_dev.detach_mcast	= mlx4_ib_mcg_detach;
 	ibdev->ib_dev.process_mad	= mlx4_ib_process_mad;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c4cf5b69eef..d26a91317d4 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -83,6 +83,11 @@ struct mlx4_ib_mr {
 	struct ib_umem	       *umem;
 };
 
+struct mlx4_ib_fast_reg_page_list {
+	struct ib_fast_reg_page_list	ibfrpl;
+	dma_addr_t			map;
+};
+
 struct mlx4_ib_fmr {
 	struct ib_fmr           ibfmr;
 	struct mlx4_fmr         mfmr;
@@ -199,6 +204,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct mlx4_ib_mr, ibmr);
 }
 
+static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+	return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
+}
+
 static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
 {
 	return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -239,6 +249,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+					int max_page_list_len);
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+							       int page_list_len);
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
 
 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 68e92485fc7..db2086faa4e 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -183,6 +183,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 	return 0;
 }
 
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+					int max_page_list_len)
+{
+	struct mlx4_ib_dev *dev = to_mdev(pd->device);
+	struct mlx4_ib_mr *mr;
+	int err;
+
+	mr = kmalloc(sizeof *mr, GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
+			    max_page_list_len, 0, &mr->mmr);
+	if (err)
+		goto err_free;
+
+	err = mlx4_mr_enable(dev->dev, &mr->mmr);
+	if (err)
+		goto err_mr;
+
+	return &mr->ibmr;
+
+err_mr:
+	mlx4_mr_free(dev->dev, &mr->mmr);
+
+err_free:
+	kfree(mr);
+	return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+							       int page_list_len)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibdev);
+	struct mlx4_ib_fast_reg_page_list *mfrpl;
+	int size = page_list_len * sizeof (u64);
+
+	if (size > PAGE_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
+	if (!mfrpl)
+		return ERR_PTR(-ENOMEM);
+
+	mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+						     size, &mfrpl->map,
+						     GFP_KERNEL);
+	if (!mfrpl->ibfrpl.page_list)
+		goto err_free;
+
+	WARN_ON(mfrpl->map & 0x3f);
+
+	return &mfrpl->ibfrpl;
+
+err_free:
+	kfree(mfrpl);
+	return ERR_PTR(-ENOMEM);
+}
+
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+	struct mlx4_ib_dev *dev = to_mdev(page_list->device);
+	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+	int size = page_list->max_page_list_len * sizeof (u64);
+
+	dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list,
+			  mfrpl->map);
+	kfree(mfrpl);
+}
+
 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
 				 struct ib_fmr_attr *fmr_attr)
 {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bda0859a5ac..02a99bc4442 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -78,6 +78,9 @@ static const __be32 mlx4_ib_opcode[] = {
 	[IB_WR_RDMA_READ]		= __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
 	[IB_WR_ATOMIC_CMP_AND_SWP]	= __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
 	[IB_WR_ATOMIC_FETCH_AND_ADD]	= __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+	[IB_WR_SEND_WITH_INV]		= __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+	[IB_WR_LOCAL_INV]		= __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+	[IB_WR_FAST_REG_MR]		= __constant_cpu_to_be32(MLX4_OPCODE_FMR),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -976,6 +979,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	context->pd	    = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
 	context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
 
+	/* Set "fast registration enabled" for all kernel QPs */
+	if (!qp->ibqp.uobject)
+		context->params1 |= cpu_to_be32(1 << 11);
+
 	if (attr_mask & IB_QP_RNR_RETRY) {
 		context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
 		optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
@@ -1322,6 +1329,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
 	return cur + nreq >= wq->max_post;
 }
 
+static __be32 convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC)       : 0) |
+	       (acc & IB_ACCESS_REMOTE_WRITE  ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
+	       (acc & IB_ACCESS_REMOTE_READ   ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ)  : 0) |
+	       (acc & IB_ACCESS_LOCAL_WRITE   ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE)  : 0) |
+		cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
+}
+
+static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+{
+	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+
+	fseg->flags		= convert_access(wr->wr.fast_reg.access_flags);
+	fseg->mem_key		= cpu_to_be32(wr->wr.fast_reg.rkey);
+	fseg->buf_list		= cpu_to_be64(mfrpl->map);
+	fseg->start_addr	= cpu_to_be64(wr->wr.fast_reg.iova_start);
+	fseg->reg_len		= cpu_to_be64(wr->wr.fast_reg.length);
+	fseg->offset		= 0; /* XXX -- is this just for ZBVA? */
+	fseg->page_size		= cpu_to_be32(wr->wr.fast_reg.page_shift);
+	fseg->reserved[0]	= 0;
+	fseg->reserved[1]	= 0;
+}
+
+static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
+{
+	iseg->flags	= 0;
+	iseg->mem_key	= cpu_to_be32(rkey);
+	iseg->guest_id	= 0;
+	iseg->pa	= 0;
+}
+
 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
 					  u64 remote_addr, u32 rkey)
 {
@@ -1423,6 +1462,21 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+	switch (wr->opcode) {
+	case IB_WR_SEND_WITH_IMM:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		return wr->ex.imm_data;
+
+	case IB_WR_SEND_WITH_INV:
+		return cpu_to_be32(wr->ex.invalidate_rkey);
+
+	default:
+		return 0;
+	}
+}
+
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		      struct ib_send_wr **bad_wr)
 {
@@ -1469,11 +1523,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
 			qp->sq_signal_bits;
 
-		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
-		    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-			ctrl->imm = wr->ex.imm_data;
-		else
-			ctrl->imm = 0;
+		ctrl->imm = send_ieth(wr);
 
 		wqe += sizeof *ctrl;
 		size = sizeof *ctrl / 16;
@@ -1505,6 +1555,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
 				break;
 
+			case IB_WR_LOCAL_INV:
+				set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
+				wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
+				size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
+				break;
+
+			case IB_WR_FAST_REG_MR:
+				set_fmr_seg(wqe, wr);
+				wqe  += sizeof (struct mlx4_wqe_fmr_seg);
+				size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+				break;
+
 			default:
 				/* No extra segments required for sends */
 				break;
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 0851ebdddfd..57278224ba1 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -202,7 +202,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET	0x8e
 #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET	0x90
 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET	0x92
-#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x97
+#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x94
 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET		0x98
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
 
@@ -377,12 +377,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		}
 	}
 
-	if (dev_cap->bmme_flags & 1)
-		mlx4_dbg(dev, "Base MM extensions: yes "
-			 "(flags %d, rsvd L_Key %08x)\n",
-			 dev_cap->bmme_flags, dev_cap->reserved_lkey);
-	else
-		mlx4_dbg(dev, "Base MM extensions: no\n");
+	mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
+		 dev_cap->bmme_flags, dev_cap->reserved_lkey);
 
 	/*
 	 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index a0e046c149b..fbf0e22be12 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -98,7 +98,7 @@ struct mlx4_dev_cap {
 	int cmpt_entry_sz;
 	int mtt_entry_sz;
 	int resize_srq;
-	u8  bmme_flags;
+	u32 bmme_flags;
 	u32 reserved_lkey;
 	u64 max_icm_sz;
 	int max_gso_sz;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index d3736013fe9..8e1d24cda1b 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -158,6 +158,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
+	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
+	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index b3ea93b9868..a3c04c5f12c 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -47,7 +47,7 @@ struct mlx4_mpt_entry {
 	__be32 flags;
 	__be32 qpn;
 	__be32 key;
-	__be32 pd;
+	__be32 pd_flags;
 	__be64 start;
 	__be64 length;
 	__be32 lkey;
@@ -61,11 +61,15 @@ struct mlx4_mpt_entry {
 } __attribute__((packed));
 
 #define MLX4_MPT_FLAG_SW_OWNS	    (0xfUL << 28)
+#define MLX4_MPT_FLAG_FREE	    (0x3UL << 28)
 #define MLX4_MPT_FLAG_MIO	    (1 << 17)
 #define MLX4_MPT_FLAG_BIND_ENABLE   (1 << 15)
 #define MLX4_MPT_FLAG_PHYSICAL	    (1 <<  9)
 #define MLX4_MPT_FLAG_REGION	    (1 <<  8)
 
+#define MLX4_MPT_PD_FLAG_FAST_REG   (1 << 26)
+#define MLX4_MPT_PD_FLAG_EN_INV	    (3 << 24)
+
 #define MLX4_MTT_FLAG_PRESENT		1
 
 #define MLX4_MPT_STATUS_SW		0xF0
@@ -324,21 +328,30 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 
 	memset(mpt_entry, 0, sizeof *mpt_entry);
 
-	mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS	 |
-				       MLX4_MPT_FLAG_MIO	 |
+	mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO	 |
 				       MLX4_MPT_FLAG_REGION	 |
 				       mr->access);
 
 	mpt_entry->key	       = cpu_to_be32(key_to_hw_index(mr->key));
-	mpt_entry->pd	       = cpu_to_be32(mr->pd);
+	mpt_entry->pd_flags    = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV);
 	mpt_entry->start       = cpu_to_be64(mr->iova);
 	mpt_entry->length      = cpu_to_be64(mr->size);
 	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
 	if (mr->mtt.order < 0) {
 		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
 		mpt_entry->mtt_seg = 0;
-	} else
+	} else {
 		mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
+	}
+
+	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
+		/* fast register MR in free state */
+		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG);
+	} else {
+		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
+	}
 
 	err = mlx4_SW2HW_MPT(dev, mailbox,
 			     key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 81b3dd5206e..655ea0d1ee1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -68,6 +68,14 @@ enum {
 	MLX4_DEV_CAP_FLAG_UD_MCAST	= 1 << 21
 };
 
+enum {
+	MLX4_BMME_FLAG_LOCAL_INV	= 1 <<  6,
+	MLX4_BMME_FLAG_REMOTE_INV	= 1 <<  7,
+	MLX4_BMME_FLAG_TYPE_2_WIN	= 1 <<  9,
+	MLX4_BMME_FLAG_RESERVED_LKEY	= 1 << 10,
+	MLX4_BMME_FLAG_FAST_REG_WR	= 1 << 11,
+};
+
 enum mlx4_event {
 	MLX4_EVENT_TYPE_COMP		   = 0x00,
 	MLX4_EVENT_TYPE_PATH_MIG	   = 0x01,
@@ -184,6 +192,8 @@ struct mlx4_caps {
 	u32			max_msg_sz;
 	u32			page_size_cap;
 	u32			flags;
+	u32			bmme_flags;
+	u32			reserved_lkey;
 	u16			stat_rate_support;
 	u8			port_width_cap[MLX4_MAX_PORTS + 1];
 	int			max_gso_sz;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index f02e9ed36cf..e27082cd650 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -233,6 +233,14 @@ struct mlx4_wqe_bind_seg {
 	__be64			length;
 };
 
+enum {
+	MLX4_WQE_FMR_PERM_LOCAL_READ	= 1 << 27,
+	MLX4_WQE_FMR_PERM_LOCAL_WRITE	= 1 << 28,
+	MLX4_WQE_FMR_PERM_REMOTE_READ	= 1 << 29,
+	MLX4_WQE_FMR_PERM_REMOTE_WRITE	= 1 << 30,
+	MLX4_WQE_FMR_PERM_ATOMIC	= 1 << 31
+};
+
 struct mlx4_wqe_fmr_seg {
 	__be32			flags;
 	__be32			mem_key;
@@ -255,11 +263,11 @@ struct mlx4_wqe_fmr_ext_seg {
 };
 
 struct mlx4_wqe_local_inval_seg {
-	u8			flags;
-	u8			reserved1[3];
+	__be32			flags;
+	u32			reserved1;
 	__be32			mem_key;
-	u8			reserved2[3];
-	u8			guest_id;
+	u32			reserved2[2];
+	__be32			guest_id;
 	__be64			pa;
 };
 
-- 
GitLab


From 76442640829163d0cdb67c2bf0cb4b81a0fe537b Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 23 Jul 2008 08:12:47 -0700
Subject: [PATCH 077/853] mlx4_core: Improve error message when not enough UAR
 pages are available

If an mlx4 device with default FW (which gives a UAR BAR size of 8 MB)
is used in a system with 64 KB pages, then there are only 8192/64==128
UAR pages available.  However, the first 128 UAR pages are reserved
for use with event queue doorbells, so no UAR pages are available to
do anything else with, which means that the driver cannot work.

The current driver fails with a fairly cryptic "Failed to allocate
driver access region, aborting" message in this situation.  Fix the
driver to detect the problem earlier and print out a clearer
description of the problem and a suggestion of how to fix it (use a
new firmware image).

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/pd.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index 3a93c5f0f7a..aa616892d09 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c
@@ -91,6 +91,13 @@ EXPORT_SYMBOL_GPL(mlx4_uar_free);
 
 int mlx4_init_uar_table(struct mlx4_dev *dev)
 {
+	if (dev->caps.num_uars <= 128) {
+		mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
+			 dev->caps.num_uars);
+		mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
+		return -ENODEV;
+	}
+
 	return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
 				dev->caps.num_uars, dev->caps.num_uars - 1,
 				max(128, dev->caps.reserved_uars));
-- 
GitLab


From 5b3ab1dbd401b36ba2f9bfee2d2dae252fd62cd8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 23 Jul 2008 14:01:29 -0700
Subject: [PATCH 078/853] netdev: Remove warning from __netif_schedule().

It isn't helping anything and we aren't going to be able to change all
the drivers that do queue wakeups in strange situations.

Just letting a noop_qdisc get scheduled will work because when
qdisc_run() executes via net_tx_work() it will simply find no packets
pending when it makes the ->dequeue() call in qdisc_restart.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 6bf217da9d8..ccf97f9f37e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1341,9 +1341,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 void __netif_schedule(struct Qdisc *q)
 {
-	if (WARN_ON_ONCE(q == &noop_qdisc))
-		return;
-
 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
 		struct softnet_data *sd;
 		unsigned long flags;
-- 
GitLab


From b4942af65028c5eb516fdd9053020ccb2ee186ce Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Wed, 23 Jul 2008 14:06:04 -0700
Subject: [PATCH 079/853] net: Update entry in af_family_clock_key_strings

In the merge phase of the CAN subsystem the
af_family_clock_key_strings[] have been added to sock.c in commit
443aef0eddfa44c158d1b94ebb431a70638fcab4
(lockdep: fixup sk_callback_lock annotation). This trivial patch adds
the missing name for address family 29 (AF_CAN).

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 10a64d57078..91f8bbc9352 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -180,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
-  "clock-27"       , "clock-28"          , "clock-29"          ,
+  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_MAX"
 };
-- 
GitLab


From 1fa6d8181b7bb0361512170c30e436dcc95591ee Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 23 Jul 2008 14:20:12 -0700
Subject: [PATCH 080/853] MAINTAINERS: Remove Glenn Streiff from NetEffect
 entry

Glenn is no longer at NetEffect.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 11944b44c2f..2a73da0cd07 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2915,8 +2915,6 @@ P:	Faisal Latif
 M:	flatif@neteffect.com
 P:	Chien Tung
 M:	ctung@neteffect.com
-P:	Glenn Streiff
-M:	gstreiff@neteffect.com
 L:	general@lists.openfabrics.org
 W:	http://www.neteffect.com
 S:	Supported
-- 
GitLab


From e8ebe3b893792887317bc24cc4608753f81b81d3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Jul 2008 15:30:52 -0700
Subject: [PATCH 081/853] e1000e: fix e1000_netpoll(), remove extraneous
 e1000_clean_tx_irq() call

Evgeniy Polyakov noticed that drivers/net/e1000e/netdev.c:e1000_netpoll()
was calling e1000_clean_tx_irq() without taking the TX lock.

David Miller suggested to remove the call altogether: since in this
callpah there's periodic calls to ->poll() anyway which will do
e1000_clean_tx_irq() and will garbage-collect any finished TX ring
descriptors.

This fix solved the e1000e+netconsole crashes i've been seeing:

=============================================================================
BUG skbuff_head_cache: Poison overwritten
-----------------------------------------------------------------------------

INFO: 0xf658ae9c-0xf658ae9c. First byte 0x6a instead of 0x6b
INFO: Allocated in __alloc_skb+0x2c/0x110 age=0 cpu=0 pid=5098
INFO: Freed in __kfree_skb+0x31/0x80 age=0 cpu=1 pid=4440
INFO: Slab 0xc16cc140 objects=16 used=1 fp=0xf658ae00 flags=0x400000c3
INFO: Object 0xf658ae00 @offset=3584 fp=0xf658af00

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/e1000e/netdev.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 869544b8c05..9c0f56b3c51 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -4067,8 +4067,6 @@ static void e1000_netpoll(struct net_device *netdev)
 	disable_irq(adapter->pdev->irq);
 	e1000_intr(adapter->pdev->irq, netdev);
 
-	e1000_clean_tx_irq(adapter);
-
 	enable_irq(adapter->pdev->irq);
 }
 #endif
-- 
GitLab


From 7ae93f51d7fa8b9130d47e0b7d17979a165c5bc3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 23 Jul 2008 16:21:07 -0700
Subject: [PATCH 082/853] sparc64: Fix cpufreq notifier registry.

Based upon a report by Daniel Smolik.

We do it too early, which triggers a BUG in
cpufreq_register_notifier().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/time.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index bedc4c159b1..a0c6a97eec6 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -884,6 +884,16 @@ static struct notifier_block sparc64_cpufreq_notifier_block = {
 	.notifier_call	= sparc64_cpufreq_notifier
 };
 
+static int __init register_sparc64_cpufreq_notifier(void)
+{
+
+	cpufreq_register_notifier(&sparc64_cpufreq_notifier_block,
+				  CPUFREQ_TRANSITION_NOTIFIER);
+	return 0;
+}
+
+core_initcall(register_sparc64_cpufreq_notifier);
+
 #endif /* CONFIG_CPU_FREQ */
 
 static int sparc64_next_event(unsigned long delta,
@@ -1050,11 +1060,6 @@ void __init time_init(void)
 	       sparc64_clockevent.mult, sparc64_clockevent.shift);
 
 	setup_sparc64_timer();
-
-#ifdef CONFIG_CPU_FREQ
-	cpufreq_register_notifier(&sparc64_cpufreq_notifier_block,
-				  CPUFREQ_TRANSITION_NOTIFIER);
-#endif
 }
 
 unsigned long long sched_clock(void)
-- 
GitLab


From 4b53fb67e385b856a991d402096379dab462170a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 23 Jul 2008 16:38:45 -0700
Subject: [PATCH 083/853] tcp: Clear probes_out more aggressively in tcp_ack().

This is based upon an excellent bug report from Eric Dumazet.

tcp_ack() should clear ->icsk_probes_out even if there are packets
outstanding.  Otherwise if we get a sequence of ACKs while we do have
packets outstanding over and over again, we'll never clear the
probes_out value and eventually think the connection is too sick and
we'll reset it.

This appears to be some "optimization" added to tcp_ack() in the 2.4.x
timeframe.  In 2.2.x, probes_out is pretty much always cleared by
tcp_ack().

Here is Eric's original report:

----------------------------------------
Apparently, we can in some situations reset TCP connections in a couple of seconds when some frames are lost.

In order to reproduce the problem, please try the following program on linux-2.6.25.*

Setup some iptables rules to allow two frames per second sent on loopback interface to tcp destination port 12000

iptables -N SLOWLO
iptables -A SLOWLO -m hashlimit --hashlimit 2 --hashlimit-burst 1 --hashlimit-mode dstip --hashlimit-name slow2 -j ACCEPT
iptables -A SLOWLO -j DROP

iptables -A OUTPUT -o lo -p tcp --dport 12000 -j SLOWLO

Then run the attached program and see the output :

# ./loop
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,1)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,3)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,5)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,7)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,9)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,11)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,201ms,13)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,188ms,15)
write(): Connection timed out
wrote 890 bytes but was interrupted after 9 seconds
ESTAB      0      0                 127.0.0.1:12000            127.0.0.1:54455
Exiting read() because no data available (4000 ms timeout).
read 860 bytes

While this tcp session makes progress (sending frames with 50 bytes of payload, every 500ms), linux tcp stack decides to reset it, when tcp_retries 2 is reached (default value : 15)

tcpdump :

15:30:28.856695 IP 127.0.0.1.56554 > 127.0.0.1.12000: S 33788768:33788768(0) win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
15:30:28.856711 IP 127.0.0.1.12000 > 127.0.0.1.56554: S 33899253:33899253(0) ack 33788769 win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
15:30:29.356947 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 1:61(60) ack 1 win 257
15:30:29.356966 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 61 win 257
15:30:29.866415 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 61:111(50) ack 1 win 257
15:30:29.866427 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 111 win 257
15:30:30.366516 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 111:161(50) ack 1 win 257
15:30:30.366527 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 161 win 257
15:30:30.876196 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 161:211(50) ack 1 win 257
15:30:30.876207 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 211 win 257
15:30:31.376282 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 211:261(50) ack 1 win 257
15:30:31.376290 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 261 win 257
15:30:31.885619 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 261:311(50) ack 1 win 257
15:30:31.885631 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 311 win 257
15:30:32.385705 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 311:361(50) ack 1 win 257
15:30:32.385715 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 361 win 257
15:30:32.895249 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 361:411(50) ack 1 win 257
15:30:32.895266 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 411 win 257
15:30:33.395341 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 411:461(50) ack 1 win 257
15:30:33.395351 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 461 win 257
15:30:33.918085 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 461:511(50) ack 1 win 257
15:30:33.918096 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 511 win 257
15:30:34.418163 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 511:561(50) ack 1 win 257
15:30:34.418172 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 561 win 257
15:30:34.927685 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 561:611(50) ack 1 win 257
15:30:34.927698 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 611 win 257
15:30:35.427757 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 611:661(50) ack 1 win 257
15:30:35.427766 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 661 win 257
15:30:35.937359 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 661:711(50) ack 1 win 257
15:30:35.937376 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 711 win 257
15:30:36.437451 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 711:761(50) ack 1 win 257
15:30:36.437464 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 761 win 257
15:30:36.947022 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 761:811(50) ack 1 win 257
15:30:36.947039 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 811 win 257
15:30:37.447135 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 811:861(50) ack 1 win 257
15:30:37.447203 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 861 win 257
15:30:41.448171 IP 127.0.0.1.12000 > 127.0.0.1.56554: F 1:1(0) ack 861 win 257
15:30:41.448189 IP 127.0.0.1.56554 > 127.0.0.1.12000: R 33789629:33789629(0) win 0

Source of program :

/*
 * small producer/consumer program.
 * setup a listener on 127.0.0.1:12000
 * Forks a child
 *   child connect to 127.0.0.1, and sends 10 bytes on this tcp socket every 100 ms
 * Father accepts connection, and read all data
 */
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <stdio.h>
#include <time.h>
#include <sys/poll.h>

int port = 12000;
char buffer[4096];
int main(int argc, char *argv[])
{
        int lfd = socket(AF_INET, SOCK_STREAM, 0);
        struct sockaddr_in socket_address;
        time_t t0, t1;
        int on = 1, sfd, res;
        unsigned long total = 0;
        socklen_t alen = sizeof(socket_address);
        pid_t pid;

        time(&t0);
        socket_address.sin_family = AF_INET;
        socket_address.sin_port = htons(port);
        socket_address.sin_addr.s_addr = htonl(INADDR_LOOPBACK);

        if (lfd == -1) {
                perror("socket()");
                return 1;
        }
        setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int));
        if (bind(lfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) {
                perror("bind");
                close(lfd);
                return 1;
        }
        if (listen(lfd, 1) == -1) {
                perror("listen()");
                close(lfd);
                return 1;
        }
        pid = fork();
        if (pid == 0) {
                int i, cfd = socket(AF_INET, SOCK_STREAM, 0);
                close(lfd);
                if (connect(cfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) {
                        perror("connect()");
                        return 1;
                        }
                for (i = 0 ; ;) {
                        res = write(cfd, "blablabla\n", 10);
                        if (res > 0) total += res;
                        else if (res == -1) {
                                perror("write()");
                                break;
                        } else break;
                        usleep(100000);
                        if (++i == 10) {
                                system("ss -on dst 127.0.0.1:12000");
                                i = 0;
                        }
                }
                time(&t1);
                fprintf(stderr, "wrote %lu bytes but was interrupted after %g seconds\n", total, difftime(t1, t0));
                system("ss -on | grep 127.0.0.1:12000");
                close(cfd);
                return 0;
        }
        sfd = accept(lfd, (struct sockaddr *)&socket_address, &alen);
        if (sfd == -1) {
                perror("accept");
                return 1;
        }
        close(lfd);
        while (1) {
                struct pollfd pfd[1];
                pfd[0].fd = sfd;
                pfd[0].events = POLLIN;
                if (poll(pfd, 1, 4000) == 0) {
                        fprintf(stderr, "Exiting read() because no data available (4000 ms timeout).\n");
                        break;
                }
                res = read(sfd, buffer, sizeof(buffer));
                if (res > 0) total += res;
                else if (res == 0) break;
                else perror("read()");
        }
        fprintf(stderr, "read %lu bytes\n", total);
        close(sfd);
        return 0;
}
----------------------------------------

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1f5e6049883..75efd244f2a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3292,6 +3292,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	 * log. Something worked...
 	 */
 	sk->sk_err_soft = 0;
+	icsk->icsk_probes_out = 0;
 	tp->rcv_tstamp = tcp_time_stamp;
 	prior_packets = tp->packets_out;
 	if (!prior_packets)
@@ -3324,8 +3325,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	return 1;
 
 no_queue:
-	icsk->icsk_probes_out = 0;
-
 	/* If this ack opens up a zero window, clear backoff.  It was
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
-- 
GitLab


From 70eed75d76635ba7350651b9bd96529a306ec67a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jul 2008 16:42:42 -0700
Subject: [PATCH 084/853] netfilter: make security table depend on
 NETFILTER_ADVANCED

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig | 2 +-
 net/ipv6/netfilter/Kconfig | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f23e60c93ef..90eb7cb47e7 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -369,7 +369,7 @@ config IP_NF_SECURITY
 	tristate "Security table"
 	depends on IP_NF_IPTABLES
 	depends on SECURITY
-	default m if NETFILTER_ADVANCED=n
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `security' table to iptables, for use
 	  with Mandatory Access Control (MAC) policy.
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 689dec899c5..0cfcce7b18d 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -213,7 +213,7 @@ config IP6_NF_SECURITY
        tristate "Security table"
        depends on IP6_NF_IPTABLES
        depends on SECURITY
-       default m if NETFILTER_ADVANCED=n
+       depends on NETFILTER_ADVANCED
        help
          This option adds a `security' table to iptables, for use
          with Mandatory Access Control (MAC) policy.
-- 
GitLab


From 0855b543222e79cbbd9d66dd56cb54740e7d524f Mon Sep 17 00:00:00 2001
From: Andre Detsch <adetsch@br.ibm.com>
Date: Thu, 24 Jul 2008 10:57:26 +1000
Subject: [PATCH 085/853] powerpc/spufs: fix aff_mutex and
 cbe_spu_info[n].list_mutex deadlock

Currenlt,, it is possible to lock aff_mutex and
cbe_spu_info[n].list_mutex in different orders, allowing a deadlock to
occur. With this change, aff_mutex is not taken within a list_mutex
critical section anymore.

Signed-off-by: Andre Detsch <adetsch@br.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 34654743363..f293963cd85 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -389,6 +389,9 @@ static int has_affinity(struct spu_context *ctx)
 	if (list_empty(&ctx->aff_list))
 		return 0;
 
+	if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+		ctx->gang->aff_ref_spu = NULL;
+
 	if (!gang->aff_ref_spu) {
 		if (!(gang->aff_flags & AFF_MERGED))
 			aff_merge_remaining_ctxs(gang);
@@ -416,14 +419,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
  	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
 		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
 
-	if (ctx->gang){
-		mutex_lock(&ctx->gang->aff_mutex);
-		if (has_affinity(ctx)) {
-			if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
-				ctx->gang->aff_ref_spu = NULL;
-		}
-		mutex_unlock(&ctx->gang->aff_mutex);
-	}
+	if (ctx->gang)
+		atomic_dec_if_positive(&ctx->gang->aff_sched_count);
 
 	spu_switch_notify(spu, NULL);
 	spu_unmap_mappings(ctx);
@@ -562,10 +559,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
 				goto found;
 			mutex_unlock(&cbe_spu_info[node].list_mutex);
 
-			mutex_lock(&ctx->gang->aff_mutex);
-			if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
-				ctx->gang->aff_ref_spu = NULL;
-			mutex_unlock(&ctx->gang->aff_mutex);
+			atomic_dec(&ctx->gang->aff_sched_count);
 			goto not_found;
 		}
 		mutex_unlock(&ctx->gang->aff_mutex);
-- 
GitLab


From ad1ede127760d6ca4903f44dfe1a8a38b3bfb36c Mon Sep 17 00:00:00 2001
From: Andre Detsch <adetsch@br.ibm.com>
Date: Thu, 24 Jul 2008 11:01:54 +1000
Subject: [PATCH 086/853] powerpc/spufs: better placement of spu affinity
 reference context

This patch adjusts the placement of a reference context from
a spu affinity chain. The reference context can now be placed
only on nodes that have enough spus not intended to be used by
another gang (already running on the node).

Signed-off-by: Andre Detsch <adetsch@br.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index f293963cd85..2deeeba7ecc 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -312,10 +312,27 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
 	 */
 	node = cpu_to_node(raw_smp_processor_id());
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		int available_spus;
+
 		node = (node < MAX_NUMNODES) ? node : 0;
 		if (!node_allowed(ctx, node))
 			continue;
+
+		available_spus = 0;
 		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->ctx && spu->ctx->gang
+					&& spu->ctx->aff_offset == 0)
+				available_spus -=
+					(spu->ctx->gang->contexts - 1);
+			else
+				available_spus++;
+		}
+		if (available_spus < ctx->gang->contexts) {
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			continue;
+		}
+
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if ((!mem_aff || spu->has_mem_affinity) &&
 							sched_spu(spu)) {
-- 
GitLab


From 6f75a9b6426e686649ac440c37ec7c249501f9a5 Mon Sep 17 00:00:00 2001
From: Chas Williams <chas@cmf.nrl.navy.mil>
Date: Wed, 23 Jul 2008 20:29:21 -0700
Subject: [PATCH 087/853] atm: [fore200e] use MODULE_FIRMWARE() and other
 suggested cleanups

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/fore200e.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index d5c1bbfbe79..73338d231db 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2562,7 +2562,8 @@ fore200e_load_and_start_fw(struct fore200e* fore200e)
     const struct firmware *firmware;
     struct device *device;
     struct fw_header *fw_header;
-    u32 *fw_data, fw_size;
+    const __le32 *fw_data;
+    u32 fw_size;
     u32 __iomem *load_addr;
     char buf[48];
     int err = -ENODEV;
@@ -2582,7 +2583,7 @@ fore200e_load_and_start_fw(struct fore200e* fore200e)
 	return err;
     }
 
-    fw_data = (u32 *) firmware->data;
+    fw_data = (__le32 *) firmware->data;
     fw_size = firmware->size / sizeof(u32);
     fw_header = (struct fw_header *) firmware->data;
     load_addr = fore200e->virt_base + le32_to_cpu(fw_header->load_offset);
@@ -3199,6 +3200,14 @@ static const struct fore200e_bus fore200e_bus[] = {
     {}
 };
 
-#ifdef MODULE_LICENSE
 MODULE_LICENSE("GPL");
+#ifdef CONFIG_PCI
+#ifdef __LITTLE_ENDIAN__
+MODULE_FIRMWARE("pca200e.bin");
+#else
+MODULE_FIRMWARE("pca200e_ecd.bin2");
+#endif
+#endif /* CONFIG_PCI */
+#ifdef CONFIG_SBUS
+MODULE_FIRMWARE("sba200e_ecd.bin2");
 #endif
-- 
GitLab


From f867e6af94239a04ec23aeec2fcda5aa58e41db7 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 23 Jul 2008 21:34:27 -0700
Subject: [PATCH 088/853] pkt_sched: sch_sfq: dump a real number of flows

Dump the "flows" number according to the number of active flows
instead of repeating the "limit".

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8589da66656..73f53844ce9 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -536,7 +536,14 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	opt.limit = q->limit;
 	opt.divisor = SFQ_HASH_DIVISOR;
-	opt.flows = q->limit;
+	opt.flows = 0;
+	if (q->tail != SFQ_DEPTH) {
+		unsigned int i;
+
+		for (i = 0; i < SFQ_HASH_DIVISOR; i++)
+			if (q->ht[i] != SFQ_DEPTH)
+				opt.flows++;
+	}
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
-- 
GitLab


From 979c9296bdcfded58ebac41905c3397317df0355 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 14 May 2008 16:10:33 +0300
Subject: [PATCH 089/853] UBI: print error code

Print error code if checking failed which is very useful
to identify problems.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/vtbl.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index af36b12be27..3c4d68f2cfd 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -127,7 +127,7 @@ static int vtbl_check(const struct ubi_device *ubi,
 		      const struct ubi_vtbl_record *vtbl)
 {
 	int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
-	int upd_marker;
+	int upd_marker, err;
 	uint32_t crc;
 	const char *name;
 
@@ -153,7 +153,7 @@ static int vtbl_check(const struct ubi_device *ubi,
 		if (reserved_pebs == 0) {
 			if (memcmp(&vtbl[i], &empty_vtbl_record,
 						UBI_VTBL_RECORD_SIZE)) {
-				dbg_err("bad empty record");
+				err = 2;
 				goto bad;
 			}
 			continue;
@@ -161,56 +161,57 @@ static int vtbl_check(const struct ubi_device *ubi,
 
 		if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
 		    name_len < 0) {
-			dbg_err("negative values");
+			err = 3;
 			goto bad;
 		}
 
 		if (alignment > ubi->leb_size || alignment == 0) {
-			dbg_err("bad alignment");
+			err = 4;
 			goto bad;
 		}
 
 		n = alignment % ubi->min_io_size;
 		if (alignment != 1 && n) {
-			dbg_err("alignment is not multiple of min I/O unit");
+			err = 5;
 			goto bad;
 		}
 
 		n = ubi->leb_size % alignment;
 		if (data_pad != n) {
 			dbg_err("bad data_pad, has to be %d", n);
+			err = 6;
 			goto bad;
 		}
 
 		if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
-			dbg_err("bad vol_type");
+			err = 7;
 			goto bad;
 		}
 
 		if (upd_marker != 0 && upd_marker != 1) {
-			dbg_err("bad upd_marker");
+			err = 8;
 			goto bad;
 		}
 
 		if (reserved_pebs > ubi->good_peb_count) {
 			dbg_err("too large reserved_pebs, good PEBs %d",
 				ubi->good_peb_count);
+			err = 9;
 			goto bad;
 		}
 
 		if (name_len > UBI_VOL_NAME_MAX) {
-			dbg_err("too long volume name, max %d",
-				UBI_VOL_NAME_MAX);
+			err = 10;
 			goto bad;
 		}
 
 		if (name[0] == '\0') {
-			dbg_err("NULL volume name");
+			err = 11;
 			goto bad;
 		}
 
 		if (name_len != strnlen(name, name_len + 1)) {
-			dbg_err("bad name_len");
+			err = 12;
 			goto bad;
 		}
 	}
@@ -235,7 +236,7 @@ static int vtbl_check(const struct ubi_device *ubi,
 	return 0;
 
 bad:
-	ubi_err("volume table check failed, record %d", i);
+	ubi_err("volume table check failed: record %d, error %d", i, err);
 	ubi_dbg_dump_vtbl_record(&vtbl[i], i);
 	return -EINVAL;
 }
@@ -620,30 +621,32 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
 static int check_sv(const struct ubi_volume *vol,
 		    const struct ubi_scan_volume *sv)
 {
+	int err;
+
 	if (sv->highest_lnum >= vol->reserved_pebs) {
-		dbg_err("bad highest_lnum");
+		err = 1;
 		goto bad;
 	}
 	if (sv->leb_count > vol->reserved_pebs) {
-		dbg_err("bad leb_count");
+		err = 2;
 		goto bad;
 	}
 	if (sv->vol_type != vol->vol_type) {
-		dbg_err("bad vol_type");
+		err = 3;
 		goto bad;
 	}
 	if (sv->used_ebs > vol->reserved_pebs) {
-		dbg_err("bad used_ebs");
+		err = 4;
 		goto bad;
 	}
 	if (sv->data_pad != vol->data_pad) {
-		dbg_err("bad data_pad");
+		err = 5;
 		goto bad;
 	}
 	return 0;
 
 bad:
-	ubi_err("bad scanning information");
+	ubi_err("bad scanning information, error %d", err);
 	ubi_dbg_dump_sv(sv);
 	ubi_dbg_dump_vol_info(vol);
 	return -EINVAL;
-- 
GitLab


From beeea636030622f6de67d15c61f5b311a03d188c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 20 May 2008 09:54:02 +0300
Subject: [PATCH 090/853] UBI: add a comment

It is not clear why we schedule PEB for scrubbing in case of
-EBADMSG. Elaborate.

Requested-by: Kyungmin Park <kmpark@infradead.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/vtbl.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 3c4d68f2cfd..42a7815086b 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -385,7 +385,16 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
 		err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
 				       ubi->vtbl_size);
 		if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
-			/* Scrub the PEB later */
+			/*
+			 * Scrub the PEB later. Note, -EBADMSG indicates an
+			 * uncorrectable ECC error, but we have our own CRC and
+			 * the data will be checked later. If the data is OK,
+			 * the PEB will be scrubbed (because we set
+			 * seb->scrub). If the data is not OK, the contents of
+			 * the PEB will be recovered from the second copy, and
+			 * seb->scrub will be cleared in
+			 * 'ubi_scan_add_used()'.
+			 */
 			seb->scrub = 1;
 		else if (err)
 			goto out_free;
-- 
GitLab


From a0fd1efd488092951f310fdb777b8a540cf84dcb Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kmpark@infradead.org>
Date: Wed, 21 May 2008 14:34:56 +0300
Subject: [PATCH 091/853] UBI: fix buffer padding

Instead of correctly pad the buffer wich we are writing to the
eraseblock during update, we used weird construct:

memset(buf + len, 0xFF, len - len);

Fix this.

Signed-off-by: Kyungmin Park <kmpark@infradead.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/upd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index ddaa1a56cc6..6fa1ab3f2a7 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -237,10 +237,10 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
 	int err;
 
 	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
-		len = ALIGN(len, ubi->min_io_size);
-		memset(buf + len, 0xFF, len - len);
+		int l = ALIGN(len, ubi->min_io_size);
 
-		len = ubi_calc_data_len(ubi, buf, len);
+		memset(buf + len, 0xFF, l - len);
+		len = ubi_calc_data_len(ubi, buf, l);
 		if (len == 0) {
 			dbg_msg("all %d bytes contain 0xFF - skip", len);
 			return 0;
-- 
GitLab


From cadb40ccc16a26a738f1cbc963e35b21edd93e79 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Thu, 22 May 2008 10:32:18 +0900
Subject: [PATCH 092/853] UBI: avoid unnecessary division operations

UBI already checks that @min io size is the power of 2 at io_init.
It is save to use bit operations then.

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c | 8 ++++++--
 drivers/mtd/ubi/cdev.c  | 6 +++---
 drivers/mtd/ubi/eba.c   | 2 +-
 drivers/mtd/ubi/kapi.c  | 6 +++---
 drivers/mtd/ubi/misc.c  | 2 +-
 drivers/mtd/ubi/vmt.c   | 2 +-
 drivers/mtd/ubi/vtbl.c  | 5 ++---
 drivers/mtd/ubi/wl.c    | 3 +--
 8 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 961416ac061..ff4425de152 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -530,7 +530,11 @@ static int io_init(struct ubi_device *ubi)
 	ubi->min_io_size = ubi->mtd->writesize;
 	ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
 
-	/* Make sure minimal I/O unit is power of 2 */
+	/*
+	 * Make sure minimal I/O unit is power of 2. Note, there is no
+	 * fundamental reason for this assumption. It is just an optimization
+	 * which allows us to avoid costly division operations.
+	 */
 	if (!is_power_of_2(ubi->min_io_size)) {
 		ubi_err("min. I/O unit (%d) is not power of 2",
 			ubi->min_io_size);
@@ -581,7 +585,7 @@ static int io_init(struct ubi_device *ubi)
 	if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE ||
 	    ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE ||
 	    ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE ||
-	    ubi->leb_start % ubi->min_io_size) {
+	    ubi->leb_start & (ubi->min_io_size - 1)) {
 		ubi_err("bad VID header (%d) or data offsets (%d)",
 			ubi->vid_hdr_offset, ubi->leb_start);
 		return -EINVAL;
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 89193ba9451..0cdaf9fba7b 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -295,7 +295,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
 	off = do_div(tmp, vol->usable_leb_size);
 	lnum = tmp;
 
-	if (off % ubi->min_io_size) {
+	if (off & (ubi->min_io_size - 1)) {
 		dbg_err("unaligned position");
 		return -EINVAL;
 	}
@@ -304,7 +304,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
 		count_save = count = vol->used_bytes - *offp;
 
 	/* We can write only in fractions of the minimum I/O unit */
-	if (count % ubi->min_io_size) {
+	if (count & (ubi->min_io_size - 1)) {
 		dbg_err("unaligned write length");
 		return -EINVAL;
 	}
@@ -564,7 +564,7 @@ static int verify_mkvol_req(const struct ubi_device *ubi,
 	if (req->alignment > ubi->leb_size)
 		goto bad;
 
-	n = req->alignment % ubi->min_io_size;
+	n = req->alignment & (ubi->min_io_size - 1);
 	if (req->alignment != 1 && n)
 		goto bad;
 
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 7ce91ca742b..37d77844794 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -752,7 +752,7 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol,
 		/* If this is the last LEB @len may be unaligned */
 		len = ALIGN(data_size, ubi->min_io_size);
 	else
-		ubi_assert(len % ubi->min_io_size == 0);
+		ubi_assert(!(len & (ubi->min_io_size - 1)));
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 	if (!vid_hdr)
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index a70d58823f8..51508832566 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -397,8 +397,8 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
 		return -EROFS;
 
 	if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 ||
-	    offset + len > vol->usable_leb_size || offset % ubi->min_io_size ||
-	    len % ubi->min_io_size)
+	    offset + len > vol->usable_leb_size ||
+	    offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1))
 		return -EINVAL;
 
 	if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
@@ -447,7 +447,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
 		return -EROFS;
 
 	if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 ||
-	    len > vol->usable_leb_size || len % ubi->min_io_size)
+	    len > vol->usable_leb_size || len & (ubi->min_io_size - 1))
 		return -EINVAL;
 
 	if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
index 93e05281201..22ad3140294 100644
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c
@@ -37,7 +37,7 @@ int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
 {
 	int i;
 
-	ubi_assert(length % ubi->min_io_size == 0);
+	ubi_assert(!(length & (ubi->min_io_size - 1)));
 
 	for (i = length - 1; i >= 0; i--)
 		if (((const uint8_t *)buf)[i] != 0xFF)
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 5be58d85c63..7402025ded9 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -727,7 +727,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
 		goto fail;
 	}
 
-	n = vol->alignment % ubi->min_io_size;
+	n = vol->alignment & (ubi->min_io_size - 1);
 	if (vol->alignment != 1 && n) {
 		ubi_err("alignment is not multiple of min I/O unit");
 		goto fail;
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 42a7815086b..d9af11a8682 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -170,7 +170,7 @@ static int vtbl_check(const struct ubi_device *ubi,
 			goto bad;
 		}
 
-		n = alignment % ubi->min_io_size;
+		n = alignment & (ubi->min_io_size - 1);
 		if (alignment != 1 && n) {
 			err = 5;
 			goto bad;
@@ -684,14 +684,13 @@ static int check_scanning_info(const struct ubi_device *ubi,
 		return -EINVAL;
 	}
 
-	if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
+	if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT &&
 	    si->highest_vol_id < UBI_INTERNAL_VOL_START) {
 		ubi_err("too large volume ID %d found by scanning",
 			si->highest_vol_id);
 		return -EINVAL;
 	}
 
-
 	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
 		cond_resched();
 
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a471a491f0a..cc8fe2934d2 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1368,7 +1368,7 @@ int ubi_thread(void *u)
 		int err;
 
 		if (kthread_should_stop())
-			goto out;
+			break;
 
 		if (try_to_freeze())
 			continue;
@@ -1403,7 +1403,6 @@ int ubi_thread(void *u)
 		cond_resched();
 	}
 
-out:
 	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
 	return 0;
 }
-- 
GitLab


From abc5e92262d87f9c5c628492bffc55f81c7dcb80 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 4 Jun 2008 16:48:12 +0300
Subject: [PATCH 093/853] UBI: fix memory leak

ubi_free_volume() function sets ubi->volumes[] to NULL, so
ubi_eba_close() is useless, it does not free what has to be freed.
So zap it and free vol->eba_tbl at the volume release function.

Pointed-out-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c |  2 --
 drivers/mtd/ubi/eba.c   | 17 -----------------
 drivers/mtd/ubi/ubi.h   |  1 -
 drivers/mtd/ubi/vmt.c   | 18 +++++++++---------
 4 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index ff4425de152..7b42b4d05b3 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -840,7 +840,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 out_uif:
 	uif_close(ubi);
 out_detach:
-	ubi_eba_close(ubi);
 	ubi_wl_close(ubi);
 	vfree(ubi->vtbl);
 out_free:
@@ -903,7 +902,6 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
 		kthread_stop(ubi->bgt_thread);
 
 	uif_close(ubi);
-	ubi_eba_close(ubi);
 	ubi_wl_close(ubi);
 	vfree(ubi->vtbl);
 	put_mtd_device(ubi->mtd);
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 37d77844794..623d25f4855 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1233,20 +1233,3 @@ out_free:
 	}
 	return err;
 }
-
-/**
- * ubi_eba_close - close EBA unit.
- * @ubi: UBI device description object
- */
-void ubi_eba_close(const struct ubi_device *ubi)
-{
-	int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
-
-	dbg_eba("close EBA unit");
-
-	for (i = 0; i < num_volumes; i++) {
-		if (!ubi->volumes[i])
-			continue;
-		kfree(ubi->volumes[i]->eba_tbl);
-	}
-}
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 67dcbd11c15..940f6b7deec 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -477,7 +477,6 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 		     struct ubi_vid_hdr *vid_hdr);
 int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
-void ubi_eba_close(const struct ubi_device *ubi);
 
 /* wl.c */
 int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 7402025ded9..367b04176e0 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -127,6 +127,7 @@ static void vol_release(struct device *dev)
 {
 	struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
 
+	kfree(vol->eba_tbl);
 	kfree(vol);
 }
 
@@ -201,7 +202,7 @@ static void volume_sysfs_close(struct ubi_volume *vol)
  */
 int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 {
-	int i, err, vol_id = req->vol_id, dont_free = 0;
+	int i, err, vol_id = req->vol_id, do_free = 1;
 	struct ubi_volume *vol;
 	struct ubi_vtbl_record vtbl_rec;
 	uint64_t bytes;
@@ -365,14 +366,14 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 
 out_sysfs:
 	/*
-	 * We have registered our device, we should not free the volume*
+	 * We have registered our device, we should not free the volume
 	 * description object in this function in case of an error - it is
 	 * freed by the release function.
 	 *
 	 * Get device reference to prevent the release function from being
 	 * called just after sysfs has been closed.
 	 */
-	dont_free = 1;
+	do_free = 0;
 	get_device(&vol->dev);
 	volume_sysfs_close(vol);
 out_gluebi:
@@ -382,17 +383,18 @@ out_gluebi:
 out_cdev:
 	cdev_del(&vol->cdev);
 out_mapping:
-	kfree(vol->eba_tbl);
+	if (do_free)
+		kfree(vol->eba_tbl);
 out_acc:
 	spin_lock(&ubi->volumes_lock);
 	ubi->rsvd_pebs -= vol->reserved_pebs;
 	ubi->avail_pebs += vol->reserved_pebs;
 out_unlock:
 	spin_unlock(&ubi->volumes_lock);
-	if (dont_free)
-		put_device(&vol->dev);
-	else
+	if (do_free)
 		kfree(vol);
+	else
+		put_device(&vol->dev);
 	ubi_err("cannot create volume %d, error %d", vol_id, err);
 	return err;
 }
@@ -445,8 +447,6 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
 			goto out_err;
 	}
 
-	kfree(vol->eba_tbl);
-	vol->eba_tbl = NULL;
 	cdev_del(&vol->cdev);
 	volume_sysfs_close(vol);
 
-- 
GitLab


From 505d1caa79cd61a70615e9a7eae2eab85e797a83 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 4 Jun 2008 17:00:35 +0300
Subject: [PATCH 094/853] UBI: do not forget to free internal volumes

UBI forgets to free internal volumes when detaching MTD device.
Fix this.

Pointed-out-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 7b42b4d05b3..33205e4c1f5 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -422,6 +422,10 @@ out_unreg:
 /**
  * uif_close - close user interfaces for an UBI device.
  * @ubi: UBI device description object
+ *
+ * Note, since this function un-registers UBI volume device objects (@vol->dev),
+ * the memory allocated voe the volumes is freed as well (in the release
+ * function).
  */
 static void uif_close(struct ubi_device *ubi)
 {
@@ -431,6 +435,21 @@ static void uif_close(struct ubi_device *ubi)
 	unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
 }
 
+/**
+ * free_internal_volumes - free internal volumes.
+ * @ubi: UBI device description object
+ */
+static void free_internal_volumes(struct ubi_device *ubi)
+{
+	int i;
+
+	for (i = ubi->vtbl_slots;
+	     i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+		kfree(ubi->volumes[i]->eba_tbl);
+		kfree(ubi->volumes[i]);
+	}
+}
+
 /**
  * attach_by_scanning - attach an MTD device using scanning method.
  * @ubi: UBI device descriptor
@@ -475,6 +494,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
 out_wl:
 	ubi_wl_close(ubi);
 out_vtbl:
+	free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
 out_si:
 	ubi_scan_destroy_si(si);
@@ -650,7 +670,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
 
 	/*
 	 * Clear the auto-resize flag in the volume in-memory copy of the
-	 * volume table, and 'ubi_resize_volume()' will propogate this change
+	 * volume table, and 'ubi_resize_volume()' will propagate this change
 	 * to the flash.
 	 */
 	ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG;
@@ -659,7 +679,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
 		struct ubi_vtbl_record vtbl_rec;
 
 		/*
-		 * No avalilable PEBs to re-size the volume, clear the flag on
+		 * No available PEBs to re-size the volume, clear the flag on
 		 * flash and exit.
 		 */
 		memcpy(&vtbl_rec, &ubi->vtbl[vol_id],
@@ -692,7 +712,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
  *
  * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number
  * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in
- * which case this function finds a vacant device nubert and assings it
+ * which case this function finds a vacant device number and assigns it
  * automatically. Returns the new UBI device number in case of success and a
  * negative error code in case of failure.
  *
@@ -841,6 +861,7 @@ out_uif:
 	uif_close(ubi);
 out_detach:
 	ubi_wl_close(ubi);
+	free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
 out_free:
 	vfree(ubi->peb_buf1);
@@ -903,6 +924,7 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
 
 	uif_close(ubi);
 	ubi_wl_close(ubi);
+	free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
 	put_mtd_device(ubi->mtd);
 	vfree(ubi->peb_buf1);
-- 
GitLab


From 472018f73e7308a7f29b753ee8c742b6f45f103f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 4 Jun 2008 17:58:37 +0300
Subject: [PATCH 095/853] UBI: fix memory leak on error path

Normally UBI volumes are freed in the release function of
the struct device object. However, on error path they may
have to be freed before the struct device objects have been
initialized.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 33205e4c1f5..a5b19944eca 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -354,16 +354,35 @@ static void kill_volumes(struct ubi_device *ubi)
 			ubi_free_volume(ubi, ubi->volumes[i]);
 }
 
+/**
+ * free_user_volumes - free all user volumes.
+ * @ubi: UBI device description object
+ *
+ * Normally the volumes are freed at the release function of the volume device
+ * objects. However, on error paths the volumes have to be freed before the
+ * device objects have been initialized.
+ */
+static void free_user_volumes(struct ubi_device *ubi)
+{
+	int i;
+
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		if (ubi->volumes[i]) {
+			kfree(ubi->volumes[i]->eba_tbl);
+			kfree(ubi->volumes[i]);
+		}
+}
+
 /**
  * uif_init - initialize user interfaces for an UBI device.
  * @ubi: UBI device description object
  *
  * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * case of failure. Note, this function destroys all volumes if it failes.
  */
 static int uif_init(struct ubi_device *ubi)
 {
-	int i, err;
+	int i, err, do_free = 0;
 	dev_t dev;
 
 	sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
@@ -410,10 +429,13 @@ static int uif_init(struct ubi_device *ubi)
 
 out_volumes:
 	kill_volumes(ubi);
+	do_free = 0;
 out_sysfs:
 	ubi_sysfs_close(ubi);
 	cdev_del(&ubi->cdev);
 out_unreg:
+	if (do_free)
+		free_user_volumes(ubi);
 	unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
 	ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err);
 	return err;
@@ -722,7 +744,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
 int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 {
 	struct ubi_device *ubi;
-	int i, err;
+	int i, err, do_free = 1;
 
 	/*
 	 * Check if we already have the same MTD device attached.
@@ -822,7 +844,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 
 	err = uif_init(ubi);
 	if (err)
-		goto out_detach;
+		goto out_nofree;
 
 	ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
 	if (IS_ERR(ubi->bgt_thread)) {
@@ -859,8 +881,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 
 out_uif:
 	uif_close(ubi);
+out_nofree:
+	do_free = 0;
 out_detach:
 	ubi_wl_close(ubi);
+	if (do_free)
+		free_user_volumes(ubi);
 	free_internal_volumes(ubi);
 	vfree(ubi->vtbl);
 out_free:
-- 
GitLab


From 23add7455c42eef63f8719bd268328047d4aed69 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 16 Jun 2008 13:35:23 +0300
Subject: [PATCH 096/853] UBI: fix LEB locking

leb_read_unlock() may be called simultaniously by several tasks.
The would race at the following code:

 up_read(&le->mutex);
 if (free)
         kfree(le);

And it is possible that one task frees 'le' before the other tasks
do 'up_read()'. Fix this by doing up_read and free inside the
'ubi->ltree' lock. Below it the oops we had because of this:

BUG: spinlock bad magic on CPU#0, integck/7504
BUG: unable to handle kernel paging request at 6b6b6c4f
IP: [<c0211221>] spin_bug+0x5c/0xdb
*pde = 00000000 Oops: 0000 [#1] PREEMPT SMP Modules linked in: ubifs ubi nandsim nand nand_ids nand_ecc video output

Pid: 7504, comm: integck Not tainted (2.6.26-rc3ubifs26 #8)
EIP: 0060:[<c0211221>] EFLAGS: 00010002 CPU: 0
EIP is at spin_bug+0x5c/0xdb
EAX: 00000032 EBX: 6b6b6b6b ECX: 6b6b6b6b EDX: f7f7ce30
ESI: f76491dc EDI: c044f51f EBP: e8a736cc ESP: e8a736a8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process integck (pid: 7504, ti=e8a72000 task=f7f7ce30 task.ti=e8a72000)
Stack: c044f754 c044f51f 00000000 f7f7d024 00001d50 00000001 f76491dc 00000296       f6df50e0 e8a736d8 c02112f0 f76491dc e8a736e8 c039157a f7d9e830 f76491d8       e8a7370c c020b975 f76491dc 00000296 f76491f8 00000000 f76491d8 00000000 Call Trace:
[<c02112f0>] ? _raw_spin_unlock+0x50/0x7c
[<c039157a>] ? _spin_unlock_irqrestore+0x20/0x58
[<c020b975>] ? rwsem_wake+0x4b/0x122
[<c0390e0a>] ? call_rwsem_wake+0xa/0xc
[<c0139ee7>] ? up_read+0x28/0x31
[<f8873b3c>] ? leb_read_unlock+0x73/0x7b [ubi]
[<f88742a3>] ? ubi_eba_read_leb+0x195/0x2b0 [ubi]
[<f8872a04>] ? ubi_leb_read+0xaf/0xf8 [ubi]

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/eba.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 623d25f4855..8dc488fc0cd 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -223,22 +223,18 @@ static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
  */
 static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
 {
-	int free = 0;
 	struct ubi_ltree_entry *le;
 
 	spin_lock(&ubi->ltree_lock);
 	le = ltree_lookup(ubi, vol_id, lnum);
 	le->users -= 1;
 	ubi_assert(le->users >= 0);
+	up_read(&le->mutex);
 	if (le->users == 0) {
 		rb_erase(&le->rb, &ubi->ltree);
-		free = 1;
+		kfree(le);
 	}
 	spin_unlock(&ubi->ltree_lock);
-
-	up_read(&le->mutex);
-	if (free)
-		kfree(le);
 }
 
 /**
@@ -274,7 +270,6 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
  */
 static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
 {
-	int free;
 	struct ubi_ltree_entry *le;
 
 	le = ltree_add_entry(ubi, vol_id, lnum);
@@ -289,12 +284,9 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
 	ubi_assert(le->users >= 0);
 	if (le->users == 0) {
 		rb_erase(&le->rb, &ubi->ltree);
-		free = 1;
-	} else
-		free = 0;
-	spin_unlock(&ubi->ltree_lock);
-	if (free)
 		kfree(le);
+	}
+	spin_unlock(&ubi->ltree_lock);
 
 	return 1;
 }
@@ -307,23 +299,18 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
  */
 static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
 {
-	int free;
 	struct ubi_ltree_entry *le;
 
 	spin_lock(&ubi->ltree_lock);
 	le = ltree_lookup(ubi, vol_id, lnum);
 	le->users -= 1;
 	ubi_assert(le->users >= 0);
+	up_write(&le->mutex);
 	if (le->users == 0) {
 		rb_erase(&le->rb, &ubi->ltree);
-		free = 1;
-	} else
-		free = 0;
-	spin_unlock(&ubi->ltree_lock);
-
-	up_write(&le->mutex);
-	if (free)
 		kfree(le);
+	}
+	spin_unlock(&ubi->ltree_lock);
 }
 
 /**
-- 
GitLab


From 73789a3d9fd8e500e121c1d4a5a2b16dd748ab5f Mon Sep 17 00:00:00 2001
From: Bruce Leonard <brucle@selinc.com>
Date: Thu, 3 Jul 2008 10:35:49 +0300
Subject: [PATCH 097/853] UBI: fix 64-bit calculations

Signed-off-by: Bruce Leonard <brucle@selinc.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/cdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 0cdaf9fba7b..3e3449ec07f 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -437,7 +437,8 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
 			break;
 		}
 
-		rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad);
+		rsvd_bytes = (long long)vol->reserved_pebs *
+					ubi->leb_size-vol->data_pad;
 		if (bytes < 0 || bytes > rsvd_bytes) {
 			err = -EINVAL;
 			break;
-- 
GitLab


From a5bf6190417cbbf80443a9f71c65b653e13e9982 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 10 Jul 2008 18:38:33 +0300
Subject: [PATCH 098/853] UBI: add ubi_sync() interface

To flush MTD device caches.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/kapi.c  | 24 ++++++++++++++++++++++++
 include/linux/mtd/ubi.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 51508832566..e65c8e0bcd5 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -632,3 +632,27 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
 	return vol->eba_tbl[lnum] >= 0;
 }
 EXPORT_SYMBOL_GPL(ubi_is_mapped);
+
+/**
+ * ubi_sync - synchronize UBI device buffers.
+ * @ubi_num: UBI device to synchronize
+ *
+ * The underlying MTD device may cache data in hardware or in software. This
+ * function ensures the caches are flushed. Returns zero in case of success and
+ * a negative error code in case of failure.
+ */
+int ubi_sync(int ubi_num)
+{
+	struct ubi_device *ubi;
+
+	ubi = ubi_get_device(ubi_num);
+	if (!ubi)
+		return -ENODEV;
+
+	if (ubi->mtd->sync)
+		ubi->mtd->sync(ubi->mtd);
+
+	ubi_put_device(ubi);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_sync);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index f71201d0f3e..83302bbbddb 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -152,6 +152,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum);
 int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
 int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
 int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int ubi_sync(int ubi_num);
 
 /*
  * This function is the same as the 'ubi_leb_read()' function, but it does not
-- 
GitLab


From a6ea440769e11c46828cddd20f91ab57261701d5 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 13 Jul 2008 21:46:24 +0300
Subject: [PATCH 099/853] UBI: improve mkvol request validation

Check that volume name is not shorter than 'name_len'.

No need to copy the trailing zero byte because whole array
was zeroed earlier.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/cdev.c | 7 +++++--
 drivers/mtd/ubi/vmt.c  | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 3e3449ec07f..4fb84e3e650 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -574,6 +574,10 @@ static int verify_mkvol_req(const struct ubi_device *ubi,
 		goto bad;
 	}
 
+	n = strnlen(req->name, req->name_len + 1);
+	if (n != req->name_len)
+		goto bad;
+
 	return 0;
 
 bad:
@@ -629,12 +633,11 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 			break;
 		}
 
+		req.name[req.name_len] = '\0';
 		err = verify_mkvol_req(ubi, &req);
 		if (err)
 			break;
 
-		req.name[req.name_len] = '\0';
-
 		mutex_lock(&ubi->volumes_mutex);
 		err = ubi_create_volume(ubi, &req);
 		mutex_unlock(&ubi->volumes_mutex);
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 367b04176e0..bfa7c5d2e06 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -275,7 +275,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	vol->data_pad  = ubi->leb_size % vol->alignment;
 	vol->vol_type  = req->vol_type;
 	vol->name_len  = req->name_len;
-	memcpy(vol->name, req->name, vol->name_len + 1);
+	memcpy(vol->name, req->name, vol->name_len);
 	vol->ubi = ubi;
 
 	/*
@@ -350,7 +350,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 		vtbl_rec.vol_type = UBI_VID_DYNAMIC;
 	else
 		vtbl_rec.vol_type = UBI_VID_STATIC;
-	memcpy(vtbl_rec.name, vol->name, vol->name_len + 1);
+	memcpy(vtbl_rec.name, vol->name, vol->name_len);
 
 	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
 	if (err)
-- 
GitLab


From bb84c1a199558962edf4b4aeb4480fb09aa09b91 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 14 Jul 2008 12:57:27 +0300
Subject: [PATCH 100/853] UBI: fix error message

The ubi_err() macro will add \n.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/gluebi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index e909b390069..ae76ab638b2 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -299,7 +299,7 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
 		mtd->size = vol->used_bytes;
 
 	if (add_mtd_device(mtd)) {
-		ubi_err("cannot not add MTD device\n");
+		ubi_err("cannot not add MTD device");
 		kfree(mtd->name);
 		return -ENFILE;
 	}
-- 
GitLab


From 85c6e6e28259e9b58b8984db536c45bc3161f40c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 16 Jul 2008 10:25:56 +0300
Subject: [PATCH 101/853] UBI: amend commentaries

Hch asked not to use "unit" for sub-systems, let it be so.
Also some other commentaries modifications.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c     |  2 +-
 drivers/mtd/ubi/debug.h     |  6 +--
 drivers/mtd/ubi/eba.c       | 22 ++++-----
 drivers/mtd/ubi/io.c        | 22 ++++-----
 drivers/mtd/ubi/scan.c      | 28 ++++++-----
 drivers/mtd/ubi/scan.h      | 19 ++++----
 drivers/mtd/ubi/ubi-media.h | 23 ++++-----
 drivers/mtd/ubi/ubi.h       | 37 +++++++--------
 drivers/mtd/ubi/wl.c        | 94 ++++++++++++++++++-------------------
 include/linux/mtd/ubi.h     |  4 +-
 10 files changed, 129 insertions(+), 128 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index a5b19944eca..27271fe32e0 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -524,7 +524,7 @@ out_si:
 }
 
 /**
- * io_init - initialize I/O unit for a given UBI device.
+ * io_init - initialize I/O sub-system for a given UBI device.
  * @ubi: UBI device description object
  *
  * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 8ea99d8c9e1..7d8d77c31df 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -76,21 +76,21 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
 #endif /* CONFIG_MTD_UBI_DEBUG_MSG */
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
-/* Messages from the eraseblock association unit */
+/* Messages from the eraseblock association sub-system */
 #define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_eba(fmt, ...) ({})
 #endif
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
-/* Messages from the wear-leveling unit */
+/* Messages from the wear-leveling sub-system */
 #define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_wl(fmt, ...) ({})
 #endif
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
-/* Messages from the input/output unit */
+/* Messages from the input/output sub-system */
 #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_io(fmt, ...) ({})
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 8dc488fc0cd..613cd1e5164 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -19,20 +19,20 @@
  */
 
 /*
- * The UBI Eraseblock Association (EBA) unit.
+ * The UBI Eraseblock Association (EBA) sub-system.
  *
- * This unit is responsible for I/O to/from logical eraseblock.
+ * This sub-system is responsible for I/O to/from logical eraseblock.
  *
  * Although in this implementation the EBA table is fully kept and managed in
  * RAM, which assumes poor scalability, it might be (partially) maintained on
  * flash in future implementations.
  *
- * The EBA unit implements per-logical eraseblock locking. Before accessing a
- * logical eraseblock it is locked for reading or writing. The per-logical
- * eraseblock locking is implemented by means of the lock tree. The lock tree
- * is an RB-tree which refers all the currently locked logical eraseblocks. The
- * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by
- * (@vol_id, @lnum) pairs.
+ * The EBA sub-system implements per-logical eraseblock locking. Before
+ * accessing a logical eraseblock it is locked for reading or writing. The
+ * per-logical eraseblock locking is implemented by means of the lock tree. The
+ * lock tree is an RB-tree which refers all the currently locked logical
+ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
+ * They are indexed by (@vol_id, @lnum) pairs.
  *
  * EBA also maintains the global sequence counter which is incremented each
  * time a logical eraseblock is mapped to a physical eraseblock and it is
@@ -1128,7 +1128,7 @@ out_unlock_leb:
 }
 
 /**
- * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  *
@@ -1143,7 +1143,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	struct ubi_scan_leb *seb;
 	struct rb_node *rb;
 
-	dbg_eba("initialize EBA unit");
+	dbg_eba("initialize EBA sub-system");
 
 	spin_lock_init(&ubi->ltree_lock);
 	mutex_init(&ubi->alc_mutex);
@@ -1209,7 +1209,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
 	}
 
-	dbg_eba("EBA unit is initialized");
+	dbg_eba("EBA sub-system is initialized");
 	return 0;
 
 out_free:
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 4ac11df7b04..561e7b2f96c 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -20,15 +20,15 @@
  */
 
 /*
- * UBI input/output unit.
+ * UBI input/output sub-system.
  *
- * This unit provides a uniform way to work with all kinds of the underlying
- * MTD devices. It also implements handy functions for reading and writing UBI
- * headers.
+ * This sub-system provides a uniform way to work with all kinds of the
+ * underlying MTD devices. It also implements handy functions for reading and
+ * writing UBI headers.
  *
  * We are trying to have a paranoid mindset and not to trust to what we read
- * from the flash media in order to be more secure and robust. So this unit
- * validates every single header it reads from the flash media.
+ * from the flash media in order to be more secure and robust. So this
+ * sub-system validates every single header it reads from the flash media.
  *
  * Some words about how the eraseblock headers are stored.
  *
@@ -79,11 +79,11 @@
  * 512-byte chunks, we have to allocate one more buffer and copy our VID header
  * to offset 448 of this buffer.
  *
- * The I/O unit does the following trick in order to avoid this extra copy.
- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
- * VID header is being written out, it shifts the VID header pointer back and
- * writes the whole sub-page.
+ * The I/O sub-system does the following trick in order to avoid this extra
+ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID
+ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer.
+ * When the VID header is being written out, it shifts the VID header pointer
+ * back and writes the whole sub-page.
  */
 
 #include <linux/crc32.h>
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 96d410e106a..892c2ba4977 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -19,9 +19,9 @@
  */
 
 /*
- * UBI scanning unit.
+ * UBI scanning sub-system.
  *
- * This unit is responsible for scanning the flash media, checking UBI
+ * This sub-system is responsible for scanning the flash media, checking UBI
  * headers and providing complete information about the UBI flash image.
  *
  * The scanning information is represented by a &struct ubi_scan_info' object.
@@ -103,7 +103,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
  * non-zero if an inconsistency was found and zero if not.
  *
  * Note, UBI does sanity check of everything it reads from the flash media.
- * Most of the checks are done in the I/O unit. Here we check that the
+ * Most of the checks are done in the I/O sub-system. Here we check that the
  * information in the VID header is consistent to the information in other VID
  * headers of the same volume.
  */
@@ -256,8 +256,8 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
 		 * that versions that are close to %0xFFFFFFFF are less then
 		 * versions that are close to %0.
 		 *
-		 * The UBI WL unit guarantees that the number of pending tasks
-		 * is not greater then %0x7FFFFFFF. So, if the difference
+		 * The UBI WL sub-system guarantees that the number of pending
+		 * tasks is not greater then %0x7FFFFFFF. So, if the difference
 		 * between any two versions is greater or equivalent to
 		 * %0x7FFFFFFF, there was an overflow and the logical
 		 * eraseblock with lower version is actually newer then the one
@@ -645,9 +645,9 @@ void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
  *
  * This function erases physical eraseblock 'pnum', and writes the erase
  * counter header to it. This function should only be used on UBI device
- * initialization stages, when the EBA unit had not been yet initialized. This
- * function returns zero in case of success and a negative error code in case
- * of failure.
+ * initialization stages, when the EBA sub-system had not been yet initialized.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
  */
 int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
 		       int pnum, int ec)
@@ -687,9 +687,10 @@ out_free:
  * @si: scanning information
  *
  * This function returns a free physical eraseblock. It is supposed to be
- * called on the UBI initialization stages when the wear-leveling unit is not
- * initialized yet. This function picks a physical eraseblocks from one of the
- * lists, writes the EC header if it is needed, and removes it from the list.
+ * called on the UBI initialization stages when the wear-leveling sub-system is
+ * not initialized yet. This function picks a physical eraseblocks from one of
+ * the lists, writes the EC header if it is needed, and removes it from the
+ * list.
  *
  * This function returns scanning physical eraseblock information in case of
  * success and an error code in case of failure.
@@ -764,8 +765,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
 		return err;
 	else if (err) {
 		/*
-		 * FIXME: this is actually duty of the I/O unit to initialize
-		 * this, but MTD does not provide enough information.
+		 * FIXME: this is actually duty of the I/O sub-system to
+		 * initialize this, but MTD does not provide enough
+		 * information.
 		 */
 		si->bad_peb_count += 1;
 		return 0;
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 966b9b682a4..4e2e3cc0bec 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -59,16 +59,16 @@ struct ubi_scan_leb {
  * @leb_count: number of logical eraseblocks in this volume
  * @vol_type: volume type
  * @used_ebs: number of used logical eraseblocks in this volume (only for
- * static volumes)
+ *            static volumes)
  * @last_data_size: amount of data in the last logical eraseblock of this
- * volume (always equivalent to the usable logical eraseblock size in case of
- * dynamic volumes)
+ *                  volume (always equivalent to the usable logical eraseblock
+ *                  size in case of dynamic volumes)
  * @data_pad: how many bytes at the end of logical eraseblocks of this volume
- * are not used (due to volume alignment)
+ *            are not used (due to volume alignment)
  * @compat: compatibility flags of this volume
  * @rb: link in the volume RB-tree
  * @root: root of the RB-tree containing all the eraseblock belonging to this
- * volume (&struct ubi_scan_leb objects)
+ *        volume (&struct ubi_scan_leb objects)
  *
  * One object of this type is allocated for each volume during scanning.
  */
@@ -92,8 +92,8 @@ struct ubi_scan_volume {
  * @free: list of free physical eraseblocks
  * @erase: list of physical eraseblocks which have to be erased
  * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ *         those belonging to "preserve"-compatible internal volumes)
  * @bad_peb_count: count of bad physical eraseblocks
- * those belonging to "preserve"-compatible internal volumes)
  * @vols_found: number of volumes found during scanning
  * @highest_vol_id: highest volume ID
  * @alien_peb_count: count of physical eraseblocks in the @alien list
@@ -106,8 +106,8 @@ struct ubi_scan_volume {
  * @ec_count: a temporary variable used when calculating @mean_ec
  *
  * This data structure contains the result of scanning and may be used by other
- * UBI units to build final UBI data structures, further error-recovery and so
- * on.
+ * UBI sub-systems to build final UBI data structures, further error-recovery
+ * and so on.
  */
 struct ubi_scan_info {
 	struct rb_root volumes;
@@ -132,8 +132,7 @@ struct ubi_device;
 struct ubi_vid_hdr;
 
 /*
- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
- * list.
+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
  *
  * @sv: volume scanning information
  * @seb: scanning eraseblock infprmation
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h
index c3185d9fd04..26bb7af9787 100644
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -98,10 +98,11 @@ enum {
  * Compatibility constants used by internal volumes.
  *
  * @UBI_COMPAT_DELETE: delete this internal volume before anything is written
- * to the flash
+ *                     to the flash
  * @UBI_COMPAT_RO: attach this device in read-only mode
  * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its
- * physical eraseblocks, don't allow the wear-leveling unit to move them
+ *                       physical eraseblocks, don't allow the wear-leveling
+ *                       sub-system to move them
  * @UBI_COMPAT_REJECT: reject this UBI image
  */
 enum {
@@ -123,7 +124,7 @@ enum {
  * struct ubi_ec_hdr - UBI erase counter header.
  * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC)
  * @version: version of UBI implementation which is supposed to accept this
- * UBI image
+ *           UBI image
  * @padding1: reserved for future, zeroes
  * @ec: the erase counter
  * @vid_hdr_offset: where the VID header starts
@@ -159,20 +160,20 @@ struct ubi_ec_hdr {
  * struct ubi_vid_hdr - on-flash UBI volume identifier header.
  * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC)
  * @version: UBI implementation version which is supposed to accept this UBI
- * image (%UBI_VERSION)
+ *           image (%UBI_VERSION)
  * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
  * @copy_flag: if this logical eraseblock was copied from another physical
- * eraseblock (for wear-leveling reasons)
+ *             eraseblock (for wear-leveling reasons)
  * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE,
- * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
+ *          %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
  * @vol_id: ID of this volume
  * @lnum: logical eraseblock number
  * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be
- * removed, kept only for not breaking older UBI users)
+ *           removed, kept only for not breaking older UBI users)
  * @data_size: how many bytes of data this logical eraseblock contains
  * @used_ebs: total number of used logical eraseblocks in this volume
  * @data_pad: how many bytes at the end of this physical eraseblock are not
- * used
+ *            used
  * @data_crc: CRC checksum of the data stored in this logical eraseblock
  * @padding1: reserved for future, zeroes
  * @sqnum: sequence number
@@ -248,9 +249,9 @@ struct ubi_ec_hdr {
  * The @data_crc field contains the CRC checksum of the contents of the logical
  * eraseblock if this is a static volume. In case of dynamic volumes, it does
  * not contain the CRC checksum as a rule. The only exception is when the
- * data of the physical eraseblock was moved by the wear-leveling unit, then
- * the wear-leveling unit calculates the data CRC and stores it in the
- * @data_crc field. And of course, the @copy_flag is %in this case.
+ * data of the physical eraseblock was moved by the wear-leveling sub-system,
+ * then the wear-leveling sub-system calculates the data CRC and stores it in
+ * the @data_crc field. And of course, the @copy_flag is %in this case.
  *
  * The @data_size field is used only for static volumes because UBI has to know
  * how many bytes of data are stored in this eraseblock. For dynamic volumes,
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 940f6b7deec..1fc32c863b7 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -74,15 +74,15 @@
 #define UBI_IO_RETRIES 3
 
 /*
- * Error codes returned by the I/O unit.
+ * Error codes returned by the I/O sub-system.
  *
  * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
- * 0xFF bytes
+ *                   %0xFF bytes
  * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
- * valid erase counter header, and the rest are %0xFF bytes
+ *                  valid erase counter header, and the rest are %0xFF bytes
  * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
  * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
- * CRC)
+ *                     CRC)
  * UBI_IO_BITFLIPS: bit-flips were detected and corrected
  */
 enum {
@@ -99,9 +99,9 @@ enum {
  * @ec: erase counter
  * @pnum: physical eraseblock number
  *
- * This data structure is used in the WL unit. Each physical eraseblock has a
- * corresponding &struct wl_entry object which may be kept in different
- * RB-trees. See WL unit for details.
+ * This data structure is used in the WL sub-system. Each physical eraseblock
+ * has a corresponding &struct wl_entry object which may be kept in different
+ * RB-trees. See WL sub-system for details.
  */
 struct ubi_wl_entry {
 	struct rb_node rb;
@@ -118,10 +118,10 @@ struct ubi_wl_entry {
  * @mutex: read/write mutex to implement read/write access serialization to
  *         the (@vol_id, @lnum) logical eraseblock
  *
- * This data structure is used in the EBA unit to implement per-LEB locking.
- * When a logical eraseblock is being locked - corresponding
+ * This data structure is used in the EBA sub-system to implement per-LEB
+ * locking. When a logical eraseblock is being locked - corresponding
  * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
- * See EBA unit for details.
+ * See EBA sub-system for details.
  */
 struct ubi_ltree_entry {
 	struct rb_node rb;
@@ -225,7 +225,7 @@ struct ubi_volume {
 #ifdef CONFIG_MTD_UBI_GLUEBI
 	/*
 	 * Gluebi-related stuff may be compiled out.
-	 * TODO: this should not be built into UBI but should be a separate
+	 * Note: this should not be built into UBI but should be a separate
 	 * ubimtd driver which works on top of UBI and emulates MTD devices.
 	 */
 	struct ubi_volume_desc *gluebi_desc;
@@ -235,8 +235,7 @@ struct ubi_volume {
 };
 
 /**
- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
- * opened.
+ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
  * @vol: reference to the corresponding volume description object
  * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
  */
@@ -316,11 +315,11 @@ struct ubi_wl_entry;
  * @ro_mode: if the UBI device is in read-only mode
  * @leb_size: logical eraseblock size
  * @leb_start: starting offset of logical eraseblocks within physical
- * eraseblocks
+ *             eraseblocks
  * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
  * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
  * @vid_hdr_offset: starting offset of the volume identifier header (might be
- * unaligned)
+ *                  unaligned)
  * @vid_hdr_aloffset: starting offset of the VID header aligned to
  * @hdrs_min_io_size
  * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
@@ -356,16 +355,16 @@ struct ubi_device {
 	struct mutex volumes_mutex;
 
 	int max_ec;
-	/* TODO: mean_ec is not updated run-time, fix */
+	/* Note, mean_ec is not updated run-time - should be fixed */
 	int mean_ec;
 
-	/* EBA unit's stuff */
+	/* EBA sub-system's stuff */
 	unsigned long long global_sqnum;
 	spinlock_t ltree_lock;
 	struct rb_root ltree;
 	struct mutex alc_mutex;
 
-	/* Wear-leveling unit's stuff */
+	/* Wear-leveling sub-system's stuff */
 	struct rb_root used;
 	struct rb_root free;
 	struct rb_root scrub;
@@ -388,7 +387,7 @@ struct ubi_device {
 	int thread_enabled;
 	char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
 
-	/* I/O unit's stuff */
+	/* I/O sub-system's stuff */
 	long long flash_size;
 	int peb_count;
 	int peb_size;
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index cc8fe2934d2..761952ba125 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -19,22 +19,22 @@
  */
 
 /*
- * UBI wear-leveling unit.
+ * UBI wear-leveling sub-system.
  *
- * This unit is responsible for wear-leveling. It works in terms of physical
- * eraseblocks and erase counters and knows nothing about logical eraseblocks,
- * volumes, etc. From this unit's perspective all physical eraseblocks are of
- * two types - used and free. Used physical eraseblocks are those that were
- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
- * those that were put by the 'ubi_wl_put_peb()' function.
+ * This sub-system is responsible for wear-leveling. It works in terms of
+ * physical* eraseblocks and erase counters and knows nothing about logical
+ * eraseblocks, volumes, etc. From this sub-system's perspective all physical
+ * eraseblocks are of two types - used and free. Used physical eraseblocks are
+ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
+ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
  *
  * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
- * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ * header. The rest of the physical eraseblock contains only %0xFF bytes.
  *
- * When physical eraseblocks are returned to the WL unit by means of the
+ * When physical eraseblocks are returned to the WL sub-system by means of the
  * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
  * done asynchronously in context of the per-UBI device background thread,
- * which is also managed by the WL unit.
+ * which is also managed by the WL sub-system.
  *
  * The wear-leveling is ensured by means of moving the contents of used
  * physical eraseblocks with low erase counter to free physical eraseblocks
@@ -43,34 +43,36 @@
  * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
  * an "optimal" physical eraseblock. For example, when it is known that the
  * physical eraseblock will be "put" soon because it contains short-term data,
- * the WL unit may pick a free physical eraseblock with low erase counter, and
- * so forth.
+ * the WL sub-system may pick a free physical eraseblock with low erase
+ * counter, and so forth.
  *
- * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ * If the WL sub-system fails to erase a physical eraseblock, it marks it as
+ * bad.
  *
- * This unit is also responsible for scrubbing. If a bit-flip is detected in a
- * physical eraseblock, it has to be moved. Technically this is the same as
- * moving it for wear-leveling reasons.
+ * This sub-system is also responsible for scrubbing. If a bit-flip is detected
+ * in a physical eraseblock, it has to be moved. Technically this is the same
+ * as moving it for wear-leveling reasons.
  *
- * As it was said, for the UBI unit all physical eraseblocks are either "free"
- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
- * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * As it was said, for the UBI sub-system all physical eraseblocks are either
+ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
+ * used eraseblocks are kept in a set of different RB-trees: @wl->used,
  * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
  *
  * Note, in this implementation, we keep a small in-RAM object for each physical
  * eraseblock. This is surely not a scalable solution. But it appears to be good
  * enough for moderately large flashes and it is simple. In future, one may
- * re-work this unit and make it more scalable.
+ * re-work this sub-system and make it more scalable.
  *
- * At the moment this unit does not utilize the sequence number, which was
- * introduced relatively recently. But it would be wise to do this because the
- * sequence number of a logical eraseblock characterizes how old is it. For
+ * At the moment this sub-system does not utilize the sequence number, which
+ * was introduced relatively recently. But it would be wise to do this because
+ * the sequence number of a logical eraseblock characterizes how old is it. For
  * example, when we move a PEB with low erase counter, and we need to pick the
  * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
  * pick target PEB with an average EC if our PEB is not very "old". This is a
- * room for future re-works of the WL unit.
+ * room for future re-works of the WL sub-system.
  *
- * FIXME: looks too complex, should be simplified (later).
+ * Note: the stuff with protection trees looks too complex and is difficult to
+ * understand. Should be fixed.
  */
 
 #include <linux/slab.h>
@@ -92,20 +94,21 @@
 
 /*
  * Maximum difference between two erase counters. If this threshold is
- * exceeded, the WL unit starts moving data from used physical eraseblocks with
- * low erase counter to free physical eraseblocks with high erase counter.
+ * exceeded, the WL sub-system starts moving data from used physical
+ * eraseblocks with low erase counter to free physical eraseblocks with high
+ * erase counter.
  */
 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
 
 /*
- * When a physical eraseblock is moved, the WL unit has to pick the target
+ * When a physical eraseblock is moved, the WL sub-system has to pick the target
  * physical eraseblock to move to. The simplest way would be just to pick the
  * one with the highest erase counter. But in certain workloads this could lead
  * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
  * situation when the picked physical eraseblock is constantly erased after the
  * data is written to it. So, we have a constant which limits the highest erase
- * counter of the free physical eraseblock to pick. Namely, the WL unit does
- * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system
+ * does not pick eraseblocks with erase counter greater then the lowest erase
  * counter plus %WL_FREE_MAX_DIFF.
  */
 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -123,11 +126,11 @@
  * @abs_ec: the absolute erase counter value when the protection ends
  * @e: the wear-leveling entry of the physical eraseblock under protection
  *
- * When the WL unit returns a physical eraseblock, the physical eraseblock is
- * protected from being moved for some "time". For this reason, the physical
- * eraseblock is not directly moved from the @wl->free tree to the @wl->used
- * tree. There is one more tree in between where this physical eraseblock is
- * temporarily stored (@wl->prot).
+ * When the WL sub-system returns a physical eraseblock, the physical
+ * eraseblock is protected from being moved for some "time". For this reason,
+ * the physical eraseblock is not directly moved from the @wl->free tree to the
+ * @wl->used tree. There is one more tree in between where this physical
+ * eraseblock is temporarily stored (@wl->prot).
  *
  * All this protection stuff is needed because:
  *  o we don't want to move physical eraseblocks just after we have given them
@@ -175,7 +178,6 @@ struct ubi_wl_prot_entry {
  * @list: a link in the list of pending works
  * @func: worker function
  * @priv: private data of the worker function
- *
  * @e: physical eraseblock to erase
  * @torture: if the physical eraseblock has to be tortured
  *
@@ -1136,7 +1138,7 @@ out_ro:
 }
 
 /**
- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
+ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
  * @ubi: UBI device description object
  * @pnum: physical eraseblock to return
  * @torture: if this physical eraseblock has to be tortured
@@ -1175,11 +1177,11 @@ retry:
 		/*
 		 * User is putting the physical eraseblock which was selected
 		 * as the target the data is moved to. It may happen if the EBA
-		 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
-		 * the WL unit has not put the PEB to the "used" tree yet, but
-		 * it is about to do this. So we just set a flag which will
-		 * tell the WL worker that the PEB is not needed anymore and
-		 * should be scheduled for erasure.
+		 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
+		 * but the WL sub-system has not put the PEB to the "used" tree
+		 * yet, but it is about to do this. So we just set a flag which
+		 * will tell the WL worker that the PEB is not needed anymore
+		 * and should be scheduled for erasure.
 		 */
 		dbg_wl("PEB %d is the target of data moving", pnum);
 		ubi_assert(!ubi->move_to_put);
@@ -1425,8 +1427,7 @@ static void cancel_pending(struct ubi_device *ubi)
 }
 
 /**
- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
- * information.
+ * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  *
@@ -1583,13 +1584,12 @@ static void protection_trees_destroy(struct ubi_device *ubi)
 }
 
 /**
- * ubi_wl_close - close the wear-leveling unit.
+ * ubi_wl_close - close the wear-leveling sub-system.
  * @ubi: UBI device description object
  */
 void ubi_wl_close(struct ubi_device *ubi)
 {
-	dbg_wl("close the UBI wear-leveling unit");
-
+	dbg_wl("close the WL sub-system");
 	cancel_pending(ubi);
 	protection_trees_destroy(ubi);
 	tree_destroy(&ubi->used);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 83302bbbddb..6316fafe5c2 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -45,13 +45,13 @@ enum {
  * @size: how many physical eraseblocks are reserved for this volume
  * @used_bytes: how many bytes of data this volume contains
  * @used_ebs: how many physical eraseblocks of this volume actually contain any
- * data
+ *            data
  * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
  * @corrupted: non-zero if the volume is corrupted (static volumes only)
  * @upd_marker: non-zero if the volume has update marker set
  * @alignment: volume alignment
  * @usable_leb_size: how many bytes are available in logical eraseblocks of
- * this volume
+ *                   this volume
  * @name_len: volume name length
  * @name: volume name
  * @cdev: UBI volume character device major and minor numbers
-- 
GitLab


From c8566350a3229ca505b84313c65d1403b4d0cbfc Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 16 Jul 2008 17:40:22 +0300
Subject: [PATCH 102/853] UBI: fix and re-work debugging stuff

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c  |   2 +-
 drivers/mtd/ubi/cdev.c   |  26 +++----
 drivers/mtd/ubi/debug.c  | 160 ++++++++++++++++++++-------------------
 drivers/mtd/ubi/debug.h  |  68 +++++++++++------
 drivers/mtd/ubi/gluebi.c |  10 +--
 drivers/mtd/ubi/io.c     |   4 +-
 drivers/mtd/ubi/kapi.c   |  20 ++---
 drivers/mtd/ubi/scan.c   |   2 +-
 drivers/mtd/ubi/upd.c    |  16 ++--
 drivers/mtd/ubi/vmt.c    |  73 +++++++++---------
 drivers/mtd/ubi/vtbl.c   |   2 +-
 11 files changed, 206 insertions(+), 177 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 27271fe32e0..7210e1da1fc 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -403,7 +403,7 @@ static int uif_init(struct ubi_device *ubi)
 
 	ubi_assert(MINOR(dev) == 0);
 	cdev_init(&ubi->cdev, &ubi_cdev_operations);
-	dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev));
+	dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev));
 	ubi->cdev.owner = THIS_MODULE;
 
 	err = cdev_add(&ubi->cdev, dev, 1);
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 4fb84e3e650..7c19918cc91 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -116,7 +116,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
 	else
 		mode = UBI_READONLY;
 
-	dbg_msg("open volume %d, mode %d", vol_id, mode);
+	dbg_gen("open volume %d, mode %d", vol_id, mode);
 
 	desc = ubi_open_volume(ubi_num, vol_id, mode);
 	unlock_kernel();
@@ -132,7 +132,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
 	struct ubi_volume_desc *desc = file->private_data;
 	struct ubi_volume *vol = desc->vol;
 
-	dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode);
+	dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode);
 
 	if (vol->updating) {
 		ubi_warn("update of volume %d not finished, volume is damaged",
@@ -141,7 +141,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
 		vol->updating = 0;
 		vfree(vol->upd_buf);
 	} else if (vol->changing_leb) {
-		dbg_msg("only %lld of %lld bytes received for atomic LEB change"
+		dbg_gen("only %lld of %lld bytes received for atomic LEB change"
 			" for volume %d:%d, cancel", vol->upd_received,
 			vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id);
 		vol->changing_leb = 0;
@@ -183,7 +183,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
 		return -EINVAL;
 	}
 
-	dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld",
+	dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld",
 		vol->vol_id, offset, origin, new_offset);
 
 	file->f_pos = new_offset;
@@ -201,7 +201,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
 	void *tbuf;
 	uint64_t tmp;
 
-	dbg_msg("read %zd bytes from offset %lld of volume %d",
+	dbg_gen("read %zd bytes from offset %lld of volume %d",
 		count, *offp, vol->vol_id);
 
 	if (vol->updating) {
@@ -216,7 +216,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
 		return 0;
 
 	if (vol->corrupted)
-		dbg_msg("read from corrupted volume %d", vol->vol_id);
+		dbg_gen("read from corrupted volume %d", vol->vol_id);
 
 	if (*offp + count > vol->used_bytes)
 		count_save = count = vol->used_bytes - *offp;
@@ -285,7 +285,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
 	char *tbuf;
 	uint64_t tmp;
 
-	dbg_msg("requested: write %zd bytes to offset %lld of volume %u",
+	dbg_gen("requested: write %zd bytes to offset %lld of volume %u",
 		count, *offp, vol->vol_id);
 
 	if (vol->vol_type == UBI_STATIC_VOLUME)
@@ -514,7 +514,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
 			break;
 		}
 
-		dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+		dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
 		err = ubi_eba_unmap_leb(ubi, vol, lnum);
 		if (err)
 			break;
@@ -626,7 +626,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 	{
 		struct ubi_mkvol_req req;
 
-		dbg_msg("create volume");
+		dbg_gen("create volume");
 		err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req));
 		if (err) {
 			err = -EFAULT;
@@ -656,7 +656,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 	{
 		int vol_id;
 
-		dbg_msg("remove volume");
+		dbg_gen("remove volume");
 		err = get_user(vol_id, (__user int32_t *)argp);
 		if (err) {
 			err = -EFAULT;
@@ -689,7 +689,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 		uint64_t tmp;
 		struct ubi_rsvol_req req;
 
-		dbg_msg("re-size volume");
+		dbg_gen("re-size volume");
 		err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req));
 		if (err) {
 			err = -EFAULT;
@@ -742,7 +742,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
 		struct ubi_attach_req req;
 		struct mtd_info *mtd;
 
-		dbg_msg("attach MTD device");
+		dbg_gen("attach MTD device");
 		err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req));
 		if (err) {
 			err = -EFAULT;
@@ -782,7 +782,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
 	{
 		int ubi_num;
 
-		dbg_msg("dettach MTD device");
+		dbg_gen("dettach MTD device");
 		err = get_user(ubi_num, (__user int32_t *)argp);
 		if (err) {
 			err = -EFAULT;
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 56956ec2845..21e0d7d76a4 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -24,7 +24,7 @@
  * changes.
  */
 
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+#ifdef CONFIG_MTD_UBI_DEBUG
 
 #include "ubi.h"
 
@@ -34,14 +34,19 @@
  */
 void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
 {
-	dbg_msg("erase counter header dump:");
-	dbg_msg("magic          %#08x", be32_to_cpu(ec_hdr->magic));
-	dbg_msg("version        %d",    (int)ec_hdr->version);
-	dbg_msg("ec             %llu",  (long long)be64_to_cpu(ec_hdr->ec));
-	dbg_msg("vid_hdr_offset %d",    be32_to_cpu(ec_hdr->vid_hdr_offset));
-	dbg_msg("data_offset    %d",    be32_to_cpu(ec_hdr->data_offset));
-	dbg_msg("hdr_crc        %#08x", be32_to_cpu(ec_hdr->hdr_crc));
-	dbg_msg("erase counter header hexdump:");
+	printk(KERN_DEBUG "Erase counter header dump:\n");
+	printk(KERN_DEBUG "\tmagic          %#08x\n",
+	       be32_to_cpu(ec_hdr->magic));
+	printk(KERN_DEBUG "\tversion        %d\n", (int)ec_hdr->version);
+	printk(KERN_DEBUG "\tec             %llu\n",
+	       (long long)be64_to_cpu(ec_hdr->ec));
+	printk(KERN_DEBUG "\tvid_hdr_offset %d\n",
+	       be32_to_cpu(ec_hdr->vid_hdr_offset));
+	printk(KERN_DEBUG "\tdata_offset    %d\n",
+	       be32_to_cpu(ec_hdr->data_offset));
+	printk(KERN_DEBUG "\thdr_crc        %#08x\n",
+	       be32_to_cpu(ec_hdr->hdr_crc));
+	printk(KERN_DEBUG "erase counter header hexdump:\n");
 	print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
 		       ec_hdr, UBI_EC_HDR_SIZE, 1);
 }
@@ -52,22 +57,24 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
  */
 void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
 {
-	dbg_msg("volume identifier header dump:");
-	dbg_msg("magic     %08x", be32_to_cpu(vid_hdr->magic));
-	dbg_msg("version   %d",   (int)vid_hdr->version);
-	dbg_msg("vol_type  %d",   (int)vid_hdr->vol_type);
-	dbg_msg("copy_flag %d",   (int)vid_hdr->copy_flag);
-	dbg_msg("compat    %d",   (int)vid_hdr->compat);
-	dbg_msg("vol_id    %d",   be32_to_cpu(vid_hdr->vol_id));
-	dbg_msg("lnum      %d",   be32_to_cpu(vid_hdr->lnum));
-	dbg_msg("leb_ver   %u",   be32_to_cpu(vid_hdr->leb_ver));
-	dbg_msg("data_size %d",   be32_to_cpu(vid_hdr->data_size));
-	dbg_msg("used_ebs  %d",   be32_to_cpu(vid_hdr->used_ebs));
-	dbg_msg("data_pad  %d",   be32_to_cpu(vid_hdr->data_pad));
-	dbg_msg("sqnum     %llu",
+	printk(KERN_DEBUG "Volume identifier header dump:\n");
+	printk(KERN_DEBUG "\tmagic     %08x\n", be32_to_cpu(vid_hdr->magic));
+	printk(KERN_DEBUG "\tversion   %d\n",   (int)vid_hdr->version);
+	printk(KERN_DEBUG "\tvol_type  %d\n",   (int)vid_hdr->vol_type);
+	printk(KERN_DEBUG "\tcopy_flag %d\n",   (int)vid_hdr->copy_flag);
+	printk(KERN_DEBUG "\tcompat    %d\n",   (int)vid_hdr->compat);
+	printk(KERN_DEBUG "\tvol_id    %d\n",   be32_to_cpu(vid_hdr->vol_id));
+	printk(KERN_DEBUG "\tlnum      %d\n",   be32_to_cpu(vid_hdr->lnum));
+	printk(KERN_DEBUG "\tleb_ver   %u\n",   be32_to_cpu(vid_hdr->leb_ver));
+	printk(KERN_DEBUG "\tdata_size %d\n",   be32_to_cpu(vid_hdr->data_size));
+	printk(KERN_DEBUG "\tused_ebs  %d\n",   be32_to_cpu(vid_hdr->used_ebs));
+	printk(KERN_DEBUG "\tdata_pad  %d\n",   be32_to_cpu(vid_hdr->data_pad));
+	printk(KERN_DEBUG "\tsqnum     %llu\n",
 		(unsigned long long)be64_to_cpu(vid_hdr->sqnum));
-	dbg_msg("hdr_crc   %08x", be32_to_cpu(vid_hdr->hdr_crc));
-	dbg_msg("volume identifier header hexdump:");
+	printk(KERN_DEBUG "\thdr_crc   %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
+	printk(KERN_DEBUG "Volume identifier header hexdump:\n");
+	print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+		       vid_hdr, UBI_VID_HDR_SIZE, 1);
 }
 
 /**
@@ -76,27 +83,27 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
  */
 void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
 {
-	dbg_msg("volume information dump:");
-	dbg_msg("vol_id          %d", vol->vol_id);
-	dbg_msg("reserved_pebs   %d", vol->reserved_pebs);
-	dbg_msg("alignment       %d", vol->alignment);
-	dbg_msg("data_pad        %d", vol->data_pad);
-	dbg_msg("vol_type        %d", vol->vol_type);
-	dbg_msg("name_len        %d", vol->name_len);
-	dbg_msg("usable_leb_size %d", vol->usable_leb_size);
-	dbg_msg("used_ebs        %d", vol->used_ebs);
-	dbg_msg("used_bytes      %lld", vol->used_bytes);
-	dbg_msg("last_eb_bytes   %d", vol->last_eb_bytes);
-	dbg_msg("corrupted       %d", vol->corrupted);
-	dbg_msg("upd_marker      %d", vol->upd_marker);
+	printk(KERN_DEBUG "Volume information dump:\n");
+	printk(KERN_DEBUG "\tvol_id          %d\n", vol->vol_id);
+	printk(KERN_DEBUG "\treserved_pebs   %d\n", vol->reserved_pebs);
+	printk(KERN_DEBUG "\talignment       %d\n", vol->alignment);
+	printk(KERN_DEBUG "\tdata_pad        %d\n", vol->data_pad);
+	printk(KERN_DEBUG "\tvol_type        %d\n", vol->vol_type);
+	printk(KERN_DEBUG "\tname_len        %d\n", vol->name_len);
+	printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size);
+	printk(KERN_DEBUG "\tused_ebs        %d\n", vol->used_ebs);
+	printk(KERN_DEBUG "\tused_bytes      %lld\n", vol->used_bytes);
+	printk(KERN_DEBUG "\tlast_eb_bytes   %d\n", vol->last_eb_bytes);
+	printk(KERN_DEBUG "\tcorrupted       %d\n", vol->corrupted);
+	printk(KERN_DEBUG "\tupd_marker      %d\n", vol->upd_marker);
 
 	if (vol->name_len <= UBI_VOL_NAME_MAX &&
 	    strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
-		dbg_msg("name            %s", vol->name);
+		printk(KERN_DEBUG "\tname            %s\n", vol->name);
 	} else {
-		dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
-			vol->name[0], vol->name[1], vol->name[2],
-			vol->name[3], vol->name[4]);
+		printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
+		       vol->name[0], vol->name[1], vol->name[2],
+		       vol->name[3], vol->name[4]);
 	}
 }
 
@@ -109,28 +116,29 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
 {
 	int name_len = be16_to_cpu(r->name_len);
 
-	dbg_msg("volume table record %d dump:", idx);
-	dbg_msg("reserved_pebs   %d", be32_to_cpu(r->reserved_pebs));
-	dbg_msg("alignment       %d", be32_to_cpu(r->alignment));
-	dbg_msg("data_pad        %d", be32_to_cpu(r->data_pad));
-	dbg_msg("vol_type        %d", (int)r->vol_type);
-	dbg_msg("upd_marker      %d", (int)r->upd_marker);
-	dbg_msg("name_len        %d", name_len);
+	printk(KERN_DEBUG "Volume table record %d dump:\n", idx);
+	printk(KERN_DEBUG "\treserved_pebs   %d\n",
+	       be32_to_cpu(r->reserved_pebs));
+	printk(KERN_DEBUG "\talignment       %d\n", be32_to_cpu(r->alignment));
+	printk(KERN_DEBUG "\tdata_pad        %d\n", be32_to_cpu(r->data_pad));
+	printk(KERN_DEBUG "\tvol_type        %d\n", (int)r->vol_type);
+	printk(KERN_DEBUG "\tupd_marker      %d\n", (int)r->upd_marker);
+	printk(KERN_DEBUG "\tname_len        %d\n", name_len);
 
 	if (r->name[0] == '\0') {
-		dbg_msg("name            NULL");
+		printk(KERN_DEBUG "\tname            NULL\n");
 		return;
 	}
 
 	if (name_len <= UBI_VOL_NAME_MAX &&
 	    strnlen(&r->name[0], name_len + 1) == name_len) {
-		dbg_msg("name            %s", &r->name[0]);
+		printk(KERN_DEBUG "\tname            %s\n", &r->name[0]);
 	} else {
-		dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
+		printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
 			r->name[0], r->name[1], r->name[2], r->name[3],
 			r->name[4]);
 	}
-	dbg_msg("crc             %#08x", be32_to_cpu(r->crc));
+	printk(KERN_DEBUG "\tcrc             %#08x\n", be32_to_cpu(r->crc));
 }
 
 /**
@@ -139,15 +147,15 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
  */
 void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
 {
-	dbg_msg("volume scanning information dump:");
-	dbg_msg("vol_id         %d", sv->vol_id);
-	dbg_msg("highest_lnum   %d", sv->highest_lnum);
-	dbg_msg("leb_count      %d", sv->leb_count);
-	dbg_msg("compat         %d", sv->compat);
-	dbg_msg("vol_type       %d", sv->vol_type);
-	dbg_msg("used_ebs       %d", sv->used_ebs);
-	dbg_msg("last_data_size %d", sv->last_data_size);
-	dbg_msg("data_pad       %d", sv->data_pad);
+	printk(KERN_DEBUG "Volume scanning information dump:\n");
+	printk(KERN_DEBUG "\tvol_id         %d\n", sv->vol_id);
+	printk(KERN_DEBUG "\thighest_lnum   %d\n", sv->highest_lnum);
+	printk(KERN_DEBUG "\tleb_count      %d\n", sv->leb_count);
+	printk(KERN_DEBUG "\tcompat         %d\n", sv->compat);
+	printk(KERN_DEBUG "\tvol_type       %d\n", sv->vol_type);
+	printk(KERN_DEBUG "\tused_ebs       %d\n", sv->used_ebs);
+	printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size);
+	printk(KERN_DEBUG "\tdata_pad       %d\n", sv->data_pad);
 }
 
 /**
@@ -157,14 +165,14 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
  */
 void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
 {
-	dbg_msg("eraseblock scanning information dump:");
-	dbg_msg("ec       %d", seb->ec);
-	dbg_msg("pnum     %d", seb->pnum);
+	printk(KERN_DEBUG "eraseblock scanning information dump:\n");
+	printk(KERN_DEBUG "\tec       %d\n", seb->ec);
+	printk(KERN_DEBUG "\tpnum     %d\n", seb->pnum);
 	if (type == 0) {
-		dbg_msg("lnum     %d", seb->lnum);
-		dbg_msg("scrub    %d", seb->scrub);
-		dbg_msg("sqnum    %llu", seb->sqnum);
-		dbg_msg("leb_ver  %u", seb->leb_ver);
+		printk(KERN_DEBUG "\tlnum     %d\n", seb->lnum);
+		printk(KERN_DEBUG "\tscrub    %d\n", seb->scrub);
+		printk(KERN_DEBUG "\tsqnum    %llu\n", seb->sqnum);
+		printk(KERN_DEBUG "\tleb_ver  %u\n", seb->leb_ver);
 	}
 }
 
@@ -176,16 +184,16 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req)
 {
 	char nm[17];
 
-	dbg_msg("volume creation request dump:");
-	dbg_msg("vol_id    %d",   req->vol_id);
-	dbg_msg("alignment %d",   req->alignment);
-	dbg_msg("bytes     %lld", (long long)req->bytes);
-	dbg_msg("vol_type  %d",   req->vol_type);
-	dbg_msg("name_len  %d",   req->name_len);
+	printk(KERN_DEBUG "Volume creation request dump:\n");
+	printk(KERN_DEBUG "\tvol_id    %d\n",   req->vol_id);
+	printk(KERN_DEBUG "\talignment %d\n",   req->alignment);
+	printk(KERN_DEBUG "\tbytes     %lld\n", (long long)req->bytes);
+	printk(KERN_DEBUG "\tvol_type  %d\n",   req->vol_type);
+	printk(KERN_DEBUG "\tname_len  %d\n",   req->name_len);
 
 	memcpy(nm, req->name, 16);
 	nm[16] = 0;
-	dbg_msg("the 1st 16 characters of the name: %s", nm);
+	printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm);
 }
 
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#endif /* CONFIG_MTD_UBI_DEBUG */
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 7d8d77c31df..78e914d23ec 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -24,21 +24,16 @@
 #ifdef CONFIG_MTD_UBI_DEBUG
 #include <linux/random.h>
 
-#define ubi_assert(expr)  BUG_ON(!(expr))
 #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
-#else
-#define ubi_assert(expr)  ({})
-#define dbg_err(fmt, ...) ({})
-#endif
 
-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
-#define DBG_DISABLE_BGT 1
-#else
-#define DBG_DISABLE_BGT 0
-#endif
+#define ubi_assert(expr)  do {                                               \
+        if (unlikely(!(expr))) {                                             \
+                printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
+                       __func__, __LINE__, current->pid);                    \
+                ubi_dbg_dump_stack();                                        \
+        }                                                                    \
+} while (0)
 
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
-/* Generic debugging message */
 #define dbg_msg(fmt, ...)                                    \
 	printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
 	       current->pid, __func__, ##__VA_ARGS__)
@@ -61,19 +56,12 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
 void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
 void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
 
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+/* General debugging messages */
+#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
-
-#define dbg_msg(fmt, ...)    ({})
-#define ubi_dbg_dump_stack() ({})
-#define ubi_dbg_dump_ec_hdr(ec_hdr)      ({})
-#define ubi_dbg_dump_vid_hdr(vid_hdr)    ({})
-#define ubi_dbg_dump_vol_info(vol)       ({})
-#define ubi_dbg_dump_vtbl_record(r, idx) ({})
-#define ubi_dbg_dump_sv(sv)              ({})
-#define ubi_dbg_dump_seb(seb, type)      ({})
-#define ubi_dbg_dump_mkvol_req(req)      ({})
-
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#define dbg_gen(fmt, ...) ({})
+#endif
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
 /* Messages from the eraseblock association sub-system */
@@ -105,6 +93,12 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
 #define UBI_IO_DEBUG 0
 #endif
 
+#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
+#define DBG_DISABLE_BGT 1
+#else
+#define DBG_DISABLE_BGT 0
+#endif
+
 #ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
 /**
  * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
@@ -149,4 +143,30 @@ static inline int ubi_dbg_is_erase_failure(void)
 #define ubi_dbg_is_erase_failure() 0
 #endif
 
+#else
+
+#define ubi_assert(expr)                 ({})
+#define dbg_err(fmt, ...)                ({})
+#define dbg_msg(fmt, ...)                ({})
+#define dbg_gen(fmt, ...)                ({})
+#define dbg_eba(fmt, ...)                ({})
+#define dbg_wl(fmt, ...)                 ({})
+#define dbg_io(fmt, ...)                 ({})
+#define dbg_bld(fmt, ...)                ({})
+#define ubi_dbg_dump_stack()             ({})
+#define ubi_dbg_dump_ec_hdr(ec_hdr)      ({})
+#define ubi_dbg_dump_vid_hdr(vid_hdr)    ({})
+#define ubi_dbg_dump_vol_info(vol)       ({})
+#define ubi_dbg_dump_vtbl_record(r, idx) ({})
+#define ubi_dbg_dump_sv(sv)              ({})
+#define ubi_dbg_dump_seb(seb, type)      ({})
+#define ubi_dbg_dump_mkvol_req(req)      ({})
+
+#define UBI_IO_DEBUG               0
+#define DBG_DISABLE_BGT            0
+#define ubi_dbg_is_bitflip()       0
+#define ubi_dbg_is_write_failure() 0
+#define ubi_dbg_is_erase_failure() 0
+
+#endif /* !CONFIG_MTD_UBI_DEBUG */
 #endif /* !__UBI_DEBUG_H__ */
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index ae76ab638b2..49f52dceea9 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -111,7 +111,7 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct ubi_device *ubi;
 	uint64_t tmp = from;
 
-	dbg_msg("read %zd bytes from offset %lld", len, from);
+	dbg_gen("read %zd bytes from offset %lld", len, from);
 
 	if (len < 0 || from < 0 || from + len > mtd->size)
 		return -EINVAL;
@@ -162,7 +162,7 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
 	struct ubi_device *ubi;
 	uint64_t tmp = to;
 
-	dbg_msg("write %zd bytes to offset %lld", len, to);
+	dbg_gen("write %zd bytes to offset %lld", len, to);
 
 	if (len < 0 || to < 0 || len + to > mtd->size)
 		return -EINVAL;
@@ -215,7 +215,7 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
 	struct ubi_volume *vol;
 	struct ubi_device *ubi;
 
-	dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr);
+	dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr);
 
 	if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
 		return -EINVAL;
@@ -304,7 +304,7 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
 		return -ENFILE;
 	}
 
-	dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u",
+	dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u",
 		mtd->index, mtd->name, mtd->size, mtd->erasesize);
 	return 0;
 }
@@ -322,7 +322,7 @@ int ubi_destroy_gluebi(struct ubi_volume *vol)
 	int err;
 	struct mtd_info *mtd = &vol->gluebi_mtd;
 
-	dbg_msg("remove mtd%d", mtd->index);
+	dbg_gen("remove mtd%d", mtd->index);
 	err = del_mtd_device(mtd);
 	if (err)
 		return err;
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 561e7b2f96c..27b9c2c2fc6 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -187,7 +187,7 @@ retry:
 		ubi_assert(len == read);
 
 		if (ubi_dbg_is_bitflip()) {
-			dbg_msg("bit-flip (emulated)");
+			dbg_gen("bit-flip (emulated)");
 			err = UBI_IO_BITFLIPS;
 		}
 	}
@@ -1256,7 +1256,7 @@ static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
 
 fail:
 	ubi_err("paranoid check failed for PEB %d", pnum);
-	dbg_msg("hex dump of the %d-%d region", offset, offset + len);
+	ubi_msg("hex dump of the %d-%d region", offset, offset + len);
 	print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
 		       ubi->dbg_peb_buf, len, 1);
 	err = 1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index e65c8e0bcd5..5d9bcf109c1 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -106,7 +106,7 @@ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode)
 	struct ubi_device *ubi;
 	struct ubi_volume *vol;
 
-	dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
+	dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
 
 	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
 		return ERR_PTR(-EINVAL);
@@ -215,7 +215,7 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
 	struct ubi_device *ubi;
 	struct ubi_volume_desc *ret;
 
-	dbg_msg("open volume %s, mode %d", name, mode);
+	dbg_gen("open volume %s, mode %d", name, mode);
 
 	if (!name)
 		return ERR_PTR(-EINVAL);
@@ -266,7 +266,7 @@ void ubi_close_volume(struct ubi_volume_desc *desc)
 	struct ubi_volume *vol = desc->vol;
 	struct ubi_device *ubi = vol->ubi;
 
-	dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode);
+	dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode);
 
 	spin_lock(&ubi->volumes_lock);
 	switch (desc->mode) {
@@ -323,7 +323,7 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
 	struct ubi_device *ubi = vol->ubi;
 	int err, vol_id = vol->vol_id;
 
-	dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
+	dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
 
 	if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 ||
 	    lnum >= vol->used_ebs || offset < 0 || len < 0 ||
@@ -388,7 +388,7 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
 	struct ubi_device *ubi = vol->ubi;
 	int vol_id = vol->vol_id;
 
-	dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
+	dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
 
 	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
 		return -EINVAL;
@@ -438,7 +438,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
 	struct ubi_device *ubi = vol->ubi;
 	int vol_id = vol->vol_id;
 
-	dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
+	dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
 
 	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
 		return -EINVAL;
@@ -482,7 +482,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum)
 	struct ubi_device *ubi = vol->ubi;
 	int err;
 
-	dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+	dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
 
 	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
 		return -EROFS;
@@ -542,7 +542,7 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum)
 	struct ubi_volume *vol = desc->vol;
 	struct ubi_device *ubi = vol->ubi;
 
-	dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+	dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
 
 	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
 		return -EROFS;
@@ -579,7 +579,7 @@ int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
 	struct ubi_volume *vol = desc->vol;
 	struct ubi_device *ubi = vol->ubi;
 
-	dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+	dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
 
 	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
 		return -EROFS;
@@ -621,7 +621,7 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
 {
 	struct ubi_volume *vol = desc->vol;
 
-	dbg_msg("test LEB %d:%d", vol->vol_id, lnum);
+	dbg_gen("test LEB %d:%d", vol->vol_id, lnum);
 
 	if (lnum < 0 || lnum >= vol->reserved_pebs)
 		return -EINVAL;
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 892c2ba4977..40eca9ce5fa 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -932,7 +932,7 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi)
 	for (pnum = 0; pnum < ubi->peb_count; pnum++) {
 		cond_resched();
 
-		dbg_msg("process PEB %d", pnum);
+		dbg_gen("process PEB %d", pnum);
 		err = process_eb(ubi, si, pnum);
 		if (err < 0)
 			goto out_vidh;
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 6fa1ab3f2a7..1230a5e1b53 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -56,11 +56,11 @@ static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol)
 	int err;
 	struct ubi_vtbl_record vtbl_rec;
 
-	dbg_msg("set update marker for volume %d", vol->vol_id);
+	dbg_gen("set update marker for volume %d", vol->vol_id);
 
 	if (vol->upd_marker) {
 		ubi_assert(ubi->vtbl[vol->vol_id].upd_marker);
-		dbg_msg("already set");
+		dbg_gen("already set");
 		return 0;
 	}
 
@@ -92,7 +92,7 @@ static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol,
 	uint64_t tmp;
 	struct ubi_vtbl_record vtbl_rec;
 
-	dbg_msg("clear update marker for volume %d", vol->vol_id);
+	dbg_gen("clear update marker for volume %d", vol->vol_id);
 
 	memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id],
 	       sizeof(struct ubi_vtbl_record));
@@ -133,7 +133,7 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol,
 	int i, err;
 	uint64_t tmp;
 
-	dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes);
+	dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes);
 	ubi_assert(!vol->updating && !vol->changing_leb);
 	vol->updating = 1;
 
@@ -183,7 +183,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
 {
 	ubi_assert(!vol->updating && !vol->changing_leb);
 
-	dbg_msg("start changing LEB %d:%d, %u bytes",
+	dbg_gen("start changing LEB %d:%d, %u bytes",
 		vol->vol_id, req->lnum, req->bytes);
 	if (req->bytes == 0)
 		return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0,
@@ -242,7 +242,7 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
 		memset(buf + len, 0xFF, l - len);
 		len = ubi_calc_data_len(ubi, buf, l);
 		if (len == 0) {
-			dbg_msg("all %d bytes contain 0xFF - skip", len);
+			dbg_gen("all %d bytes contain 0xFF - skip", len);
 			return 0;
 		}
 
@@ -283,7 +283,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
 	uint64_t tmp;
 	int lnum, offs, err = 0, len, to_write = count;
 
-	dbg_msg("write %d of %lld bytes, %lld already passed",
+	dbg_gen("write %d of %lld bytes, %lld already passed",
 		count, vol->upd_bytes, vol->upd_received);
 
 	if (ubi->ro_mode)
@@ -400,7 +400,7 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
 {
 	int err;
 
-	dbg_msg("write %d of %lld bytes, %lld already passed",
+	dbg_gen("write %d of %lld bytes, %lld already passed",
 		count, vol->upd_bytes, vol->upd_received);
 
 	if (ubi->ro_mode)
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index bfa7c5d2e06..2cd886a5ada 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -28,9 +28,9 @@
 #include "ubi.h"
 
 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
-static void paranoid_check_volumes(struct ubi_device *ubi);
+static int paranoid_check_volumes(struct ubi_device *ubi);
 #else
-#define paranoid_check_volumes(ubi)
+#define paranoid_check_volumes(ubi) 0
 #endif
 
 static ssize_t vol_attribute_show(struct device *dev,
@@ -218,7 +218,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	spin_lock(&ubi->volumes_lock);
 	if (vol_id == UBI_VOL_NUM_AUTO) {
 		/* Find unused volume ID */
-		dbg_msg("search for vacant volume ID");
+		dbg_gen("search for vacant volume ID");
 		for (i = 0; i < ubi->vtbl_slots; i++)
 			if (!ubi->volumes[i]) {
 				vol_id = i;
@@ -233,7 +233,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 		req->vol_id = vol_id;
 	}
 
-	dbg_msg("volume ID %d, %llu bytes, type %d, name %s",
+	dbg_gen("volume ID %d, %llu bytes, type %d, name %s",
 		vol_id, (unsigned long long)req->bytes,
 		(int)req->vol_type, req->name);
 
@@ -361,8 +361,8 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	ubi->vol_count += 1;
 	spin_unlock(&ubi->volumes_lock);
 
-	paranoid_check_volumes(ubi);
-	return 0;
+	err = paranoid_check_volumes(ubi);
+	return err;
 
 out_sysfs:
 	/*
@@ -414,7 +414,7 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
 	struct ubi_device *ubi = vol->ubi;
 	int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
 
-	dbg_msg("remove UBI volume %d", vol_id);
+	dbg_gen("remove UBI volume %d", vol_id);
 	ubi_assert(desc->mode == UBI_EXCLUSIVE);
 	ubi_assert(vol == ubi->volumes[vol_id]);
 
@@ -465,8 +465,8 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
 	ubi->vol_count -= 1;
 	spin_unlock(&ubi->volumes_lock);
 
-	paranoid_check_volumes(ubi);
-	return 0;
+	err = paranoid_check_volumes(ubi);
+	return err;
 
 out_err:
 	ubi_err("cannot remove volume %d, error %d", vol_id, err);
@@ -497,7 +497,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
 	if (ubi->ro_mode)
 		return -EROFS;
 
-	dbg_msg("re-size volume %d to from %d to %d PEBs",
+	dbg_gen("re-size volume %d to from %d to %d PEBs",
 		vol_id, vol->reserved_pebs, reserved_pebs);
 
 	if (vol->vol_type == UBI_STATIC_VOLUME &&
@@ -586,8 +586,8 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
 			(long long)vol->used_ebs * vol->usable_leb_size;
 	}
 
-	paranoid_check_volumes(ubi);
-	return 0;
+	err = paranoid_check_volumes(ubi);
+	return err;
 
 out_acc:
 	if (pebs > 0) {
@@ -615,8 +615,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
 	int err, vol_id = vol->vol_id;
 	dev_t dev;
 
-	dbg_msg("add volume %d", vol_id);
-	ubi_dbg_dump_vol_info(vol);
+	dbg_gen("add volume %d", vol_id);
 
 	/* Register character device for the volume */
 	cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
@@ -650,8 +649,8 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
 		return err;
 	}
 
-	paranoid_check_volumes(ubi);
-	return 0;
+	err = paranoid_check_volumes(ubi);
+	return err;
 
 out_gluebi:
 	err = ubi_destroy_gluebi(vol);
@@ -672,7 +671,7 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
 {
 	int err;
 
-	dbg_msg("free volume %d", vol->vol_id);
+	dbg_gen("free volume %d", vol->vol_id);
 
 	ubi->volumes[vol->vol_id] = NULL;
 	err = ubi_destroy_gluebi(vol);
@@ -686,8 +685,10 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
  * paranoid_check_volume - check volume information.
  * @ubi: UBI device description object
  * @vol_id: volume ID
+ *
+ * Returns zero if volume is all right and a a negative error code if not.
  */
-static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
+static int paranoid_check_volume(struct ubi_device *ubi, int vol_id)
 {
 	int idx = vol_id2idx(ubi, vol_id);
 	int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
@@ -705,16 +706,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
 			goto fail;
 		}
 		spin_unlock(&ubi->volumes_lock);
-		return;
-	}
-
-	if (vol->exclusive) {
-		/*
-		 * The volume may be being created at the moment, do not check
-		 * it (e.g., it may be in the middle of ubi_create_volume().
-		 */
-		spin_unlock(&ubi->volumes_lock);
-		return;
+		return 0;
 	}
 
 	if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 ||
@@ -830,25 +822,34 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
 	}
 
 	spin_unlock(&ubi->volumes_lock);
-	return;
+	return 0;
 
 fail:
 	ubi_err("paranoid check failed for volume %d", vol_id);
-	ubi_dbg_dump_vol_info(vol);
-	ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
+	if (vol) {
+		ubi_dbg_dump_vol_info(vol);
+		ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
+	}
 	spin_unlock(&ubi->volumes_lock);
-	BUG();
+	return -EINVAL;
 }
 
 /**
  * paranoid_check_volumes - check information about all volumes.
  * @ubi: UBI device description object
+ *
+ * Returns zero if volumes are all right and a a negative error code if not.
  */
-static void paranoid_check_volumes(struct ubi_device *ubi)
+static int paranoid_check_volumes(struct ubi_device *ubi)
 {
-	int i;
+	int i, err = 0;
 
-	for (i = 0; i < ubi->vtbl_slots; i++)
-		paranoid_check_volume(ubi, i);
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		err = paranoid_check_volume(ubi, i);
+		if (err)
+			break;
+	}
+
+	return err;
 }
 #endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index d9af11a8682..05fb72fd268 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -371,7 +371,7 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
 	 *    to LEB 0.
 	 */
 
-	dbg_msg("check layout volume");
+	dbg_gen("check layout volume");
 
 	/* Read both LEB 0 and LEB 1 into memory */
 	ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
-- 
GitLab


From f40ac9cdf6991287f19bdafe9b0752ee40137908 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 13 Jul 2008 21:47:47 +0300
Subject: [PATCH 103/853] UBI: implement multiple volumes rename

Quite useful ioctl which allows to make atomic system upgrades.
The idea belongs to Richard Titmuss <richard_titmuss@logitech.com>

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c |   1 +
 drivers/mtd/ubi/cdev.c  | 188 +++++++++++++++++++++++++++++++++++++++-
 drivers/mtd/ubi/ubi.h   |  33 ++++++-
 drivers/mtd/ubi/vmt.c   |  57 ++++++++++--
 drivers/mtd/ubi/vtbl.c  |  51 +++++++++++
 include/mtd/ubi-user.h  |  60 ++++++++++++-
 6 files changed, 375 insertions(+), 15 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 7210e1da1fc..4418a2369b5 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -806,6 +806,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 
 	mutex_init(&ubi->buf_mutex);
 	mutex_init(&ubi->ckvol_mutex);
+	mutex_init(&ubi->mult_mutex);
 	mutex_init(&ubi->volumes_mutex);
 	spin_lock_init(&ubi->volumes_lock);
 
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 7c19918cc91..bc8199c6a9f 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -605,6 +605,166 @@ static int verify_rsvol_req(const struct ubi_device *ubi,
 	return 0;
 }
 
+/**
+ * rename_volumes - rename UBI volumes.
+ * @ubi: UBI device description object
+ * @req: volumes re-name request
+ *
+ * This is a helper function for the volume re-name IOCTL which validates the
+ * the request, opens the volume and calls corresponding volumes management
+ * function. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int rename_volumes(struct ubi_device *ubi,
+			  struct ubi_rnvol_req *req)
+{
+	int i, n, err;
+	struct list_head rename_list;
+	struct ubi_rename_entry *re, *re1;
+
+	if (req->count < 0 || req->count > UBI_MAX_RNVOL)
+		return -EINVAL;
+
+	if (req->count == 0)
+		return 0;
+
+	/* Validate volume IDs and names in the request */
+	for (i = 0; i < req->count; i++) {
+		if (req->ents[i].vol_id < 0 ||
+		    req->ents[i].vol_id >= ubi->vtbl_slots)
+			return -EINVAL;
+		if (req->ents[i].name_len < 0)
+			return -EINVAL;
+		if (req->ents[i].name_len > UBI_VOL_NAME_MAX)
+			return -ENAMETOOLONG;
+		req->ents[i].name[req->ents[i].name_len] = '\0';
+		n = strlen(req->ents[i].name);
+		if (n != req->ents[i].name_len)
+			err = -EINVAL;
+	}
+
+	/* Make sure volume IDs and names are unique */
+	for (i = 0; i < req->count - 1; i++) {
+		for (n = i + 1; n < req->count; n++) {
+			if (req->ents[i].vol_id == req->ents[n].vol_id) {
+				dbg_err("duplicated volume id %d",
+					req->ents[i].vol_id);
+				return -EINVAL;
+			}
+			if (!strcmp(req->ents[i].name, req->ents[n].name)) {
+				dbg_err("duplicated volume name \"%s\"",
+					req->ents[i].name);
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* Create the re-name list */
+	INIT_LIST_HEAD(&rename_list);
+	for (i = 0; i < req->count; i++) {
+		int vol_id = req->ents[i].vol_id;
+		int name_len = req->ents[i].name_len;
+		const char *name = req->ents[i].name;
+
+		re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+		if (!re) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+
+		re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
+		if (IS_ERR(re->desc)) {
+			err = PTR_ERR(re->desc);
+			dbg_err("cannot open volume %d, error %d", vol_id, err);
+			kfree(re);
+			goto out_free;
+		}
+
+		/* Skip this re-naming if the name does not really change */
+		if (re->desc->vol->name_len == name_len &&
+		    !memcmp(re->desc->vol->name, name, name_len)) {
+			ubi_close_volume(re->desc);
+			kfree(re);
+			continue;
+		}
+
+		re->new_name_len = name_len;
+		memcpy(re->new_name, name, name_len);
+		list_add_tail(&re->list, &rename_list);
+		dbg_msg("will rename volume %d from \"%s\" to \"%s\"",
+			vol_id, re->desc->vol->name, name);
+	}
+
+	if (list_empty(&rename_list))
+		return 0;
+
+	/* Find out the volumes which have to be removed */
+	list_for_each_entry(re, &rename_list, list) {
+		struct ubi_volume_desc *desc;
+		int no_remove_needed = 0;
+
+		/*
+		 * Volume @re->vol_id is going to be re-named to
+		 * @re->new_name, while its current name is @name. If a volume
+		 * with name @re->new_name currently exists, it has to be
+		 * removed, unless it is also re-named in the request (@req).
+		 */
+		list_for_each_entry(re1, &rename_list, list) {
+			if (re->new_name_len == re1->desc->vol->name_len &&
+			    !memcmp(re->new_name, re1->desc->vol->name,
+				    re1->desc->vol->name_len)) {
+				no_remove_needed = 1;
+				break;
+			}
+		}
+
+		if (no_remove_needed)
+			continue;
+
+		/*
+		 * It seems we need to remove volume with name @re->new_name,
+		 * if it exists.
+		 */
+		desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE);
+		if (IS_ERR(desc)) {
+			err = PTR_ERR(desc);
+			if (err == -ENODEV)
+				/* Re-naming into a non-existing volume name */
+				continue;
+
+			/* The volume exists but busy, or an error occurred */
+			dbg_err("cannot open volume \"%s\", error %d",
+				re->new_name, err);
+			goto out_free;
+		}
+
+		re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+		if (!re) {
+			err = -ENOMEM;
+			ubi_close_volume(desc);
+			goto out_free;
+		}
+
+		re->remove = 1;
+		re->desc = desc;
+		list_add(&re->list, &rename_list);
+		dbg_msg("will remove volume %d, name \"%s\"",
+			re->desc->vol->vol_id, re->desc->vol->name);
+	}
+
+	mutex_lock(&ubi->volumes_mutex);
+	err = ubi_rename_volumes(ubi, &rename_list);
+	mutex_unlock(&ubi->volumes_mutex);
+
+out_free:
+	list_for_each_entry_safe(re, re1, &rename_list, list) {
+		ubi_close_volume(re->desc);
+		list_del(&re->list);
+		kfree(re);
+	}
+	return err;
+}
+
 static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 			  unsigned int cmd, unsigned long arg)
 {
@@ -670,7 +830,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 		}
 
 		mutex_lock(&ubi->volumes_mutex);
-		err = ubi_remove_volume(desc);
+		err = ubi_remove_volume(desc, 0);
 		mutex_unlock(&ubi->volumes_mutex);
 
 		/*
@@ -717,6 +877,32 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 		break;
 	}
 
+	/* Re-name volumes command */
+	case UBI_IOCRNVOL:
+	{
+		struct ubi_rnvol_req *req;
+
+		dbg_msg("re-name volumes");
+		req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL);
+		if (!req) {
+			err = -ENOMEM;
+			break;
+		};
+
+		err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req));
+		if (err) {
+			err = -EFAULT;
+			kfree(req);
+			break;
+		}
+
+		mutex_lock(&ubi->mult_mutex);
+		err = rename_volumes(ubi, req);
+		mutex_unlock(&ubi->mult_mutex);
+		kfree(req);
+		break;
+	}
+
 	default:
 		err = -ENOTTY;
 		break;
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 1fc32c863b7..274c67916b3 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -131,6 +131,27 @@ struct ubi_ltree_entry {
 	struct rw_semaphore mutex;
 };
 
+/**
+ * struct ubi_rename_entry - volume re-name description data structure.
+ * @new_name_len: new volume name length
+ * @new_name: new volume name
+ * @remove: if not zero, this volume should be removed, not re-named
+ * @desc: descriptor of the volume
+ * @list: links re-name entries into a list
+ *
+ * This data structure is utilized in the multiple volume re-name code. Namely,
+ * UBI first creates a list of &struct ubi_rename_entry objects from the
+ * &struct ubi_rnvol_req request object, and then utilizes this list to do all
+ * the job.
+ */
+struct ubi_rename_entry {
+	int new_name_len;
+	char new_name[UBI_VOL_NAME_MAX + 1];
+	int remove;
+	struct ubi_volume_desc *desc;
+	struct list_head list;
+};
+
 struct ubi_volume_desc;
 
 /**
@@ -206,7 +227,7 @@ struct ubi_volume {
 	int alignment;
 	int data_pad;
 	int name_len;
-	char name[UBI_VOL_NAME_MAX+1];
+	char name[UBI_VOL_NAME_MAX + 1];
 
 	int upd_ebs;
 	int ch_lnum;
@@ -272,7 +293,7 @@ struct ubi_wl_entry;
  * @vtbl_size: size of the volume table in bytes
  * @vtbl: in-RAM volume table copy
  * @volumes_mutex: protects on-flash volume table and serializes volume
- *                 changes, like creation, deletion, update, resize
+ *                 changes, like creation, deletion, update, re-size and re-name
  *
  * @max_ec: current highest erase counter value
  * @mean_ec: current mean erase counter value
@@ -330,6 +351,8 @@ struct ubi_wl_entry;
  * @peb_buf1: a buffer of PEB size used for different purposes
  * @peb_buf2: another buffer of PEB size used for different purposes
  * @buf_mutex: proptects @peb_buf1 and @peb_buf2
+ * @ckvol_mutex: serializes static volume checking when opening
+ * @mult_mutex: serializes operations on multiple volumes, like re-nameing
  * @dbg_peb_buf: buffer of PEB size used for debugging
  * @dbg_buf_mutex: proptects @dbg_peb_buf
  */
@@ -410,6 +433,7 @@ struct ubi_device {
 	void *peb_buf2;
 	struct mutex buf_mutex;
 	struct mutex ckvol_mutex;
+	struct mutex mult_mutex;
 #ifdef CONFIG_MTD_UBI_DEBUG
 	void *dbg_peb_buf;
 	struct mutex dbg_buf_mutex;
@@ -426,12 +450,15 @@ extern struct mutex ubi_devices_mutex;
 /* vtbl.c */
 int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
 			   struct ubi_vtbl_record *vtbl_rec);
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+			    struct list_head *rename_list);
 int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
 
 /* vmt.c */
 int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
-int ubi_remove_volume(struct ubi_volume_desc *desc);
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl);
 int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list);
 int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol);
 void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol);
 
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 2cd886a5ada..4be4014c70d 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -402,13 +402,14 @@ out_unlock:
 /**
  * ubi_remove_volume - remove volume.
  * @desc: volume descriptor
+ * @no_vtbl: do not change volume table if not zero
  *
  * This function removes volume described by @desc. The volume has to be opened
  * in "exclusive" mode. Returns zero in case of success and a negative error
  * code in case of failure. The caller has to have the @ubi->volumes_mutex
  * locked.
  */
-int ubi_remove_volume(struct ubi_volume_desc *desc)
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
 {
 	struct ubi_volume *vol = desc->vol;
 	struct ubi_device *ubi = vol->ubi;
@@ -437,9 +438,11 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
 	if (err)
 		goto out_err;
 
-	err = ubi_change_vtbl_record(ubi, vol_id, NULL);
-	if (err)
-		goto out_err;
+	if (!no_vtbl) {
+		err = ubi_change_vtbl_record(ubi, vol_id, NULL);
+		if (err)
+			goto out_err;
+	}
 
 	for (i = 0; i < vol->reserved_pebs; i++) {
 		err = ubi_eba_unmap_leb(ubi, vol, i);
@@ -465,7 +468,8 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
 	ubi->vol_count -= 1;
 	spin_unlock(&ubi->volumes_lock);
 
-	err = paranoid_check_volumes(ubi);
+	if (!no_vtbl)
+		err = paranoid_check_volumes(ubi);
 	return err;
 
 out_err:
@@ -601,6 +605,44 @@ out_free:
 	return err;
 }
 
+/**
+ * ubi_rename_volumes - re-name UBI volumes.
+ * @ubi: UBI device description object
+ * @renam_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names or removes volumes specified in the re-name list.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list)
+{
+	int err;
+	struct ubi_rename_entry *re;
+
+	err = ubi_vtbl_rename_volumes(ubi, rename_list);
+	if (err)
+		return err;
+
+	list_for_each_entry(re, rename_list, list) {
+		if (re->remove) {
+			err = ubi_remove_volume(re->desc, 1);
+			if (err)
+				break;
+		} else {
+			struct ubi_volume *vol = re->desc->vol;
+
+			spin_lock(&ubi->volumes_lock);
+			vol->name_len = re->new_name_len;
+			memcpy(vol->name, re->new_name, re->new_name_len + 1);
+			spin_unlock(&ubi->volumes_lock);
+		}
+	}
+
+	if (!err)
+		paranoid_check_volumes(ubi);
+	return err;
+}
+
 /**
  * ubi_add_volume - add volume.
  * @ubi: UBI device description object
@@ -826,10 +868,9 @@ static int paranoid_check_volume(struct ubi_device *ubi, int vol_id)
 
 fail:
 	ubi_err("paranoid check failed for volume %d", vol_id);
-	if (vol) {
+	if (vol)
 		ubi_dbg_dump_vol_info(vol);
-		ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
-	}
+	ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
 	spin_unlock(&ubi->volumes_lock);
 	return -EINVAL;
 }
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 05fb72fd268..23c5376234b 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -114,6 +114,57 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
 	return 0;
 }
 
+/**
+ * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table.
+ * @ubi: UBI device description object
+ * @renam_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names multiple volumes specified in @req in the volume
+ * table. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+			    struct list_head *rename_list)
+{
+	int i, err;
+	struct ubi_rename_entry *re;
+	struct ubi_volume *layout_vol;
+
+	list_for_each_entry(re, rename_list, list) {
+		uint32_t crc;
+		struct ubi_volume *vol = re->desc->vol;
+		struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id];
+
+		if (re->remove) {
+			memcpy(vtbl_rec, &empty_vtbl_record,
+			       sizeof(struct ubi_vtbl_record));
+			continue;
+		}
+
+		vtbl_rec->name_len = cpu_to_be16(re->new_name_len);
+		memcpy(vtbl_rec->name, re->new_name, re->new_name_len);
+		memset(vtbl_rec->name + re->new_name_len, 0,
+		       UBI_VOL_NAME_MAX + 1 - re->new_name_len);
+		crc = crc32(UBI_CRC32_INIT, vtbl_rec,
+			    UBI_VTBL_RECORD_SIZE_CRC);
+		vtbl_rec->crc = cpu_to_be32(crc);
+	}
+
+	layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)];
+	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+		err = ubi_eba_unmap_leb(ubi, layout_vol, i);
+		if (err)
+			return err;
+
+		err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0,
+					ubi->vtbl_size, UBI_LONGTERM);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 /**
  * vtbl_check - check if volume table is not corrupted and contains sensible
  *              data.
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index a7421f130cc..e8e57c3dfcd 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -58,6 +58,13 @@
  * device should be used. A &struct ubi_rsvol_req object has to be properly
  * filled and a pointer to it has to be passed to the IOCTL.
  *
+ * UBI volumes re-name
+ * ~~~~~~~~~~~~~~~~~~~
+ *
+ * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command
+ * of the UBI character device should be used. A &struct ubi_rnvol_req object
+ * has to be properly filled and a pointer to it has to be passed to the IOCTL.
+ *
  * UBI volume update
  * ~~~~~~~~~~~~~~~~~
  *
@@ -104,6 +111,8 @@
 #define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t)
 /* Re-size an UBI volume */
 #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req)
+/* Re-name volumes */
+#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
 
 /* IOCTL commands of the UBI control character device */
 
@@ -128,6 +137,9 @@
 /* Maximum MTD device name length supported by UBI */
 #define MAX_UBI_MTD_NAME_LEN 127
 
+/* Maximum amount of UBI volumes that can be re-named at one go */
+#define UBI_MAX_RNVOL 32
+
 /*
  * UBI data type hint constants.
  *
@@ -189,7 +201,7 @@ struct ubi_attach_req {
 	int32_t ubi_num;
 	int32_t mtd_num;
 	int32_t vid_hdr_offset;
-	uint8_t padding[12];
+	int8_t padding[12];
 };
 
 /**
@@ -250,6 +262,48 @@ struct ubi_rsvol_req {
 	int32_t vol_id;
 } __attribute__ ((packed));
 
+/**
+ * struct ubi_rnvol_req - volumes re-name request.
+ * @count: count of volumes to re-name
+ * @padding1:  reserved for future, not used, has to be zeroed
+ * @vol_id: ID of the volume to re-name
+ * @name_len: name length
+ * @padding2:  reserved for future, not used, has to be zeroed
+ * @name: new volume name
+ *
+ * UBI allows to re-name up to %32 volumes at one go. The count of volumes to
+ * re-name is specified in the @count field. The ID of the volumes to re-name
+ * and the new names are specified in the @vol_id and @name fields.
+ *
+ * The UBI volume re-name operation is atomic, which means that should power cut
+ * happen, the volumes will have either old name or new name. So the possible
+ * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes
+ * A and B one may create temporary volumes %A1 and %B1 with the new contents,
+ * then atomically re-name A1->A and B1->B, in which case old %A and %B will
+ * be removed.
+ *
+ * If it is not desirable to remove old A and B, the re-name request has to
+ * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1
+ * become A and B, and old A and B will become A1 and B1.
+ *
+ * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1
+ * and B1 become A and B, and old A and B become X and Y.
+ *
+ * In other words, in case of re-naming into an existing volume name, the
+ * existing volume is removed, unless it is re-named as well at the same
+ * re-name request.
+ */
+struct ubi_rnvol_req {
+	int32_t count;
+	int8_t padding1[12];
+	struct {
+		int32_t vol_id;
+		int16_t name_len;
+		int8_t  padding2[2];
+		char    name[UBI_MAX_VOLUME_NAME + 1];
+	} ents[UBI_MAX_RNVOL];
+} __attribute__ ((packed));
+
 /**
  * struct ubi_leb_change_req - a data structure used in atomic logical
  *                             eraseblock change requests.
@@ -261,8 +315,8 @@ struct ubi_rsvol_req {
 struct ubi_leb_change_req {
 	int32_t lnum;
 	int32_t bytes;
-	uint8_t dtype;
-	uint8_t padding[7];
+	int8_t  dtype;
+	int8_t  padding[7];
 } __attribute__ ((packed));
 
 #endif /* __UBI_USER_H__ */
-- 
GitLab


From 8c1e6ee10bd87d70faada065a8d1f70732c17382 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 18 Jul 2008 12:20:23 +0300
Subject: [PATCH 104/853] UBI: rework scrubbing messages

If bit-flips happen often, UBI prints to many messages. Lessen
the amount by only printing the messages when the PEB has been
scrubbed. Also, print torturing messages.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/io.c | 8 +++++++-
 drivers/mtd/ubi/wl.c | 6 +++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 27b9c2c2fc6..2bebb39d19b 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -156,8 +156,12 @@ retry:
 			/*
 			 * -EUCLEAN is reported if there was a bit-flip which
 			 * was corrected, so this is harmless.
+			 *
+			 * We do not report about it here unless debugging is
+			 * enabled. A corresponding message will be printed
+			 * later, when it is has been scrubbed.
 			 */
-			ubi_msg("fixable bit-flip detected at PEB %d", pnum);
+			dbg_msg("fixable bit-flip detected at PEB %d", pnum);
 			ubi_assert(len == read);
 			return UBI_IO_BITFLIPS;
 		}
@@ -391,6 +395,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
 {
 	int err, i, patt_count;
 
+	ubi_msg("run torture test for PEB %d", pnum);
 	patt_count = ARRAY_SIZE(patterns);
 	ubi_assert(patt_count > 0);
 
@@ -434,6 +439,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
 	}
 
 	err = patt_count;
+	ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum);
 
 out:
 	mutex_unlock(&ubi->buf_mutex);
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 761952ba125..6821952bcdb 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -873,6 +873,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	}
 
 	ubi_free_vid_hdr(ubi, vid_hdr);
+	if (scrubbing && !protect)
+		ubi_msg("scrubbed PEB %d, data moved to PEB %d",
+			e1->pnum, e2->pnum);
+
 	spin_lock(&ubi->wl_lock);
 	if (protect)
 		prot_tree_add(ubi, e1, pe, protect);
@@ -1231,7 +1235,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
 {
 	struct ubi_wl_entry *e;
 
-	ubi_msg("schedule PEB %d for scrubbing", pnum);
+	dbg_msg("schedule PEB %d for scrubbing", pnum);
 
 retry:
 	spin_lock(&ubi->wl_lock);
-- 
GitLab


From 4d88de4beb6f327dfc7c2221eab532dad5b2bb3e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 18 Jul 2008 12:42:14 +0300
Subject: [PATCH 105/853] UBI: bugfix - do not torture PEB needlessly

This is probably a copy-paste bug - we torture the old PEB
in the atomic LEB change function, but we should not do this.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/eba.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 613cd1e5164..e14208152c3 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -906,7 +906,7 @@ retry:
 	}
 
 	if (vol->eba_tbl[lnum] >= 0) {
-		err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+		err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0);
 		if (err)
 			goto out_leb_unlock;
 	}
-- 
GitLab


From 9c9ec147709e63e4e8ac6a037c6bb50688ff8e9c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 18 Jul 2008 13:19:52 +0300
Subject: [PATCH 106/853] UBI: fix checkpatch.pl errors and warnings

Just out or curiousity ran checkpatch.pl for whole UBI,
and discovered there are quite a few of stylistic issues.
Fix them.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c  |  8 ++--
 drivers/mtd/ubi/cdev.c   |  4 +-
 drivers/mtd/ubi/eba.c    |  7 +--
 drivers/mtd/ubi/gluebi.c |  4 +-
 drivers/mtd/ubi/io.c     |  8 ++--
 drivers/mtd/ubi/scan.c   |  9 ++--
 drivers/mtd/ubi/ubi.h    |  3 +-
 drivers/mtd/ubi/upd.c    |  8 ++--
 drivers/mtd/ubi/vmt.c    |  4 +-
 drivers/mtd/ubi/vtbl.c   | 12 +++---
 drivers/mtd/ubi/wl.c     | 92 +++++++++++++++++++---------------------
 include/mtd/ubi-user.h   | 16 +++----
 12 files changed, 86 insertions(+), 89 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 4418a2369b5..535d9a8a6ba 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -51,14 +51,13 @@
  * @name: MTD device name or number string
  * @vid_hdr_offs: VID header offset
  */
-struct mtd_dev_param
-{
+struct mtd_dev_param {
 	char name[MTD_PARAM_LEN_MAX];
 	int vid_hdr_offs;
 };
 
 /* Numbers of elements set in the @mtd_dev_param array */
-static int mtd_devs = 0;
+static int mtd_devs;
 
 /* MTD devices specification parameters */
 static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
@@ -781,7 +780,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 			if (!ubi_devices[ubi_num])
 				break;
 		if (ubi_num == UBI_MAX_DEVICES) {
-			dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES);
+			dbg_err("only %d UBI devices may be created",
+				UBI_MAX_DEVICES);
 			return -ENFILE;
 		}
 	} else {
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index bc8199c6a9f..03c759b4eeb 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -39,9 +39,9 @@
 #include <linux/stat.h>
 #include <linux/ioctl.h>
 #include <linux/capability.h>
+#include <linux/uaccess.h>
 #include <linux/smp_lock.h>
 #include <mtd/ubi-user.h>
-#include <asm/uaccess.h>
 #include <asm/div64.h>
 #include "ubi.h"
 
@@ -352,7 +352,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
 }
 
 #else
-#define vol_cdev_direct_write(file, buf, count, offp) -EPERM
+#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM)
 #endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
 
 static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index e14208152c3..e04bcf1dff8 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -189,9 +189,7 @@ static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi,
 	le->users += 1;
 	spin_unlock(&ubi->ltree_lock);
 
-	if (le_free)
-		kfree(le_free);
-
+	kfree(le_free);
 	return le;
 }
 
@@ -503,9 +501,8 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 	struct ubi_vid_hdr *vid_hdr;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
-	if (!vid_hdr) {
+	if (!vid_hdr)
 		return -ENOMEM;
-	}
 
 	mutex_lock(&ubi->buf_mutex);
 
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index 49f52dceea9..605812bb0b1 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -249,8 +249,8 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
 	if (err)
 		goto out_err;
 
-        instr->state = MTD_ERASE_DONE;
-        mtd_erase_callback(instr);
+	instr->state = MTD_ERASE_DONE;
+	mtd_erase_callback(instr);
 	return 0;
 
 out_err:
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 2bebb39d19b..a84f0db0a03 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -167,8 +167,8 @@ retry:
 		}
 
 		if (read != len && retries++ < UBI_IO_RETRIES) {
-			dbg_io("error %d while reading %d bytes from PEB %d:%d, "
-			       "read only %zd bytes, retry",
+			dbg_io("error %d while reading %d bytes from PEB %d:%d,"
+			       " read only %zd bytes, retry",
 			       err, len, pnum, offset, read);
 			yield();
 			goto retry;
@@ -705,8 +705,8 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
 
 	if (hdr_crc != crc) {
 		if (verbose) {
-			ubi_warn("bad EC header CRC at PEB %d, calculated %#08x,"
-				 " read %#08x", pnum, crc, hdr_crc);
+			ubi_warn("bad EC header CRC at PEB %d, calculated "
+				 "%#08x, read %#08x", pnum, crc, hdr_crc);
 			ubi_dbg_dump_ec_hdr(ec_hdr);
 		}
 		return UBI_IO_BAD_EC_HDR;
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 40eca9ce5fa..0bb7488862d 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -248,7 +248,8 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
 	unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
 
 	if (seb->sqnum == 0 && sqnum2 == 0) {
-		long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
+		long long abs;
+		long long v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
 
 		/*
 		 * UBI constantly increases the logical eraseblock version
@@ -752,7 +753,8 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
  * This function returns a zero if the physical eraseblock was successfully
  * handled and a negative error code in case of failure.
  */
-static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
+static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
+		      int pnum)
 {
 	long long uninitialized_var(ec);
 	int err, bitflips = 0, vol_id, ec_corr = 0;
@@ -1301,8 +1303,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
 		if (err < 0) {
 			kfree(buf);
 			return err;
-		}
-		else if (err)
+		} else if (err)
 			buf[pnum] = 1;
 	}
 
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 274c67916b3..14a5596d2d9 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -473,7 +473,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
 			     const void __user *buf, int count);
 
 /* misc.c */
-int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length);
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
+		      int length);
 int ubi_check_volume(struct ubi_device *ubi, int vol_id);
 void ubi_calculate_reserved(struct ubi_device *ubi);
 
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 1230a5e1b53..3b8beb8545c 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -39,7 +39,7 @@
  */
 
 #include <linux/err.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/div64.h>
 #include "ubi.h"
 
@@ -246,7 +246,8 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
 			return 0;
 		}
 
-		err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN);
+		err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len,
+					UBI_UNKNOWN);
 	} else {
 		/*
 		 * When writing static volume, and this is the last logical
@@ -418,7 +419,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
 	if (vol->upd_received == vol->upd_bytes) {
 		int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size);
 
-		memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes);
+		memset(vol->upd_buf + vol->upd_bytes, 0xFF,
+		       len - vol->upd_bytes);
 		len = ubi_calc_data_len(ubi, vol->upd_buf, len);
 		err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum,
 						vol->upd_buf, len, UBI_UNKNOWN);
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 4be4014c70d..852482d8b18 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -253,7 +253,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 			goto out_unlock;
 		}
 
-        /* Calculate how many eraseblocks are requested */
+	/* Calculate how many eraseblocks are requested */
 	vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
 	bytes = req->bytes;
 	if (do_div(bytes, vol->usable_leb_size))
@@ -858,7 +858,7 @@ static int paranoid_check_volume(struct ubi_device *ubi, int vol_id)
 
 	if (alignment != vol->alignment || data_pad != vol->data_pad ||
 	    upd_marker != vol->upd_marker || vol_type != vol->vol_type ||
-	    name_len!= vol->name_len || strncmp(name, vol->name, name_len)) {
+	    name_len != vol->name_len || strncmp(name, vol->name, name_len)) {
 		ubi_err("volume info is different");
 		goto fail;
 	}
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 23c5376234b..10c22257f60 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -461,7 +461,8 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
 	if (!leb_corrupted[0]) {
 		/* LEB 0 is OK */
 		if (leb[1])
-			leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
+			leb_corrupted[1] = memcmp(leb[0], leb[1],
+						  ubi->vtbl_size);
 		if (leb_corrupted[1]) {
 			ubi_warn("volume table copy #2 is corrupted");
 			err = create_vtbl(ubi, si, 1, leb[0]);
@@ -859,11 +860,10 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
 
 out_free:
 	vfree(ubi->vtbl);
-	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
-		if (ubi->volumes[i]) {
-			kfree(ubi->volumes[i]);
-			ubi->volumes[i] = NULL;
-		}
+	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+		kfree(ubi->volumes[i]);
+		ubi->volumes[i] = NULL;
+	}
 	return err;
 }
 
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 6821952bcdb..2a5d2a0e14a 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -475,52 +475,47 @@ retry:
 	}
 
 	switch (dtype) {
-		case UBI_LONGTERM:
-			/*
-			 * For long term data we pick a physical eraseblock
-			 * with high erase counter. But the highest erase
-			 * counter we can pick is bounded by the the lowest
-			 * erase counter plus %WL_FREE_MAX_DIFF.
-			 */
-			e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
-			protect = LT_PROTECTION;
-			break;
-		case UBI_UNKNOWN:
-			/*
-			 * For unknown data we pick a physical eraseblock with
-			 * medium erase counter. But we by no means can pick a
-			 * physical eraseblock with erase counter greater or
-			 * equivalent than the lowest erase counter plus
-			 * %WL_FREE_MAX_DIFF.
-			 */
-			first = rb_entry(rb_first(&ubi->free),
-					 struct ubi_wl_entry, rb);
-			last = rb_entry(rb_last(&ubi->free),
-					struct ubi_wl_entry, rb);
+	case UBI_LONGTERM:
+		/*
+		 * For long term data we pick a physical eraseblock with high
+		 * erase counter. But the highest erase counter we can pick is
+		 * bounded by the the lowest erase counter plus
+		 * %WL_FREE_MAX_DIFF.
+		 */
+		e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+		protect = LT_PROTECTION;
+		break;
+	case UBI_UNKNOWN:
+		/*
+		 * For unknown data we pick a physical eraseblock with medium
+		 * erase counter. But we by no means can pick a physical
+		 * eraseblock with erase counter greater or equivalent than the
+		 * lowest erase counter plus %WL_FREE_MAX_DIFF.
+		 */
+		first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+		last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
 
-			if (last->ec - first->ec < WL_FREE_MAX_DIFF)
-				e = rb_entry(ubi->free.rb_node,
-						struct ubi_wl_entry, rb);
-			else {
-				medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
-				e = find_wl_entry(&ubi->free, medium_ec);
-			}
-			protect = U_PROTECTION;
-			break;
-		case UBI_SHORTTERM:
-			/*
-			 * For short term data we pick a physical eraseblock
-			 * with the lowest erase counter as we expect it will
-			 * be erased soon.
-			 */
-			e = rb_entry(rb_first(&ubi->free),
-				     struct ubi_wl_entry, rb);
-			protect = ST_PROTECTION;
-			break;
-		default:
-			protect = 0;
-			e = NULL;
-			BUG();
+		if (last->ec - first->ec < WL_FREE_MAX_DIFF)
+			e = rb_entry(ubi->free.rb_node,
+					struct ubi_wl_entry, rb);
+		else {
+			medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
+			e = find_wl_entry(&ubi->free, medium_ec);
+		}
+		protect = U_PROTECTION;
+		break;
+	case UBI_SHORTTERM:
+		/*
+		 * For short term data we pick a physical eraseblock with the
+		 * lowest erase counter as we expect it will be erased soon.
+		 */
+		e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+		protect = ST_PROTECTION;
+		break;
+	default:
+		protect = 0;
+		e = NULL;
+		BUG();
 	}
 
 	/*
@@ -584,7 +579,8 @@ found:
  * This function returns zero in case of success and a negative error code in
  * case of failure.
  */
-static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
+static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+		      int torture)
 {
 	int err;
 	struct ubi_ec_hdr *ec_hdr;
@@ -1060,8 +1056,8 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 		spin_unlock(&ubi->wl_lock);
 
 		/*
-		 * One more erase operation has happened, take care about protected
-		 * physical eraseblocks.
+		 * One more erase operation has happened, take care about
+		 * protected physical eraseblocks.
 		 */
 		check_protection_over(ubi);
 
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index e8e57c3dfcd..ccdc562e444 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -188,14 +188,14 @@ enum {
  * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages.
  *
  * But in rare cases, if this optimizes things, the VID header may be placed to
- * a different offset. For example, the boot-loader might do things faster if the
- * VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. As
- * the boot-loader would not normally need to read EC headers (unless it needs
- * UBI in RW mode), it might be faster to calculate ECC. This is weird example,
- * but it real-life example. So, in this example, @vid_hdr_offer would be
- * 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
- * aligned, which is OK, as UBI is clever enough to realize this is 4th sub-page
- * of the first page and add needed padding.
+ * a different offset. For example, the boot-loader might do things faster if
+ * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages.
+ * As the boot-loader would not normally need to read EC headers (unless it
+ * needs UBI in RW mode), it might be faster to calculate ECC. This is weird
+ * example, but it real-life example. So, in this example, @vid_hdr_offer would
+ * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
+ * aligned, which is OK, as UBI is clever enough to realize this is 4th
+ * sub-page of the first page and add needed padding.
  */
 struct ubi_attach_req {
 	int32_t ubi_num;
-- 
GitLab


From ebaaf1af3e9ef05c4fb7c61e4530c15e1ad10e3b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 18 Jul 2008 13:34:32 +0300
Subject: [PATCH 107/853] UBI: fix kernel-doc errors and warnings

No functional changes, just tweak comments to make kernel-doc
work fine and stop complaining.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c |  8 +++-----
 drivers/mtd/ubi/io.c    |  6 ++----
 drivers/mtd/ubi/scan.c  | 18 ++++++------------
 drivers/mtd/ubi/ubi.h   |  1 +
 drivers/mtd/ubi/upd.c   |  2 ++
 drivers/mtd/ubi/vmt.c   |  2 +-
 drivers/mtd/ubi/vtbl.c  |  8 +++-----
 drivers/mtd/ubi/wl.c    | 13 +++++--------
 8 files changed, 23 insertions(+), 35 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 535d9a8a6ba..eba760b3b8c 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -159,8 +159,7 @@ void ubi_put_device(struct ubi_device *ubi)
 }
 
 /**
- * ubi_get_by_major - get UBI device description object by character device
- *                    major number.
+ * ubi_get_by_major - get UBI device by character device major number.
  * @major: major number
  *
  * This function is similar to 'ubi_get_device()', but it searches the device
@@ -727,7 +726,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
 
 /**
  * ubi_attach_mtd_dev - attach an MTD device.
- * @mtd_dev: MTD device description object
+ * @mtd: MTD device description object
  * @ubi_num: number to assign to the new UBI device
  * @vid_hdr_offset: VID header offset
  *
@@ -1095,8 +1094,7 @@ static void __exit ubi_exit(void)
 module_exit(ubi_exit);
 
 /**
- * bytes_str_to_int - convert a string representing number of bytes to an
- * integer.
+ * bytes_str_to_int - convert a number of bytes string into an integer.
  * @str: the string to convert
  *
  * This function returns positive resulting integer in case of success and a
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index a84f0db0a03..2fb64be44f1 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -1101,8 +1101,7 @@ fail:
 }
 
 /**
- * paranoid_check_peb_ec_hdr - check that the erase counter header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_ec_hdr - check erase counter header.
  * @ubi: UBI device description object
  * @pnum: the physical eraseblock number to check
  *
@@ -1180,8 +1179,7 @@ fail:
 }
 
 /**
- * paranoid_check_peb_vid_hdr - check that the volume identifier header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_vid_hdr - check volume identifier header.
  * @ubi: UBI device description object
  * @pnum: the physical eraseblock number to check
  *
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 0bb7488862d..4dfbf27b065 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -93,8 +93,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
 }
 
 /**
- * validate_vid_hdr - check that volume identifier header is correct and
- * consistent.
+ * validate_vid_hdr - check volume identifier header.
  * @vid_hdr: the volume identifier header to check
  * @sv: information about the volume this logical eraseblock belongs to
  * @pnum: physical eraseblock number the VID header came from
@@ -380,8 +379,7 @@ out_free_vidh:
 }
 
 /**
- * ubi_scan_add_used - add information about a physical eraseblock to the
- * scanning information.
+ * ubi_scan_add_used - add physical eraseblock to the scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  * @pnum: the physical eraseblock number
@@ -555,8 +553,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 }
 
 /**
- * ubi_scan_find_sv - find information about a particular volume in the
- * scanning information.
+ * ubi_scan_find_sv - find volume in the scanning information.
  * @si: scanning information
  * @vol_id: the requested volume ID
  *
@@ -585,8 +582,7 @@ struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si,
 }
 
 /**
- * ubi_scan_find_seb - find information about a particular logical
- * eraseblock in the volume scanning information.
+ * ubi_scan_find_seb - find LEB in the volume scanning information.
  * @sv: a pointer to the volume scanning information
  * @lnum: the requested logical eraseblock
  *
@@ -744,8 +740,7 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
 }
 
 /**
- * process_eb - read UBI headers, check them and add corresponding data
- * to the scanning information.
+ * process_eb - read, check UBI headers, and add them to scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  * @pnum: the physical eraseblock number
@@ -1083,8 +1078,7 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si)
 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
 
 /**
- * paranoid_check_si - check if the scanning information is correct and
- * consistent.
+ * paranoid_check_si - check the scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  *
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 14a5596d2d9..1c3fa18c26a 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -313,6 +313,7 @@ struct ubi_wl_entry;
  *           @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
  *           fields
  * @move_mutex: serializes eraseblock moves
+ * @work_sem: sycnhronizes the WL worker with use tasks
  * @wl_scheduled: non-zero if the wear-leveling was scheduled
  * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
  *             physical eraseblock
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 3b8beb8545c..8b89cc18ff0 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -268,6 +268,7 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
 
 /**
  * ubi_more_update_data - write more update data.
+ * @ubi: UBI device description object
  * @vol: volume description object
  * @buf: write data (user-space memory buffer)
  * @count: how much bytes to write
@@ -385,6 +386,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
 
 /**
  * ubi_more_leb_change_data - accept more data for atomic LEB change.
+ * @ubi: UBI device description object
  * @vol: volume description object
  * @buf: write data (user-space memory buffer)
  * @count: how much bytes to write
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 852482d8b18..d40066833ab 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -608,7 +608,7 @@ out_free:
 /**
  * ubi_rename_volumes - re-name UBI volumes.
  * @ubi: UBI device description object
- * @renam_list: list of &struct ubi_rename_entry objects
+ * @rename_list: list of &struct ubi_rename_entry objects
  *
  * This function re-names or removes volumes specified in the re-name list.
  * Returns zero in case of success and a negative error code in case of
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 10c22257f60..4e1c489a3ba 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -117,7 +117,7 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
 /**
  * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table.
  * @ubi: UBI device description object
- * @renam_list: list of &struct ubi_rename_entry objects
+ * @rename_list: list of &struct ubi_rename_entry objects
  *
  * This function re-names multiple volumes specified in @req in the volume
  * table. Returns zero in case of success and a negative error code in case of
@@ -166,8 +166,7 @@ int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
 }
 
 /**
- * vtbl_check - check if volume table is not corrupted and contains sensible
- *              data.
+ * vtbl_check - check if volume table is not corrupted and sensible.
  * @ubi: UBI device description object
  * @vtbl: volume table
  *
@@ -780,8 +779,7 @@ static int check_scanning_info(const struct ubi_device *ubi,
 }
 
 /**
- * ubi_read_volume_table - read volume table.
- * information.
+ * ubi_read_volume_table - read the volume table.
  * @ubi: UBI device description object
  * @si: scanning information
  *
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 2a5d2a0e14a..05d70937b54 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -632,8 +632,7 @@ out_free:
 }
 
 /**
- * check_protection_over - check if it is time to stop protecting some
- * physical eraseblocks.
+ * check_protection_over - check if it is time to stop protecting some PEBs.
  * @ubi: UBI device description object
  *
  * This function is called after each erase operation, when the absolute erase
@@ -1601,8 +1600,7 @@ void ubi_wl_close(struct ubi_device *ubi)
 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
 
 /**
- * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
- * is correct.
+ * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
  * @ubi: UBI device description object
  * @pnum: the physical eraseblock number to check
  * @ec: the erase counter to check
@@ -1643,13 +1641,12 @@ out_free:
 }
 
 /**
- * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
- * in a WL RB-tree.
+ * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
  * @e: the wear-leveling entry to check
  * @root: the root of the tree
  *
- * This function returns zero if @e is in the @root RB-tree and %1 if it
- * is not.
+ * This function returns zero if @e is in the @root RB-tree and %1 if it is
+ * not.
  */
 static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
 				     struct rb_root *root)
-- 
GitLab


From 9869cd801c107bbae91663c3f4edbb6b5715919f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 18 Jul 2008 13:53:39 +0300
Subject: [PATCH 108/853] UBI: remove pre-sqnum images support

Before UBI got into mainline, there was a slight flash format
change - we did not have sequence number support, then added it.

We have carried full support of those ancient images till this
moment. Now the support is removed, well, not fully removed.

Now UBI will support only _clean_ old images, which were cleanly
detached last time (just before kernel upgrade). This is most
likely the case.

But we will not support unclean ancient images. Surprisingly,
this allows us to remove a big chunk of legacy code.

And the same should be true for downgrading: clean images should
downgrade fine, but unclean ones will not.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/debug.c     |  2 -
 drivers/mtd/ubi/scan.c      | 87 ++++++++++---------------------------
 drivers/mtd/ubi/scan.h      |  2 -
 drivers/mtd/ubi/ubi-media.h | 17 +++-----
 drivers/mtd/ubi/vtbl.c      |  1 -
 5 files changed, 30 insertions(+), 79 deletions(-)

diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 21e0d7d76a4..c0ed60e8ade 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -65,7 +65,6 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
 	printk(KERN_DEBUG "\tcompat    %d\n",   (int)vid_hdr->compat);
 	printk(KERN_DEBUG "\tvol_id    %d\n",   be32_to_cpu(vid_hdr->vol_id));
 	printk(KERN_DEBUG "\tlnum      %d\n",   be32_to_cpu(vid_hdr->lnum));
-	printk(KERN_DEBUG "\tleb_ver   %u\n",   be32_to_cpu(vid_hdr->leb_ver));
 	printk(KERN_DEBUG "\tdata_size %d\n",   be32_to_cpu(vid_hdr->data_size));
 	printk(KERN_DEBUG "\tused_ebs  %d\n",   be32_to_cpu(vid_hdr->used_ebs));
 	printk(KERN_DEBUG "\tdata_pad  %d\n",   be32_to_cpu(vid_hdr->data_pad));
@@ -172,7 +171,6 @@ void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
 		printk(KERN_DEBUG "\tlnum     %d\n", seb->lnum);
 		printk(KERN_DEBUG "\tscrub    %d\n", seb->scrub);
 		printk(KERN_DEBUG "\tsqnum    %llu\n", seb->sqnum);
-		printk(KERN_DEBUG "\tleb_ver  %u\n", seb->leb_ver);
 	}
 }
 
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 4dfbf27b065..967bb4406df 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -246,46 +246,21 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
 	struct ubi_vid_hdr *vh = NULL;
 	unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
 
-	if (seb->sqnum == 0 && sqnum2 == 0) {
-		long long abs;
-		long long v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
-
+	if (sqnum2 == seb->sqnum) {
 		/*
-		 * UBI constantly increases the logical eraseblock version
-		 * number and it can overflow. Thus, we have to bear in mind
-		 * that versions that are close to %0xFFFFFFFF are less then
-		 * versions that are close to %0.
-		 *
-		 * The UBI WL sub-system guarantees that the number of pending
-		 * tasks is not greater then %0x7FFFFFFF. So, if the difference
-		 * between any two versions is greater or equivalent to
-		 * %0x7FFFFFFF, there was an overflow and the logical
-		 * eraseblock with lower version is actually newer then the one
-		 * with higher version.
-		 *
-		 * FIXME: but this is anyway obsolete and will be removed at
-		 * some point.
+		 * This must be a really ancient UBI image which has been
+		 * created before sequence numbers support has been added. At
+		 * that times we used 32-bit LEB versions stored in logical
+		 * eraseblocks. That was before UBI got into mainline. We do not
+		 * support these images anymore. Well, those images will work
+		 * still work, but only if no unclean reboots happened.
 		 */
-		dbg_bld("using old crappy leb_ver stuff");
-
-		if (v1 == v2) {
-			ubi_err("PEB %d and PEB %d have the same version %lld",
-				seb->pnum, pnum, v1);
-			return -EINVAL;
-		}
+		ubi_err("unsupported on-flash UBI format\n");
+		return -EINVAL;
+	}
 
-		abs = v1 - v2;
-		if (abs < 0)
-			abs = -abs;
-
-		if (abs < 0x7FFFFFFF)
-			/* Non-overflow situation */
-			second_is_newer = (v2 > v1);
-		else
-			second_is_newer = (v2 < v1);
-	} else
-		/* Obviously the LEB with lower sequence counter is older */
-		second_is_newer = sqnum2 > seb->sqnum;
+	/* Obviously the LEB with lower sequence counter is older */
+	second_is_newer = !!(sqnum2 > seb->sqnum);
 
 	/*
 	 * Now we know which copy is newer. If the copy flag of the PEB with
@@ -293,7 +268,7 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
 	 * check data CRC. For the second PEB we already have the VID header,
 	 * for the first one - we'll need to re-read it from flash.
 	 *
-	 * FIXME: this may be optimized so that we wouldn't read twice.
+	 * Note: this may be optimized so that we wouldn't read twice.
 	 */
 
 	if (second_is_newer) {
@@ -399,7 +374,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 		      int bitflips)
 {
 	int err, vol_id, lnum;
-	uint32_t leb_ver;
 	unsigned long long sqnum;
 	struct ubi_scan_volume *sv;
 	struct ubi_scan_leb *seb;
@@ -408,10 +382,9 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 	vol_id = be32_to_cpu(vid_hdr->vol_id);
 	lnum = be32_to_cpu(vid_hdr->lnum);
 	sqnum = be64_to_cpu(vid_hdr->sqnum);
-	leb_ver = be32_to_cpu(vid_hdr->leb_ver);
 
-	dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
-		pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
+	dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d",
+		pnum, vol_id, lnum, ec, sqnum, bitflips);
 
 	sv = add_volume(si, vol_id, pnum, vid_hdr);
 	if (IS_ERR(sv) < 0)
@@ -444,25 +417,20 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 		 */
 
 		dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
-			"LEB ver %u, EC %d", seb->pnum, seb->sqnum,
-			seb->leb_ver, seb->ec);
-
-		/*
-		 * Make sure that the logical eraseblocks have different
-		 * versions. Otherwise the image is bad.
-		 */
-		if (seb->leb_ver == leb_ver && leb_ver != 0) {
-			ubi_err("two LEBs with same version %u", leb_ver);
-			ubi_dbg_dump_seb(seb, 0);
-			ubi_dbg_dump_vid_hdr(vid_hdr);
-			return -EINVAL;
-		}
+			"EC %d", seb->pnum, seb->sqnum, seb->ec);
 
 		/*
 		 * Make sure that the logical eraseblocks have different
 		 * sequence numbers. Otherwise the image is bad.
 		 *
-		 * FIXME: remove 'sqnum != 0' check when leb_ver is removed.
+		 * However, if the sequence number is zero, we assume it must
+		 * be an ancient UBI image from the era when UBI did not have
+		 * sequence numbers. We still can attach these images, unless
+		 * there is a need to distinguish between old and new
+		 * eraseblocks, in which case we'll refuse the image in
+		 * 'compare_lebs()'. In other words, we attach old clean
+		 * images, but refuse attaching old images with duplicated
+		 * logical eraseblocks because there was an unclean reboot.
 		 */
 		if (seb->sqnum == sqnum && sqnum != 0) {
 			ubi_err("two LEBs with same sequence number %llu",
@@ -502,7 +470,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 			seb->pnum = pnum;
 			seb->scrub = ((cmp_res & 2) || bitflips);
 			seb->sqnum = sqnum;
-			seb->leb_ver = leb_ver;
 
 			if (sv->highest_lnum == lnum)
 				sv->last_data_size =
@@ -539,7 +506,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 	seb->lnum = lnum;
 	seb->sqnum = sqnum;
 	seb->scrub = bitflips;
-	seb->leb_ver = leb_ver;
 
 	if (sv->highest_lnum <= lnum) {
 		sv->highest_lnum = lnum;
@@ -1263,11 +1229,6 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
 				ubi_err("bad data_pad %d", sv->data_pad);
 				goto bad_vid_hdr;
 			}
-
-			if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) {
-				ubi_err("bad leb_ver %u", seb->leb_ver);
-				goto bad_vid_hdr;
-			}
 		}
 
 		if (!last_seb)
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 4e2e3cc0bec..61df208e2f2 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -34,7 +34,6 @@
  * @u: unions RB-tree or @list links
  * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
  * @u.list: link in one of the eraseblock lists
- * @leb_ver: logical eraseblock version (obsolete)
  *
  * One object of this type is allocated for each physical eraseblock during
  * scanning.
@@ -49,7 +48,6 @@ struct ubi_scan_leb {
 		struct rb_node rb;
 		struct list_head list;
 	} u;
-	uint32_t leb_ver;
 };
 
 /**
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h
index 26bb7af9787..2ad94040905 100644
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -168,16 +168,15 @@ struct ubi_ec_hdr {
  *          %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
  * @vol_id: ID of this volume
  * @lnum: logical eraseblock number
- * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be
- *           removed, kept only for not breaking older UBI users)
+ * @padding1: reserved for future, zeroes
  * @data_size: how many bytes of data this logical eraseblock contains
  * @used_ebs: total number of used logical eraseblocks in this volume
  * @data_pad: how many bytes at the end of this physical eraseblock are not
  *            used
  * @data_crc: CRC checksum of the data stored in this logical eraseblock
- * @padding1: reserved for future, zeroes
- * @sqnum: sequence number
  * @padding2: reserved for future, zeroes
+ * @sqnum: sequence number
+ * @padding3: reserved for future, zeroes
  * @hdr_crc: volume identifier header CRC checksum
  *
  * The @sqnum is the value of the global sequence counter at the time when this
@@ -225,10 +224,6 @@ struct ubi_ec_hdr {
  * checksum is correct, this physical eraseblock is selected (P1). Otherwise
  * the older one (P) is selected.
  *
- * Note, there is an obsolete @leb_ver field which was used instead of @sqnum
- * in the past. But it is not used anymore and we keep it in order to be able
- * to deal with old UBI images. It will be removed at some point.
- *
  * There are 2 sorts of volumes in UBI: user volumes and internal volumes.
  * Internal volumes are not seen from outside and are used for various internal
  * UBI purposes. In this implementation there is only one internal volume - the
@@ -278,14 +273,14 @@ struct ubi_vid_hdr {
 	__u8    compat;
 	__be32  vol_id;
 	__be32  lnum;
-	__be32  leb_ver; /* obsolete, to be removed, don't use */
+	__u8    padding1[4];
 	__be32  data_size;
 	__be32  used_ebs;
 	__be32  data_pad;
 	__be32  data_crc;
-	__u8    padding1[4];
+	__u8    padding2[4];
 	__be64  sqnum;
-	__u8    padding2[12];
+	__u8    padding3[12];
 	__be32  hdr_crc;
 } __attribute__ ((packed));
 
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 4e1c489a3ba..217d0e111b2 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -338,7 +338,6 @@ retry:
 			     vid_hdr->data_pad = cpu_to_be32(0);
 	vid_hdr->lnum = cpu_to_be32(copy);
 	vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
-	vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
 
 	/* The EC header is already there, write the VID header */
 	err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
-- 
GitLab


From eeb16e87b6747c9a4f5769f33467c9d173e9f5ee Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 23 Jul 2008 15:51:46 +0300
Subject: [PATCH 109/853] UBI: fix gcc warning

Fix the following warning:

drivers/mtd/ubi/vmt.c: In function 'ubi_rename_volumes':
drivers/mtd/ubi/vmt.c:642: warning: statement with no effect

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/vmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index d40066833ab..3531ca9a1e2 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -639,7 +639,7 @@ int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list)
 	}
 
 	if (!err)
-		paranoid_check_volumes(ubi);
+		err = paranoid_check_volumes(ubi);
 	return err;
 }
 
-- 
GitLab


From 58838cf3ca3337d76141c33d6c68376490263468 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 24 Jul 2008 12:43:13 +0200
Subject: [PATCH 110/853] sched: clean up compiler warning

Reported-by: Daniel Walker <dwalker@mvista.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 147004c651c..93ac8ee0827 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -253,7 +253,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
 
 		diff = iter->rt_runtime - iter->rt_time;
 		if (diff > 0) {
-			do_div(diff, weight);
+			diff = div_u64((u64)diff, weight);
 			if (rt_rq->rt_runtime + diff > rt_period)
 				diff = rt_period - rt_rq->rt_runtime;
 			iter->rt_runtime -= diff;
-- 
GitLab


From 78305de2f99e9f43ab860dd95bb430b20e26c695 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Wed, 23 Apr 2008 07:20:41 -0400
Subject: [PATCH 111/853] Remove mention of semaphores from kernel-locking

Since the consensus seems to be to eliminate semaphores where possible,
we shouldn't be educating people about how to use them as locks.  Use
mutexes instead.  Semaphores should be described in a separate document
if we end up keeping them.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/DocBook/kernel-locking.tmpl | 57 ++++++++++-------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 2510763295d..084f6ad7b7a 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -219,10 +219,10 @@
    </para>
 
    <sect1 id="lock-intro">
-   <title>Three Main Types of Kernel Locks: Spinlocks, Mutexes and Semaphores</title>
+   <title>Two Main Types of Kernel Locks: Spinlocks and Mutexes</title>
 
    <para>
-     There are three main types of kernel locks.  The fundamental type
+     There are two main types of kernel locks.  The fundamental type
      is the spinlock 
      (<filename class="headerfile">include/asm/spinlock.h</filename>),
      which is a very simple single-holder lock: if you can't get the 
@@ -239,14 +239,6 @@
      can't sleep (see <xref linkend="sleeping-things"/>), and so have to
      use a spinlock instead.
    </para>
-   <para>
-     The third type is a semaphore
-     (<filename class="headerfile">include/linux/semaphore.h</filename>): it
-     can have more than one holder at any time (the number decided at
-     initialization time), although it is most commonly used as a
-     single-holder lock (a mutex).  If you can't get a semaphore, your
-     task will be suspended and later on woken up - just like for mutexes.
-   </para>
    <para>
      Neither type of lock is recursive: see
      <xref linkend="deadlock"/>.
@@ -278,7 +270,7 @@
     </para>
 
     <para>
-      Semaphores still exist, because they are required for
+      Mutexes still exist, because they are required for
       synchronization between <firstterm linkend="gloss-usercontext">user 
       contexts</firstterm>, as we will see below.
     </para>
@@ -289,18 +281,17 @@
 
      <para>
        If you have a data structure which is only ever accessed from
-       user context, then you can use a simple semaphore
-       (<filename>linux/linux/semaphore.h</filename>) to protect it.  This
-       is the most trivial case: you initialize the semaphore to the number 
-       of resources available (usually 1), and call
-       <function>down_interruptible()</function> to grab the semaphore, and 
-       <function>up()</function> to release it.  There is also a 
-       <function>down()</function>, which should be avoided, because it 
+       user context, then you can use a simple mutex
+       (<filename>include/linux/mutex.h</filename>) to protect it.  This
+       is the most trivial case: you initialize the mutex.  Then you can
+       call <function>mutex_lock_interruptible()</function> to grab the mutex,
+       and <function>mutex_unlock()</function> to release it.  There is also a 
+       <function>mutex_lock()</function>, which should be avoided, because it 
        will not return if a signal is received.
      </para>
 
      <para>
-       Example: <filename>linux/net/core/netfilter.c</filename> allows 
+       Example: <filename>net/netfilter/nf_sockopt.c</filename> allows 
        registration of new <function>setsockopt()</function> and 
        <function>getsockopt()</function> calls, with
        <function>nf_register_sockopt()</function>.  Registration and 
@@ -515,7 +506,7 @@
       <listitem>
 	<para>
           If you are in a process context (any syscall) and want to
-	lock other process out, use a semaphore.  You can take a semaphore
+	lock other process out, use a mutex.  You can take a mutex
 	and sleep (<function>copy_from_user*(</function> or
 	<function>kmalloc(x,GFP_KERNEL)</function>).
       </para>
@@ -662,7 +653,7 @@
 <entry>SLBH</entry>
 <entry>SLBH</entry>
 <entry>SLBH</entry>
-<entry>DI</entry>
+<entry>MLI</entry>
 <entry>None</entry>
 </row>
 
@@ -692,8 +683,8 @@
 <entry>spin_lock_bh</entry>
 </row>
 <row>
-<entry>DI</entry>
-<entry>down_interruptible</entry>
+<entry>MLI</entry>
+<entry>mutex_lock_interruptible</entry>
 </row>
 
 </tbody>
@@ -1310,7 +1301,7 @@ as Alan Cox says, <quote>Lock data, not code</quote>.
     <para>
       There is a coding bug where a piece of code tries to grab a
       spinlock twice: it will spin forever, waiting for the lock to
-      be released (spinlocks, rwlocks and semaphores are not
+      be released (spinlocks, rwlocks and mutexes are not
       recursive in Linux).  This is trivial to diagnose: not a
       stay-up-five-nights-talk-to-fluffy-code-bunnies kind of
       problem.
@@ -1335,7 +1326,7 @@ as Alan Cox says, <quote>Lock data, not code</quote>.
 
     <para>
       This complete lockup is easy to diagnose: on SMP boxes the
-      watchdog timer or compiling with <symbol>DEBUG_SPINLOCKS</symbol> set
+      watchdog timer or compiling with <symbol>DEBUG_SPINLOCK</symbol> set
       (<filename>include/linux/spinlock.h</filename>) will show this up 
       immediately when it happens.
     </para>
@@ -1558,7 +1549,7 @@ the amount of locking which needs to be done.
    <title>Read/Write Lock Variants</title>
 
    <para>
-      Both spinlocks and semaphores have read/write variants:
+      Both spinlocks and mutexes have read/write variants:
       <type>rwlock_t</type> and <structname>struct rw_semaphore</structname>.
       These divide users into two classes: the readers and the writers.  If
       you are only reading the data, you can get a read lock, but to write to
@@ -1681,7 +1672,7 @@ the amount of locking which needs to be done.
  #include &lt;linux/slab.h&gt;
  #include &lt;linux/string.h&gt;
 +#include &lt;linux/rcupdate.h&gt;
- #include &lt;linux/semaphore.h&gt;
+ #include &lt;linux/mutex.h&gt;
  #include &lt;asm/errno.h&gt;
 
  struct object
@@ -1913,7 +1904,7 @@ machines due to caching.
        </listitem>
        <listitem>
         <para>
-          <function> put_user()</function>
+          <function>put_user()</function>
         </para>
        </listitem>
       </itemizedlist>
@@ -1927,13 +1918,13 @@ machines due to caching.
 
      <listitem>
       <para>
-      <function>down_interruptible()</function> and
-      <function>down()</function>
+      <function>mutex_lock_interruptible()</function> and
+      <function>mutex_lock()</function>
       </para>
       <para>
-       There is a <function>down_trylock()</function> which can be
+       There is a <function>mutex_trylock()</function> which can be
        used inside interrupt context, as it will not sleep.
-       <function>up()</function> will also never sleep.
+       <function>mutex_unlock()</function> will also never sleep.
       </para>
      </listitem>
     </itemizedlist>
@@ -2023,7 +2014,7 @@ machines due to caching.
       <para>
         Prior to 2.5, or when <symbol>CONFIG_PREEMPT</symbol> is
         unset, processes in user context inside the kernel would not
-        preempt each other (ie. you had that CPU until you have it up,
+        preempt each other (ie. you had that CPU until you gave it up,
         except for interrupts).  With the addition of
         <symbol>CONFIG_PREEMPT</symbol> in 2.5.4, this changed: when
         in user context, higher priority tasks can "cut in": spinlocks
-- 
GitLab


From 0f17e4c796e89d1f69f13b653aba60e6ccfb8ae0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Thu, 24 Jul 2008 08:30:48 -0400
Subject: [PATCH 112/853] Add missing semaphore.h includes

These files use semaphores but don't include semaphore.h

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/input/keyboard/hil_kbd.c | 1 +
 drivers/input/misc/hp_sdc_rtc.c  | 1 +
 drivers/input/serio/hp_sdc.c     | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/input/keyboard/hil_kbd.c b/drivers/input/keyboard/hil_kbd.c
index adbf29f0169..71c1971abf8 100644
--- a/drivers/input/keyboard/hil_kbd.c
+++ b/drivers/input/keyboard/hil_kbd.c
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/semaphore.h>
 #include <linux/slab.h>
 #include <linux/pci_ids.h>
 
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index 49d8abfe38f..daa9d422033 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -44,6 +44,7 @@
 #include <linux/proc_fs.h>
 #include <linux/poll.h>
 #include <linux/rtc.h>
+#include <linux/semaphore.h>
 
 MODULE_AUTHOR("Brian S. Julin <bri@calyx.com>");
 MODULE_DESCRIPTION("HP i8042 SDC + MSM-58321 RTC Driver");
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 7b233a492ad..aad664d5259 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -67,6 +67,7 @@
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/time.h>
+#include <linux/semaphore.h>
 #include <linux/slab.h>
 #include <linux/hil.h>
 #include <linux/semaphore.h>
-- 
GitLab


From 6310e472717ed736c9bff9840febb71f7bb400ed Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Thu, 24 Jul 2008 08:08:09 -0400
Subject: [PATCH 113/853] Remove use of asm/semaphore.h

Change to use linux/semaphore.h

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 arch/arm/mach-ns9xxx/clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-ns9xxx/clock.c b/arch/arm/mach-ns9xxx/clock.c
index f8639161068..44ed20d4a38 100644
--- a/arch/arm/mach-ns9xxx/clock.c
+++ b/arch/arm/mach-ns9xxx/clock.c
@@ -14,8 +14,8 @@
 #include <linux/clk.h>
 #include <linux/string.h>
 #include <linux/platform_device.h>
+#include <linux/semaphore.h>
 
-#include <asm/semaphore.h>
 #include "clock.h"
 
 static LIST_HEAD(clocks);
-- 
GitLab


From 2351ec533ed0dd56052ab96988d2161d5ecc8ed9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Thu, 24 Jul 2008 08:09:32 -0400
Subject: [PATCH 114/853] Remove asm/semaphore.h

All users have now been converted to linux/semaphore.h and we don't need
to keep these files around any longer.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 Documentation/feature-removal-schedule.txt | 8 --------
 include/asm-alpha/semaphore.h              | 1 -
 include/asm-arm/semaphore.h                | 1 -
 include/asm-avr32/semaphore.h              | 1 -
 include/asm-blackfin/semaphore.h           | 1 -
 include/asm-cris/semaphore.h               | 1 -
 include/asm-frv/semaphore.h                | 1 -
 include/asm-h8300/semaphore.h              | 1 -
 include/asm-ia64/semaphore.h               | 1 -
 include/asm-m32r/semaphore.h               | 1 -
 include/asm-m68k/semaphore.h               | 1 -
 include/asm-m68knommu/semaphore.h          | 1 -
 include/asm-mips/semaphore.h               | 1 -
 include/asm-mn10300/semaphore.h            | 1 -
 include/asm-parisc/semaphore.h             | 1 -
 include/asm-powerpc/semaphore.h            | 1 -
 include/asm-s390/semaphore.h               | 1 -
 include/asm-sh/semaphore.h                 | 1 -
 include/asm-sparc/semaphore.h              | 1 -
 include/asm-sparc64/semaphore.h            | 1 -
 include/asm-um/semaphore.h                 | 1 -
 include/asm-v850/semaphore.h               | 1 -
 include/asm-x86/semaphore.h                | 1 -
 include/asm-xtensa/semaphore.h             | 1 -
 24 files changed, 31 deletions(-)
 delete mode 100644 include/asm-alpha/semaphore.h
 delete mode 100644 include/asm-arm/semaphore.h
 delete mode 100644 include/asm-avr32/semaphore.h
 delete mode 100644 include/asm-blackfin/semaphore.h
 delete mode 100644 include/asm-cris/semaphore.h
 delete mode 100644 include/asm-frv/semaphore.h
 delete mode 100644 include/asm-h8300/semaphore.h
 delete mode 100644 include/asm-ia64/semaphore.h
 delete mode 100644 include/asm-m32r/semaphore.h
 delete mode 100644 include/asm-m68k/semaphore.h
 delete mode 100644 include/asm-m68knommu/semaphore.h
 delete mode 100644 include/asm-mips/semaphore.h
 delete mode 100644 include/asm-mn10300/semaphore.h
 delete mode 100644 include/asm-parisc/semaphore.h
 delete mode 100644 include/asm-powerpc/semaphore.h
 delete mode 100644 include/asm-s390/semaphore.h
 delete mode 100644 include/asm-sh/semaphore.h
 delete mode 100644 include/asm-sparc/semaphore.h
 delete mode 100644 include/asm-sparc64/semaphore.h
 delete mode 100644 include/asm-um/semaphore.h
 delete mode 100644 include/asm-v850/semaphore.h
 delete mode 100644 include/asm-x86/semaphore.h
 delete mode 100644 include/asm-xtensa/semaphore.h

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 9f73587219e..09c4a1efb8e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -300,14 +300,6 @@ Who:	ocfs2-devel@oss.oracle.com
 
 ---------------------------
 
-What:	asm/semaphore.h
-When:	2.6.26
-Why:	Implementation became generic; users should now include
-	linux/semaphore.h instead.
-Who:	Matthew Wilcox <willy@linux.intel.com>
-
----------------------------
-
 What:	SCTP_GET_PEER_ADDRS_NUM_OLD, SCTP_GET_PEER_ADDRS_OLD,
 	SCTP_GET_LOCAL_ADDRS_NUM_OLD, SCTP_GET_LOCAL_ADDRS_OLD
 When: 	June 2009
diff --git a/include/asm-alpha/semaphore.h b/include/asm-alpha/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-alpha/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-arm/semaphore.h b/include/asm-arm/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-arm/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-avr32/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-blackfin/semaphore.h b/include/asm-blackfin/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-blackfin/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-cris/semaphore.h b/include/asm-cris/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-cris/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-frv/semaphore.h b/include/asm-frv/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-frv/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-h8300/semaphore.h b/include/asm-h8300/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-h8300/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-ia64/semaphore.h b/include/asm-ia64/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-ia64/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-m32r/semaphore.h b/include/asm-m32r/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-m32r/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-m68k/semaphore.h b/include/asm-m68k/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-m68k/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-m68knommu/semaphore.h b/include/asm-m68knommu/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-m68knommu/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-mips/semaphore.h b/include/asm-mips/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-mips/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-mn10300/semaphore.h b/include/asm-mn10300/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-mn10300/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-parisc/semaphore.h b/include/asm-parisc/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-parisc/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-powerpc/semaphore.h b/include/asm-powerpc/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-powerpc/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-s390/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-sh/semaphore.h b/include/asm-sh/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-sh/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-sparc/semaphore.h b/include/asm-sparc/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-sparc/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-sparc64/semaphore.h b/include/asm-sparc64/semaphore.h
deleted file mode 100644
index 39362afde5f..00000000000
--- a/include/asm-sparc64/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-sparc/semaphore.h>
diff --git a/include/asm-um/semaphore.h b/include/asm-um/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-um/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-v850/semaphore.h b/include/asm-v850/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-v850/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-x86/semaphore.h b/include/asm-x86/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-x86/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-xtensa/semaphore.h b/include/asm-xtensa/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-xtensa/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
-- 
GitLab


From b552068999b0b05087c454e525b30b785c79dc9b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Wed, 23 Apr 2008 10:07:27 -0400
Subject: [PATCH 115/853] Remove __DECLARE_SEMAPHORE_GENERIC

There are no users of __DECLARE_SEMAPHORE_GENERIC in the kernel

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 include/linux/semaphore.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 9cae64b00d6..7415839ac89 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -26,10 +26,8 @@ struct semaphore {
 	.wait_list	= LIST_HEAD_INIT((name).wait_list),		\
 }
 
-#define __DECLARE_SEMAPHORE_GENERIC(name, count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
-
-#define DECLARE_MUTEX(name)	__DECLARE_SEMAPHORE_GENERIC(name, 1)
+#define DECLARE_MUTEX(name)	\
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
 
 static inline void sema_init(struct semaphore *sem, int val)
 {
-- 
GitLab


From e108526e77aa41c89b3be96f75d97615db2b751c Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Jul 2008 21:26:44 -0700
Subject: [PATCH 116/853] move memory_read_from_buffer() from fs.h to string.h

James Bottomley warns that inclusion of linux/fs.h in a low level
driver was always a danger signal.  This patch moves
memory_read_from_buffer() from fs.h to string.h and fixes includes in
existing memory_read_from_buffer() users.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Bob Moore <robert.moore@intel.com>
Cc: Thomas Renninger <trenn@suse.de>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/system.c       | 1 +
 drivers/zorro/zorro-sysfs.c | 1 -
 include/linux/fs.h          | 2 --
 include/linux/string.h      | 3 +++
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index d8e3f153b29..91dec448b3e 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -26,6 +26,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/string.h>
 #include <asm/uaccess.h>
 
 #include <acpi/acpi_drivers.h>
diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c
index 3da712cc770..5290552d2ef 100644
--- a/drivers/zorro/zorro-sysfs.c
+++ b/drivers/zorro/zorro-sysfs.c
@@ -15,7 +15,6 @@
 #include <linux/zorro.h>
 #include <linux/stat.h>
 #include <linux/string.h>
-#include <linux/fs.h>
 
 #include "zorro.h"
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9c2ac5c0ef5..ff54ae4933f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2006,8 +2006,6 @@ extern void simple_release_fs(struct vfsmount **mount, int *count);
 
 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 			loff_t *ppos, const void *from, size_t available);
-extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
-			const void *from, size_t available);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
diff --git a/include/linux/string.h b/include/linux/string.h
index efdc44593b5..810d80df0a1 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -111,5 +111,8 @@ extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
 
+extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
+			const void *from, size_t available);
+
 #endif
 #endif /* _LINUX_STRING_H_ */
-- 
GitLab


From d36e74c4392b5f26a5c4d94d7881a156ddc8e593 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Wed, 23 Jul 2008 21:26:46 -0700
Subject: [PATCH 117/853] hpet: clarify maintainer entry

The existing HPET maintainer entries are somewhat unclear about which one
applies to what part of the kernel.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5d8971c76a7..7ffd78c4e27 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1984,7 +1984,7 @@ P:	Carlos Corbacho
 M:	carlos@strangeworlds.co.uk
 S:	Odd Fixes
 
-HPET:	High Precision Event Timers driver (hpet.c)
+HPET:	High Precision Event Timers driver (drivers/char/hpet.c)
 P:	Clemens Ladisch
 M:	clemens@ladisch.de
 S:	Maintained
-- 
GitLab


From d7ce20b2024d318b9ba88859226af1441270d99f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:26:47 -0700
Subject: [PATCH 118/853] remove is_tty()

This patch removes the no longer used is_tty().

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index fa48dba5ba5..6f4d856df98 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1119,19 +1119,6 @@ int tty_hung_up_p(struct file *filp)
 
 EXPORT_SYMBOL(tty_hung_up_p);
 
-/**
- *	is_tty	-	checker whether file is a TTY
- *	@filp:		file handle that may be a tty
- *
- *	Check if the file handle is a tty handle.
- */
-
-int is_tty(struct file *filp)
-{
-	return filp->f_op->read == tty_read
-		|| filp->f_op->read == hung_up_tty_read;
-}
-
 static void session_clear_tty(struct pid *session)
 {
 	struct task_struct *p;
-- 
GitLab


From 9483a578df27fe7603605d565eefe039c1ba5845 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 23 Jul 2008 21:26:48 -0700
Subject: [PATCH 119/853] add HAVE_CLK to Kconfig, for driver dependencies

Flag platforms as HAVE_CLK (or not) in Kconfig, based on whether they
support <linux/clk.h> calls, so that otherwise portable drivers which need
those calls can list that dependency.

Something like this is a prerequisite for merging the musb_hdrc driver,
currently used on platforms including Davinci, OMAP2430, OMAP3xx ...  and
the discrete TUSB6010 chip, which doesn't have a natural platform
dependency.  (Used with OMAP 2420 in current Nokia N8x0 tablets.)

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig         |  7 +++++++
 arch/arm/Kconfig     | 13 +++++++++++++
 arch/avr32/Kconfig   |  1 +
 arch/powerpc/Kconfig |  1 +
 arch/sh/Kconfig      |  1 +
 5 files changed, 23 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index ad89a33d8c6..4d5ebbc1e72 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -42,3 +42,10 @@ config HAVE_DMA_ATTRS
 
 config USE_GENERIC_SMP_HELPERS
 	def_bool n
+
+config HAVE_CLK
+	def_bool n
+	help
+	  The <linux/clk.h> calls support software clock gating and
+	  thus are a key power management tool on many systems.
+
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d048f6887d0..6fb4f03369f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -198,12 +198,14 @@ choice
 config ARCH_AAEC2000
 	bool "Agilent AAEC-2000 based"
 	select ARM_AMBA
+	select HAVE_CLK
 	help
 	  This enables support for systems based on the Agilent AAEC-2000
 
 config ARCH_INTEGRATOR
 	bool "ARM Ltd. Integrator family"
 	select ARM_AMBA
+	select HAVE_CLK
 	select ICST525
 	help
 	  Support for ARM's Integrator platform.
@@ -211,6 +213,7 @@ config ARCH_INTEGRATOR
 config ARCH_REALVIEW
 	bool "ARM Ltd. RealView family"
 	select ARM_AMBA
+	select HAVE_CLK
 	select ICST307
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
@@ -221,6 +224,7 @@ config ARCH_VERSATILE
 	bool "ARM Ltd. Versatile family"
 	select ARM_AMBA
 	select ARM_VIC
+	select HAVE_CLK
 	select ICST307
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
@@ -262,6 +266,8 @@ config ARCH_EP93XX
 	select ARM_AMBA
 	select ARM_VIC
 	select GENERIC_GPIO
+	select HAVE_CLK
+	select HAVE_CLK
 	select HAVE_GPIO_LIB
 	help
 	  This enables support for the Cirrus EP93xx series of CPUs.
@@ -381,6 +387,7 @@ config ARCH_NS9XXX
 	select GENERIC_GPIO
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
+	select HAVE_CLK
 	help
 	  Say Y here if you intend to run this kernel on a NetSilicon NS9xxx
 	  System.
@@ -430,6 +437,7 @@ config ARCH_ORION5X
 
 config ARCH_PNX4008
 	bool "Philips Nexperia PNX4008 Mobile"
+	select HAVE_CLK
 	help
 	  This enables support for Philips PNX4008 mobile platform.
 
@@ -438,6 +446,7 @@ config ARCH_PXA
 	depends on MMU
 	select ARCH_MTD_XIP
 	select GENERIC_GPIO
+	select HAVE_CLK
 	select HAVE_GPIO_LIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
@@ -468,6 +477,7 @@ config ARCH_SA1100
 	select GENERIC_GPIO
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
+	select HAVE_CLK
 	select TICK_ONESHOT
 	select HAVE_GPIO_LIB
 	help
@@ -476,6 +486,7 @@ config ARCH_SA1100
 config ARCH_S3C2410
 	bool "Samsung S3C2410, S3C2412, S3C2413, S3C2440, S3C2442, S3C2443"
 	select GENERIC_GPIO
+	select HAVE_CLK
 	help
 	  Samsung S3C2410X CPU based systems, such as the Simtec Electronics
 	  BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or
@@ -503,12 +514,14 @@ config ARCH_DAVINCI
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_GPIO
+	select HAVE_CLK
 	help
 	  Support for TI's DaVinci platform.
 
 config ARCH_OMAP
 	bool "TI OMAP"
 	select GENERIC_GPIO
+	select HAVE_CLK
 	select HAVE_GPIO_LIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 45d63c98601..df4adefedb4 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -10,6 +10,7 @@ config AVR32
 	# With EMBEDDED=n, we get lots of stuff automatically selected
 	# that we usually don't need on AVR32.
 	select EMBEDDED
+	select HAVE_CLK
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	help
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 737ebf9d12b..4d7e2ba10ba 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -843,6 +843,7 @@ source "crypto/Kconfig"
 config PPC_CLOCK
 	bool
 	default n
+	select HAVE_CLK
 
 config PPC_LIB_RHEAP
 	bool
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 3e7384f4619..8879938f335 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux/SuperH Kernel Configuration"
 config SUPERH
 	def_bool y
 	select EMBEDDED
+	select HAVE_CLK
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	help
-- 
GitLab


From 6b74ab97bc12ce74acec900f1d89a4aee2e4d70d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:26:49 -0700
Subject: [PATCH 120/853] mm: add a basic debugging framework for memory
 initialisation

Boot initialisation is very complex, with significant numbers of
architecture-specific routines, hooks and code ordering.  While significant
amounts of the initialisation is architecture-independent, it trusts the data
received from the architecture layer.  This is a mistake, and has resulted in
a number of difficult-to-diagnose bugs.

This patchset adds some validation and tracing to memory initialisation.  It
also introduces a few basic defensive measures.  The validation code can be
explicitly disabled for embedded systems.

This patch:

Add additional debugging and verification code for memory initialisation.

Once enabled, the verification checks are always run and when required
additional debugging information may be outputted via a mminit_loglevel=
command-line parameter.

The verification code is placed in a new file mm/mm_init.c.  Ideally other mm
initialisation code will be moved here over time.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  8 ++++++++
 lib/Kconfig.debug                   | 12 ++++++++++++
 mm/Makefile                         |  1 +
 mm/internal.h                       | 27 +++++++++++++++++++++++++++
 mm/mm_init.c                        | 18 ++++++++++++++++++
 mm/page_alloc.c                     | 22 +++++++++++++---------
 6 files changed, 79 insertions(+), 9 deletions(-)
 create mode 100644 mm/mm_init.c

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 47e7d8794fc..5e20ccb5a73 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1225,6 +1225,14 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	mga=		[HW,DRM]
 
+	mminit_loglevel=
+			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+			parameter allows control of the logging verbosity for
+			the additional memory initialisation checks. A value
+			of 0 disables mminit logging and a level of 4 will
+			log everything. Information is printed at KERN_DEBUG
+			so loglevel=8 may also need to be specified.
+
 	mousedev.tap_time=
 			[MOUSE] Maximum time between finger touching and
 			leaving touchpad surface for touch to be considered
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 882c5104899..e1d4764435e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -505,6 +505,18 @@ config DEBUG_WRITECOUNT
 
 	  If unsure, say N.
 
+config DEBUG_MEMORY_INIT
+	bool "Debug memory initialisation" if EMBEDDED
+	default !EMBEDDED
+	help
+	  Enable this for additional checks during memory initialisation.
+	  The sanity checks verify aspects of the VM such as the memory model
+	  and other information provided by the architecture. Verbose
+	  information will be printed at KERN_DEBUG loglevel depending
+	  on the mminit_loglevel= command-line option.
+
+	  If unsure, say Y
+
 config DEBUG_LIST
 	bool "Debug linked list manipulation"
 	depends on DEBUG_KERNEL
diff --git a/mm/Makefile b/mm/Makefile
index 18c143b3c46..4bbc8f094ff 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_SLAB) += slab.o
+obj-$(CONFIG_DEBUG_MEMORY_INIT) += mm_init.o
 obj-$(CONFIG_SLUB) += slub.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
diff --git a/mm/internal.h b/mm/internal.h
index 0034e947e4b..a7ee0525329 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -59,4 +59,31 @@ static inline unsigned long page_order(struct page *page)
 #define __paginginit __init
 #endif
 
+/* Memory initialisation debug and verification */
+enum mminit_level {
+	MMINIT_WARNING,
+	MMINIT_VERIFY,
+	MMINIT_TRACE
+};
+
+#ifdef CONFIG_DEBUG_MEMORY_INIT
+
+extern int mminit_loglevel;
+
+#define mminit_dprintk(level, prefix, fmt, arg...) \
+do { \
+	if (level < mminit_loglevel) { \
+		printk(level <= MMINIT_WARNING ? KERN_WARNING : KERN_DEBUG); \
+		printk(KERN_CONT "mminit::" prefix " " fmt, ##arg); \
+	} \
+} while (0)
+
+#else
+
+static inline void mminit_dprintk(enum mminit_level level,
+				const char *prefix, const char *fmt, ...)
+{
+}
+
+#endif /* CONFIG_DEBUG_MEMORY_INIT */
 #endif
diff --git a/mm/mm_init.c b/mm/mm_init.c
new file mode 100644
index 00000000000..c01d8dfec81
--- /dev/null
+++ b/mm/mm_init.c
@@ -0,0 +1,18 @@
+/*
+ * mm_init.c - Memory initialisation verification and debugging
+ *
+ * Copyright 2008 IBM Corporation, 2008
+ * Author Mel Gorman <mel@csn.ul.ie>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+int __meminitdata mminit_loglevel;
+
+static __init int set_mminit_loglevel(char *str)
+{
+	get_option(&str, &mminit_loglevel);
+	return 0;
+}
+early_param("mminit_loglevel", set_mminit_loglevel);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 79ac4afc908..0908352ba72 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2975,7 +2975,8 @@ void __init sparse_memory_present_with_active_regions(int nid)
 void __init push_node_boundaries(unsigned int nid,
 		unsigned long start_pfn, unsigned long end_pfn)
 {
-	printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n",
+	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
+			"Entering push_node_boundaries(%u, %lu, %lu)\n",
 			nid, start_pfn, end_pfn);
 
 	/* Initialise the boundary for this node if necessary */
@@ -2993,7 +2994,8 @@ void __init push_node_boundaries(unsigned int nid,
 static void __meminit account_node_boundary(unsigned int nid,
 		unsigned long *start_pfn, unsigned long *end_pfn)
 {
-	printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n",
+	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
+			"Entering account_node_boundary(%u, %lu, %lu)\n",
 			nid, *start_pfn, *end_pfn);
 
 	/* Return if boundary information has not been provided */
@@ -3368,8 +3370,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 			PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
 		if (realsize >= memmap_pages) {
 			realsize -= memmap_pages;
-			printk(KERN_DEBUG
-				"  %s zone: %lu pages used for memmap\n",
+			mminit_dprintk(MMINIT_TRACE, "memmap_init",
+				"%s zone: %lu pages used for memmap\n",
 				zone_names[j], memmap_pages);
 		} else
 			printk(KERN_WARNING
@@ -3379,7 +3381,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		/* Account for reserved pages */
 		if (j == 0 && realsize > dma_reserve) {
 			realsize -= dma_reserve;
-			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
+			mminit_dprintk(MMINIT_TRACE, "memmap_init",
+					"%s zone: %lu pages reserved\n",
 					zone_names[0], dma_reserve);
 		}
 
@@ -3520,10 +3523,11 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
 {
 	int i;
 
-	printk(KERN_DEBUG "Entering add_active_range(%d, %#lx, %#lx) "
-			  "%d entries of %d used\n",
-			  nid, start_pfn, end_pfn,
-			  nr_nodemap_entries, MAX_ACTIVE_REGIONS);
+	mminit_dprintk(MMINIT_TRACE, "memory_register",
+			"Entering add_active_range(%d, %#lx, %#lx) "
+			"%d entries of %d used\n",
+			nid, start_pfn, end_pfn,
+			nr_nodemap_entries, MAX_ACTIVE_REGIONS);
 
 	/* Merge with existing active regions if possible */
 	for (i = 0; i < nr_nodemap_entries; i++) {
-- 
GitLab


From 708614e6180f398cd307ea0048d48ba6fa274610 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:26:51 -0700
Subject: [PATCH 121/853] mm: verify the page links and memory model

Print out information on how the page flags are being used if mminit_loglevel
is MMINIT_VERIFY or higher and unconditionally performs sanity checks on the
flags regardless of loglevel.

When the page flags are updated with section, node and zone information, a
check are made to ensure the values can be retrieved correctly.  Finally we
confirm that pfn_to_page and page_to_pfn are the correct inverse functions.

[akpm@linux-foundation.org: fix printk warnings]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/internal.h   | 12 +++++++++
 mm/mm_init.c    | 71 +++++++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c |  8 ++++++
 3 files changed, 91 insertions(+)

diff --git a/mm/internal.h b/mm/internal.h
index a7ee0525329..7a4a2885dc8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -78,6 +78,10 @@ do { \
 	} \
 } while (0)
 
+extern void mminit_verify_pageflags_layout(void);
+extern void mminit_verify_page_links(struct page *page,
+		enum zone_type zone, unsigned long nid, unsigned long pfn);
+
 #else
 
 static inline void mminit_dprintk(enum mminit_level level,
@@ -85,5 +89,13 @@ static inline void mminit_dprintk(enum mminit_level level,
 {
 }
 
+static inline void mminit_verify_pageflags_layout(void)
+{
+}
+
+static inline void mminit_verify_page_links(struct page *page,
+		enum zone_type zone, unsigned long nid, unsigned long pfn)
+{
+}
 #endif /* CONFIG_DEBUG_MEMORY_INIT */
 #endif
diff --git a/mm/mm_init.c b/mm/mm_init.c
index c01d8dfec81..e16990d629e 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -7,9 +7,80 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include "internal.h"
 
 int __meminitdata mminit_loglevel;
 
+void __init mminit_verify_pageflags_layout(void)
+{
+	int shift, width;
+	unsigned long or_mask, add_mask;
+
+	shift = 8 * sizeof(unsigned long);
+	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH;
+	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
+		"Section %d Node %d Zone %d Flags %d\n",
+		SECTIONS_WIDTH,
+		NODES_WIDTH,
+		ZONES_WIDTH,
+		NR_PAGEFLAGS);
+	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
+		"Section %d Node %d Zone %d\n",
+#ifdef SECTIONS_SHIFT
+		SECTIONS_SHIFT,
+#else
+		0,
+#endif
+		NODES_SHIFT,
+		ZONES_SHIFT);
+	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_offsets",
+		"Section %lu Node %lu Zone %lu\n",
+		(unsigned long)SECTIONS_PGSHIFT,
+		(unsigned long)NODES_PGSHIFT,
+		(unsigned long)ZONES_PGSHIFT);
+	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_zoneid",
+		"Zone ID: %lu -> %lu\n",
+		(unsigned long)ZONEID_PGOFF,
+		(unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT));
+	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_usage",
+		"location: %d -> %d unused %d -> %d flags %d -> %d\n",
+		shift, width, width, NR_PAGEFLAGS, NR_PAGEFLAGS, 0);
+#ifdef NODE_NOT_IN_PAGE_FLAGS
+	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags",
+		"Node not in page flags");
+#endif
+
+	if (SECTIONS_WIDTH) {
+		shift -= SECTIONS_WIDTH;
+		BUG_ON(shift != SECTIONS_PGSHIFT);
+	}
+	if (NODES_WIDTH) {
+		shift -= NODES_WIDTH;
+		BUG_ON(shift != NODES_PGSHIFT);
+	}
+	if (ZONES_WIDTH) {
+		shift -= ZONES_WIDTH;
+		BUG_ON(shift != ZONES_PGSHIFT);
+	}
+
+	/* Check for bitmask overlaps */
+	or_mask = (ZONES_MASK << ZONES_PGSHIFT) |
+			(NODES_MASK << NODES_PGSHIFT) |
+			(SECTIONS_MASK << SECTIONS_PGSHIFT);
+	add_mask = (ZONES_MASK << ZONES_PGSHIFT) +
+			(NODES_MASK << NODES_PGSHIFT) +
+			(SECTIONS_MASK << SECTIONS_PGSHIFT);
+	BUG_ON(or_mask != add_mask);
+}
+
+void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone,
+			unsigned long nid, unsigned long pfn)
+{
+	BUG_ON(page_to_nid(page) != nid);
+	BUG_ON(page_zonenum(page) != zone);
+	BUG_ON(page_to_pfn(page) != pfn);
+}
+
 static __init int set_mminit_loglevel(char *str)
 {
 	get_option(&str, &mminit_loglevel);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0908352ba72..acab6ad326d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2534,6 +2534,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		}
 		page = pfn_to_page(pfn);
 		set_page_links(page, zone, nid, pfn);
+		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		reset_page_mapcount(page);
 		SetPageReserved(page);
@@ -2836,6 +2837,12 @@ __meminit int init_currently_empty_zone(struct zone *zone,
 
 	zone->zone_start_pfn = zone_start_pfn;
 
+	mminit_dprintk(MMINIT_TRACE, "memmap_init",
+			"Initialising map node %d zone %lu pfns %lu -> %lu\n",
+			pgdat->node_id,
+			(unsigned long)zone_idx(zone),
+			zone_start_pfn, (zone_start_pfn + size));
+
 	zone_init_free_lists(zone);
 
 	return 0;
@@ -3961,6 +3968,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 						early_node_map[i].end_pfn);
 
 	/* Initialise every node */
+	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);
-- 
GitLab


From 2dbb51c49f4fecb8330e43247a0edfbc4b2b8974 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:26:52 -0700
Subject: [PATCH 122/853] mm: make defensive checks around PFN values
 registered for memory usage

There are a number of different views to how much memory is currently active.
There is the arch-independent zone-sizing view, the bootmem allocator and
memory models view.

Architectures register this information at different times and is not
necessarily in sync particularly with respect to some SPARSEMEM limitations.

This patch introduces mminit_validate_memmodel_limits() which is able to
validate and correct PFN ranges with respect to the memory model.  It is only
SPARSEMEM that currently validates itself.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c    |  1 +
 mm/internal.h   | 12 ++++++++++++
 mm/page_alloc.c |  2 ++
 mm/sparse.c     | 37 +++++++++++++++++++++++++++++--------
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 8d9f60e06f6..9f4bbc5da73 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -91,6 +91,7 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
 	bootmem_data_t *bdata = pgdat->bdata;
 	unsigned long mapsize;
 
+	mminit_validate_memmodel_limits(&start, &end);
 	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
 	bdata->node_boot_start = PFN_PHYS(start);
 	bdata->node_low_pfn = end;
diff --git a/mm/internal.h b/mm/internal.h
index 7a4a2885dc8..5d17f3efac4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -98,4 +98,16 @@ static inline void mminit_verify_page_links(struct page *page,
 {
 }
 #endif /* CONFIG_DEBUG_MEMORY_INIT */
+
+/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
+#if defined(CONFIG_SPARSEMEM)
+extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
+				unsigned long *end_pfn);
+#else
+static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
+				unsigned long *end_pfn)
+{
+}
+#endif /* CONFIG_SPARSEMEM */
+
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index acab6ad326d..0adb66e711e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3536,6 +3536,8 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
 			nid, start_pfn, end_pfn,
 			nr_nodemap_entries, MAX_ACTIVE_REGIONS);
 
+	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
+
 	/* Merge with existing active regions if possible */
 	for (i = 0; i < nr_nodemap_entries; i++) {
 		if (early_node_map[i].nid != nid)
diff --git a/mm/sparse.c b/mm/sparse.c
index 36511c7b5e2..7a3650923d9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -12,6 +12,7 @@
 #include <asm/dma.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
+#include "internal.h"
 
 /*
  * Permanent SPARSEMEM data:
@@ -147,22 +148,41 @@ static inline int sparse_early_nid(struct mem_section *section)
 	return (section->section_mem_map >> SECTION_NID_SHIFT);
 }
 
-/* Record a memory area against a node. */
-void __init memory_present(int nid, unsigned long start, unsigned long end)
+/* Validate the physical addressing limitations of the model */
+void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
+						unsigned long *end_pfn)
 {
-	unsigned long max_arch_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
-	unsigned long pfn;
+	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
 
 	/*
 	 * Sanity checks - do not allow an architecture to pass
 	 * in larger pfns than the maximum scope of sparsemem:
 	 */
-	if (start >= max_arch_pfn)
-		return;
-	if (end >= max_arch_pfn)
-		end = max_arch_pfn;
+	if (*start_pfn > max_sparsemem_pfn) {
+		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
+			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
+			*start_pfn, *end_pfn, max_sparsemem_pfn);
+		WARN_ON_ONCE(1);
+		*start_pfn = max_sparsemem_pfn;
+		*end_pfn = max_sparsemem_pfn;
+	}
+
+	if (*end_pfn > max_sparsemem_pfn) {
+		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
+			"End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
+			*start_pfn, *end_pfn, max_sparsemem_pfn);
+		WARN_ON_ONCE(1);
+		*end_pfn = max_sparsemem_pfn;
+	}
+}
+
+/* Record a memory area against a node. */
+void __init memory_present(int nid, unsigned long start, unsigned long end)
+{
+	unsigned long pfn;
 
 	start &= PAGE_SECTION_MASK;
+	mminit_validate_memmodel_limits(&start, &end);
 	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 		unsigned long section = pfn_to_section_nr(pfn);
 		struct mem_section *ms;
@@ -187,6 +207,7 @@ unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
 	unsigned long pfn;
 	unsigned long nr_pages = 0;
 
+	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
 	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 		if (nid != early_pfn_to_nid(pfn))
 			continue;
-- 
GitLab


From 68ad8df42e12037c3894c9706ab428bf5cd6426b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:26:52 -0700
Subject: [PATCH 123/853] mm: print out the zonelists on request for manual
 verification

This patch prints out the zonelists during boot for manual verification by the
user if the mminit_loglevel is MMINIT_VERIFY or higher.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/internal.h   |  5 +++++
 mm/mm_init.c    | 45 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c |  1 +
 3 files changed, 51 insertions(+)

diff --git a/mm/internal.h b/mm/internal.h
index 5d17f3efac4..50807e12490 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -81,6 +81,7 @@ do { \
 extern void mminit_verify_pageflags_layout(void);
 extern void mminit_verify_page_links(struct page *page,
 		enum zone_type zone, unsigned long nid, unsigned long pfn);
+extern void mminit_verify_zonelist(void);
 
 #else
 
@@ -97,6 +98,10 @@ static inline void mminit_verify_page_links(struct page *page,
 		enum zone_type zone, unsigned long nid, unsigned long pfn)
 {
 }
+
+static inline void mminit_verify_zonelist(void)
+{
+}
 #endif /* CONFIG_DEBUG_MEMORY_INIT */
 
 /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index e16990d629e..ce445ca097e 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -11,6 +11,51 @@
 
 int __meminitdata mminit_loglevel;
 
+/* The zonelists are simply reported, validation is manual. */
+void mminit_verify_zonelist(void)
+{
+	int nid;
+
+	if (mminit_loglevel < MMINIT_VERIFY)
+		return;
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		struct zone *zone;
+		struct zoneref *z;
+		struct zonelist *zonelist;
+		int i, listid, zoneid;
+
+		BUG_ON(MAX_ZONELISTS > 2);
+		for (i = 0; i < MAX_ZONELISTS * MAX_NR_ZONES; i++) {
+
+			/* Identify the zone and nodelist */
+			zoneid = i % MAX_NR_ZONES;
+			listid = i / MAX_NR_ZONES;
+			zonelist = &pgdat->node_zonelists[listid];
+			zone = &pgdat->node_zones[zoneid];
+			if (!populated_zone(zone))
+				continue;
+
+			/* Print information about the zonelist */
+			printk(KERN_DEBUG "mminit::zonelist %s %d:%s = ",
+				listid > 0 ? "thisnode" : "general", nid,
+				zone->name);
+
+			/* Iterate the zonelist */
+			for_each_zone_zonelist(zone, z, zonelist, zoneid) {
+#ifdef CONFIG_NUMA
+				printk(KERN_CONT "%d:%s ",
+					zone->node, zone->name);
+#else
+				printk(KERN_CONT "0:%s ", zone->name);
+#endif /* CONFIG_NUMA */
+			}
+			printk(KERN_CONT "\n");
+		}
+	}
+}
+
 void __init mminit_verify_pageflags_layout(void)
 {
 	int shift, width;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0adb66e711e..9ece07ce65b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2352,6 +2352,7 @@ void build_all_zonelists(void)
 
 	if (system_state == SYSTEM_BOOTING) {
 		__build_all_zonelists(NULL);
+		mminit_verify_zonelist();
 		cpuset_init_current_mems_allowed();
 	} else {
 		/* we have to stop all cpus to guarantee there is no user
-- 
GitLab


From 8b05c7e6e159d2f33c9275281b8b909a89eb7c5d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 23 Jul 2008 21:26:53 -0700
Subject: [PATCH 124/853] add a helper function to test if an object is on the
 stack

lib/debugobjects.c has a function to test if an object is on the stack.
The block layer and ide needs it (they need to avoid DMA from/to stack
buffers).  This patch moves the function to include/linux/sched.h so that
everyone can use it.

lib/debugobjects.c uses current->stack but this patch uses a
task_stack_page() accessor, which is a preferable way to access the stack.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 7 +++++++
 lib/debugobjects.c    | 4 +---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc7e592c473..6aca4a16e37 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1983,6 +1983,13 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 
 #endif
 
+static inline int object_is_on_stack(void *obj)
+{
+	void *stack = task_stack_page(current);
+
+	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
+}
+
 extern void thread_info_cache_init(void);
 
 /* set thread flags in other task's structures
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 85b18d79be8..f86196390cf 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -226,15 +226,13 @@ debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
 
 static void debug_object_is_on_stack(void *addr, int onstack)
 {
-	void *stack = current->stack;
 	int is_on_stack;
 	static int limit;
 
 	if (limit > 4)
 		return;
 
-	is_on_stack = (addr >= stack && addr < (stack + THREAD_SIZE));
-
+	is_on_stack = object_is_on_stack(addr);
 	if (is_on_stack == onstack)
 		return;
 
-- 
GitLab


From b61bfa3c462671c48a51fb5c31af337c5a996a04 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:26:55 -0700
Subject: [PATCH 125/853] mm: move bootmem descriptors definition to a single
 place

There are a lot of places that define either a single bootmem descriptor or an
array of them.  Use only one central array with MAX_NUMNODES items instead.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/numa.c             |  8 ++++----
 arch/arm/mm/discontig.c          | 34 +++++++++++++++-----------------
 arch/ia64/mm/discontig.c         | 11 +++++------
 arch/m32r/mm/discontig.c         |  4 +---
 arch/m68k/mm/init.c              |  4 +---
 arch/mips/sgi-ip27/ip27-memory.c |  4 +---
 arch/parisc/mm/init.c            |  3 +--
 arch/powerpc/mm/numa.c           |  3 +--
 arch/sh/mm/numa.c                |  5 ++---
 arch/sparc64/mm/init.c           |  3 +--
 arch/x86/mm/discontig_32.c       |  3 +--
 arch/x86/mm/numa_64.c            |  4 +---
 include/linux/bootmem.h          |  2 ++
 mm/bootmem.c                     |  2 ++
 mm/page_alloc.c                  |  4 +---
 15 files changed, 40 insertions(+), 54 deletions(-)

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 10ab7833e83..a53fda0481c 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -19,7 +19,6 @@
 #include <asm/pgalloc.h>
 
 pg_data_t node_data[MAX_NUMNODES];
-bootmem_data_t node_bdata[MAX_NUMNODES];
 EXPORT_SYMBOL(node_data);
 
 #undef DEBUG_DISCONTIG
@@ -141,7 +140,7 @@ setup_memory_node(int nid, void *kernel_end)
 		printk(" not enough mem to reserve NODE_DATA");
 		return;
 	}
-	NODE_DATA(nid)->bdata = &node_bdata[nid];
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 
 	printk(" Detected node memory:   start %8lu, end %8lu\n",
 	       node_min_pfn, node_max_pfn);
@@ -304,8 +303,9 @@ void __init paging_init(void)
 	dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
 	for_each_online_node(nid) {
-		unsigned long start_pfn = node_bdata[nid].node_boot_start >> PAGE_SHIFT;
-		unsigned long end_pfn = node_bdata[nid].node_low_pfn;
+		bootmem_data_t *bdata = &bootmem_node_data[nid];
+		unsigned long start_pfn = bdata->node_boot_start >> PAGE_SHIFT;
+		unsigned long end_pfn = bdata->node_low_pfn;
 
 		if (dma_local_pfn >= end_pfn - start_pfn)
 			zones_size[ZONE_DMA] = end_pfn - start_pfn;
diff --git a/arch/arm/mm/discontig.c b/arch/arm/mm/discontig.c
index 1e560218950..c8c0c4b0f0a 100644
--- a/arch/arm/mm/discontig.c
+++ b/arch/arm/mm/discontig.c
@@ -21,26 +21,24 @@
  * Our node_data structure for discontiguous memory.
  */
 
-static bootmem_data_t node_bootmem_data[MAX_NUMNODES];
-
 pg_data_t discontig_node_data[MAX_NUMNODES] = {
-  { .bdata = &node_bootmem_data[0] },
-  { .bdata = &node_bootmem_data[1] },
-  { .bdata = &node_bootmem_data[2] },
-  { .bdata = &node_bootmem_data[3] },
+  { .bdata = &bootmem_node_data[0] },
+  { .bdata = &bootmem_node_data[1] },
+  { .bdata = &bootmem_node_data[2] },
+  { .bdata = &bootmem_node_data[3] },
 #if MAX_NUMNODES == 16
-  { .bdata = &node_bootmem_data[4] },
-  { .bdata = &node_bootmem_data[5] },
-  { .bdata = &node_bootmem_data[6] },
-  { .bdata = &node_bootmem_data[7] },
-  { .bdata = &node_bootmem_data[8] },
-  { .bdata = &node_bootmem_data[9] },
-  { .bdata = &node_bootmem_data[10] },
-  { .bdata = &node_bootmem_data[11] },
-  { .bdata = &node_bootmem_data[12] },
-  { .bdata = &node_bootmem_data[13] },
-  { .bdata = &node_bootmem_data[14] },
-  { .bdata = &node_bootmem_data[15] },
+  { .bdata = &bootmem_node_data[4] },
+  { .bdata = &bootmem_node_data[5] },
+  { .bdata = &bootmem_node_data[6] },
+  { .bdata = &bootmem_node_data[7] },
+  { .bdata = &bootmem_node_data[8] },
+  { .bdata = &bootmem_node_data[9] },
+  { .bdata = &bootmem_node_data[10] },
+  { .bdata = &bootmem_node_data[11] },
+  { .bdata = &bootmem_node_data[12] },
+  { .bdata = &bootmem_node_data[13] },
+  { .bdata = &bootmem_node_data[14] },
+  { .bdata = &bootmem_node_data[15] },
 #endif
 };
 
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 544dc420c65..2fcf8464331 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -36,7 +36,6 @@ struct early_node_data {
 	struct ia64_node_data *node_data;
 	unsigned long pernode_addr;
 	unsigned long pernode_size;
-	struct bootmem_data bootmem_data;
 	unsigned long num_physpages;
 #ifdef CONFIG_ZONE_DMA
 	unsigned long num_dma_physpages;
@@ -76,7 +75,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len,
 				  int node)
 {
 	unsigned long cstart, epfn, end = start + len;
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
 	cstart = GRANULEROUNDDOWN(start);
@@ -167,7 +166,7 @@ static void __init fill_pernode(int node, unsigned long pernode,
 {
 	void *cpu_data;
 	int cpus = early_nr_cpus_node(node);
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	mem_data[node].pernode_addr = pernode;
 	mem_data[node].pernode_size = pernodesize;
@@ -224,7 +223,7 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
 {
 	unsigned long epfn;
 	unsigned long pernodesize = 0, pernode, pages, mapsize;
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = (start + len) >> PAGE_SHIFT;
 
@@ -440,7 +439,7 @@ void __init find_memory(void)
 	efi_memmap_walk(find_max_min_low_pfn, NULL);
 
 	for_each_online_node(node)
-		if (mem_data[node].bootmem_data.node_low_pfn) {
+		if (bootmem_node_data[node].node_low_pfn) {
 			node_clear(node, memory_less_mask);
 			mem_data[node].min_pfn = ~0UL;
 		}
@@ -460,7 +459,7 @@ void __init find_memory(void)
 		else if (node_isset(node, memory_less_mask))
 			continue;
 
-		bdp = &mem_data[node].bootmem_data;
+		bdp = &bootmem_node_data[node];
 		pernode = mem_data[node].pernode_addr;
 		pernodesize = mem_data[node].pernode_size;
 		map = pernode + pernodesize;
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index 07c1af7dc0e..aa9145ef6cc 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -20,7 +20,6 @@ extern char _end[];
 
 struct pglist_data *node_data[MAX_NUMNODES];
 EXPORT_SYMBOL(node_data);
-static bootmem_data_t node_bdata[MAX_NUMNODES] __initdata;
 
 pg_data_t m32r_node_data[MAX_NUMNODES];
 
@@ -81,7 +80,7 @@ unsigned long __init setup_memory(void)
 	for_each_online_node(nid) {
 		mp = &mem_prof[nid];
 		NODE_DATA(nid)=(pg_data_t *)&m32r_node_data[nid];
-		NODE_DATA(nid)->bdata = &node_bdata[nid];
+		NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 		min_pfn = mp->start_pfn;
 		max_pfn = mp->start_pfn + mp->pages;
 		bootmap_size = init_bootmem_node(NODE_DATA(nid), mp->free_pfn,
@@ -163,4 +162,3 @@ unsigned long __init zone_sizes_init(void)
 
 	return holes;
 }
-
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index d8fb9c5303c..79f5f94d480 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -32,8 +32,6 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
-static bootmem_data_t __initdata bootmem_data[MAX_NUMNODES];
-
 pg_data_t pg_data_map[MAX_NUMNODES];
 EXPORT_SYMBOL(pg_data_map);
 
@@ -58,7 +56,7 @@ void __init m68k_setup_node(int node)
 		pg_data_table[i] = pg_data_map + node;
 	}
 #endif
-	pg_data_map[node].bdata = bootmem_data + node;
+	pg_data_map[node].bdata = bootmem_node_data + node;
 	node_set_online(node);
 }
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 42cd1095630..060d853d7b3 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -33,8 +33,6 @@
 #define SLOT_PFNSHIFT           (SLOT_SHIFT - PAGE_SHIFT)
 #define PFN_NASIDSHFT           (NASID_SHFT - PAGE_SHIFT)
 
-static struct bootmem_data __initdata plat_node_bdata[MAX_COMPACT_NODES];
-
 struct node_data *__node_data[MAX_COMPACT_NODES];
 
 EXPORT_SYMBOL(__node_data);
@@ -403,7 +401,7 @@ static void __init node_mem_init(cnodeid_t node)
 	 */
 	__node_data[node] = __va(slot_freepfn << PAGE_SHIFT);
 
-	NODE_DATA(node)->bdata = &plat_node_bdata[node];
+	NODE_DATA(node)->bdata = &bootmem_node_data[node];
 	NODE_DATA(node)->node_start_pfn = start_pfn;
 	NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
 
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b4d6c8777ed..0ddf4904640 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -36,7 +36,6 @@ extern int  data_start;
 
 #ifdef CONFIG_DISCONTIGMEM
 struct node_map_data node_data[MAX_NUMNODES] __read_mostly;
-bootmem_data_t bmem_data[MAX_NUMNODES] __read_mostly;
 unsigned char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
 #endif
 
@@ -262,7 +261,7 @@ static void __init setup_bootmem(void)
 #ifdef CONFIG_DISCONTIGMEM
 	for (i = 0; i < MAX_PHYSMEM_RANGES; i++) {
 		memset(NODE_DATA(i), 0, sizeof(pg_data_t));
-		NODE_DATA(i)->bdata = &bmem_data[i];
+		NODE_DATA(i)->bdata = &bootmem_node_data[i];
 	}
 	memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index cf4bffba6f7..d9a18135133 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
 EXPORT_SYMBOL(numa_cpumask_lookup_table);
 EXPORT_SYMBOL(node_data);
 
-static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
 static int min_common_depth;
 static int n_mem_addr_cells, n_mem_size_cells;
 
@@ -816,7 +815,7 @@ void __init do_init_bootmem(void)
   		dbg("node %d\n", nid);
 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
 
-		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+		NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 		NODE_DATA(nid)->node_start_pfn = start_pfn;
 		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 1663199ce88..095d93bec7c 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -14,7 +14,6 @@
 #include <linux/pfn.h>
 #include <asm/sections.h>
 
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL_GPL(node_data);
 
@@ -35,7 +34,7 @@ void __init setup_memory(void)
 	NODE_DATA(0) = pfn_to_kaddr(free_pfn);
 	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
 	free_pfn += PFN_UP(sizeof(struct pglist_data));
-	NODE_DATA(0)->bdata = &plat_node_bdata[0];
+	NODE_DATA(0)->bdata = &bootmem_node_data[0];
 
 	/* Set up node 0 */
 	setup_bootmem_allocator(free_pfn);
@@ -66,7 +65,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	free_pfn += PFN_UP(sizeof(struct pglist_data));
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-	NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 	NODE_DATA(nid)->node_start_pfn = start_pfn;
 	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 84898c44dd4..71329747395 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -788,7 +788,6 @@ int numa_cpu_lookup_table[NR_CPUS];
 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct mdesc_mblock {
 	u64	base;
@@ -871,7 +870,7 @@ static void __init allocate_node_data(int nid)
 	NODE_DATA(nid) = __va(paddr);
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-	NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 #endif
 
 	p = NODE_DATA(nid);
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 5dfef9fa061..62fa440678d 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -42,7 +42,6 @@
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
-static bootmem_data_t node0_bdata;
 
 /*
  * numa interface - we expect the numa architecture specific code to have
@@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn,
 	for_each_online_node(nid)
 		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-	NODE_DATA(0)->bdata = &node0_bdata;
+	NODE_DATA(0)->bdata = &bootmem_node_data[0];
 	setup_bootmem_allocator();
 }
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9782f42dd31..a4dd793d600 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -23,8 +23,6 @@
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
-
 struct memnode memnode;
 
 s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
@@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 		nodedata_phys + pgdat_size - 1);
 
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
-	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
+	NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
 	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
 	NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
 
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index a1d9b79078e..2599c741405 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -38,6 +38,8 @@ typedef struct bootmem_data {
 	struct list_head list;
 } bootmem_data_t;
 
+extern bootmem_data_t bootmem_node_data[];
+
 extern unsigned long bootmem_bootmap_pages(unsigned long);
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 extern void free_bootmem(unsigned long addr, unsigned long size);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9f4bbc5da73..35b3cb66703 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -36,6 +36,8 @@ static LIST_HEAD(bdata_list);
 unsigned long saved_max_pfn;
 #endif
 
+bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
+
 /* return the number of _pages_ that will be allocated for the boot bitmap */
 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9ece07ce65b..e089b92cdff 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4040,9 +4040,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-static bootmem_data_t contig_bootmem_data;
-struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
-
+struct pglist_data contig_page_data = { .bdata = &bootmem_node_data[0] };
 EXPORT_SYMBOL(contig_page_data);
 #endif
 
-- 
GitLab


From 6b312c0e6e2f44b020e12953d1dd37eed60e3609 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:26:58 -0700
Subject: [PATCH 126/853] mm: fix free_all_bootmem_core alignment check

The check for node_boot_start is bogus because we start freeing at the
corresponding pfn.  So check if the pfn is properly aligned instead in a more
readable way and adjust the documentation.

Also remove an unneeded accounting variable.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 35b3cb66703..319a79bce7c 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -377,7 +377,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 	struct page *page;
 	unsigned long pfn;
 	bootmem_data_t *bdata = pgdat->bdata;
-	unsigned long i, count, total = 0;
+	unsigned long i, count;
 	unsigned long idx;
 	unsigned long *map; 
 	int gofast = 0;
@@ -389,10 +389,13 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 	pfn = PFN_DOWN(bdata->node_boot_start);
 	idx = bdata->node_low_pfn - pfn;
 	map = bdata->node_bootmem_map;
-	/* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
-	if (bdata->node_boot_start == 0 ||
-	    ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG))
+	/*
+	 * Check if we are aligned to BITS_PER_LONG pages.  If so, we might
+	 * be able to free page orders of that size at once.
+	 */
+	if (!(pfn & (BITS_PER_LONG-1)))
 		gofast = 1;
+
 	for (i = 0; i < idx; ) {
 		unsigned long v = ~map[i / BITS_PER_LONG];
 
@@ -420,23 +423,19 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 		}
 		pfn += BITS_PER_LONG;
 	}
-	total += count;
 
 	/*
 	 * Now free the allocator bitmap itself, it's not
 	 * needed anymore:
 	 */
 	page = virt_to_page(bdata->node_bootmem_map);
-	count = 0;
 	idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
-	for (i = 0; i < idx; i++, page++) {
+	for (i = 0; i < idx; i++, page++)
 		__free_pages_bootmem(page, 0);
-		count++;
-	}
-	total += count;
+	count += i;
 	bdata->node_bootmem_map = NULL;
 
-	return total;
+	return count;
 }
 
 unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
-- 
GitLab


From 8ae04463077324ed9f6b04ab3a5b17ae1ee4dd35 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:26:59 -0700
Subject: [PATCH 127/853] mm: normalize internal argument passing of bootmem
 data

All _core functions only need the bootmem data, not the whole node descriptor.
Adjust the two functions that take the node descriptor unneededly.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 319a79bce7c..251c66c5d96 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -87,10 +87,9 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata)
 /*
  * Called once to set up the allocator itself.
  */
-static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
+static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 	unsigned long mapstart, unsigned long start, unsigned long end)
 {
-	bootmem_data_t *bdata = pgdat->bdata;
 	unsigned long mapsize;
 
 	mminit_validate_memmodel_limits(&start, &end);
@@ -372,11 +371,10 @@ found:
 	return ret;
 }
 
-static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
+static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 {
 	struct page *page;
 	unsigned long pfn;
-	bootmem_data_t *bdata = pgdat->bdata;
 	unsigned long i, count;
 	unsigned long idx;
 	unsigned long *map; 
@@ -441,7 +439,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 				unsigned long startpfn, unsigned long endpfn)
 {
-	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
+	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
 }
 
 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
@@ -466,14 +464,14 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
 	register_page_bootmem_info_node(pgdat);
-	return free_all_bootmem_core(pgdat);
+	return free_all_bootmem_core(pgdat->bdata);
 }
 
 unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 {
 	max_low_pfn = pages;
 	min_low_pfn = start;
-	return init_bootmem_core(NODE_DATA(0), start, 0, pages);
+	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
 }
 
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
@@ -504,7 +502,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
 
 unsigned long __init free_all_bootmem(void)
 {
-	return free_all_bootmem_core(NODE_DATA(0));
+	return free_all_bootmem_core(NODE_DATA(0)->bdata);
 }
 
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
-- 
GitLab


From ffc6421f0720f433b5b35b89ff56e998eabff93b Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:26:59 -0700
Subject: [PATCH 128/853] mm: unexport __alloc_bootmem_core()

This function has no external callers, so unexport it.  Also fix its naming
inconsistency.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  5 -----
 mm/bootmem.c            | 24 ++++++++++++------------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 2599c741405..dd8fee6c46d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -56,11 +56,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long size,
 				      unsigned long align,
 				      unsigned long goal);
-extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal,
-				  unsigned long limit);
 
 /*
  * flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 251c66c5d96..4bc6ae2fbaa 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -234,9 +234,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
  *
  * NOTE:  This function is _not_ reentrant.
  */
-void * __init
-__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
-	      unsigned long align, unsigned long goal, unsigned long limit)
+static void * __init
+alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
+		unsigned long align, unsigned long goal, unsigned long limit)
 {
 	unsigned long areasize, preferred;
 	unsigned long i, start = 0, incr, eidx, end_pfn;
@@ -245,7 +245,7 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	void *node_bootmem_map;
 
 	if (!size) {
-		printk("__alloc_bootmem_core(): zero-sized request\n");
+		printk("alloc_bootmem_core(): zero-sized request\n");
 		BUG();
 	}
 	BUG_ON(align & (align-1));
@@ -512,7 +512,7 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 	void *ptr;
 
 	list_for_each_entry(bdata, &bdata_list, list) {
-		ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
+		ptr = alloc_bootmem_core(bdata, size, align, goal, 0);
 		if (ptr)
 			return ptr;
 	}
@@ -540,7 +540,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 {
 	void *ptr;
 
-	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
 		return ptr;
 
@@ -559,8 +559,8 @@ void * __init alloc_bootmem_section(unsigned long size,
 	goal = PFN_PHYS(pfn);
 	limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
 	pgdat = NODE_DATA(early_pfn_to_nid(pfn));
-	ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
-				   limit);
+	ptr = alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
+				limit);
 
 	if (!ptr)
 		return NULL;
@@ -589,8 +589,8 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 	void *ptr;
 
 	list_for_each_entry(bdata, &bdata_list, list) {
-		ptr = __alloc_bootmem_core(bdata, size, align, goal,
-						ARCH_LOW_ADDRESS_LIMIT);
+		ptr = alloc_bootmem_core(bdata, size, align, goal,
+					ARCH_LOW_ADDRESS_LIMIT);
 		if (ptr)
 			return ptr;
 	}
@@ -606,6 +606,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
-	return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
-				    ARCH_LOW_ADDRESS_LIMIT);
+	return alloc_bootmem_core(pgdat->bdata, size, align, goal,
+				ARCH_LOW_ADDRESS_LIMIT);
 }
-- 
GitLab


From efe9e77997f6e0306fedc6efa98df491dcf5ecb0 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 23 Jul 2008 21:27:00 -0700
Subject: [PATCH 129/853] mspec: convert nopfn to fault

[akpm@linux-foundation.org: remove unused variable]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mspec.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index fe2a95b5d3c..30f095a8c2d 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -193,25 +193,23 @@ mspec_close(struct vm_area_struct *vma)
 }
 
 /*
- * mspec_nopfn
+ * mspec_fault
  *
  * Creates a mspec page and maps it to user space.
  */
-static unsigned long
-mspec_nopfn(struct vm_area_struct *vma, unsigned long address)
+static int
+mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	unsigned long paddr, maddr;
 	unsigned long pfn;
-	int index;
+	pgoff_t index = vmf->pgoff;
 	struct vma_data *vdata = vma->vm_private_data;
 
-	BUG_ON(address < vdata->vm_start || address >= vdata->vm_end);
-	index = (address - vdata->vm_start) >> PAGE_SHIFT;
 	maddr = (volatile unsigned long) vdata->maddr[index];
 	if (maddr == 0) {
 		maddr = uncached_alloc_page(numa_node_id(), 1);
 		if (maddr == 0)
-			return NOPFN_OOM;
+			return VM_FAULT_OOM;
 
 		spin_lock(&vdata->lock);
 		if (vdata->maddr[index] == 0) {
@@ -231,13 +229,20 @@ mspec_nopfn(struct vm_area_struct *vma, unsigned long address)
 
 	pfn = paddr >> PAGE_SHIFT;
 
-	return pfn;
+	/*
+	 * vm_insert_pfn can fail with -EBUSY, but in that case it will
+	 * be because another thread has installed the pte first, so it
+	 * is no problem.
+	 */
+	vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+
+	return VM_FAULT_NOPAGE;
 }
 
 static struct vm_operations_struct mspec_vm_ops = {
 	.open = mspec_open,
 	.close = mspec_close,
-	.nopfn = mspec_nopfn
+	.fault = mspec_fault,
 };
 
 /*
-- 
GitLab


From e4048e5dc4aecec670f48ed007a28779f09cebd6 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 23 Jul 2008 21:27:01 -0700
Subject: [PATCH 130/853] page allocator: inline some __alloc_pages() wrappers

Two zonelist patch series rewrote __page_alloc() largely.  Now, it is just
a wrapper function.  Inlining them will save a function call.

[akpm@linux-foundation.org: export __alloc_pages_internal]
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 21 +++++++++++++++++----
 mm/page_alloc.c     | 19 ++-----------------
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index b414be38718..f640ed24142 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -173,11 +173,24 @@ static inline void arch_free_page(struct page *page, int order) { }
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
-extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);
+struct page *
+__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
+		       struct zonelist *zonelist, nodemask_t *nodemask);
+
+static inline struct page *
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
+		struct zonelist *zonelist)
+{
+	return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
+}
+
+static inline struct page *
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+		struct zonelist *zonelist, nodemask_t *nodemask)
+{
+	return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
+}
 
-extern struct page *
-__alloc_pages_nodemask(gfp_t, unsigned int,
-				struct zonelist *, nodemask_t *nodemask);
 
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e089b92cdff..35b1347d81b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1429,7 +1429,7 @@ try_next_zone:
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
-static struct page *
+struct page *
 __alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
 			struct zonelist *zonelist, nodemask_t *nodemask)
 {
@@ -1632,22 +1632,7 @@ nopage:
 got_pg:
 	return page;
 }
-
-struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
-		struct zonelist *zonelist)
-{
-	return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
-}
-
-struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-		struct zonelist *zonelist, nodemask_t *nodemask)
-{
-	return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
-}
-
-EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(__alloc_pages_internal);
 
 /*
  * Common helper functions.
-- 
GitLab


From 4f5ca265788973e3f5a1129a96ee4a9cbf587f2b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:27:02 -0700
Subject: [PATCH 131/853] mm/migrate.c should #include <linux/syscalls.h>

Every file should include the headers containing the externs for its
global functions (in this case for sys_move_pages()).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/migrate.c b/mm/migrate.c
index 55bd355d170..e7d13a708da 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -30,6 +30,7 @@
 #include <linux/vmalloc.h>
 #include <linux/security.h>
 #include <linux/memcontrol.h>
+#include <linux/syscalls.h>
 
 #include "internal.h"
 
-- 
GitLab


From c748e1340e0de3fa7fed86f8bdf499be9242afff Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:27:03 -0700
Subject: [PATCH 132/853] mm/vmstat.c: proper externs

This patch adds proper extern declarations for five variables in
include/linux/vmstat.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_misc.c    | 4 ----
 include/linux/vmstat.h | 6 ++++++
 kernel/sysctl.c        | 2 +-
 mm/vmstat.c            | 1 +
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc0..b14f43d25e9 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 #undef K
 }
 
-extern const struct seq_operations fragmentation_op;
 static int fragmentation_open(struct inode *inode, struct file *file)
 {
 	(void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations pagetypeinfo_op;
 static int pagetypeinfo_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations zoneinfo_op;
 static int zoneinfo_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations vmstat_op;
 static int vmstat_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &vmstat_op);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e83b69346d2..58334d43951 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -44,6 +44,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		NR_VM_EVENT_ITEMS
 };
 
+extern const struct seq_operations fragmentation_op;
+extern const struct seq_operations pagetypeinfo_op;
+extern const struct seq_operations zoneinfo_op;
+extern const struct seq_operations vmstat_op;
+extern int sysctl_stat_interval;
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 /*
  * Light weight per cpu counter implementation.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2a7b9d88706..1f7b3b76a16 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -43,6 +43,7 @@
 #include <linux/limits.h>
 #include <linux/dcache.h>
 #include <linux/syscalls.h>
+#include <linux/vmstat.h>
 #include <linux/nfs_fs.h>
 #include <linux/acpi.h>
 #include <linux/reboot.h>
@@ -80,7 +81,6 @@ extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
 extern int maps_protect;
-extern int sysctl_stat_interval;
 extern int latencytop_enabled;
 extern int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifdef CONFIG_RCU_TORTURE_TEST
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c3d4a781802..b0d08e667ec 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
+#include <linux/vmstat.h>
 #include <linux/sched.h>
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
-- 
GitLab


From 75353bed36cfbbfb55bbde0896bbf5a02d9ba355 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:27:03 -0700
Subject: [PATCH 133/853] mm/hugetlb.c: fix duplicate variable

It's confusing that set_max_huge_pages() contained two different
variables named "ret", and although the code works correctly this should
be fixed.

The inner of the two variables can simply be removed.

Spotted by sparse.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc:  "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ab171274ef2..2c5c9ee4220 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -603,7 +603,6 @@ static unsigned long set_max_huge_pages(unsigned long count)
 	}
 
 	while (count > persistent_huge_pages) {
-		int ret;
 		/*
 		 * If this allocation races such that we no longer need the
 		 * page, free_huge_page will handle it by freeing the page
-- 
GitLab


From a969e903a944f69309ee5cc9e7c7b08310d1151e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Jul 2008 21:27:04 -0700
Subject: [PATCH 134/853] kill generic_file_direct_IO()

generic_file_direct_IO is a common helper around the invocation of
->direct_IO.  But there's almost nothing shared between the read and write
side, so we're better off without this helper.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 117 ++++++++++++++++++++++-----------------------------
 1 file changed, 51 insertions(+), 66 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 65d9d9e2b75..6343f3c841b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -42,9 +42,6 @@
 
 #include <asm/mman.h>
 
-static ssize_t
-generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs);
 
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
@@ -1205,8 +1202,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			goto out; /* skip atime */
 		size = i_size_read(inode);
 		if (pos < size) {
-			retval = generic_file_direct_IO(READ, iocb,
-						iov, pos, nr_segs);
+			retval = filemap_write_and_wait(mapping);
+			if (!retval) {
+				retval = mapping->a_ops->direct_IO(READ, iocb,
+							iov, pos, nr_segs);
+			}
 			if (retval > 0)
 				*ppos = pos + retval;
 		}
@@ -2004,11 +2004,55 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	struct address_space *mapping = file->f_mapping;
 	struct inode	*inode = mapping->host;
 	ssize_t		written;
+	size_t		write_len;
+	pgoff_t		end;
 
 	if (count != ocount)
 		*nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
 
-	written = generic_file_direct_IO(WRITE, iocb, iov, pos, *nr_segs);
+	/*
+	 * Unmap all mmappings of the file up-front.
+	 *
+	 * This will cause any pte dirty bits to be propagated into the
+	 * pageframes for the subsequent filemap_write_and_wait().
+	 */
+	write_len = iov_length(iov, *nr_segs);
+	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
+	if (mapping_mapped(mapping))
+		unmap_mapping_range(mapping, pos, write_len, 0);
+
+	written = filemap_write_and_wait(mapping);
+	if (written)
+		goto out;
+
+	/*
+	 * After a write we want buffered reads to be sure to go to disk to get
+	 * the new data.  We invalidate clean cached page from the region we're
+	 * about to write.  We do this *before* the write so that we can return
+	 * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
+	 */
+	if (mapping->nrpages) {
+		written = invalidate_inode_pages2_range(mapping,
+					pos >> PAGE_CACHE_SHIFT, end);
+		if (written)
+			goto out;
+	}
+
+	written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
+
+	/*
+	 * Finally, try again to invalidate clean pages which might have been
+	 * cached by non-direct readahead, or faulted in by get_user_pages()
+	 * if the source of the write was an mmap'ed region of the file
+	 * we're writing.  Either one is a pretty crazy thing to do,
+	 * so we don't support it 100%.  If this invalidation
+	 * fails, tough, the write still worked...
+	 */
+	if (mapping->nrpages) {
+		invalidate_inode_pages2_range(mapping,
+					      pos >> PAGE_CACHE_SHIFT, end);
+	}
+
 	if (written > 0) {
 		loff_t end = pos + written;
 		if (end > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
@@ -2024,6 +2068,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	 * i_mutex is held, which protects generic_osync_inode() from
 	 * livelocking.  AIO O_DIRECT ops attempt to sync metadata here.
 	 */
+out:
 	if ((written >= 0 || written == -EIOCBQUEUED) &&
 	    ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
@@ -2511,66 +2556,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 }
 EXPORT_SYMBOL(generic_file_aio_write);
 
-/*
- * Called under i_mutex for writes to S_ISREG files.   Returns -EIO if something
- * went wrong during pagecache shootdown.
- */
-static ssize_t
-generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs)
-{
-	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	ssize_t retval;
-	size_t write_len;
-	pgoff_t end = 0; /* silence gcc */
-
-	/*
-	 * If it's a write, unmap all mmappings of the file up-front.  This
-	 * will cause any pte dirty bits to be propagated into the pageframes
-	 * for the subsequent filemap_write_and_wait().
-	 */
-	if (rw == WRITE) {
-		write_len = iov_length(iov, nr_segs);
-		end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT;
-	       	if (mapping_mapped(mapping))
-			unmap_mapping_range(mapping, offset, write_len, 0);
-	}
-
-	retval = filemap_write_and_wait(mapping);
-	if (retval)
-		goto out;
-
-	/*
-	 * After a write we want buffered reads to be sure to go to disk to get
-	 * the new data.  We invalidate clean cached page from the region we're
-	 * about to write.  We do this *before* the write so that we can return
-	 * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
-	 */
-	if (rw == WRITE && mapping->nrpages) {
-		retval = invalidate_inode_pages2_range(mapping,
-					offset >> PAGE_CACHE_SHIFT, end);
-		if (retval)
-			goto out;
-	}
-
-	retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs);
-
-	/*
-	 * Finally, try again to invalidate clean pages which might have been
-	 * cached by non-direct readahead, or faulted in by get_user_pages()
-	 * if the source of the write was an mmap'ed region of the file
-	 * we're writing.  Either one is a pretty crazy thing to do,
-	 * so we don't support it 100%.  If this invalidation
-	 * fails, tough, the write still worked...
-	 */
-	if (rw == WRITE && mapping->nrpages) {
-		invalidate_inode_pages2_range(mapping, offset >> PAGE_CACHE_SHIFT, end);
-	}
-out:
-	return retval;
-}
-
 /**
  * try_to_release_page() - release old fs-specific metadata on a page
  *
-- 
GitLab


From 0d71d10a4252a3938e6b70189bc776171c02e076 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 23 Jul 2008 21:27:05 -0700
Subject: [PATCH 135/853] mm: remove nopfn

There are no users of nopfn in the tree. Remove it.

[hugh@veritas.com: fix build error]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  9 -------
 mm/memory.c        | 67 +++++-----------------------------------------
 2 files changed, 7 insertions(+), 69 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2128ef7780c..eb815cfc1b3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -166,8 +166,6 @@ struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
-	unsigned long (*nopfn)(struct vm_area_struct *area,
-			unsigned long address);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
@@ -674,13 +672,6 @@ static inline int page_mapped(struct page *page)
 	return atomic_read(&(page)->_mapcount) >= 0;
 }
 
-/*
- * Error return values for the *_nopfn functions
- */
-#define NOPFN_SIGBUS	((unsigned long) -1)
-#define NOPFN_OOM	((unsigned long) -2)
-#define NOPFN_REFAULT	((unsigned long) -3)
-
 /*
  * Different kinds of faults, as returned by handle_mm_fault().
  * Used to decide whether a process gets delivered SIGBUS or
diff --git a/mm/memory.c b/mm/memory.c
index 2302d228fe0..46dbed4b744 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1058,11 +1058,9 @@ static inline int use_zero_page(struct vm_area_struct *vma)
 	if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 		return 0;
 	/*
-	 * And if we have a fault or a nopfn routine, it's not an
-	 * anonymous region.
+	 * And if we have a fault routine, it's not an anonymous region.
 	 */
-	return !vma->vm_ops ||
-		(!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+	return !vma->vm_ops || !vma->vm_ops->fault;
 }
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
@@ -1338,6 +1336,11 @@ out:
  *
  * This function should only be called from a vm_ops->fault handler, and
  * in that case the handler should return NULL.
+ *
+ * vma cannot be a COW mapping.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
  */
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
@@ -2501,59 +2504,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
-
-/*
- * do_no_pfn() tries to create a new page mapping for a page without
- * a struct_page backing it
- *
- * As this is called only for pages that do not currently exist, we
- * do not need to flush old virtual caches or the TLB.
- *
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
- *
- * It is expected that the ->nopfn handler always returns the same pfn
- * for a given virtual mapping.
- *
- * Mark this `noinline' to prevent it from bloating the main pagefault code.
- */
-static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
-		     unsigned long address, pte_t *page_table, pmd_t *pmd,
-		     int write_access)
-{
-	spinlock_t *ptl;
-	pte_t entry;
-	unsigned long pfn;
-
-	pte_unmap(page_table);
-	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
-	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
-
-	pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
-
-	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
-
-	if (unlikely(pfn == NOPFN_OOM))
-		return VM_FAULT_OOM;
-	else if (unlikely(pfn == NOPFN_SIGBUS))
-		return VM_FAULT_SIGBUS;
-	else if (unlikely(pfn == NOPFN_REFAULT))
-		return 0;
-
-	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-
-	/* Only go through if we didn't race with anybody else... */
-	if (pte_none(*page_table)) {
-		entry = pfn_pte(pfn, vma->vm_page_prot);
-		if (write_access)
-			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		set_pte_at(mm, address, page_table, entry);
-	}
-	pte_unmap_unlock(page_table, ptl);
-	return 0;
-}
-
 /*
  * Fault of a previously existing named mapping. Repopulate the pte
  * from the encoded file_pte if possible. This enables swappable
@@ -2614,9 +2564,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 				if (likely(vma->vm_ops->fault))
 					return do_linear_fault(mm, vma, address,
 						pte, pmd, write_access, entry);
-				if (unlikely(vma->vm_ops->nopfn))
-					return do_no_pfn(mm, vma, address, pte,
-							 pmd, write_access);
 			}
 			return do_anonymous_page(mm, vma, address,
 						 pte, pmd, write_access);
-- 
GitLab


From 28b2ee20c7cba812b6f2ccf6d722cf86d00a84dc Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 23 Jul 2008 21:27:05 -0700
Subject: [PATCH 136/853] access_process_vm device memory infrastructure

In order to be able to debug things like the X server and programs using
the PPC Cell SPUs, the debugger needs to be able to access device memory
through ptrace and /proc/pid/mem.

This patch:

Add the generic_access_phys access function and put the hooks in place
to allow access_process_vm to access device or PPC Cell SPU memory.

[riel@redhat.com: Add documentation for the vm_ops->access function]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Benjamin Herrensmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/Locking |   7 ++
 arch/Kconfig                      |   3 +
 arch/x86/Kconfig                  |   1 +
 arch/x86/mm/ioremap.c             |   8 ++
 include/asm-x86/io_32.h           |   2 +
 include/asm-x86/io_64.h           |   2 +
 include/linux/mm.h                |   8 ++
 mm/memory.c                       | 131 ++++++++++++++++++++++++++----
 8 files changed, 144 insertions(+), 18 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8b22d7d8b99..680fb566b92 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -510,6 +510,7 @@ prototypes:
 	void (*close)(struct vm_area_struct*);
 	int (*fault)(struct vm_area_struct*, struct vm_fault *);
 	int (*page_mkwrite)(struct vm_area_struct *, struct page *);
+	int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
 
 locking rules:
 		BKL	mmap_sem	PageLocked(page)
@@ -517,6 +518,7 @@ open:		no	yes
 close:		no	yes
 fault:		no	yes
 page_mkwrite:	no	yes		no
+access:		no	yes
 
 	->page_mkwrite() is called when a previously read-only page is
 about to become writeable. The file system is responsible for
@@ -525,6 +527,11 @@ taking to lock out truncate, the page range should be verified to be
 within i_size. The page mapping should also be checked that it is not
 NULL.
 
+	->access() is called when get_user_pages() fails in
+acces_process_vm(), typically used to debug a process through
+/proc/pid/mem or ptrace.  This function is needed only for
+VM_IO | VM_PFNMAP VMAs.
+
 ================================================================================
 			Dubious stuff
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 4d5ebbc1e72..6093c0be58b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -31,6 +31,9 @@ config KRETPROBES
 	def_bool y
 	depends on KPROBES && HAVE_KRETPROBES
 
+config HAVE_IOREMAP_PROT
+	def_bool n
+
 config HAVE_KPROBES
 	def_bool n
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 03980cb0429..b2ddfcf0172 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,6 +21,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_DYNAMIC_FTRACE
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 24c1d3c3018..016f335bbee 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
 	return (void __iomem *)ret;
 }
 
+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
+				unsigned long prot_val)
+{
+	return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
 /**
  * iounmap - Free a IO remapping
  * @addr: virtual address from ioremap_*
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 4df44ed5407..e876d89ac15 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address)
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+				unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index ddd8058a502..22995c5c5ad 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size);
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+				unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb815cfc1b3..5c7f8f64f70 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -170,6 +170,12 @@ struct vm_operations_struct {
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
 	int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
+
+	/* called by access_process_vm when get_user_pages() fails, typically
+	 * for use by special VMAs that can switch between memory and hardware
+	 */
+	int (*access)(struct vm_area_struct *vma, unsigned long addr,
+		      void *buf, int len, int write);
 #ifdef CONFIG_NUMA
 	/*
 	 * set_policy() op must add a reference to any non-NULL @new mempolicy
@@ -771,6 +777,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+			void *buf, int len, int write);
 
 static inline void unmap_shared_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen)
diff --git a/mm/memory.c b/mm/memory.c
index 46dbed4b744..87350321e66 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2751,6 +2751,86 @@ int in_gate_area_no_task(unsigned long addr)
 
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+static resource_size_t follow_phys(struct vm_area_struct *vma,
+			unsigned long address, unsigned int flags,
+			unsigned long *prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	resource_size_t phys_addr = 0;
+	struct mm_struct *mm = vma->vm_mm;
+
+	VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+
+	pgd = pgd_offset(mm, address);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto no_page_table;
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+		goto no_page_table;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		goto no_page_table;
+
+	/* We cannot handle huge page PFN maps. Luckily they don't exist. */
+	if (pmd_huge(*pmd))
+		goto no_page_table;
+
+	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+	if (!ptep)
+		goto out;
+
+	pte = *ptep;
+	if (!pte_present(pte))
+		goto unlock;
+	if ((flags & FOLL_WRITE) && !pte_write(pte))
+		goto unlock;
+	phys_addr = pte_pfn(pte);
+	phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
+
+	*prot = pgprot_val(pte_pgprot(pte));
+
+unlock:
+	pte_unmap_unlock(ptep, ptl);
+out:
+	return phys_addr;
+no_page_table:
+	return 0;
+}
+
+int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+			void *buf, int len, int write)
+{
+	resource_size_t phys_addr;
+	unsigned long prot = 0;
+	void *maddr;
+	int offset = addr & (PAGE_SIZE-1);
+
+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		return -EINVAL;
+
+	phys_addr = follow_phys(vma, addr, write, &prot);
+
+	if (!phys_addr)
+		return -EINVAL;
+
+	maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+	if (write)
+		memcpy_toio(maddr + offset, buf, len);
+	else
+		memcpy_fromio(buf, maddr + offset, len);
+	iounmap(maddr);
+
+	return len;
+}
+#endif
+
 /*
  * Access another process' address space.
  * Source/target buffer must be kernel space,
@@ -2760,7 +2840,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 {
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
-	struct page *page;
 	void *old_buf = buf;
 
 	mm = get_task_mm(tsk);
@@ -2772,28 +2851,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 	while (len) {
 		int bytes, ret, offset;
 		void *maddr;
+		struct page *page = NULL;
 
 		ret = get_user_pages(tsk, mm, addr, 1,
 				write, 1, &page, &vma);
-		if (ret <= 0)
-			break;
-
-		bytes = len;
-		offset = addr & (PAGE_SIZE-1);
-		if (bytes > PAGE_SIZE-offset)
-			bytes = PAGE_SIZE-offset;
-
-		maddr = kmap(page);
-		if (write) {
-			copy_to_user_page(vma, page, addr,
-					  maddr + offset, buf, bytes);
-			set_page_dirty_lock(page);
+		if (ret <= 0) {
+			/*
+			 * Check if this is a VM_IO | VM_PFNMAP VMA, which
+			 * we can access using slightly different code.
+			 */
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+			vma = find_vma(mm, addr);
+			if (!vma)
+				break;
+			if (vma->vm_ops && vma->vm_ops->access)
+				ret = vma->vm_ops->access(vma, addr, buf,
+							  len, write);
+			if (ret <= 0)
+#endif
+				break;
+			bytes = ret;
 		} else {
-			copy_from_user_page(vma, page, addr,
-					    buf, maddr + offset, bytes);
+			bytes = len;
+			offset = addr & (PAGE_SIZE-1);
+			if (bytes > PAGE_SIZE-offset)
+				bytes = PAGE_SIZE-offset;
+
+			maddr = kmap(page);
+			if (write) {
+				copy_to_user_page(vma, page, addr,
+						  maddr + offset, buf, bytes);
+				set_page_dirty_lock(page);
+			} else {
+				copy_from_user_page(vma, page, addr,
+						    buf, maddr + offset, bytes);
+			}
+			kunmap(page);
+			page_cache_release(page);
 		}
-		kunmap(page);
-		page_cache_release(page);
 		len -= bytes;
 		buf += bytes;
 		addr += bytes;
-- 
GitLab


From 7ae8ed5053a39082d224a3f48409e016baca9c16 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 23 Jul 2008 21:27:07 -0700
Subject: [PATCH 137/853] use generic_access_phys for /dev/mem mappings

Use generic_access_phys as the access_process_vm access function for
/dev/mem mappings.  This makes it possible to debug the X server.

[akpm@linux-foundation.org: repair all the architectures which broke]
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Benjamin Herrensmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/pci/i386.c | 1 +
 drivers/char/mem.c  | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 2aafb67dc5f..a09505806b8 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -280,6 +280,7 @@ static void pci_track_mmap_page_range(struct vm_area_struct *vma)
 static struct vm_operations_struct pci_mmap_ops = {
 	.open  = pci_track_mmap_page_range,
 	.close = pci_unmap_page_range,
+	.access = generic_access_phys,
 };
 
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index c2dba82eb5f..672b08e694d 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -327,7 +327,10 @@ static void mmap_mem_close(struct vm_area_struct *vma)
 
 static struct vm_operations_struct mmap_mem_ops = {
 	.open  = mmap_mem_open,
-	.close = mmap_mem_close
+	.close = mmap_mem_close,
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+	.access = generic_access_phys
+#endif
 };
 
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
-- 
GitLab


From a1f242ff460e4b50a045fa237c3c56cce9eabf83 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 23 Jul 2008 21:27:08 -0700
Subject: [PATCH 138/853] powerpc ioremap_prot

This adds ioremap_prot and pte_pgprot() so that one can extract protection
bits from a PTE and use them to ioremap_prot() (in order to support ptrace
of VM_IO | VM_PFNMAP as per Rik's patch).

This moves a couple of flag checks around in the ioremap implementations
of arch/powerpc.  There's a side effect of allowing non-cacheable and
non-guarded mappings on ppc32 which before would always have _PAGE_GUARDED
set whenever _PAGE_NO_CACHE is.

(standard ioremap will still set _PAGE_GUARDED, but ioremap_prot will be
capable of setting such a non guarded mapping).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/Kconfig                |  1 +
 arch/powerpc/mm/pgtable_32.c        | 22 ++++++++++++++++------
 arch/powerpc/mm/pgtable_64.c        | 16 ++++++++++++++++
 include/asm-powerpc/io.h            |  5 ++++-
 include/asm-powerpc/pgtable-4k.h    |  3 +++
 include/asm-powerpc/pgtable-ppc32.h | 16 ++++++++++++++++
 include/asm-powerpc/pgtable-ppc64.h |  8 ++++++++
 7 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4d7e2ba10ba..a487671c282 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE
 	select HAVE_IDE
+	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index c7584072dfc..2001abdb191 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -145,13 +145,20 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 void __iomem *
 ioremap(phys_addr_t addr, unsigned long size)
 {
-	return __ioremap(addr, size, _PAGE_NO_CACHE);
+	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
 }
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *
 ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
 {
+	/* writeable implies dirty for kernel addresses */
+	if (flags & _PAGE_RW)
+		flags |= _PAGE_DIRTY | _PAGE_HWWRITE;
+
+	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
+	flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC);
+
 	return __ioremap(addr, size, flags);
 }
 EXPORT_SYMBOL(ioremap_flags);
@@ -163,6 +170,14 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
 	phys_addr_t p;
 	int err;
 
+	/* Make sure we have the base flags */
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= _PAGE_KERNEL;
+
+	/* Non-cacheable page cannot be coherent */
+	if (flags & _PAGE_NO_CACHE)
+		flags &= ~_PAGE_COHERENT;
+
 	/*
 	 * Choose an address to map it to.
 	 * Once the vmalloc system is running, we use it.
@@ -219,11 +234,6 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
 		v = (ioremap_bot -= size);
 	}
 
-	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= _PAGE_KERNEL;
-	if (flags & _PAGE_NO_CACHE)
-		flags |= _PAGE_GUARDED;
-
 	/*
 	 * Should check if it is a candidate for a BAT mapping
 	 */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3ef0ad2f9ca..365e61ae5db 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -107,9 +107,18 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
 {
 	unsigned long i;
 
+	/* Make sure we have the base flags */
 	if ((flags & _PAGE_PRESENT) == 0)
 		flags |= pgprot_val(PAGE_KERNEL);
 
+	/* Non-cacheable page cannot be coherent */
+	if (flags & _PAGE_NO_CACHE)
+		flags &= ~_PAGE_COHERENT;
+
+	/* We don't support the 4K PFN hack with ioremap */
+	if (flags & _PAGE_4K_PFN)
+		return NULL;
+
 	WARN_ON(pa & ~PAGE_MASK);
 	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
 	WARN_ON(size & ~PAGE_MASK);
@@ -190,6 +199,13 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
 			     unsigned long flags)
 {
+	/* writeable implies dirty for kernel addresses */
+	if (flags & _PAGE_RW)
+		flags |= _PAGE_DIRTY;
+
+	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
+	flags &= ~(_PAGE_USER | _PAGE_EXEC);
+
 	if (ppc_md.ioremap)
 		return ppc_md.ioremap(addr, size, flags);
 	return __ioremap(addr, size, flags);
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 8b627823f5f..77c7fa025e6 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -617,7 +617,8 @@ static inline void iosync(void)
  *   and can be hooked by the platform via ppc_md
  *
  * * ioremap_flags allows to specify the page flags as an argument and can
- *   also be hooked by the platform via ppc_md
+ *   also be hooked by the platform via ppc_md. ioremap_prot is the exact
+ *   same thing as ioremap_flags.
  *
  * * ioremap_nocache is identical to ioremap
  *
@@ -639,6 +640,8 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
 extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size,
 				   unsigned long flags);
 #define ioremap_nocache(addr, size)	ioremap((addr), (size))
+#define ioremap_prot(addr, size, prot)	ioremap_flags((addr), (size), (prot))
+
 extern void iounmap(volatile void __iomem *addr);
 
 extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h
index fd2090dc1dc..c9601dfb4a1 100644
--- a/include/asm-powerpc/pgtable-4k.h
+++ b/include/asm-powerpc/pgtable-4k.h
@@ -51,6 +51,9 @@
 #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
 			 _PAGE_SECONDARY | _PAGE_GROUP_IX)
 
+/* There is no 4K PFN hack on 4K pages */
+#define _PAGE_4K_PFN	0
+
 /* PAGE_MASK gives the right answer below, but only by accident */
 /* It should be preserving the high 48 bits and then specifically */
 /* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
diff --git a/include/asm-powerpc/pgtable-ppc32.h b/include/asm-powerpc/pgtable-ppc32.h
index 3a96d001cb7..bdbab72f3eb 100644
--- a/include/asm-powerpc/pgtable-ppc32.h
+++ b/include/asm-powerpc/pgtable-ppc32.h
@@ -395,6 +395,12 @@ extern int icache_44x_need_flush;
 #ifndef _PAGE_EXEC
 #define _PAGE_EXEC	0
 #endif
+#ifndef _PAGE_ENDIAN
+#define _PAGE_ENDIAN	0
+#endif
+#ifndef _PAGE_COHERENT
+#define _PAGE_COHERENT	0
+#endif
 #ifndef _PMD_PRESENT_MASK
 #define _PMD_PRESENT_MASK	_PMD_PRESENT
 #endif
@@ -405,6 +411,12 @@ extern int icache_44x_need_flush;
 
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 
+
+#define PAGE_PROT_BITS	__pgprot(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
+				 _PAGE_WRITETHRU | _PAGE_ENDIAN | \
+				 _PAGE_USER | _PAGE_ACCESSED | \
+				 _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | \
+				 _PAGE_EXEC | _PAGE_HWEXEC)
 /*
  * Note: the _PAGE_COHERENT bit automatically gets set in the hardware
  * PTE if CONFIG_SMP is defined (hash_page does this); there is no need
@@ -538,6 +550,10 @@ static inline pte_t pte_mkyoung(pte_t pte) {
 	pte_val(pte) |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkspecial(pte_t pte) {
 	return pte; }
+static inline unsigned long pte_pgprot(pte_t pte)
+{
+	return __pgprot(pte_val(pte)) & PAGE_PROT_BITS;
+}
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h
index ab98a9c80b2..ba8000352b9 100644
--- a/include/asm-powerpc/pgtable-ppc64.h
+++ b/include/asm-powerpc/pgtable-ppc64.h
@@ -117,6 +117,10 @@
 #define PAGE_AGP	__pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE)
 #define HAVE_PAGE_AGP
 
+#define PAGE_PROT_BITS	__pgprot(_PAGE_GUARDED | _PAGE_COHERENT | \
+				 _PAGE_NO_CACHE | _PAGE_WRITETHRU | \
+				 _PAGE_4K_PFN | _PAGE_RW | _PAGE_USER | \
+ 				 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_EXEC)
 /* PTEIDX nibble */
 #define _PTEIDX_SECONDARY	0x8
 #define _PTEIDX_GROUP_IX	0x7
@@ -262,6 +266,10 @@ static inline pte_t pte_mkhuge(pte_t pte) {
 	return pte; }
 static inline pte_t pte_mkspecial(pte_t pte) {
 	return pte; }
+static inline unsigned long pte_pgprot(pte_t pte)
+{
+	return __pgprot(pte_val(pte)) & PAGE_PROT_BITS;
+}
 
 /* Atomic PTE updates */
 static inline unsigned long pte_update(struct mm_struct *mm,
-- 
GitLab


From a352894d07059649398c4769dc8b645e1a1dad88 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 23 Jul 2008 21:27:09 -0700
Subject: [PATCH 139/853] spufs: use new vm_ops->access to allow local state
 access from gdb

This uses the new vm_ops->access to allow gdb to access the SPU local
store.  We currently prevent access to problem state registers, this can
be done later if really needed but it's safer not to.

[akpm@linux-foundation.org: fix typo]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/spufs/file.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 99c73066b82..010a51f5979 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -288,9 +288,32 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return VM_FAULT_NOPAGE;
 }
 
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+				unsigned long address,
+				void *buf, int len, int write)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long offset = address - vma->vm_start;
+	char *local_store;
+
+	if (write && !(vma->vm_flags & VM_WRITE))
+		return -EACCES;
+	if (spu_acquire(ctx))
+		return -EINTR;
+	if ((offset + len) > vma->vm_end)
+		len = vma->vm_end - offset;
+	local_store = ctx->ops->get_ls(ctx);
+	if (write)
+		memcpy_toio(local_store + offset, buf, len);
+	else
+		memcpy_fromio(buf, local_store + offset, len);
+	spu_release(ctx);
+	return len;
+}
 
 static struct vm_operations_struct spufs_mem_mmap_vmops = {
 	.fault = spufs_mem_mmap_fault,
+	.access = spufs_mem_mmap_access,
 };
 
 static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
-- 
GitLab


From 42b7772812d15b86543a23b82bd6070eef9a08b1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 23 Jul 2008 21:27:10 -0700
Subject: [PATCH 140/853] mm: remove double indirection on tlb parameter to
 free_pgd_range() & Co

The double indirection here is not needed anywhere and hence (at least)
confusing.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/hugetlbpage.c    |  2 +-
 arch/powerpc/mm/hugetlbpage.c |  8 ++++----
 fs/exec.c                     |  4 ++--
 include/asm-ia64/hugetlb.h    |  2 +-
 include/asm-powerpc/hugetlb.h |  2 +-
 include/asm-sh/hugetlb.h      |  2 +-
 include/asm-sparc/hugetlb.h   |  2 +-
 include/asm-x86/hugetlb.h     |  2 +-
 include/linux/mm.h            |  4 +---
 mm/internal.h                 |  3 +++
 mm/memory.c                   | 10 ++++++----
 mm/mmap.c                     |  6 ++++--
 12 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index d3ce8f3bcaa..cd49e2860ee 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -112,7 +112,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int wri
 	return NULL;
 }
 
-void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			unsigned long addr, unsigned long end,
 			unsigned long floor, unsigned long ceiling)
 {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0d12fba31bc..1a96cc891cf 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -255,7 +255,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  *
  * Must be called with pagetable lock held.
  */
-void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			    unsigned long addr, unsigned long end,
 			    unsigned long floor, unsigned long ceiling)
 {
@@ -315,13 +315,13 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
 		return;
 
 	start = addr;
-	pgd = pgd_offset((*tlb)->mm, addr);
+	pgd = pgd_offset(tlb->mm, addr);
 	do {
-		BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize);
+		BUG_ON(get_slice_psize(tlb->mm, addr) != mmu_huge_psize);
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+		hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8..190ed1f9277 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -541,7 +541,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		/*
 		 * when the old and new regions overlap clear from new_end.
 		 */
-		free_pgd_range(&tlb, new_end, old_end, new_end,
+		free_pgd_range(tlb, new_end, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	} else {
 		/*
@@ -550,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		 * have constraints on va-space that make this illegal (IA64) -
 		 * for the others its just a little faster.
 		 */
-		free_pgd_range(&tlb, old_start, old_end, new_end,
+		free_pgd_range(tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	}
 	tlb_finish_mmu(tlb, new_end, old_end);
diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h
index f28a9701f1c..e9d1e5e2382 100644
--- a/include/asm-ia64/hugetlb.h
+++ b/include/asm-ia64/hugetlb.h
@@ -4,7 +4,7 @@
 #include <asm/page.h>
 
 
-void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index be32ff02f4a..0a37aa5ecaa 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -7,7 +7,7 @@
 int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 			   unsigned long len);
 
-void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 
diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h
index 02402303d89..fb30018938c 100644
--- a/include/asm-sh/hugetlb.h
+++ b/include/asm-sh/hugetlb.h
@@ -26,7 +26,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) {
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
 					  unsigned long ceiling)
diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h
index 412af58926a..aeb92374ca3 100644
--- a/include/asm-sparc/hugetlb.h
+++ b/include/asm-sparc/hugetlb.h
@@ -31,7 +31,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
 					  unsigned long ceiling)
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 14171a4924f..7eed6e0883b 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -26,7 +26,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) {
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
 					  unsigned long ceiling)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5c7f8f64f70..f8071097302 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -769,10 +769,8 @@ struct mm_walk {
 
 int walk_page_range(unsigned long addr, unsigned long end,
 		struct mm_walk *walk);
-void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
-void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
-		unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
diff --git a/mm/internal.h b/mm/internal.h
index 50807e12490..858ad01864d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -13,6 +13,9 @@
 
 #include <linux/mm.h>
 
+void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
+		unsigned long floor, unsigned long ceiling);
+
 static inline void set_page_count(struct page *page, int v)
 {
 	atomic_set(&page->_count, v);
diff --git a/mm/memory.c b/mm/memory.c
index 87350321e66..82f3f1c5cf1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -61,6 +61,8 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include "internal.h"
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -211,7 +213,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  *
  * Must be called with pagetable lock held.
  */
-void free_pgd_range(struct mmu_gather **tlb,
+void free_pgd_range(struct mmu_gather *tlb,
 			unsigned long addr, unsigned long end,
 			unsigned long floor, unsigned long ceiling)
 {
@@ -262,16 +264,16 @@ void free_pgd_range(struct mmu_gather **tlb,
 		return;
 
 	start = addr;
-	pgd = pgd_offset((*tlb)->mm, addr);
+	pgd = pgd_offset(tlb->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+		free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
 }
 
-void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		unsigned long floor, unsigned long ceiling)
 {
 	while (vma) {
diff --git a/mm/mmap.c b/mm/mmap.c
index 1d102b956fd..75e0d0673d7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -32,6 +32,8 @@
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
 
+#include "internal.h"
+
 #ifndef arch_mmap_check
 #define arch_mmap_check(addr, len, flags)	(0)
 #endif
@@ -1763,7 +1765,7 @@ static void unmap_region(struct mm_struct *mm,
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
 				 next? next->vm_start: 0);
 	tlb_finish_mmu(tlb, start, end);
 }
@@ -2063,7 +2065,7 @@ void exit_mmap(struct mm_struct *mm)
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
 	/*
-- 
GitLab


From 3c82d0ce2c4f642b2f24ef98707a030543b06b90 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:11 -0700
Subject: [PATCH 141/853] buddy: clarify comments describing buddy merge

In __free_one_page(), the comment "Move the buddy up one level" appears
attached to the break and by implication when the break is taken we are
moving it up one level:

	if (!page_is_buddy(page, buddy, order))
		break;          /* Move the buddy up one level. */

In reality the inverse is true, we break out when we can no longer merge
this page with its buddy.  Looking back into pre-history (into the full
git history) it appears that these two lines accidentally got joined as
part of another change.

Move the comment down where it belongs below the if and clarify its
language.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 35b1347d81b..24aa3d1b9d9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -432,8 +432,9 @@ static inline void __free_one_page(struct page *page,
 
 		buddy = __page_find_buddy(page, page_idx, order);
 		if (!page_is_buddy(page, buddy, order))
-			break;		/* Move the buddy up one level. */
+			break;
 
+		/* Our buddy is free, merge with it and move up one order. */
 		list_del(&buddy->lru);
 		zone->free_area[order].nr_free--;
 		rmv_page_order(buddy);
-- 
GitLab


From da3bbdd4632c0171406b2677e31494afa5bde2f8 Mon Sep 17 00:00:00 2001
From: Kentaro Makita <k-makita@np.css.fujitsu.com>
Date: Wed, 23 Jul 2008 21:27:13 -0700
Subject: [PATCH 142/853] fix soft lock up at NFS mount via per-SB LRU-list of
 unused dentries

[Summary]

 Split LRU-list of unused dentries to one per superblock to avoid soft
 lock up during NFS mounts and remounting of any filesystem.

 Previously I posted here:
 http://lkml.org/lkml/2008/3/5/590

[Descriptions]

- background

  dentry_unused is a list of dentries which are not referenced.
  dentry_unused grows up when references on directories or files are
  released.  This list can be very long if there is huge free memory.

- the problem

  When shrink_dcache_sb() is called, it scans all dentry_unused linearly
  under spin_lock(), and if dentry->d_sb is differnt from given
  superblock, scan next dentry.  This scan costs very much if there are
  many entries, and very ineffective if there are many superblocks.

  IOW, When we need to shrink unused dentries on one dentry, but scans
  unused dentries on all superblocks in the system.  For example, we scan
  500 dentries to unmount a filesystem, but scans 1,000,000 or more unused
  dentries on other superblocks.

  In our case , At mounting NFS*, shrink_dcache_sb() is called to shrink
  unused dentries on NFS, but scans 100,000,000 unused dentries on
  superblocks in the system such as local ext3 filesystems.  I hear NFS
  mounting took 1 min on some system in use.

* : NFS uses virtual filesystem in rpc layer, so NFS is affected by
  this problem.

  100,000,000 is possible number on large systems.

  Per-superblock LRU of unused dentried can reduce the cost in
  reasonable manner.

- How to fix

  I found this problem is solved by David Chinner's "Per-superblock
  unused dentry LRU lists V3"(1), so I rebase it and add some fix to
  reclaim with fairness, which is in Andrew Morton's comments(2).

  1) http://lkml.org/lkml/2006/5/25/318
  2) http://lkml.org/lkml/2006/5/25/320

  Split LRU-list of unused dentries to each superblocks.  Then, NFS
  mounting will check dentries under a superblock instead of all.  But
  this spliting will break LRU of dentry-unused.  So, I've attempted to
  make reclaim unused dentrins with fairness by calculate number of
  dentries to scan on this sb based on following way

  number of dentries to scan on this sb =
  count * (number of dentries on this sb / number of dentries in the machine)

- ToDo
 - I have to measuring performance number and do stress tests.

 - When unmount occurs during prune_dcache(), scanning on same
  superblock, It is unable to reach next superblock because it is gone
  away.  We restart scannig superblock from first one, it causes
  unfairness of reclaim unused dentries on first superblock.  But I think
  this happens very rarely.

- Test Results

  Result on 6GB boxes with excessive unused dentries.

Without patch:

$ cat /proc/sys/fs/dentry-state
10181835        10180203        45      0       0       0
# mount -t nfs 10.124.60.70:/work/kernel-src nfs
real    0m1.830s
user    0m0.001s
sys     0m1.653s

 With this patch:
$ cat /proc/sys/fs/dentry-state
10236610        10234751        45      0       0       0
# mount -t nfs 10.124.60.70:/work/kernel-src nfs
real    0m0.106s
user    0m0.002s
sys     0m0.032s

[akpm@linux-foundation.org: fix comments]
Signed-off-by: Kentaro Makita <k-makita@np.css.fujitsu.com>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: David Chinner <dgc@sgi.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c        | 335 ++++++++++++++++++++++++---------------------
 fs/super.c         |   1 +
 include/linux/fs.h |   4 +
 3 files changed, 185 insertions(+), 155 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393..3818d6ab76c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
 static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
 		call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
-static void dentry_lru_remove(struct dentry *dentry)
-{
-	if (!list_empty(&dentry->d_lru)) {
-		list_del_init(&dentry->d_lru);
-		dentry_stat.nr_unused--;
-	}
-}
-
 /*
  * Release the dentry's inode, using the filesystem
  * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
 	}
 }
 
+/*
+ * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ */
+static void dentry_lru_add(struct dentry *dentry)
+{
+	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+	dentry->d_sb->s_nr_dentry_unused++;
+	dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_add_tail(struct dentry *dentry)
+{
+	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+	dentry->d_sb->s_nr_dentry_unused++;
+	dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_del(struct dentry *dentry)
+{
+	if (!list_empty(&dentry->d_lru)) {
+		list_del(&dentry->d_lru);
+		dentry->d_sb->s_nr_dentry_unused--;
+		dentry_stat.nr_unused--;
+	}
+}
+
+static void dentry_lru_del_init(struct dentry *dentry)
+{
+	if (likely(!list_empty(&dentry->d_lru))) {
+		list_del_init(&dentry->d_lru);
+		dentry->d_sb->s_nr_dentry_unused--;
+		dentry_stat.nr_unused--;
+	}
+}
+
 /**
  * d_kill - kill dentry and return parent
  * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
 		goto kill_it;
   	if (list_empty(&dentry->d_lru)) {
   		dentry->d_flags |= DCACHE_REFERENCED;
-  		list_add(&dentry->d_lru, &dentry_unused);
-  		dentry_stat.nr_unused++;
+		dentry_lru_add(dentry);
   	}
  	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
 unhash_it:
 	__d_drop(dentry);
 kill_it:
-	dentry_lru_remove(dentry);
+	/* if dentry was on the d_lru list delete it from there */
+	dentry_lru_del(dentry);
 	dentry = d_kill(dentry);
 	if (dentry)
 		goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	atomic_inc(&dentry->d_count);
-	dentry_lru_remove(dentry);
+	dentry_lru_del_init(dentry);
 	return dentry;
 }
 
@@ -406,133 +432,167 @@ static void prune_one_dentry(struct dentry * dentry)
 
 		if (dentry->d_op && dentry->d_op->d_delete)
 			dentry->d_op->d_delete(dentry);
-		dentry_lru_remove(dentry);
+		dentry_lru_del_init(dentry);
 		__d_drop(dentry);
 		dentry = d_kill(dentry);
 		spin_lock(&dcache_lock);
 	}
 }
 
-/**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try and free
- * @sb: if given, ignore dentries for other superblocks
- *         which are being unmounted.
- *
- * Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
- *
- * This function may fail to free any resources if
- * all the dentries are in use.
+/*
+ * Shrink the dentry LRU on a given superblock.
+ * @sb   : superblock to shrink dentry LRU.
+ * @count: If count is NULL, we prune all dentries on superblock.
+ * @flags: If flags is non-zero, we need to do special processing based on
+ * which flags are set. This means we don't need to maintain multiple
+ * similar copies of this loop.
  */
- 
-static void prune_dcache(int count, struct super_block *sb)
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 {
-	spin_lock(&dcache_lock);
-	for (; count ; count--) {
-		struct dentry *dentry;
-		struct list_head *tmp;
-		struct rw_semaphore *s_umount;
-
-		cond_resched_lock(&dcache_lock);
+	LIST_HEAD(referenced);
+	LIST_HEAD(tmp);
+	struct dentry *dentry;
+	int cnt = 0;
 
-		tmp = dentry_unused.prev;
-		if (sb) {
-			/* Try to find a dentry for this sb, but don't try
-			 * too hard, if they aren't near the tail they will
-			 * be moved down again soon
+	BUG_ON(!sb);
+	BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
+	spin_lock(&dcache_lock);
+	if (count != NULL)
+		/* called from prune_dcache() and shrink_dcache_parent() */
+		cnt = *count;
+restart:
+	if (count == NULL)
+		list_splice_init(&sb->s_dentry_lru, &tmp);
+	else {
+		while (!list_empty(&sb->s_dentry_lru)) {
+			dentry = list_entry(sb->s_dentry_lru.prev,
+					struct dentry, d_lru);
+			BUG_ON(dentry->d_sb != sb);
+
+			spin_lock(&dentry->d_lock);
+			/*
+			 * If we are honouring the DCACHE_REFERENCED flag and
+			 * the dentry has this flag set, don't free it. Clear
+			 * the flag and put it back on the LRU.
 			 */
-			int skip = count;
-			while (skip && tmp != &dentry_unused &&
-			    list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
-				skip--;
-				tmp = tmp->prev;
+			if ((flags & DCACHE_REFERENCED)
+				&& (dentry->d_flags & DCACHE_REFERENCED)) {
+				dentry->d_flags &= ~DCACHE_REFERENCED;
+				list_move_tail(&dentry->d_lru, &referenced);
+				spin_unlock(&dentry->d_lock);
+			} else {
+				list_move_tail(&dentry->d_lru, &tmp);
+				spin_unlock(&dentry->d_lock);
+				cnt--;
+				if (!cnt)
+					break;
 			}
 		}
-		if (tmp == &dentry_unused)
-			break;
-		list_del_init(tmp);
-		prefetch(dentry_unused.prev);
- 		dentry_stat.nr_unused--;
-		dentry = list_entry(tmp, struct dentry, d_lru);
-
- 		spin_lock(&dentry->d_lock);
+	}
+	while (!list_empty(&tmp)) {
+		dentry = list_entry(tmp.prev, struct dentry, d_lru);
+		dentry_lru_del_init(dentry);
+		spin_lock(&dentry->d_lock);
 		/*
 		 * We found an inuse dentry which was not removed from
-		 * dentry_unused because of laziness during lookup.  Do not free
-		 * it - just keep it off the dentry_unused list.
+		 * the LRU because of laziness during lookup.  Do not free
+		 * it - just keep it off the LRU list.
 		 */
- 		if (atomic_read(&dentry->d_count)) {
- 			spin_unlock(&dentry->d_lock);
+		if (atomic_read(&dentry->d_count)) {
+			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		/* If the dentry was recently referenced, don't free it. */
-		if (dentry->d_flags & DCACHE_REFERENCED) {
-			dentry->d_flags &= ~DCACHE_REFERENCED;
- 			list_add(&dentry->d_lru, &dentry_unused);
- 			dentry_stat.nr_unused++;
- 			spin_unlock(&dentry->d_lock);
+		prune_one_dentry(dentry);
+		/* dentry->d_lock was dropped in prune_one_dentry() */
+		cond_resched_lock(&dcache_lock);
+	}
+	if (count == NULL && !list_empty(&sb->s_dentry_lru))
+		goto restart;
+	if (count != NULL)
+		*count = cnt;
+	if (!list_empty(&referenced))
+		list_splice(&referenced, &sb->s_dentry_lru);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * prune_dcache - shrink the dcache
+ * @count: number of entries to try to free
+ *
+ * Shrink the dcache. This is done when we need more memory, or simply when we
+ * need to unmount something (at which point we need to unuse all dentries).
+ *
+ * This function may fail to free any resources if all the dentries are in use.
+ */
+static void prune_dcache(int count)
+{
+	struct super_block *sb;
+	int w_count;
+	int unused = dentry_stat.nr_unused;
+	int prune_ratio;
+	int pruned;
+
+	if (unused == 0 || count == 0)
+		return;
+	spin_lock(&dcache_lock);
+restart:
+	if (count >= unused)
+		prune_ratio = 1;
+	else
+		prune_ratio = unused / count;
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_nr_dentry_unused == 0)
 			continue;
-		}
-		/*
-		 * If the dentry is not DCACHED_REFERENCED, it is time
-		 * to remove it from the dcache, provided the super block is
-		 * NULL (which means we are trying to reclaim memory)
-		 * or this dentry belongs to the same super block that
-		 * we want to shrink.
-		 */
-		/*
-		 * If this dentry is for "my" filesystem, then I can prune it
-		 * without taking the s_umount lock (I already hold it).
+		sb->s_count++;
+		/* Now, we reclaim unused dentrins with fairness.
+		 * We reclaim them same percentage from each superblock.
+		 * We calculate number of dentries to scan on this sb
+		 * as follows, but the implementation is arranged to avoid
+		 * overflows:
+		 * number of dentries to scan on this sb =
+		 * count * (number of dentries on this sb /
+		 * number of dentries in the machine)
 		 */
-		if (sb && dentry->d_sb == sb) {
-			prune_one_dentry(dentry);
-			continue;
-		}
+		spin_unlock(&sb_lock);
+		if (prune_ratio != 1)
+			w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
+		else
+			w_count = sb->s_nr_dentry_unused;
+		pruned = w_count;
 		/*
-		 * ...otherwise we need to be sure this filesystem isn't being
-		 * unmounted, otherwise we could race with
-		 * generic_shutdown_super(), and end up holding a reference to
-		 * an inode while the filesystem is unmounted.
-		 * So we try to get s_umount, and make sure s_root isn't NULL.
-		 * (Take a local copy of s_umount to avoid a use-after-free of
-		 * `dentry').
+		 * We need to be sure this filesystem isn't being unmounted,
+		 * otherwise we could race with generic_shutdown_super(), and
+		 * end up holding a reference to an inode while the filesystem
+		 * is unmounted.  So we try to get s_umount, and make sure
+		 * s_root isn't NULL.
 		 */
-		s_umount = &dentry->d_sb->s_umount;
-		if (down_read_trylock(s_umount)) {
-			if (dentry->d_sb->s_root != NULL) {
-				prune_one_dentry(dentry);
-				up_read(s_umount);
-				continue;
+		if (down_read_trylock(&sb->s_umount)) {
+			if ((sb->s_root != NULL) &&
+			    (!list_empty(&sb->s_dentry_lru))) {
+				spin_unlock(&dcache_lock);
+				__shrink_dcache_sb(sb, &w_count,
+						DCACHE_REFERENCED);
+				pruned -= w_count;
+				spin_lock(&dcache_lock);
 			}
-			up_read(s_umount);
+			up_read(&sb->s_umount);
 		}
-		spin_unlock(&dentry->d_lock);
+		spin_lock(&sb_lock);
+		count -= pruned;
 		/*
-		 * Insert dentry at the head of the list as inserting at the
-		 * tail leads to a cycle.
+		 * restart only when sb is no longer on the list and
+		 * we have more work to do.
 		 */
- 		list_add(&dentry->d_lru, &dentry_unused);
-		dentry_stat.nr_unused++;
+		if (__put_super_and_need_restart(sb) && count > 0) {
+			spin_unlock(&sb_lock);
+			goto restart;
+		}
 	}
+	spin_unlock(&sb_lock);
 	spin_unlock(&dcache_lock);
 }
 
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list.  On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them.  The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
-
 /**
  * shrink_dcache_sb - shrink dcache for a superblock
  * @sb: superblock
@@ -541,44 +601,9 @@ static void prune_dcache(int count, struct super_block *sb)
  * is used to free the dcache before unmounting a file
  * system
  */
-
 void shrink_dcache_sb(struct super_block * sb)
 {
-	struct list_head *tmp, *next;
-	struct dentry *dentry;
-
-	/*
-	 * Pass one ... move the dentries for the specified
-	 * superblock to the most recent end of the unused list.
-	 */
-	spin_lock(&dcache_lock);
-	list_for_each_prev_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		list_move_tail(tmp, &dentry_unused);
-	}
-
-	/*
-	 * Pass two ... free the dentries for this superblock.
-	 */
-repeat:
-	list_for_each_prev_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		dentry_stat.nr_unused--;
-		list_del_init(tmp);
-		spin_lock(&dentry->d_lock);
-		if (atomic_read(&dentry->d_count)) {
-			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		prune_one_dentry(dentry);
-		cond_resched_lock(&dcache_lock);
-		goto repeat;
-	}
-	spin_unlock(&dcache_lock);
+	__shrink_dcache_sb(sb, NULL, 0);
 }
 
 /*
@@ -595,7 +620,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 
 	/* detach this root from the system */
 	spin_lock(&dcache_lock);
-	dentry_lru_remove(dentry);
+	dentry_lru_del_init(dentry);
 	__d_drop(dentry);
 	spin_unlock(&dcache_lock);
 
@@ -609,7 +634,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			spin_lock(&dcache_lock);
 			list_for_each_entry(loop, &dentry->d_subdirs,
 					    d_u.d_child) {
-				dentry_lru_remove(loop);
+				dentry_lru_del_init(loop);
 				__d_drop(loop);
 				cond_resched_lock(&dcache_lock);
 			}
@@ -791,14 +816,13 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
-		dentry_lru_remove(dentry);
+		dentry_lru_del_init(dentry);
 		/* 
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
 		if (!atomic_read(&dentry->d_count)) {
-			list_add_tail(&dentry->d_lru, &dentry_unused);
-			dentry_stat.nr_unused++;
+			dentry_lru_add_tail(dentry);
 			found++;
 		}
 
@@ -840,10 +864,11 @@ out:
  
 void shrink_dcache_parent(struct dentry * parent)
 {
+	struct super_block *sb = parent->d_sb;
 	int found;
 
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found, parent->d_sb);
+		__shrink_dcache_sb(sb, &found, 0);
 }
 
 /*
@@ -863,7 +888,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
-		prune_dcache(nr, NULL);
+		prune_dcache(nr);
 	}
 	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
@@ -1215,7 +1240,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
  * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
  * lookup is going on.
  *
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
  * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
  * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
  * acquisition.
diff --git a/fs/super.c b/fs/super.c
index 453877c5697..e931ae9511f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
+		INIT_LIST_HEAD(&s->s_dentry_lru);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ff54ae4933f..e5e6a244096 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1025,6 +1025,7 @@ extern int send_sigurg(struct fown_struct *fown);
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
+#define sb_entry(list)  list_entry((list), struct super_block, s_list)
 #define S_BIAS (1<<30)
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
@@ -1058,6 +1059,9 @@ struct super_block {
 	struct list_head	s_more_io;	/* parked for more writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
+	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
+	struct list_head	s_dentry_lru;	/* unused dentry lru */
+	int			s_nr_dentry_unused;	/* # of dentry on lru */
 
 	struct block_device	*s_bdev;
 	struct mtd_info		*s_mtd;
-- 
GitLab


From 0cad47cf13bc2e9142d3a11d9f50523797d0d4ea Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:16 -0700
Subject: [PATCH 143/853] page-flags: record page flag overlays explicitly

With the recent page flag reorganisation we have a single enum which
defines the valid page flags and their values, nice and clear.  However
there are a number of bits which are overloaded by different subsystems.
Firstly there is PG_owner_priv_1 which is used by filesystems and by XEN.
Secondly both SLOB and SLUB use a couple of extra page bits to manage
internal state for pages they own; both overlay other bits.  All of these
"aliases" are scattered about the source making it very hard for a reader
to know if the bits are safe to rely on in all contexts; confusion here is
bad.

As we now have a single place where the bits are clearly assigned it makes
sense to clarify the reuse of bits by making the aliases explicit and
visible with the original bit assignments.  This patch creates explicit
aliases within the enum itself for the overloaded bits, creates standard
bit accessors PageFoo etc.  and uses those throughout.

This version pulls the bit manipulation out to standard named page bit
accessors as suggested by Christoph, it retains the explicit mapping to
the overlayed bits.  A fusion of both ideas.  This has been SLUB and SLOB
have been compile tested on x86_64 only, and SLUB boot tested.  If people
feel this is worth doing then I can run a fuller set of testing.

This patch:

Some page flags are used for more than one purpose, for example
PG_owner_priv_1.  Currently there are individual accessors for each user,
each built using the common flag name far away from the bit definitions.
This makes it hard to see all possible uses of these bits.

Now that we have a single enum to generate the bit orders it makes sense
to express overlays in the same place.  So create per use aliases for this
bit in the main page-flags enum and use those in the accessors.

[akpm@linux-foundation.org: fix xen]
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0d2a4e7012a..7d8db1233e4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -96,7 +96,14 @@ enum pageflags {
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
-	__NR_PAGEFLAGS
+	__NR_PAGEFLAGS,
+
+	/* Filesystems */
+	PG_checked = PG_owner_priv_1,
+
+	/* XEN */
+	PG_pinned = PG_owner_priv_1,
+	PG_savepinned = PG_dirty,
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -155,9 +162,9 @@ PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
 __PAGEFLAG(Slab, slab)
-PAGEFLAG(Checked, owner_priv_1)		/* Used by some filesystems */
-PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */
-PAGEFLAG(SavePinned, dirty);					/* Xen */
+PAGEFLAG(Checked, checked)		/* Used by some filesystems */
+PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
+PAGEFLAG(SavePinned, savepinned);			/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
-- 
GitLab


From 8a38082d21cbc5ec961da7dda195e98a9a064dcf Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:18 -0700
Subject: [PATCH 144/853] slub: record page flag overlays explicitly

SLUB reuses two page bits for internal purposes, it overlays PG_active and
PG_error.  This is hidden away in slub.c.  Document these overlays
explicitly in the main page-flags enum along with all the others.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  7 ++++
 mm/slub.c                  | 65 ++++++++++----------------------------
 2 files changed, 24 insertions(+), 48 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 7d8db1233e4..3fc586b7b90 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -104,6 +104,10 @@ enum pageflags {
 	/* XEN */
 	PG_pinned = PG_owner_priv_1,
 	PG_savepinned = PG_dirty,
+
+	/* SLUB */
+	PG_slub_frozen = PG_active,
+	PG_slub_debug = PG_error,
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -169,6 +173,9 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
 
+__PAGEFLAG(SlubFrozen, slub_frozen)
+__PAGEFLAG(SlubDebug, slub_debug)
+
 /*
  * Only test-and-set exist for PG_writeback.  The unconditional operators are
  * risky: they bypass page accounting.
diff --git a/mm/slub.c b/mm/slub.c
index 6d4a49c1ff2..77c21cf53ff 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -102,44 +102,12 @@
  * 			the fast path and disables lockless freelists.
  */
 
-#define FROZEN (1 << PG_active)
-
 #ifdef CONFIG_SLUB_DEBUG
-#define SLABDEBUG (1 << PG_error)
+#define SLABDEBUG 1
 #else
 #define SLABDEBUG 0
 #endif
 
-static inline int SlabFrozen(struct page *page)
-{
-	return page->flags & FROZEN;
-}
-
-static inline void SetSlabFrozen(struct page *page)
-{
-	page->flags |= FROZEN;
-}
-
-static inline void ClearSlabFrozen(struct page *page)
-{
-	page->flags &= ~FROZEN;
-}
-
-static inline int SlabDebug(struct page *page)
-{
-	return page->flags & SLABDEBUG;
-}
-
-static inline void SetSlabDebug(struct page *page)
-{
-	page->flags |= SLABDEBUG;
-}
-
-static inline void ClearSlabDebug(struct page *page)
-{
-	page->flags &= ~SLABDEBUG;
-}
-
 /*
  * Issues still to be resolved:
  *
@@ -971,7 +939,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
 	}
 
 	/* Special debug activities for freeing objects */
-	if (!SlabFrozen(page) && !page->freelist)
+	if (!PageSlubFrozen(page) && !page->freelist)
 		remove_full(s, page);
 	if (s->flags & SLAB_STORE_USER)
 		set_track(s, object, TRACK_FREE, addr);
@@ -1157,7 +1125,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	page->flags |= 1 << PG_slab;
 	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
 			SLAB_STORE_USER | SLAB_TRACE))
-		SetSlabDebug(page);
+		__SetPageSlubDebug(page);
 
 	start = page_address(page);
 
@@ -1184,14 +1152,14 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	int order = compound_order(page);
 	int pages = 1 << order;
 
-	if (unlikely(SlabDebug(page))) {
+	if (unlikely(SLABDEBUG && PageSlubDebug(page))) {
 		void *p;
 
 		slab_pad_check(s, page);
 		for_each_object(p, s, page_address(page),
 						page->objects)
 			check_object(s, page, p, 0);
-		ClearSlabDebug(page);
+		__ClearPageSlubDebug(page);
 	}
 
 	mod_zone_page_state(page_zone(page),
@@ -1288,7 +1256,7 @@ static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
 	if (slab_trylock(page)) {
 		list_del(&page->lru);
 		n->nr_partial--;
-		SetSlabFrozen(page);
+		__SetPageSlubFrozen(page);
 		return 1;
 	}
 	return 0;
@@ -1398,7 +1366,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 	struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
 
-	ClearSlabFrozen(page);
+	__ClearPageSlubFrozen(page);
 	if (page->inuse) {
 
 		if (page->freelist) {
@@ -1406,7 +1374,8 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 			stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
 		} else {
 			stat(c, DEACTIVATE_FULL);
-			if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
+			if (SLABDEBUG && PageSlubDebug(page) &&
+						(s->flags & SLAB_STORE_USER))
 				add_full(n, page);
 		}
 		slab_unlock(page);
@@ -1551,7 +1520,7 @@ load_freelist:
 	object = c->page->freelist;
 	if (unlikely(!object))
 		goto another_slab;
-	if (unlikely(SlabDebug(c->page)))
+	if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
 		goto debug;
 
 	c->freelist = object[c->offset];
@@ -1588,7 +1557,7 @@ new_slab:
 		if (c->page)
 			flush_slab(s, c);
 		slab_lock(new);
-		SetSlabFrozen(new);
+		__SetPageSlubFrozen(new);
 		c->page = new;
 		goto load_freelist;
 	}
@@ -1674,7 +1643,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	stat(c, FREE_SLOWPATH);
 	slab_lock(page);
 
-	if (unlikely(SlabDebug(page)))
+	if (unlikely(SLABDEBUG && PageSlubDebug(page)))
 		goto debug;
 
 checks_ok:
@@ -1682,7 +1651,7 @@ checks_ok:
 	page->freelist = object;
 	page->inuse--;
 
-	if (unlikely(SlabFrozen(page))) {
+	if (unlikely(PageSlubFrozen(page))) {
 		stat(c, FREE_FROZEN);
 		goto out_unlock;
 	}
@@ -3317,12 +3286,12 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page,
 			s->name, page);
 
 	if (s->flags & DEBUG_DEFAULT_FLAGS) {
-		if (!SlabDebug(page))
-			printk(KERN_ERR "SLUB %s: SlabDebug not set "
+		if (!PageSlubDebug(page))
+			printk(KERN_ERR "SLUB %s: SlubDebug not set "
 				"on slab 0x%p\n", s->name, page);
 	} else {
-		if (SlabDebug(page))
-			printk(KERN_ERR "SLUB %s: SlabDebug set on "
+		if (PageSlubDebug(page))
+			printk(KERN_ERR "SLUB %s: SlubDebug set on "
 				"slab 0x%p\n", s->name, page);
 	}
 }
-- 
GitLab


From 9023cb7e8564d95a1893f8cb6895a293be9a71fe Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:19 -0700
Subject: [PATCH 145/853] slob: record page flag overlays explicitly

SLOB reuses two page bits for internal purposes, it overlays PG_active and
PG_private.  This is hidden away in slob.c.  Document these overlays
explicitly in the main page-flags enum along with all the others.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  7 +++++++
 mm/slob.c                  | 12 ++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3fc586b7b90..54590a9a103 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -105,6 +105,10 @@ enum pageflags {
 	PG_pinned = PG_owner_priv_1,
 	PG_savepinned = PG_dirty,
 
+	/* SLOB */
+	PG_slob_page = PG_active,
+	PG_slob_free = PG_private,
+
 	/* SLUB */
 	PG_slub_frozen = PG_active,
 	PG_slub_debug = PG_error,
@@ -173,6 +177,9 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
 
+__PAGEFLAG(SlobPage, slob_page)
+__PAGEFLAG(SlobFree, slob_free)
+
 __PAGEFLAG(SlubFrozen, slub_frozen)
 __PAGEFLAG(SlubDebug, slub_debug)
 
diff --git a/mm/slob.c b/mm/slob.c
index a3ad6671adf..de268eb7ac7 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -130,17 +130,17 @@ static LIST_HEAD(free_slob_large);
  */
 static inline int slob_page(struct slob_page *sp)
 {
-	return test_bit(PG_active, &sp->flags);
+	return PageSlobPage((struct page *)sp);
 }
 
 static inline void set_slob_page(struct slob_page *sp)
 {
-	__set_bit(PG_active, &sp->flags);
+	__SetPageSlobPage((struct page *)sp);
 }
 
 static inline void clear_slob_page(struct slob_page *sp)
 {
-	__clear_bit(PG_active, &sp->flags);
+	__ClearPageSlobPage((struct page *)sp);
 }
 
 /*
@@ -148,19 +148,19 @@ static inline void clear_slob_page(struct slob_page *sp)
  */
 static inline int slob_page_free(struct slob_page *sp)
 {
-	return test_bit(PG_private, &sp->flags);
+	return PageSlobFree((struct page *)sp);
 }
 
 static void set_slob_page_free(struct slob_page *sp, struct list_head *list)
 {
 	list_add(&sp->list, list);
-	__set_bit(PG_private, &sp->flags);
+	__SetPageSlobFree((struct page *)sp);
 }
 
 static inline void clear_slob_page_free(struct slob_page *sp)
 {
 	list_del(&sp->list);
-	__clear_bit(PG_private, &sp->flags);
+	__ClearPageSlobFree((struct page *)sp);
 }
 
 #define SLOB_UNIT sizeof(slob_t)
-- 
GitLab


From 2185e69f680ae8c8496b6fc15e20c889d5b39b67 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 23 Jul 2008 21:27:19 -0700
Subject: [PATCH 146/853] mapping_set_error: add unlikely()

This is called on a per-page basis and in the vast majority of cases
`error' is zero.

Cc: Guillaume Chazarain <guichaz@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d2fca802f80..ee1ec2c7723 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -22,7 +22,7 @@
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
 {
-	if (error) {
+	if (unlikely(error)) {
 		if (error == -ENOSPC)
 			set_bit(AS_ENOSPC, &mapping->flags);
 		else
-- 
GitLab


From 9109fb7b3520de187ebc3646c209d66a233f7169 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:27:20 -0700
Subject: [PATCH 147/853] mm: drop unneeded pgdat argument from
 free_area_init_node()

free_area_init_node() gets passed in the node id as well as the node
descriptor.  This is redundant as the function can trivially get the node
descriptor itself by means of NODE_DATA() and the node's id.

I checked all the users and NODE_DATA() seems to be usable everywhere
from where this function is called.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/numa.c         |  2 +-
 arch/arm/mm/init.c           |  2 +-
 arch/avr32/mm/init.c         |  2 +-
 arch/cris/arch-v10/mm/init.c |  2 +-
 arch/cris/arch-v32/mm/init.c |  2 +-
 arch/m32r/mm/discontig.c     |  3 +--
 arch/m32r/mm/init.c          |  2 +-
 arch/m68k/mm/motorola.c      |  2 +-
 arch/m68k/mm/sun3mmu.c       |  2 +-
 arch/parisc/mm/init.c        |  2 +-
 arch/sparc/mm/srmmu.c        |  3 +--
 arch/sparc/mm/sun4c.c        |  3 +--
 arch/v850/kernel/setup.c     |  3 +--
 include/linux/mm.h           |  5 ++---
 mm/memory_hotplug.c          |  2 +-
 mm/page_alloc.c              | 11 ++++++-----
 16 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index a53fda0481c..def0c74a78a 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -313,7 +313,7 @@ void __init paging_init(void)
 			zones_size[ZONE_DMA] = dma_local_pfn;
 			zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn;
 		}
-		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, NULL);
+		free_area_init_node(nid, zones_size, start_pfn, NULL);
 	}
 
 	/* Initialize the kernel's ZERO_PGE. */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b657f1719af..e6352946dde 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -284,7 +284,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	 */
 	arch_adjust_zones(node, zone_size, zhole_size);
 
-	free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size);
+	free_area_init_node(node, zone_size, start_pfn, zhole_size);
 
 	return end_pfn;
 }
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index 3f90a87527b..786de88a82a 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -129,7 +129,7 @@ void __init paging_init(void)
 		printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
 		       nid, start_pfn, low);
 
-		free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL);
+		free_area_init_node(nid, zones_size, start_pfn, NULL);
 
 		printk("Node %u: mem_map starts at %p\n",
 		       pgdat->node_id, pgdat->node_mem_map);
diff --git a/arch/cris/arch-v10/mm/init.c b/arch/cris/arch-v10/mm/init.c
index e0fcd1a9bfd..742fd1974c2 100644
--- a/arch/cris/arch-v10/mm/init.c
+++ b/arch/cris/arch-v10/mm/init.c
@@ -182,7 +182,7 @@ paging_init(void)
 	 * mem_map page array.
 	 */
 
-	free_area_init_node(0, &contig_page_data, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
+	free_area_init_node(0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
 }
 
 /* Initialize remaps of some I/O-ports. It is important that this
diff --git a/arch/cris/arch-v32/mm/init.c b/arch/cris/arch-v32/mm/init.c
index 5a9ac583464..8a34b8b7429 100644
--- a/arch/cris/arch-v32/mm/init.c
+++ b/arch/cris/arch-v32/mm/init.c
@@ -162,7 +162,7 @@ paging_init(void)
 	 * substantially higher than 0, like us (we start at PAGE_OFFSET). This
 	 * saves space in the mem_map page array.
 	 */
-	free_area_init_node(0, &contig_page_data, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
+	free_area_init_node(0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
 
 	mem_map = contig_page_data.node_mem_map;
 }
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index aa9145ef6cc..cc23934bc41 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -147,8 +147,7 @@ unsigned long __init zone_sizes_init(void)
 		zholes_size[ZONE_DMA] = mp->holes;
 		holes += zholes_size[ZONE_DMA];
 
-		free_area_init_node(nid, NODE_DATA(nid), zones_size,
-			start_pfn, zholes_size);
+		free_area_init_node(nid, zones_size, start_pfn, zholes_size);
 	}
 
 	/*
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index bbd97c85bc5..28799af15e9 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -123,7 +123,7 @@ unsigned long __init zone_sizes_init(void)
 	start_pfn = __MEMORY_START >> PAGE_SHIFT;
 #endif /* CONFIG_MMU */
 
-	free_area_init_node(0, NODE_DATA(0), zones_size, start_pfn, 0);
+	free_area_init_node(0, zones_size, start_pfn, 0);
 
 	return 0;
 }
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 226795bdf35..c5dbb9bdb32 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -296,7 +296,7 @@ void __init paging_init(void)
 #endif
 	for (i = 0; i < m68k_num_memory; i++) {
 		zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
-		free_area_init_node(i, pg_data_map + i, zones_size,
+		free_area_init_node(i, zones_size,
 				    m68k_memory[i].addr >> PAGE_SHIFT, NULL);
 	}
 }
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index edceefc1887..1b902dbd437 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -94,7 +94,7 @@ void __init paging_init(void)
 
 	/* I really wish I knew why the following change made things better...  -- Sam */
 /*	free_area_init(zones_size); */
-	free_area_init_node(0, NODE_DATA(0), zones_size,
+	free_area_init_node(0, zones_size,
 			    (__pa(PAGE_OFFSET) >> PAGE_SHIFT) + 1, NULL);
 
 
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 0ddf4904640..7c155c254e7 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -887,7 +887,7 @@ void __init paging_init(void)
 		}
 #endif
 
-		free_area_init_node(i, NODE_DATA(i), zones_size,
+		free_area_init_node(i, zones_size,
 				pmem_ranges[i].start_pfn, NULL);
 	}
 }
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index c624e04ff03..ee30462598f 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1352,8 +1352,7 @@ void __init srmmu_paging_init(void)
 		zones_size[ZONE_HIGHMEM] = npages;
 		zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
 
-		free_area_init_node(0, &contig_page_data, zones_size,
-				    pfn_base, zholes_size);
+		free_area_init_node(0, zones_size, pfn_base, zholes_size);
 	}
 }
 
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 2375fe9dc31..d1782f6368b 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -2123,8 +2123,7 @@ void __init sun4c_paging_init(void)
 		zones_size[ZONE_HIGHMEM] = npages;
 		zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
 
-		free_area_init_node(0, &contig_page_data, zones_size,
-				    pfn_base, zholes_size);
+		free_area_init_node(0, zones_size, pfn_base, zholes_size);
 	}
 
 	cnt = 0;
diff --git a/arch/v850/kernel/setup.c b/arch/v850/kernel/setup.c
index a0a8456a843..10335cecf7b 100644
--- a/arch/v850/kernel/setup.c
+++ b/arch/v850/kernel/setup.c
@@ -295,8 +295,7 @@ init_mem_alloc (unsigned long ram_start, unsigned long ram_len)
 #error MAX_ORDER is too large for given PAGE_OFFSET (use CONFIG_FORCE_MAX_ZONEORDER to change it)
 #endif
 	NODE_DATA(0)->node_mem_map = NULL;
-	free_area_init_node (0, NODE_DATA(0), zones_size,
-			     ADDR_TO_PAGE (PAGE_OFFSET), 0);
+	free_area_init_node(0, zones_size, ADDR_TO_PAGE (PAGE_OFFSET), 0);
 }
 
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f8071097302..196924b657b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -962,9 +962,8 @@ static inline void pgtable_page_dtor(struct page *page)
 		NULL: pte_offset_kernel(pmd, address))
 
 extern void free_area_init(unsigned long * zones_size);
-extern void free_area_init_node(int nid, pg_data_t *pgdat,
-	unsigned long * zones_size, unsigned long zone_start_pfn, 
-	unsigned long *zholes_size);
+extern void free_area_init_node(int nid, unsigned long * zones_size,
+		unsigned long zone_start_pfn, unsigned long *zholes_size);
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
 /*
  * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 833f854eabe..6e26adc08f1 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -455,7 +455,7 @@ static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
 	/* we can use NODE_DATA(nid) from here */
 
 	/* init node's zones as empty zones, we don't have any present pages.*/
-	free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
+	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
 
 	return pgdat;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 24aa3d1b9d9..e43aae135b3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3461,10 +3461,11 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
 #endif /* CONFIG_FLAT_NODE_MEM_MAP */
 }
 
-void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat,
-		unsigned long *zones_size, unsigned long node_start_pfn,
-		unsigned long *zholes_size)
+void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
+		unsigned long node_start_pfn, unsigned long *zholes_size)
 {
+	pg_data_t *pgdat = NODE_DATA(nid);
+
 	pgdat->node_id = nid;
 	pgdat->node_start_pfn = node_start_pfn;
 	calculate_node_totalpages(pgdat, zones_size, zholes_size);
@@ -3961,7 +3962,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 	setup_nr_node_ids();
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);
-		free_area_init_node(nid, pgdat, NULL,
+		free_area_init_node(nid, NULL,
 				find_min_pfn_for_node(nid), NULL);
 
 		/* Any memory on that node */
@@ -4032,7 +4033,7 @@ EXPORT_SYMBOL(contig_page_data);
 
 void __init free_area_init(unsigned long *zones_size)
 {
-	free_area_init_node(0, NODE_DATA(0), zones_size,
+	free_area_init_node(0, zones_size,
 			__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
 }
 
-- 
GitLab


From fc1b8a73dd71226902a11928dd5500326e101df9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:27:22 -0700
Subject: [PATCH 148/853] hugetlb: move hugetlb_acct_memory()

This is a patchset to give reliable behaviour to a process that
successfully calls mmap(MAP_PRIVATE) on a hugetlbfs file.  Currently, it
is possible for the process to be killed due to a small hugepage pool size
even if it calls mlock().

MAP_SHARED mappings on hugetlbfs reserve huge pages at mmap() time.  This
guarantees all future faults against the mapping will succeed.  This
allows local allocations at first use improving NUMA locality whilst
retaining reliability.

MAP_PRIVATE mappings do not reserve pages.  This can result in an
application being SIGKILLed later if a huge page is not available at fault
time.  This makes huge pages usage very ill-advised in some cases as the
unexpected application failure cannot be detected and handled as it is
immediately fatal.  Although an application may force instantiation of the
pages using mlock(), this may lead to poor memory placement and the
process may still be killed when performing COW.

This patchset introduces a reliability guarantee for the process which
creates a private mapping, i.e.  the process that calls mmap() on a
hugetlbfs file successfully.  The first patch of the set is purely
mechanical code move to make later diffs easier to read.  The second patch
will guarantee faults up until the process calls fork().  After patch two,
as long as the child keeps the mappings, the parent is no longer
guaranteed to be reliable.  Patch 3 guarantees that the parent will always
successfully COW by unmapping the pages from the child in the event there
are insufficient pages in the hugepage pool in allocate a new page, be it
via a static or dynamic pool.

Existing hugepage-aware applications are unlikely to be affected by this
change.  For much of hugetlbfs's history, pages were pre-faulted at mmap()
time or mmap() failed which acts in a reserve-like manner.  If the pool is
sized correctly already so that parent and child can fault reliably, the
application will not even notice the reserves.  It's only when the pool is
too small for the application to function perfectly reliably that the
reserves come into play.

Credit goes to Andy Whitcroft for cleaning up a number of mistakes during
review before the patches were released.

This patch:

A later patch in this set needs to call hugetlb_acct_memory() before it is
defined.  This patch moves the function without modification.  This makes
later diffs easier to read.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 82 ++++++++++++++++++++++++++--------------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2c5c9ee4220..a4dbba8965f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -716,6 +716,47 @@ unsigned long hugetlb_total_pages(void)
 	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
 }
 
+static int hugetlb_acct_memory(long delta)
+{
+	int ret = -ENOMEM;
+
+	spin_lock(&hugetlb_lock);
+	/*
+	 * When cpuset is configured, it breaks the strict hugetlb page
+	 * reservation as the accounting is done on a global variable. Such
+	 * reservation is completely rubbish in the presence of cpuset because
+	 * the reservation is not checked against page availability for the
+	 * current cpuset. Application can still potentially OOM'ed by kernel
+	 * with lack of free htlb page in cpuset that the task is in.
+	 * Attempt to enforce strict accounting with cpuset is almost
+	 * impossible (or too ugly) because cpuset is too fluid that
+	 * task or memory node can be dynamically moved between cpusets.
+	 *
+	 * The change of semantics for shared hugetlb mapping with cpuset is
+	 * undesirable. However, in order to preserve some of the semantics,
+	 * we fall back to check against current free page availability as
+	 * a best attempt and hopefully to minimize the impact of changing
+	 * semantics that cpuset has.
+	 */
+	if (delta > 0) {
+		if (gather_surplus_pages(delta) < 0)
+			goto out;
+
+		if (delta > cpuset_mems_nr(free_huge_pages_node)) {
+			return_unused_surplus_pages(delta);
+			goto out;
+		}
+	}
+
+	ret = 0;
+	if (delta < 0)
+		return_unused_surplus_pages((unsigned long) -delta);
+
+out:
+	spin_unlock(&hugetlb_lock);
+	return ret;
+}
+
 /*
  * We cannot handle pagefaults against hugetlb pages at all.  They cause
  * handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -1248,47 +1289,6 @@ static long region_truncate(struct list_head *head, long end)
 	return chg;
 }
 
-static int hugetlb_acct_memory(long delta)
-{
-	int ret = -ENOMEM;
-
-	spin_lock(&hugetlb_lock);
-	/*
-	 * When cpuset is configured, it breaks the strict hugetlb page
-	 * reservation as the accounting is done on a global variable. Such
-	 * reservation is completely rubbish in the presence of cpuset because
-	 * the reservation is not checked against page availability for the
-	 * current cpuset. Application can still potentially OOM'ed by kernel
-	 * with lack of free htlb page in cpuset that the task is in.
-	 * Attempt to enforce strict accounting with cpuset is almost
-	 * impossible (or too ugly) because cpuset is too fluid that
-	 * task or memory node can be dynamically moved between cpusets.
-	 *
-	 * The change of semantics for shared hugetlb mapping with cpuset is
-	 * undesirable. However, in order to preserve some of the semantics,
-	 * we fall back to check against current free page availability as
-	 * a best attempt and hopefully to minimize the impact of changing
-	 * semantics that cpuset has.
-	 */
-	if (delta > 0) {
-		if (gather_surplus_pages(delta) < 0)
-			goto out;
-
-		if (delta > cpuset_mems_nr(free_huge_pages_node)) {
-			return_unused_surplus_pages(delta);
-			goto out;
-		}
-	}
-
-	ret = 0;
-	if (delta < 0)
-		return_unused_surplus_pages((unsigned long) -delta);
-
-out:
-	spin_unlock(&hugetlb_lock);
-	return ret;
-}
-
 int hugetlb_reserve_pages(struct inode *inode, long from, long to)
 {
 	long ret, chg;
-- 
GitLab


From a1e78772d72b2616ed20e54896e68e0e7044854e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:27:23 -0700
Subject: [PATCH 149/853] hugetlb: reserve huge pages for reliable MAP_PRIVATE
 hugetlbfs mappings until fork()

This patch reserves huge pages at mmap() time for MAP_PRIVATE mappings in
a similar manner to the reservations taken for MAP_SHARED mappings.  The
reserve count is accounted both globally and on a per-VMA basis for
private mappings.  This guarantees that a process that successfully calls
mmap() will successfully fault all pages in the future unless fork() is
called.

The characteristics of private mappings of hugetlbfs files behaviour after
this patch are;

1. The process calling mmap() is guaranteed to succeed all future faults until
   it forks().
2. On fork(), the parent may die due to SIGKILL on writes to the private
   mapping if enough pages are not available for the COW. For reasonably
   reliable behaviour in the face of a small huge page pool, children of
   hugepage-aware processes should not reference the mappings; such as
   might occur when fork()ing to exec().
3. On fork(), the child VMAs inherit no reserves. Reads on pages already
   faulted by the parent will succeed. Successful writes will depend on enough
   huge pages being free in the pool.
4. Quotas of the hugetlbfs mount are checked at reserve time for the mapper
   and at fault time otherwise.

Before this patch, all reads or writes in the child potentially needs page
allocations that can later lead to the death of the parent.  This applies
to reads and writes of uninstantiated pages as well as COW.  After the
patch it is only a write to an instantiated page that causes problems.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |   8 +-
 include/linux/hugetlb.h |   9 ++-
 kernel/fork.c           |   9 +++
 mm/hugetlb.c            | 158 ++++++++++++++++++++++++++++++----------
 4 files changed, 140 insertions(+), 44 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a..1576bbecd08 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -103,9 +103,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	ret = -ENOMEM;
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 
-	if (vma->vm_flags & VM_MAYSHARE &&
-	    hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
-				  len >> HPAGE_SHIFT))
+	if (hugetlb_reserve_pages(inode,
+				vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
+				len >> HPAGE_SHIFT, vma))
 		goto out;
 
 	ret = 0;
@@ -942,7 +942,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 		goto out_dentry;
 
 	error = -ENOMEM;
-	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
+	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL))
 		goto out_inode;
 
 	d_instantiate(dentry, inode);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a79e80b689d..185b14c9f02 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -17,6 +17,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_HUGETLB;
 }
 
+void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
@@ -30,7 +31,8 @@ int hugetlb_report_node_meminfo(int, char *);
 unsigned long hugetlb_total_pages(void);
 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, int write_access);
-int hugetlb_reserve_pages(struct inode *inode, long from, long to);
+int hugetlb_reserve_pages(struct inode *inode, long from, long to,
+						struct vm_area_struct *vma);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
 extern unsigned long max_huge_pages;
@@ -58,6 +60,11 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
 	return 0;
 }
+
+static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+}
+
 static inline unsigned long hugetlb_total_pages(void)
 {
 	return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f2..552c8d8e77a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,6 +33,7 @@
 #include <linux/cpu.h>
 #include <linux/cgroup.h>
 #include <linux/security.h>
+#include <linux/hugetlb.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
@@ -306,6 +307,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			spin_unlock(&file->f_mapping->i_mmap_lock);
 		}
 
+		/*
+		 * Clear hugetlb-related page reserves for children. This only
+		 * affects MAP_PRIVATE mappings. Faults generated by the child
+		 * are not guaranteed to succeed, even if read-only
+		 */
+		if (is_vm_hugetlb_page(tmp))
+			reset_vma_resv_huge_pages(tmp);
+
 		/*
 		 * Link in the new vma and copy the page table entries.
 		 */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a4dbba8965f..0af500db363 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -40,6 +40,69 @@ static int hugetlb_next_nid;
  */
 static DEFINE_SPINLOCK(hugetlb_lock);
 
+/*
+ * These helpers are used to track how many pages are reserved for
+ * faults in a MAP_PRIVATE mapping. Only the process that called mmap()
+ * is guaranteed to have their future faults succeed.
+ *
+ * With the exception of reset_vma_resv_huge_pages() which is called at fork(),
+ * the reserve counters are updated with the hugetlb_lock held. It is safe
+ * to reset the VMA at fork() time as it is not in use yet and there is no
+ * chance of the global counters getting corrupted as a result of the values.
+ */
+static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	if (!(vma->vm_flags & VM_SHARED))
+		return (unsigned long)vma->vm_private_data;
+	return 0;
+}
+
+static void set_vma_resv_huge_pages(struct vm_area_struct *vma,
+							unsigned long reserve)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON(vma->vm_flags & VM_SHARED);
+
+	vma->vm_private_data = (void *)reserve;
+}
+
+/* Decrement the reserved pages in the hugepage pool by one */
+static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_SHARED) {
+		/* Shared mappings always use reserves */
+		resv_huge_pages--;
+	} else {
+		/*
+		 * Only the process that called mmap() has reserves for
+		 * private mappings.
+		 */
+		if (vma_resv_huge_pages(vma)) {
+			resv_huge_pages--;
+			reserve = (unsigned long)vma->vm_private_data - 1;
+			vma->vm_private_data = (void *)reserve;
+		}
+	}
+}
+
+void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	if (!(vma->vm_flags & VM_SHARED))
+		vma->vm_private_data = (void *)0;
+}
+
+/* Returns true if the VMA has associated reserve pages */
+static int vma_has_private_reserves(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_SHARED)
+		return 0;
+	if (!vma_resv_huge_pages(vma))
+		return 0;
+	return 1;
+}
+
 static void clear_huge_page(struct page *page, unsigned long addr)
 {
 	int i;
@@ -101,6 +164,15 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 	struct zone *zone;
 	struct zoneref *z;
 
+	/*
+	 * A child process with MAP_PRIVATE mappings created by their parent
+	 * have no page reserves. This check ensures that reservations are
+	 * not "stolen". The child may still get SIGKILLed
+	 */
+	if (!vma_has_private_reserves(vma) &&
+			free_huge_pages - resv_huge_pages == 0)
+		return NULL;
+
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
 		nid = zone_to_nid(zone);
@@ -111,8 +183,8 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 			list_del(&page->lru);
 			free_huge_pages--;
 			free_huge_pages_node[nid]--;
-			if (vma && vma->vm_flags & VM_MAYSHARE)
-				resv_huge_pages--;
+			decrement_hugepage_resv_vma(vma);
+
 			break;
 		}
 	}
@@ -461,55 +533,40 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 	}
 }
 
-
-static struct page *alloc_huge_page_shared(struct vm_area_struct *vma,
-						unsigned long addr)
+static struct page *alloc_huge_page(struct vm_area_struct *vma,
+				    unsigned long addr)
 {
 	struct page *page;
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned int chg = 0;
+
+	/*
+	 * Processes that did not create the mapping will have no reserves and
+	 * will not have accounted against quota. Check that the quota can be
+	 * made before satisfying the allocation
+	 */
+	if (!vma_has_private_reserves(vma)) {
+		chg = 1;
+		if (hugetlb_get_quota(inode->i_mapping, chg))
+			return ERR_PTR(-ENOSPC);
+	}
 
 	spin_lock(&hugetlb_lock);
 	page = dequeue_huge_page_vma(vma, addr);
 	spin_unlock(&hugetlb_lock);
-	return page ? page : ERR_PTR(-VM_FAULT_OOM);
-}
 
-static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
-						unsigned long addr)
-{
-	struct page *page = NULL;
-
-	if (hugetlb_get_quota(vma->vm_file->f_mapping, 1))
-		return ERR_PTR(-VM_FAULT_SIGBUS);
-
-	spin_lock(&hugetlb_lock);
-	if (free_huge_pages > resv_huge_pages)
-		page = dequeue_huge_page_vma(vma, addr);
-	spin_unlock(&hugetlb_lock);
 	if (!page) {
 		page = alloc_buddy_huge_page(vma, addr);
 		if (!page) {
-			hugetlb_put_quota(vma->vm_file->f_mapping, 1);
+			hugetlb_put_quota(inode->i_mapping, chg);
 			return ERR_PTR(-VM_FAULT_OOM);
 		}
 	}
-	return page;
-}
 
-static struct page *alloc_huge_page(struct vm_area_struct *vma,
-				    unsigned long addr)
-{
-	struct page *page;
-	struct address_space *mapping = vma->vm_file->f_mapping;
-
-	if (vma->vm_flags & VM_MAYSHARE)
-		page = alloc_huge_page_shared(vma, addr);
-	else
-		page = alloc_huge_page_private(vma, addr);
+	set_page_refcounted(page);
+	set_page_private(page, (unsigned long) mapping);
 
-	if (!IS_ERR(page)) {
-		set_page_refcounted(page);
-		set_page_private(page, (unsigned long) mapping);
-	}
 	return page;
 }
 
@@ -757,6 +814,13 @@ out:
 	return ret;
 }
 
+static void hugetlb_vm_op_close(struct vm_area_struct *vma)
+{
+	unsigned long reserve = vma_resv_huge_pages(vma);
+	if (reserve)
+		hugetlb_acct_memory(-reserve);
+}
+
 /*
  * We cannot handle pagefaults against hugetlb pages at all.  They cause
  * handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -771,6 +835,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 struct vm_operations_struct hugetlb_vm_ops = {
 	.fault = hugetlb_vm_op_fault,
+	.close = hugetlb_vm_op_close,
 };
 
 static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
@@ -1289,11 +1354,25 @@ static long region_truncate(struct list_head *head, long end)
 	return chg;
 }
 
-int hugetlb_reserve_pages(struct inode *inode, long from, long to)
+int hugetlb_reserve_pages(struct inode *inode,
+					long from, long to,
+					struct vm_area_struct *vma)
 {
 	long ret, chg;
 
-	chg = region_chg(&inode->i_mapping->private_list, from, to);
+	/*
+	 * Shared mappings base their reservation on the number of pages that
+	 * are already allocated on behalf of the file. Private mappings need
+	 * to reserve the full area even if read-only as mprotect() may be
+	 * called to make the mapping read-write. Assume !vma is a shm mapping
+	 */
+	if (!vma || vma->vm_flags & VM_SHARED)
+		chg = region_chg(&inode->i_mapping->private_list, from, to);
+	else {
+		chg = to - from;
+		set_vma_resv_huge_pages(vma, chg);
+	}
+
 	if (chg < 0)
 		return chg;
 
@@ -1304,7 +1383,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
 		hugetlb_put_quota(inode->i_mapping, chg);
 		return ret;
 	}
-	region_add(&inode->i_mapping->private_list, from, to);
+	if (!vma || vma->vm_flags & VM_SHARED)
+		region_add(&inode->i_mapping->private_list, from, to);
 	return 0;
 }
 
-- 
GitLab


From 04f2cbe35699d22dbf428373682ead85ca1240f5 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:27:25 -0700
Subject: [PATCH 150/853] hugetlb: guarantee that COW faults for a process that
 called mmap(MAP_PRIVATE) on hugetlbfs will succeed

After patch 2 in this series, a process that successfully calls mmap() for
a MAP_PRIVATE mapping will be guaranteed to successfully fault until a
process calls fork().  At that point, the next write fault from the parent
could fail due to COW if the child still has a reference.

We only reserve pages for the parent but a copy must be made to avoid
leaking data from the parent to the child after fork().  Reserves could be
taken for both parent and child at fork time to guarantee faults but if
the mapping is large it is highly likely we will not have sufficient pages
for the reservation, and it is common to fork only to exec() immediatly
after.  A failure here would be very undesirable.

Note that the current behaviour of mainline with MAP_PRIVATE pages is
pretty bad.  The following situation is allowed to occur today.

1. Process calls mmap(MAP_PRIVATE)
2. Process calls mlock() to fault all pages and makes sure it succeeds
3. Process forks()
4. Process writes to MAP_PRIVATE mapping while child still exists
5. If the COW fails at this point, the process gets SIGKILLed even though it
   had taken care to ensure the pages existed

This patch improves the situation by guaranteeing the reliability of the
process that successfully calls mmap().  When the parent performs COW, it
will try to satisfy the allocation without using reserves.  If that fails
the parent will steal the page leaving any children without a page.
Faults from the child after that point will result in failure.  If the
child COW happens first, an attempt will be made to allocate the page
without reserves and the child will get SIGKILLed on failure.

To summarise the new behaviour:

1. If the original mapper performs COW on a private mapping with multiple
   references, it will attempt to allocate a hugepage from the pool or
   the buddy allocator without using the existing reserves. On fail, VMAs
   mapping the same area are traversed and the page being COW'd is unmapped
   where found. It will then steal the original page as the last mapper in
   the normal way.

2. The VMAs the pages were unmapped from are flagged to note that pages
   with data no longer exist. Future no-page faults on those VMAs will
   terminate the process as otherwise it would appear that data was corrupted.
   A warning is printed to the console that this situation occured.

2. If the child performs COW first, it will attempt to satisfy the COW
   from the pool if there are enough pages or via the buddy allocator if
   overcommit is allowed and the buddy allocator can satisfy the request. If
   it fails, the child will be killed.

If the pool is large enough, existing applications will not notice that
the reserves were a factor.  Existing applications depending on the
no-reserves been set are unlikely to exist as for much of the history of
hugetlbfs, pages were prefaulted at mmap(), allocating the pages at that
point or failing the mmap().

[npiggin@suse.de: fix CONFIG_HUGETLB=n build]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |   2 +-
 include/linux/hugetlb.h |   8 +-
 mm/hugetlb.c            | 201 ++++++++++++++++++++++++++++++++++++----
 mm/memory.c             |   2 +-
 4 files changed, 190 insertions(+), 23 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1576bbecd08..428eff5b73f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -441,7 +441,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
 			v_offset = 0;
 
 		__unmap_hugepage_range(vma,
-				vma->vm_start + v_offset, vma->vm_end);
+				vma->vm_start + v_offset, vma->vm_end, NULL);
 	}
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 185b14c9f02..abbc187193a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -23,8 +23,10 @@ int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __us
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
-void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
-void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
+void unmap_hugepage_range(struct vm_area_struct *,
+			unsigned long, unsigned long, struct page *);
+void __unmap_hugepage_range(struct vm_area_struct *,
+			unsigned long, unsigned long, struct page *);
 int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
 int hugetlb_report_meminfo(char *);
 int hugetlb_report_node_meminfo(int, char *);
@@ -74,7 +76,7 @@ static inline unsigned long hugetlb_total_pages(void)
 #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
-#define unmap_hugepage_range(vma, start, end)	BUG()
+#define unmap_hugepage_range(vma, start, end, page)	BUG()
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0af500db363..a2d29b84501 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -40,6 +40,9 @@ static int hugetlb_next_nid;
  */
 static DEFINE_SPINLOCK(hugetlb_lock);
 
+#define HPAGE_RESV_OWNER    (1UL << (BITS_PER_LONG - 1))
+#define HPAGE_RESV_UNMAPPED (1UL << (BITS_PER_LONG - 2))
+#define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)
 /*
  * These helpers are used to track how many pages are reserved for
  * faults in a MAP_PRIVATE mapping. Only the process that called mmap()
@@ -54,17 +57,32 @@ static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	if (!(vma->vm_flags & VM_SHARED))
-		return (unsigned long)vma->vm_private_data;
+		return (unsigned long)vma->vm_private_data & ~HPAGE_RESV_MASK;
 	return 0;
 }
 
 static void set_vma_resv_huge_pages(struct vm_area_struct *vma,
 							unsigned long reserve)
 {
+	unsigned long flags;
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	VM_BUG_ON(vma->vm_flags & VM_SHARED);
 
-	vma->vm_private_data = (void *)reserve;
+	flags = (unsigned long)vma->vm_private_data & HPAGE_RESV_MASK;
+	vma->vm_private_data = (void *)(reserve | flags);
+}
+
+static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
+{
+	unsigned long reserveflags = (unsigned long)vma->vm_private_data;
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	vma->vm_private_data = (void *)(reserveflags | flags);
+}
+
+static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	return ((unsigned long)vma->vm_private_data & flag) != 0;
 }
 
 /* Decrement the reserved pages in the hugepage pool by one */
@@ -78,14 +96,18 @@ static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
 		 * Only the process that called mmap() has reserves for
 		 * private mappings.
 		 */
-		if (vma_resv_huge_pages(vma)) {
+		if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+			unsigned long flags, reserve;
 			resv_huge_pages--;
+			flags = (unsigned long)vma->vm_private_data &
+							HPAGE_RESV_MASK;
 			reserve = (unsigned long)vma->vm_private_data - 1;
-			vma->vm_private_data = (void *)reserve;
+			vma->vm_private_data = (void *)(reserve | flags);
 		}
 	}
 }
 
+/* Reset counters to 0 and clear all HPAGE_RESV_* flags */
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
@@ -153,7 +175,7 @@ static struct page *dequeue_huge_page(void)
 }
 
 static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
-				unsigned long address)
+				unsigned long address, int avoid_reserve)
 {
 	int nid;
 	struct page *page = NULL;
@@ -173,6 +195,10 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 			free_huge_pages - resv_huge_pages == 0)
 		return NULL;
 
+	/* If reserves cannot be used, ensure enough pages are in the pool */
+	if (avoid_reserve && free_huge_pages - resv_huge_pages == 0)
+		return NULL;
+
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
 		nid = zone_to_nid(zone);
@@ -183,7 +209,9 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 			list_del(&page->lru);
 			free_huge_pages--;
 			free_huge_pages_node[nid]--;
-			decrement_hugepage_resv_vma(vma);
+
+			if (!avoid_reserve)
+				decrement_hugepage_resv_vma(vma);
 
 			break;
 		}
@@ -534,7 +562,7 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 }
 
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
-				    unsigned long addr)
+				    unsigned long addr, int avoid_reserve)
 {
 	struct page *page;
 	struct address_space *mapping = vma->vm_file->f_mapping;
@@ -546,14 +574,15 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	 * will not have accounted against quota. Check that the quota can be
 	 * made before satisfying the allocation
 	 */
-	if (!vma_has_private_reserves(vma)) {
+	if (!(vma->vm_flags & VM_SHARED) &&
+			!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		chg = 1;
 		if (hugetlb_get_quota(inode->i_mapping, chg))
 			return ERR_PTR(-ENOSPC);
 	}
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_vma(vma, addr);
+	page = dequeue_huge_page_vma(vma, addr, avoid_reserve);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page) {
@@ -909,7 +938,7 @@ nomem:
 }
 
 void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
-			    unsigned long end)
+			    unsigned long end, struct page *ref_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -937,6 +966,27 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 		if (huge_pmd_unshare(mm, &address, ptep))
 			continue;
 
+		/*
+		 * If a reference page is supplied, it is because a specific
+		 * page is being unmapped, not a range. Ensure the page we
+		 * are about to unmap is the actual page of interest.
+		 */
+		if (ref_page) {
+			pte = huge_ptep_get(ptep);
+			if (huge_pte_none(pte))
+				continue;
+			page = pte_page(pte);
+			if (page != ref_page)
+				continue;
+
+			/*
+			 * Mark the VMA as having unmapped its page so that
+			 * future faults in this VMA will fail rather than
+			 * looking like data was lost
+			 */
+			set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED);
+		}
+
 		pte = huge_ptep_get_and_clear(mm, address, ptep);
 		if (huge_pte_none(pte))
 			continue;
@@ -955,7 +1005,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 }
 
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
-			  unsigned long end)
+			  unsigned long end, struct page *ref_page)
 {
 	/*
 	 * It is undesirable to test vma->vm_file as it should be non-null
@@ -967,19 +1017,68 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	 */
 	if (vma->vm_file) {
 		spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
-		__unmap_hugepage_range(vma, start, end);
+		__unmap_hugepage_range(vma, start, end, ref_page);
 		spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
 	}
 }
 
+/*
+ * This is called when the original mapper is failing to COW a MAP_PRIVATE
+ * mappping it owns the reserve page for. The intention is to unmap the page
+ * from other VMAs and let the children be SIGKILLed if they are faulting the
+ * same region.
+ */
+int unmap_ref_private(struct mm_struct *mm,
+					struct vm_area_struct *vma,
+					struct page *page,
+					unsigned long address)
+{
+	struct vm_area_struct *iter_vma;
+	struct address_space *mapping;
+	struct prio_tree_iter iter;
+	pgoff_t pgoff;
+
+	/*
+	 * vm_pgoff is in PAGE_SIZE units, hence the different calculation
+	 * from page cache lookup which is in HPAGE_SIZE units.
+	 */
+	address = address & huge_page_mask(hstate_vma(vma));
+	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+		+ (vma->vm_pgoff >> PAGE_SHIFT);
+	mapping = (struct address_space *)page_private(page);
+
+	vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		/* Do not unmap the current VMA */
+		if (iter_vma == vma)
+			continue;
+
+		/*
+		 * Unmap the page from other VMAs without their own reserves.
+		 * They get marked to be SIGKILLed if they fault in these
+		 * areas. This is because a future no-page fault on this VMA
+		 * could insert a zeroed page instead of the data existing
+		 * from the time of fork. This would look like data corruption
+		 */
+		if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
+			unmap_hugepage_range(iter_vma,
+				address, address + HPAGE_SIZE,
+				page);
+	}
+
+	return 1;
+}
+
 static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, pte_t *ptep, pte_t pte)
+			unsigned long address, pte_t *ptep, pte_t pte,
+			struct page *pagecache_page)
 {
 	struct page *old_page, *new_page;
 	int avoidcopy;
+	int outside_reserve = 0;
 
 	old_page = pte_page(pte);
 
+retry_avoidcopy:
 	/* If no-one else is actually using this page, avoid the copy
 	 * and just make the page writable */
 	avoidcopy = (page_count(old_page) == 1);
@@ -988,11 +1087,43 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 		return 0;
 	}
 
+	/*
+	 * If the process that created a MAP_PRIVATE mapping is about to
+	 * perform a COW due to a shared page count, attempt to satisfy
+	 * the allocation without using the existing reserves. The pagecache
+	 * page is used to determine if the reserve at this address was
+	 * consumed or not. If reserves were used, a partial faulted mapping
+	 * at the time of fork() could consume its reserves on COW instead
+	 * of the full address range.
+	 */
+	if (!(vma->vm_flags & VM_SHARED) &&
+			is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
+			old_page != pagecache_page)
+		outside_reserve = 1;
+
 	page_cache_get(old_page);
-	new_page = alloc_huge_page(vma, address);
+	new_page = alloc_huge_page(vma, address, outside_reserve);
 
 	if (IS_ERR(new_page)) {
 		page_cache_release(old_page);
+
+		/*
+		 * If a process owning a MAP_PRIVATE mapping fails to COW,
+		 * it is due to references held by a child and an insufficient
+		 * huge page pool. To guarantee the original mappers
+		 * reliability, unmap the page from child processes. The child
+		 * may get SIGKILLed if it later faults.
+		 */
+		if (outside_reserve) {
+			BUG_ON(huge_pte_none(pte));
+			if (unmap_ref_private(mm, vma, old_page, address)) {
+				BUG_ON(page_count(old_page) != 1);
+				BUG_ON(huge_pte_none(pte));
+				goto retry_avoidcopy;
+			}
+			WARN_ON_ONCE(1);
+		}
+
 		return -PTR_ERR(new_page);
 	}
 
@@ -1015,6 +1146,20 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 	return 0;
 }
 
+/* Return the pagecache page at a given address within a VMA */
+static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
+			unsigned long address)
+{
+	struct address_space *mapping;
+	unsigned long idx;
+
+	mapping = vma->vm_file->f_mapping;
+	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
+		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+
+	return find_lock_page(mapping, idx);
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, int write_access)
 {
@@ -1025,6 +1170,18 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct address_space *mapping;
 	pte_t new_pte;
 
+	/*
+	 * Currently, we are forced to kill the process in the event the
+	 * original mapper has unmapped pages from the child due to a failed
+	 * COW. Warn that such a situation has occured as it may not be obvious
+	 */
+	if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
+		printk(KERN_WARNING
+			"PID %d killed due to inadequate hugepage pool\n",
+			current->pid);
+		return ret;
+	}
+
 	mapping = vma->vm_file->f_mapping;
 	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
 		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
@@ -1039,7 +1196,7 @@ retry:
 		size = i_size_read(mapping->host) >> HPAGE_SHIFT;
 		if (idx >= size)
 			goto out;
-		page = alloc_huge_page(vma, address);
+		page = alloc_huge_page(vma, address, 0);
 		if (IS_ERR(page)) {
 			ret = -PTR_ERR(page);
 			goto out;
@@ -1081,7 +1238,7 @@ retry:
 
 	if (write_access && !(vma->vm_flags & VM_SHARED)) {
 		/* Optimization, do the COW without a second fault */
-		ret = hugetlb_cow(mm, vma, address, ptep, new_pte);
+		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
 	}
 
 	spin_unlock(&mm->page_table_lock);
@@ -1126,8 +1283,15 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	spin_lock(&mm->page_table_lock);
 	/* Check for a racing update before calling hugetlb_cow */
 	if (likely(pte_same(entry, huge_ptep_get(ptep))))
-		if (write_access && !pte_write(entry))
-			ret = hugetlb_cow(mm, vma, address, ptep, entry);
+		if (write_access && !pte_write(entry)) {
+			struct page *page;
+			page = hugetlbfs_pagecache_page(vma, address);
+			ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
+			if (page) {
+				unlock_page(page);
+				put_page(page);
+			}
+		}
 	spin_unlock(&mm->page_table_lock);
 	mutex_unlock(&hugetlb_instantiation_mutex);
 
@@ -1371,6 +1535,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	else {
 		chg = to - from;
 		set_vma_resv_huge_pages(vma, chg);
+		set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
 	}
 
 	if (chg < 0)
diff --git a/mm/memory.c b/mm/memory.c
index 82f3f1c5cf1..72932489a08 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -901,7 +901,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			}
 
 			if (unlikely(is_vm_hugetlb_page(vma))) {
-				unmap_hugepage_range(vma, start, end);
+				unmap_hugepage_range(vma, start, end, NULL);
 				zap_work -= (end - start) /
 						(HPAGE_SIZE / PAGE_SIZE);
 				start = end;
-- 
GitLab


From e7c4b0bfd025f71cf7624b7c1be174f63caade33 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:26 -0700
Subject: [PATCH 151/853] huge page private reservation review cleanups

Create some new accessors for vma private data to cut down on and contain
the casts.  Encapsulates the huge and small page offset calculations.
Also adds a couple of VM_BUG_ONs for consistency.

[akpm@linux-foundation.org: Make things static]
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 58 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a2d29b84501..3e873f0101f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -40,6 +40,28 @@ static int hugetlb_next_nid;
  */
 static DEFINE_SPINLOCK(hugetlb_lock);
 
+/*
+ * Convert the address within this vma to the page offset within
+ * the mapping, in base page units.
+ */
+static pgoff_t vma_page_offset(struct vm_area_struct *vma,
+				unsigned long address)
+{
+	return ((address - vma->vm_start) >> PAGE_SHIFT) +
+					(vma->vm_pgoff >> PAGE_SHIFT);
+}
+
+/*
+ * Convert the address within this vma to the page offset within
+ * the mapping, in pagecache page units; huge pages here.
+ */
+static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma,
+					unsigned long address)
+{
+	return ((address - vma->vm_start) >> HPAGE_SHIFT) +
+			(vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+}
+
 #define HPAGE_RESV_OWNER    (1UL << (BITS_PER_LONG - 1))
 #define HPAGE_RESV_UNMAPPED (1UL << (BITS_PER_LONG - 2))
 #define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)
@@ -53,36 +75,48 @@ static DEFINE_SPINLOCK(hugetlb_lock);
  * to reset the VMA at fork() time as it is not in use yet and there is no
  * chance of the global counters getting corrupted as a result of the values.
  */
+static unsigned long get_vma_private_data(struct vm_area_struct *vma)
+{
+	return (unsigned long)vma->vm_private_data;
+}
+
+static void set_vma_private_data(struct vm_area_struct *vma,
+							unsigned long value)
+{
+	vma->vm_private_data = (void *)value;
+}
+
 static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	if (!(vma->vm_flags & VM_SHARED))
-		return (unsigned long)vma->vm_private_data & ~HPAGE_RESV_MASK;
+		return get_vma_private_data(vma) & ~HPAGE_RESV_MASK;
 	return 0;
 }
 
 static void set_vma_resv_huge_pages(struct vm_area_struct *vma,
 							unsigned long reserve)
 {
-	unsigned long flags;
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	VM_BUG_ON(vma->vm_flags & VM_SHARED);
 
-	flags = (unsigned long)vma->vm_private_data & HPAGE_RESV_MASK;
-	vma->vm_private_data = (void *)(reserve | flags);
+	set_vma_private_data(vma,
+		(get_vma_private_data(vma) & HPAGE_RESV_MASK) | reserve);
 }
 
 static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
 {
-	unsigned long reserveflags = (unsigned long)vma->vm_private_data;
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	vma->vm_private_data = (void *)(reserveflags | flags);
+	VM_BUG_ON(vma->vm_flags & VM_SHARED);
+
+	set_vma_private_data(vma, get_vma_private_data(vma) | flags);
 }
 
 static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	return ((unsigned long)vma->vm_private_data & flag) != 0;
+
+	return (get_vma_private_data(vma) & flag) != 0;
 }
 
 /* Decrement the reserved pages in the hugepage pool by one */
@@ -1151,11 +1185,10 @@ static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
 			unsigned long address)
 {
 	struct address_space *mapping;
-	unsigned long idx;
+	pgoff_t idx;
 
 	mapping = vma->vm_file->f_mapping;
-	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
-		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+	idx = vma_pagecache_offset(vma, address);
 
 	return find_lock_page(mapping, idx);
 }
@@ -1164,7 +1197,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, int write_access)
 {
 	int ret = VM_FAULT_SIGBUS;
-	unsigned long idx;
+	pgoff_t idx;
 	unsigned long size;
 	struct page *page;
 	struct address_space *mapping;
@@ -1183,8 +1216,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	mapping = vma->vm_file->f_mapping;
-	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
-		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+	idx = vma_pagecache_offset(vma, address);
 
 	/*
 	 * Use page lock to guard against racing truncation
-- 
GitLab


From cdfd4325c0d878679bd6a3ba8285b71d9980e3c0 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:28 -0700
Subject: [PATCH 152/853] mm: record MAP_NORESERVE status on vmas and fix small
 page mprotect reservations

With Mel's hugetlb private reservation support patches applied, strict
overcommit semantics are applied to both shared and private huge page
mappings.  This can be a problem if an application relied on unlimited
overcommit semantics for private mappings.  An example of this would be an
application which maps a huge area with the intention of using it very
sparsely.  These application would benefit from being able to opt-out of
the strict overcommit.  It should be noted that prior to hugetlb
supporting demand faulting all mappings were fully populated and so
applications of this type should be rare.

This patch stack implements the MAP_NORESERVE mmap() flag for huge page
mappings.  This flag has the same meaning as for small page mappings,
suppressing reservations for that mapping.

Thanks to Mel Gorman for reviewing a number of early versions of these
patches.

This patch:

When a small page mapping is created with mmap() reservations are created
by default for any memory pages required.  When the region is read/write
the reservation is increased for every page, no reservation is needed for
read-only regions (as they implicitly share the zero page).  Reservations
are tracked via the VM_ACCOUNT vma flag which is present when the region
has reservation backing it.  When we convert a region from read-only to
read-write new reservations are aquired and VM_ACCOUNT is set.  However,
when a read-only map is created with MAP_NORESERVE it is indistinguishable
from a normal mapping.  When we then convert that to read/write we are
forced to incorrectly create reservations for it as we have no record of
the original MAP_NORESERVE.

This patch introduces a new vma flag VM_NORESERVE which records the
presence of the original MAP_NORESERVE flag.  This allows us to
distinguish these two circumstances and correctly account the reserve.

As well as fixing this FIXME in the code, this makes it much easier to
introduce MAP_NORESERVE support for huge pages as this flag is available
consistantly for the life of the mapping.  VM_ACCOUNT on the other hand is
heavily used at the generic level in association with small pages.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 mm/mmap.c          | 3 +++
 mm/mprotect.c      | 6 ++----
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 196924b657b..df322fb4df3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -100,6 +100,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
 #define VM_RESERVED	0x00080000	/* Count as reserved_vm like IO */
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
+#define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
diff --git a/mm/mmap.c b/mm/mmap.c
index 75e0d0673d7..57d3b6097de 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1110,6 +1110,9 @@ munmap_back:
 	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
 		return -ENOMEM;
 
+	if (flags & MAP_NORESERVE)
+		vm_flags |= VM_NORESERVE;
+
 	if (accountable && (!(flags & MAP_NORESERVE) ||
 			    sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
 		if (vm_flags & VM_SHARED) {
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 360d9cc8b38..abd645a3b0a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -153,12 +153,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
 	 * make it unwritable again.
-	 *
-	 * FIXME? We haven't defined a VM_NORESERVE flag, so mprotecting
-	 * a MAP_NORESERVE private mapping to writable will now reserve.
 	 */
 	if (newflags & VM_WRITE) {
-		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
+		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|
+						VM_SHARED|VM_NORESERVE))) {
 			charged = nrpages;
 			if (security_vm_enough_memory(charged))
 				return -ENOMEM;
-- 
GitLab


From 9682290484370ce68ba23cd2ec2838e301934199 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:29 -0700
Subject: [PATCH 153/853] hugetlb: move reservation region support earlier

The following patch will require use of the reservation regions support.
Move this earlier in the file.  No changes have been made to this code.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 246 ++++++++++++++++++++++++++-------------------------
 1 file changed, 125 insertions(+), 121 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3e873f0101f..05bc9af4fca 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -40,6 +40,131 @@ static int hugetlb_next_nid;
  */
 static DEFINE_SPINLOCK(hugetlb_lock);
 
+/*
+ * Region tracking -- allows tracking of reservations and instantiated pages
+ *                    across the pages in a mapping.
+ */
+struct file_region {
+	struct list_head link;
+	long from;
+	long to;
+};
+
+static long region_add(struct list_head *head, long f, long t)
+{
+	struct file_region *rg, *nrg, *trg;
+
+	/* Locate the region we are either in or before. */
+	list_for_each_entry(rg, head, link)
+		if (f <= rg->to)
+			break;
+
+	/* Round our left edge to the current segment if it encloses us. */
+	if (f > rg->from)
+		f = rg->from;
+
+	/* Check for and consume any regions we now overlap with. */
+	nrg = rg;
+	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
+		if (&rg->link == head)
+			break;
+		if (rg->from > t)
+			break;
+
+		/* If this area reaches higher then extend our area to
+		 * include it completely.  If this is not the first area
+		 * which we intend to reuse, free it. */
+		if (rg->to > t)
+			t = rg->to;
+		if (rg != nrg) {
+			list_del(&rg->link);
+			kfree(rg);
+		}
+	}
+	nrg->from = f;
+	nrg->to = t;
+	return 0;
+}
+
+static long region_chg(struct list_head *head, long f, long t)
+{
+	struct file_region *rg, *nrg;
+	long chg = 0;
+
+	/* Locate the region we are before or in. */
+	list_for_each_entry(rg, head, link)
+		if (f <= rg->to)
+			break;
+
+	/* If we are below the current region then a new region is required.
+	 * Subtle, allocate a new region at the position but make it zero
+	 * size such that we can guarantee to record the reservation. */
+	if (&rg->link == head || t < rg->from) {
+		nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
+		if (!nrg)
+			return -ENOMEM;
+		nrg->from = f;
+		nrg->to   = f;
+		INIT_LIST_HEAD(&nrg->link);
+		list_add(&nrg->link, rg->link.prev);
+
+		return t - f;
+	}
+
+	/* Round our left edge to the current segment if it encloses us. */
+	if (f > rg->from)
+		f = rg->from;
+	chg = t - f;
+
+	/* Check for and consume any regions we now overlap with. */
+	list_for_each_entry(rg, rg->link.prev, link) {
+		if (&rg->link == head)
+			break;
+		if (rg->from > t)
+			return chg;
+
+		/* We overlap with this area, if it extends futher than
+		 * us then we must extend ourselves.  Account for its
+		 * existing reservation. */
+		if (rg->to > t) {
+			chg += rg->to - t;
+			t = rg->to;
+		}
+		chg -= rg->to - rg->from;
+	}
+	return chg;
+}
+
+static long region_truncate(struct list_head *head, long end)
+{
+	struct file_region *rg, *trg;
+	long chg = 0;
+
+	/* Locate the region we are either in or before. */
+	list_for_each_entry(rg, head, link)
+		if (end <= rg->to)
+			break;
+	if (&rg->link == head)
+		return 0;
+
+	/* If we are in the middle of a region then adjust it. */
+	if (end > rg->from) {
+		chg = rg->to - end;
+		rg->to = end;
+		rg = list_entry(rg->link.next, typeof(*rg), link);
+	}
+
+	/* Drop any remaining regions. */
+	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
+		if (&rg->link == head)
+			break;
+		chg += rg->to - rg->from;
+		list_del(&rg->link);
+		kfree(rg);
+	}
+	return chg;
+}
+
 /*
  * Convert the address within this vma to the page offset within
  * the mapping, in base page units.
@@ -1429,127 +1554,6 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	flush_tlb_range(vma, start, end);
 }
 
-struct file_region {
-	struct list_head link;
-	long from;
-	long to;
-};
-
-static long region_add(struct list_head *head, long f, long t)
-{
-	struct file_region *rg, *nrg, *trg;
-
-	/* Locate the region we are either in or before. */
-	list_for_each_entry(rg, head, link)
-		if (f <= rg->to)
-			break;
-
-	/* Round our left edge to the current segment if it encloses us. */
-	if (f > rg->from)
-		f = rg->from;
-
-	/* Check for and consume any regions we now overlap with. */
-	nrg = rg;
-	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
-		if (&rg->link == head)
-			break;
-		if (rg->from > t)
-			break;
-
-		/* If this area reaches higher then extend our area to
-		 * include it completely.  If this is not the first area
-		 * which we intend to reuse, free it. */
-		if (rg->to > t)
-			t = rg->to;
-		if (rg != nrg) {
-			list_del(&rg->link);
-			kfree(rg);
-		}
-	}
-	nrg->from = f;
-	nrg->to = t;
-	return 0;
-}
-
-static long region_chg(struct list_head *head, long f, long t)
-{
-	struct file_region *rg, *nrg;
-	long chg = 0;
-
-	/* Locate the region we are before or in. */
-	list_for_each_entry(rg, head, link)
-		if (f <= rg->to)
-			break;
-
-	/* If we are below the current region then a new region is required.
-	 * Subtle, allocate a new region at the position but make it zero
-	 * size such that we can guarantee to record the reservation. */
-	if (&rg->link == head || t < rg->from) {
-		nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
-		if (!nrg)
-			return -ENOMEM;
-		nrg->from = f;
-		nrg->to   = f;
-		INIT_LIST_HEAD(&nrg->link);
-		list_add(&nrg->link, rg->link.prev);
-
-		return t - f;
-	}
-
-	/* Round our left edge to the current segment if it encloses us. */
-	if (f > rg->from)
-		f = rg->from;
-	chg = t - f;
-
-	/* Check for and consume any regions we now overlap with. */
-	list_for_each_entry(rg, rg->link.prev, link) {
-		if (&rg->link == head)
-			break;
-		if (rg->from > t)
-			return chg;
-
-		/* We overlap with this area, if it extends futher than
-		 * us then we must extend ourselves.  Account for its
-		 * existing reservation. */
-		if (rg->to > t) {
-			chg += rg->to - t;
-			t = rg->to;
-		}
-		chg -= rg->to - rg->from;
-	}
-	return chg;
-}
-
-static long region_truncate(struct list_head *head, long end)
-{
-	struct file_region *rg, *trg;
-	long chg = 0;
-
-	/* Locate the region we are either in or before. */
-	list_for_each_entry(rg, head, link)
-		if (end <= rg->to)
-			break;
-	if (&rg->link == head)
-		return 0;
-
-	/* If we are in the middle of a region then adjust it. */
-	if (end > rg->from) {
-		chg = rg->to - end;
-		rg->to = end;
-		rg = list_entry(rg->link.next, typeof(*rg), link);
-	}
-
-	/* Drop any remaining regions. */
-	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
-		if (&rg->link == head)
-			break;
-		chg += rg->to - rg->from;
-		list_del(&rg->link);
-		kfree(rg);
-	}
-	return chg;
-}
-
 int hugetlb_reserve_pages(struct inode *inode,
 					long from, long to,
 					struct vm_area_struct *vma)
-- 
GitLab


From c37f9fb11c976ffc08200d631dada6dcbfd07ea4 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:30 -0700
Subject: [PATCH 154/853] hugetlb: allow huge page mappings to be created
 without reservations

By default all shared mappings and most private mappings now have
reservations associated with them.  This improves semantics by providing
allocation guarentees to the mapper.  However a small number of
applications may attempt to make very large sparse mappings, with these
strict reservations the system will never be able to honour the mapping.

This patch set brings MAP_NORESERVE support to hugetlb files.  This allows
new mappings to be made to hugetlbfs files without an associated
reservation, for both shared and private mappings.  This allows
applications which want to create very sparse mappings to opt-out of the
reservation system.  Obviously as there is no reservation they are liable
to fault at runtime if the huge page pool becomes exhausted; buyer beware.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 05bc9af4fca..72acbb29d2c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -247,6 +247,9 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 /* Decrement the reserved pages in the hugepage pool by one */
 static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
 {
+	if (vma->vm_flags & VM_NORESERVE)
+		return;
+
 	if (vma->vm_flags & VM_SHARED) {
 		/* Shared mappings always use reserves */
 		resv_huge_pages--;
@@ -720,25 +723,65 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 	}
 }
 
+/*
+ * Determine if the huge page at addr within the vma has an associated
+ * reservation.  Where it does not we will need to logically increase
+ * reservation and actually increase quota before an allocation can occur.
+ * Where any new reservation would be required the reservation change is
+ * prepared, but not committed.  Once the page has been quota'd allocated
+ * an instantiated the change should be committed via vma_commit_reservation.
+ * No action is required on failure.
+ */
+static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+
+	if (vma->vm_flags & VM_SHARED) {
+		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		return region_chg(&inode->i_mapping->private_list,
+							idx, idx + 1);
+
+	} else {
+		if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+			return 1;
+	}
+
+	return 0;
+}
+static void vma_commit_reservation(struct vm_area_struct *vma,
+							unsigned long addr)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+
+	if (vma->vm_flags & VM_SHARED) {
+		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		region_add(&inode->i_mapping->private_list, idx, idx + 1);
+	}
+}
+
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
 				    unsigned long addr, int avoid_reserve)
 {
 	struct page *page;
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
-	unsigned int chg = 0;
+	unsigned int chg;
 
 	/*
 	 * Processes that did not create the mapping will have no reserves and
 	 * will not have accounted against quota. Check that the quota can be
 	 * made before satisfying the allocation
+	 * MAP_NORESERVE mappings may also need pages and quota allocated
+	 * if no reserve mapping overlaps.
 	 */
-	if (!(vma->vm_flags & VM_SHARED) &&
-			!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
-		chg = 1;
+	chg = vma_needs_reservation(vma, addr);
+	if (chg < 0)
+		return ERR_PTR(chg);
+	if (chg)
 		if (hugetlb_get_quota(inode->i_mapping, chg))
 			return ERR_PTR(-ENOSPC);
-	}
 
 	spin_lock(&hugetlb_lock);
 	page = dequeue_huge_page_vma(vma, addr, avoid_reserve);
@@ -755,6 +798,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	set_page_refcounted(page);
 	set_page_private(page, (unsigned long) mapping);
 
+	vma_commit_reservation(vma, addr);
+
 	return page;
 }
 
@@ -1560,6 +1605,9 @@ int hugetlb_reserve_pages(struct inode *inode,
 {
 	long ret, chg;
 
+	if (vma && vma->vm_flags & VM_NORESERVE)
+		return 0;
+
 	/*
 	 * Shared mappings base their reservation on the number of pages that
 	 * are already allocated on behalf of the file. Private mappings need
-- 
GitLab


From 84afd99b8398c9d73af8238aa3cd835858e3097a Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:32 -0700
Subject: [PATCH 155/853] hugetlb reservations: fix hugetlb MAP_PRIVATE
 reservations across vma splits

When a hugetlb mapping with a reservation is split, a new VMA is cloned
from the original.  This new VMA is a direct copy of the original
including the reservation count.  When this pair of VMAs are unmapped we
will incorrect double account the unused reservation and the overall
reservation count will be incorrect, in extreme cases it will wrap.

The problem occurs when we split an existing VMA say to unmap a page in
the middle.  split_vma() will create a new VMA copying all fields from the
original.  As we are storing our reservation count in vm_private_data this
is also copies, endowing the new VMA with a duplicate of the original
VMA's reservation.  Neither of the new VMAs can exhaust these reservations
as they are too small, but when we unmap and close these VMAs we will
incorrect credit the remainder twice and resv_huge_pages will become out
of sync.  This can lead to allocation failures on mappings with
reservations and even to resv_huge_pages wrapping which prevents all
subsequent hugepage allocations.

The simple fix would be to correctly apportion the remaining reservation
count when the split is made.  However the only hook we have vm_ops->open
only has the new VMA we do not know the identity of the preceeding VMA.
Also even if we did have that VMA to hand we do not know how much of the
reservation was consumed each side of the split.

This patch therefore takes a different tack.  We know that the whole of
any private mapping (which has a reservation) has a reservation over its
whole size.  Any present pages represent consumed reservation.  Therefore
if we track the instantiated pages we can calculate the remaining
reservation.

This patch reuses the existing regions code to track the regions for which
we have consumed reservation (ie.  the instantiated pages), as each page
is faulted in we record the consumption of reservation for the new page.
When we need to return unused reservations at unmap time we simply count
the consumed reservation region subtracting that from the whole of the
map.  During a VMA split the newly opened VMA will point to the same
region map, as this map is offset oriented it remains valid for both of
the split VMAs.  This map is referenced counted so that it is removed when
all VMAs which are part of the mmap are gone.

Thanks to Adam Litke and Mel Gorman for their review feedback.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 172 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 145 insertions(+), 27 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 72acbb29d2c..65616941a38 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -43,6 +43,16 @@ static DEFINE_SPINLOCK(hugetlb_lock);
 /*
  * Region tracking -- allows tracking of reservations and instantiated pages
  *                    across the pages in a mapping.
+ *
+ * The region data structures are protected by a combination of the mmap_sem
+ * and the hugetlb_instantion_mutex.  To access or modify a region the caller
+ * must either hold the mmap_sem for write, or the mmap_sem for read and
+ * the hugetlb_instantiation mutex:
+ *
+ * 	down_write(&mm->mmap_sem);
+ * or
+ * 	down_read(&mm->mmap_sem);
+ * 	mutex_lock(&hugetlb_instantiation_mutex);
  */
 struct file_region {
 	struct list_head link;
@@ -165,6 +175,30 @@ static long region_truncate(struct list_head *head, long end)
 	return chg;
 }
 
+static long region_count(struct list_head *head, long f, long t)
+{
+	struct file_region *rg;
+	long chg = 0;
+
+	/* Locate each segment we overlap with, and count that overlap. */
+	list_for_each_entry(rg, head, link) {
+		int seg_from;
+		int seg_to;
+
+		if (rg->to <= f)
+			continue;
+		if (rg->from >= t)
+			break;
+
+		seg_from = max(rg->from, f);
+		seg_to = min(rg->to, t);
+
+		chg += seg_to - seg_from;
+	}
+
+	return chg;
+}
+
 /*
  * Convert the address within this vma to the page offset within
  * the mapping, in base page units.
@@ -187,9 +221,15 @@ static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma,
 			(vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
 }
 
-#define HPAGE_RESV_OWNER    (1UL << (BITS_PER_LONG - 1))
-#define HPAGE_RESV_UNMAPPED (1UL << (BITS_PER_LONG - 2))
+/*
+ * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
+ * bits of the reservation map pointer, which are always clear due to
+ * alignment.
+ */
+#define HPAGE_RESV_OWNER    (1UL << 0)
+#define HPAGE_RESV_UNMAPPED (1UL << 1)
 #define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)
+
 /*
  * These helpers are used to track how many pages are reserved for
  * faults in a MAP_PRIVATE mapping. Only the process that called mmap()
@@ -199,6 +239,15 @@ static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma,
  * the reserve counters are updated with the hugetlb_lock held. It is safe
  * to reset the VMA at fork() time as it is not in use yet and there is no
  * chance of the global counters getting corrupted as a result of the values.
+ *
+ * The private mapping reservation is represented in a subtly different
+ * manner to a shared mapping.  A shared mapping has a region map associated
+ * with the underlying file, this region map represents the backing file
+ * pages which have ever had a reservation assigned which this persists even
+ * after the page is instantiated.  A private mapping has a region map
+ * associated with the original mmap which is attached to all VMAs which
+ * reference it, this region map represents those offsets which have consumed
+ * reservation ie. where pages have been instantiated.
  */
 static unsigned long get_vma_private_data(struct vm_area_struct *vma)
 {
@@ -211,22 +260,48 @@ static void set_vma_private_data(struct vm_area_struct *vma,
 	vma->vm_private_data = (void *)value;
 }
 
-static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma)
+struct resv_map {
+	struct kref refs;
+	struct list_head regions;
+};
+
+struct resv_map *resv_map_alloc(void)
+{
+	struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
+	if (!resv_map)
+		return NULL;
+
+	kref_init(&resv_map->refs);
+	INIT_LIST_HEAD(&resv_map->regions);
+
+	return resv_map;
+}
+
+void resv_map_release(struct kref *ref)
+{
+	struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
+
+	/* Clear out any active regions before we release the map. */
+	region_truncate(&resv_map->regions, 0);
+	kfree(resv_map);
+}
+
+static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	if (!(vma->vm_flags & VM_SHARED))
-		return get_vma_private_data(vma) & ~HPAGE_RESV_MASK;
+		return (struct resv_map *)(get_vma_private_data(vma) &
+							~HPAGE_RESV_MASK);
 	return 0;
 }
 
-static void set_vma_resv_huge_pages(struct vm_area_struct *vma,
-							unsigned long reserve)
+static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	VM_BUG_ON(vma->vm_flags & VM_SHARED);
 
-	set_vma_private_data(vma,
-		(get_vma_private_data(vma) & HPAGE_RESV_MASK) | reserve);
+	set_vma_private_data(vma, (get_vma_private_data(vma) &
+				HPAGE_RESV_MASK) | (unsigned long)map);
 }
 
 static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
@@ -253,19 +328,12 @@ static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
 	if (vma->vm_flags & VM_SHARED) {
 		/* Shared mappings always use reserves */
 		resv_huge_pages--;
-	} else {
+	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		/*
 		 * Only the process that called mmap() has reserves for
 		 * private mappings.
 		 */
-		if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
-			unsigned long flags, reserve;
-			resv_huge_pages--;
-			flags = (unsigned long)vma->vm_private_data &
-							HPAGE_RESV_MASK;
-			reserve = (unsigned long)vma->vm_private_data - 1;
-			vma->vm_private_data = (void *)(reserve | flags);
-		}
+		resv_huge_pages--;
 	}
 }
 
@@ -282,7 +350,7 @@ static int vma_has_private_reserves(struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & VM_SHARED)
 		return 0;
-	if (!vma_resv_huge_pages(vma))
+	if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		return 0;
 	return 1;
 }
@@ -742,12 +810,19 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
 		return region_chg(&inode->i_mapping->private_list,
 							idx, idx + 1);
 
-	} else {
-		if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
-			return 1;
-	}
+	} else if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+		return 1;
 
-	return 0;
+	} else  {
+		int err;
+		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		struct resv_map *reservations = vma_resv_map(vma);
+
+		err = region_chg(&reservations->regions, idx, idx + 1);
+		if (err < 0)
+			return err;
+		return 0;
+	}
 }
 static void vma_commit_reservation(struct vm_area_struct *vma,
 							unsigned long addr)
@@ -758,6 +833,13 @@ static void vma_commit_reservation(struct vm_area_struct *vma,
 	if (vma->vm_flags & VM_SHARED) {
 		pgoff_t idx = vma_pagecache_offset(vma, addr);
 		region_add(&inode->i_mapping->private_list, idx, idx + 1);
+
+	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		struct resv_map *reservations = vma_resv_map(vma);
+
+		/* Mark this page used in the map. */
+		region_add(&reservations->regions, idx, idx + 1);
 	}
 }
 
@@ -1047,11 +1129,41 @@ out:
 	return ret;
 }
 
+static void hugetlb_vm_op_open(struct vm_area_struct *vma)
+{
+	struct resv_map *reservations = vma_resv_map(vma);
+
+	/*
+	 * This new VMA should share its siblings reservation map if present.
+	 * The VMA will only ever have a valid reservation map pointer where
+	 * it is being copied for another still existing VMA.  As that VMA
+	 * has a reference to the reservation map it cannot dissappear until
+	 * after this open call completes.  It is therefore safe to take a
+	 * new reference here without additional locking.
+	 */
+	if (reservations)
+		kref_get(&reservations->refs);
+}
+
 static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 {
-	unsigned long reserve = vma_resv_huge_pages(vma);
-	if (reserve)
-		hugetlb_acct_memory(-reserve);
+	struct resv_map *reservations = vma_resv_map(vma);
+	unsigned long reserve;
+	unsigned long start;
+	unsigned long end;
+
+	if (reservations) {
+		start = vma_pagecache_offset(vma, vma->vm_start);
+		end = vma_pagecache_offset(vma, vma->vm_end);
+
+		reserve = (end - start) -
+			region_count(&reservations->regions, start, end);
+
+		kref_put(&reservations->refs, resv_map_release);
+
+		if (reserve)
+			hugetlb_acct_memory(-reserve);
+	}
 }
 
 /*
@@ -1068,6 +1180,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 struct vm_operations_struct hugetlb_vm_ops = {
 	.fault = hugetlb_vm_op_fault,
+	.open = hugetlb_vm_op_open,
 	.close = hugetlb_vm_op_close,
 };
 
@@ -1617,8 +1730,13 @@ int hugetlb_reserve_pages(struct inode *inode,
 	if (!vma || vma->vm_flags & VM_SHARED)
 		chg = region_chg(&inode->i_mapping->private_list, from, to);
 	else {
+		struct resv_map *resv_map = resv_map_alloc();
+		if (!resv_map)
+			return -ENOMEM;
+
 		chg = to - from;
-		set_vma_resv_huge_pages(vma, chg);
+
+		set_vma_resv_map(vma, resv_map);
 		set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
 	}
 
-- 
GitLab


From a858f7b2e9bb4eb665176dde5cf32eeaaf90f153 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:27:33 -0700
Subject: [PATCH 156/853] vma_page_offset() has no callees: drop it

Hugh adds: vma_pagecache_offset() has a dangerously misleading name, since
it's using hugepage units: rename it to vma_hugecache_offset().

[apw@shadowen.org: restack onto fixed MAP_PRIVATE reservations]
[akpm@linux-foundation.org: vma_split conversion]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 65616941a38..eda9642254a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -199,22 +199,11 @@ static long region_count(struct list_head *head, long f, long t)
 	return chg;
 }
 
-/*
- * Convert the address within this vma to the page offset within
- * the mapping, in base page units.
- */
-static pgoff_t vma_page_offset(struct vm_area_struct *vma,
-				unsigned long address)
-{
-	return ((address - vma->vm_start) >> PAGE_SHIFT) +
-					(vma->vm_pgoff >> PAGE_SHIFT);
-}
-
 /*
  * Convert the address within this vma to the page offset within
  * the mapping, in pagecache page units; huge pages here.
  */
-static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma,
+static pgoff_t vma_hugecache_offset(struct vm_area_struct *vma,
 					unsigned long address)
 {
 	return ((address - vma->vm_start) >> HPAGE_SHIFT) +
@@ -806,7 +795,7 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
 	struct inode *inode = mapping->host;
 
 	if (vma->vm_flags & VM_SHARED) {
-		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(vma, addr);
 		return region_chg(&inode->i_mapping->private_list,
 							idx, idx + 1);
 
@@ -815,7 +804,7 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
 
 	} else  {
 		int err;
-		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(vma, addr);
 		struct resv_map *reservations = vma_resv_map(vma);
 
 		err = region_chg(&reservations->regions, idx, idx + 1);
@@ -831,11 +820,11 @@ static void vma_commit_reservation(struct vm_area_struct *vma,
 	struct inode *inode = mapping->host;
 
 	if (vma->vm_flags & VM_SHARED) {
-		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(vma, addr);
 		region_add(&inode->i_mapping->private_list, idx, idx + 1);
 
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
-		pgoff_t idx = vma_pagecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(vma, addr);
 		struct resv_map *reservations = vma_resv_map(vma);
 
 		/* Mark this page used in the map. */
@@ -1153,8 +1142,8 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 	unsigned long end;
 
 	if (reservations) {
-		start = vma_pagecache_offset(vma, vma->vm_start);
-		end = vma_pagecache_offset(vma, vma->vm_end);
+		start = vma_hugecache_offset(vma, vma->vm_start);
+		end = vma_hugecache_offset(vma, vma->vm_end);
 
 		reserve = (end - start) -
 			region_count(&reservations->regions, start, end);
@@ -1471,7 +1460,7 @@ static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
 	pgoff_t idx;
 
 	mapping = vma->vm_file->f_mapping;
-	idx = vma_pagecache_offset(vma, address);
+	idx = vma_hugecache_offset(vma, address);
 
 	return find_lock_page(mapping, idx);
 }
@@ -1499,7 +1488,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	mapping = vma->vm_file->f_mapping;
-	idx = vma_pagecache_offset(vma, address);
+	idx = vma_hugecache_offset(vma, address);
 
 	/*
 	 * Use page lock to guard against racing truncation
-- 
GitLab


From 11fa977ecde652ab324dd79c179deb52e82a8df1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 23 Jul 2008 21:27:34 -0700
Subject: [PATCH 157/853] generic_file_aio_read() cleanups

As akpm points out, there's really no need for generic_file_aio_read to
make a special case of count 0: just loop through nr_segs doing nothing.
And as Harvey Harrison points out, there's no need to reset retval to 0
where it's already 0.

Setting count (or ocount) to 0 before calling generic_segment_checks is
unnecessary too; but reluctantly I'll leave that removal to someone with a
wider range of gcc versions to hand - 4.1.2 and 4.2.1 don't warn about it,
but perhaps others do - I forget which are the warniest versions.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Tested-by: Lawrence Greenfield <leg@google.com>
Cc: Christoph Rohland <hans-christoph.rohland@sap.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 6343f3c841b..7675b91f4f6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1197,7 +1197,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 
 		mapping = filp->f_mapping;
 		inode = mapping->host;
-		retval = 0;
 		if (!count)
 			goto out; /* skip atime */
 		size = i_size_read(inode);
@@ -1209,33 +1208,30 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			}
 			if (retval > 0)
 				*ppos = pos + retval;
-		}
-		if (likely(retval != 0)) {
-			file_accessed(filp);
-			goto out;
+			if (retval) {
+				file_accessed(filp);
+				goto out;
+			}
 		}
 	}
 
-	retval = 0;
-	if (count) {
-		for (seg = 0; seg < nr_segs; seg++) {
-			read_descriptor_t desc;
+	for (seg = 0; seg < nr_segs; seg++) {
+		read_descriptor_t desc;
 
-			desc.written = 0;
-			desc.arg.buf = iov[seg].iov_base;
-			desc.count = iov[seg].iov_len;
-			if (desc.count == 0)
-				continue;
-			desc.error = 0;
-			do_generic_file_read(filp,ppos,&desc,file_read_actor);
-			retval += desc.written;
-			if (desc.error) {
-				retval = retval ?: desc.error;
-				break;
-			}
-			if (desc.count > 0)
-				break;
+		desc.written = 0;
+		desc.arg.buf = iov[seg].iov_base;
+		desc.count = iov[seg].iov_len;
+		if (desc.count == 0)
+			continue;
+		desc.error = 0;
+		do_generic_file_read(filp, ppos, &desc, file_read_actor);
+		retval += desc.written;
+		if (desc.error) {
+			retval = retval ?: desc.error;
+			break;
 		}
+		if (desc.count > 0)
+			break;
 	}
 out:
 	return retval;
-- 
GitLab


From bcd78e49613c41b5bed96fa288e983876f286a59 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 23 Jul 2008 21:27:35 -0700
Subject: [PATCH 158/853] tmpfs: support aio

We have a request for tmpfs to support the AIO interface: easily done, no
more than replacing the old shmem_file_read by shmem_file_aio_read,
cribbed from generic_file_aio_read.  (In 2.6.25 its write side was already
changed to use generic_file_aio_write.)

Incorporate cleanups from Andrew Morton and Harvey Harrison.

Tests out fine with LTP's ltp-aiodio.sh, given hacks (not included) to
support O_DIRECT.  tmpfs cannot honestly support O_DIRECT: its
cache-avoiding-IO nature is at odds with direct IO-avoiding-cache.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Tested-by: Lawrence Greenfield <leg@google.com>
Cc: Christoph Rohland <hans-christoph.rohland@sap.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 55 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index e2a6ae1a44e..9ffbea9b79e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1690,26 +1690,38 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
 	file_accessed(filp);
 }
 
-static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
-	read_descriptor_t desc;
-
-	if ((ssize_t) count < 0)
-		return -EINVAL;
-	if (!access_ok(VERIFY_WRITE, buf, count))
-		return -EFAULT;
-	if (!count)
-		return 0;
-
-	desc.written = 0;
-	desc.count = count;
-	desc.arg.buf = buf;
-	desc.error = 0;
-
-	do_shmem_file_read(filp, ppos, &desc, file_read_actor);
-	if (desc.written)
-		return desc.written;
-	return desc.error;
+static ssize_t shmem_file_aio_read(struct kiocb *iocb,
+		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
+{
+	struct file *filp = iocb->ki_filp;
+	ssize_t retval;
+	unsigned long seg;
+	size_t count;
+	loff_t *ppos = &iocb->ki_pos;
+
+	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+	if (retval)
+		return retval;
+
+	for (seg = 0; seg < nr_segs; seg++) {
+		read_descriptor_t desc;
+
+		desc.written = 0;
+		desc.arg.buf = iov[seg].iov_base;
+		desc.count = iov[seg].iov_len;
+		if (desc.count == 0)
+			continue;
+		desc.error = 0;
+		do_shmem_file_read(filp, ppos, &desc, file_read_actor);
+		retval += desc.written;
+		if (desc.error) {
+			retval = retval ?: desc.error;
+			break;
+		}
+		if (desc.count > 0)
+			break;
+	}
+	return retval;
 }
 
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -2369,8 +2381,9 @@ static const struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
 #ifdef CONFIG_TMPFS
 	.llseek		= generic_file_llseek,
-	.read		= shmem_file_read,
+	.read		= do_sync_read,
 	.write		= do_sync_write,
+	.aio_read	= shmem_file_aio_read,
 	.aio_write	= generic_file_aio_write,
 	.fsync		= simple_sync_file,
 	.splice_read	= generic_file_splice_read,
-- 
GitLab


From cce770815869e9209171d819dfde89bcc738ab62 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Wed, 23 Jul 2008 21:27:36 -0700
Subject: [PATCH 159/853] SYNC_FILE_RANGE_WRITE may and will block. Document
 that.

[akpm@linux-foundation.org: fix comment text]
Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/sync.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/sync.c b/fs/sync.c
index 228e17b5e9e..2967562d416 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd)
  * before performing the write.
  *
  * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
- * range which are not presently under writeback.
+ * range which are not presently under writeback. Note that this may block for
+ * significant periods due to exhaustion of disk request structures.
  *
  * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
  * after performing the write.
-- 
GitLab


From a47a126ad5ea072aca3e611ed8f8dc6adad24bab Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 23 Jul 2008 21:27:38 -0700
Subject: [PATCH 160/853] vmallocinfo: add NUMA information

Christoph recently added /proc/vmallocinfo file to get information about
vmalloc allocations.

This patch adds NUMA specific information, giving number of pages
allocated on each memory node.

This should help to check that vmalloc() is able to respect NUMA policies.

Example of output on a four nodes machine (one cpu per node)

1) network hash tables are evenly spreaded on four nodes (OK) (Same
   point for inodes and dentries hash tables)

2) iptables tables (x_tables) are correctly allocated on each cpu node
   (OK).

3) sys_swapon() allocates its memory from one node only.

4) each loaded module is using memory on one node.

Sysadmins could tune their setup to change points 3) and 4) if necessary.

grep "pages="  /proc/vmallocinfo
0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204/0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204/0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
0xffffc2000031a000-0xffffc2000031d000   12288 alloc_large_system_hash+0x204/0x2c0 pages=2 vmalloc N1=1 N2=1
0xffffc2000031f000-0xffffc2000032b000   49152 cramfs_uncompress_init+0x2e/0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
0xffffc2000033e000-0xffffc20000341000   12288 sys_swapon+0x640/0xac0 pages=2 vmalloc N0=2
0xffffc20000341000-0xffffc20000344000   12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N0=2
0xffffc20000344000-0xffffc20000347000   12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N1=2
0xffffc20000347000-0xffffc2000034a000   12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N2=2
0xffffc2000034a000-0xffffc2000034d000   12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N3=2
0xffffc20004381000-0xffffc20004402000  528384 alloc_large_system_hash+0x204/0x2c0 pages=128 vmalloc N0=32 N1=32 N2=32 N3=32
0xffffc20004402000-0xffffc20004803000 4198400 alloc_large_system_hash+0x204/0x2c0 pages=1024 vmalloc vpages N0=256 N1=256 N2=256 N3=256
0xffffc20004803000-0xffffc20004904000 1052672 alloc_large_system_hash+0x204/0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
0xffffc20004904000-0xffffc20004bec000 3047424 sys_swapon+0x640/0xac0 pages=743 vmalloc vpages N0=743
0xffffffffa0000000-0xffffffffa000f000   61440 sys_init_module+0xc27/0x1d00 pages=14 vmalloc N1=14
0xffffffffa000f000-0xffffffffa0014000   20480 sys_init_module+0xc27/0x1d00 pages=4 vmalloc N0=4
0xffffffffa0014000-0xffffffffa0017000   12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N0=2
0xffffffffa0017000-0xffffffffa0022000   45056 sys_init_module+0xc27/0x1d00 pages=10 vmalloc N1=10
0xffffffffa0022000-0xffffffffa0028000   24576 sys_init_module+0xc27/0x1d00 pages=5 vmalloc N3=5
0xffffffffa0028000-0xffffffffa0050000  163840 sys_init_module+0xc27/0x1d00 pages=39 vmalloc N1=39
0xffffffffa0050000-0xffffffffa0052000    8192 sys_init_module+0xc27/0x1d00 pages=1 vmalloc N1=1
0xffffffffa0052000-0xffffffffa0056000   16384 sys_init_module+0xc27/0x1d00 pages=3 vmalloc N1=3
0xffffffffa0056000-0xffffffffa0081000  176128 sys_init_module+0xc27/0x1d00 pages=42 vmalloc N3=42
0xffffffffa0081000-0xffffffffa00ae000  184320 sys_init_module+0xc27/0x1d00 pages=44 vmalloc N3=44
0xffffffffa00ae000-0xffffffffa00b1000   12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2
0xffffffffa00b1000-0xffffffffa00b9000   32768 sys_init_module+0xc27/0x1d00 pages=7 vmalloc N0=7
0xffffffffa00b9000-0xffffffffa00c4000   45056 sys_init_module+0xc27/0x1d00 pages=10 vmalloc N3=10
0xffffffffa00c6000-0xffffffffa00e0000  106496 sys_init_module+0xc27/0x1d00 pages=25 vmalloc N2=25
0xffffffffa00e0000-0xffffffffa00f1000   69632 sys_init_module+0xc27/0x1d00 pages=16 vmalloc N2=16
0xffffffffa00f1000-0xffffffffa00f4000   12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2
0xffffffffa00f4000-0xffffffffa00f7000   12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2

[akpm@linux-foundation.org: fix comment]
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 44 ++++++++++++++++++++++++++++++
 fs/proc/proc_misc.c                | 15 ++++++++--
 mm/vmalloc.c                       | 20 ++++++++++++++
 3 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 7f268f327d7..8c6384bdfed 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -296,6 +296,7 @@ Table 1-4: Kernel info in /proc
  uptime      System uptime                                     
  version     Kernel version                                    
  video	     bttv info of video resources			(2.4)
+ vmallocinfo Show vmalloced areas
 ..............................................................................
 
 You can,  for  example,  check  which interrupts are currently in use and what
@@ -557,6 +558,49 @@ VmallocTotal: total size of vmalloc memory area
  VmallocUsed: amount of vmalloc area which is used
 VmallocChunk: largest contigious block of vmalloc area which is free
 
+..............................................................................
+
+vmallocinfo:
+
+Provides information about vmalloced/vmaped areas. One line per area,
+containing the virtual address range of the area, size in bytes,
+caller information of the creator, and optional information depending
+on the kind of area :
+
+ pages=nr    number of pages
+ phys=addr   if a physical address was specified
+ ioremap     I/O mapping (ioremap() and friends)
+ vmalloc     vmalloc() area
+ vmap        vmap()ed pages
+ user        VM_USERMAP area
+ vpages      buffer for pages pointers was vmalloced (huge area)
+ N<node>=nr  (Only on NUMA kernels)
+             Number of pages allocated on memory node <node>
+
+> cat /proc/vmallocinfo
+0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ...
+  /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
+0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ...
+  /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
+0xffffc20000302000-0xffffc20000304000    8192 acpi_tb_verify_table+0x21/0x4f...
+  phys=7fee8000 ioremap
+0xffffc20000304000-0xffffc20000307000   12288 acpi_tb_verify_table+0x21/0x4f...
+  phys=7fee7000 ioremap
+0xffffc2000031d000-0xffffc2000031f000    8192 init_vdso_vars+0x112/0x210
+0xffffc2000031f000-0xffffc2000032b000   49152 cramfs_uncompress_init+0x2e ...
+  /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
+0xffffc2000033a000-0xffffc2000033d000   12288 sys_swapon+0x640/0xac0      ...
+  pages=2 vmalloc N1=2
+0xffffc20000347000-0xffffc2000034c000   20480 xt_alloc_table_info+0xfe ...
+  /0x130 [x_tables] pages=4 vmalloc N0=4
+0xffffffffa0000000-0xffffffffa000f000   61440 sys_init_module+0xc27/0x1d00 ...
+   pages=14 vmalloc N2=14
+0xffffffffa000f000-0xffffffffa0014000   20480 sys_init_module+0xc27/0x1d00 ...
+   pages=4 vmalloc N1=4
+0xffffffffa0014000-0xffffffffa0017000   12288 sys_init_module+0xc27/0x1d00 ...
+   pages=2 vmalloc N1=2
+0xffffffffa0017000-0xffffffffa0022000   45056 sys_init_module+0xc27/0x1d00 ...
+   pages=10 vmalloc N0=10
 
 1.3 IDE devices in /proc/ide
 ----------------------------
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index b14f43d25e9..ded96986296 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -464,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
 #ifdef CONFIG_MMU
 static int vmalloc_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &vmalloc_op);
+	unsigned int *ptr = NULL;
+	int ret;
+
+	if (NUMA_BUILD)
+		ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
+	ret = seq_open(file, &vmalloc_op);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = ptr;
+	} else
+		kfree(ptr);
+	return ret;
 }
 
 static const struct file_operations proc_vmalloc_operations = {
 	.open		= vmalloc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= seq_release_private,
 };
 #endif
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6e45b0f3d12..35f29381629 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -931,6 +931,25 @@ static void s_stop(struct seq_file *m, void *p)
 	read_unlock(&vmlist_lock);
 }
 
+static void show_numa_info(struct seq_file *m, struct vm_struct *v)
+{
+	if (NUMA_BUILD) {
+		unsigned int nr, *counters = m->private;
+
+		if (!counters)
+			return;
+
+		memset(counters, 0, nr_node_ids * sizeof(unsigned int));
+
+		for (nr = 0; nr < v->nr_pages; nr++)
+			counters[page_to_nid(v->pages[nr])]++;
+
+		for_each_node_state(nr, N_HIGH_MEMORY)
+			if (counters[nr])
+				seq_printf(m, " N%u=%u", nr, counters[nr]);
+	}
+}
+
 static int s_show(struct seq_file *m, void *p)
 {
 	struct vm_struct *v = p;
@@ -967,6 +986,7 @@ static int s_show(struct seq_file *m, void *p)
 	if (v->flags & VM_VPAGES)
 		seq_printf(m, " vpages");
 
+	show_numa_info(m, v);
 	seq_putc(m, '\n');
 	return 0;
 }
-- 
GitLab


From 5e9426abe209cf134adbbd62c5e73ef534eb73e9 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 23 Jul 2008 21:27:39 -0700
Subject: [PATCH 161/853] mm: remove mm_init compilation dependency on
 CONFIG_DEBUG_MEMORY_INIT

Towards the end of putting all core mm initialization in mm_init.c, I
plan on putting the creation of a mm kobject in a function in that file.
However, the file is currently only compiled if CONFIG_DEBUG_MEMORY_INIT
is set. Remove this dependency, but put the code under an #ifdef on the
same config option. This should result in no functional changes.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/Makefile  | 3 +--
 mm/mm_init.c | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/Makefile b/mm/Makefile
index 4bbc8f094ff..06ca2381fef 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o $(mmu-y)
+			   page_isolation.o mm_init.o $(mmu-y)
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
@@ -26,7 +26,6 @@ obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_SLAB) += slab.o
-obj-$(CONFIG_DEBUG_MEMORY_INIT) += mm_init.o
 obj-$(CONFIG_SLUB) += slub.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
diff --git a/mm/mm_init.c b/mm/mm_init.c
index ce445ca097e..eaf0d3b4709 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include "internal.h"
 
+#ifdef CONFIG_DEBUG_MEMORY_INIT
 int __meminitdata mminit_loglevel;
 
 /* The zonelists are simply reported, validation is manual. */
@@ -132,3 +133,4 @@ static __init int set_mminit_loglevel(char *str)
 	return 0;
 }
 early_param("mminit_loglevel", set_mminit_loglevel);
+#endif /* CONFIG_DEBUG_MEMORY_INIT */
-- 
GitLab


From ff7ea79cf7c3a481851bd4b2185fdeb6ce4afa29 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 23 Jul 2008 21:27:39 -0700
Subject: [PATCH 162/853] mm: create /sys/kernel/mm

Add a kobject to create /sys/kernel/mm when sysfs is mounted.  The kobject
will exist regardless.  This will allow for the hugepage related sysfs
directories to exist under the mm "subsystem" directory.  Add an ABI file
appropriately.

[kosaki.motohiro@jp.fujitsu.com: fix build]
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-kernel-mm |  6 ++++++
 include/linux/kobject.h                   |  2 ++
 mm/mm_init.c                              | 16 ++++++++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm b/Documentation/ABI/testing/sysfs-kernel-mm
new file mode 100644
index 00000000000..190d523ac15
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm
@@ -0,0 +1,6 @@
+What:		/sys/kernel/mm
+Date:		July 2008
+Contact:	Nishanth Aravamudan <nacc@us.ibm.com>, VM maintainers
+Description:
+		/sys/kernel/mm/ should contain any and all VM
+		related information in /sys/kernel/.
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 60f0d418ae3..5437ac0276e 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -186,6 +186,8 @@ extern struct kobject *kset_find_obj(struct kset *, const char *);
 
 /* The global /sys/kernel/ kobject for people to chain off of */
 extern struct kobject *kernel_kobj;
+/* The global /sys/kernel/mm/ kobject for people to chain off of */
+extern struct kobject *mm_kobj;
 /* The global /sys/hypervisor/ kobject for people to chain off of */
 extern struct kobject *hypervisor_kobj;
 /* The global /sys/power/ kobject for people to chain off of */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index eaf0d3b4709..c6af41ea999 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -7,6 +7,8 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
 #include "internal.h"
 
 #ifdef CONFIG_DEBUG_MEMORY_INIT
@@ -134,3 +136,17 @@ static __init int set_mminit_loglevel(char *str)
 }
 early_param("mminit_loglevel", set_mminit_loglevel);
 #endif /* CONFIG_DEBUG_MEMORY_INIT */
+
+struct kobject *mm_kobj;
+EXPORT_SYMBOL_GPL(mm_kobj);
+
+static int __init mm_sysfs_init(void)
+{
+	mm_kobj = kobject_create_and_add("mm", kernel_kobj);
+	if (!mm_kobj)
+		return -ENOMEM;
+
+	return 0;
+}
+
+__initcall(mm_sysfs_init);
-- 
GitLab


From b7ba30c679ed1eb7ed3ed8f281f6493282042bd4 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:40 -0700
Subject: [PATCH 163/853] hugetlb: factor out prep_new_huge_page

Needed to avoid code duplication in follow up patches.

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eda9642254a..32dff4290c6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -513,6 +513,16 @@ static int adjust_pool_surplus(int delta)
 	return ret;
 }
 
+static void prep_new_huge_page(struct page *page, int nid)
+{
+	set_compound_page_dtor(page, free_huge_page);
+	spin_lock(&hugetlb_lock);
+	nr_huge_pages++;
+	nr_huge_pages_node[nid]++;
+	spin_unlock(&hugetlb_lock);
+	put_page(page); /* free it into the hugepage allocator */
+}
+
 static struct page *alloc_fresh_huge_page_node(int nid)
 {
 	struct page *page;
@@ -526,12 +536,7 @@ static struct page *alloc_fresh_huge_page_node(int nid)
 			__free_pages(page, HUGETLB_PAGE_ORDER);
 			return NULL;
 		}
-		set_compound_page_dtor(page, free_huge_page);
-		spin_lock(&hugetlb_lock);
-		nr_huge_pages++;
-		nr_huge_pages_node[nid]++;
-		spin_unlock(&hugetlb_lock);
-		put_page(page); /* free it into the hugepage allocator */
+		prep_new_huge_page(page, nid);
 	}
 
 	return page;
-- 
GitLab


From a5516438959d90b071ff0a484ce4f3f523dc3152 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:41 -0700
Subject: [PATCH 164/853] hugetlb: modular state for hugetlb page size

The goal of this patchset is to support multiple hugetlb page sizes.  This
is achieved by introducing a new struct hstate structure, which
encapsulates the important hugetlb state and constants (eg.  huge page
size, number of huge pages currently allocated, etc).

The hstate structure is then passed around the code which requires these
fields, they will do the right thing regardless of the exact hstate they
are operating on.

This patch adds the hstate structure, with a single global instance of it
(default_hstate), and does the basic work of converting hugetlb to use the
hstate.

Future patches will add more hstate structures to allow for different
hugetlbfs mounts to have different page sizes.

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/hugetlbpage.c    |   7 +-
 arch/powerpc/mm/hugetlbpage.c |   3 +-
 arch/s390/mm/hugetlbpage.c    |   3 +-
 arch/sh/mm/hugetlbpage.c      |   3 +-
 arch/sparc64/mm/hugetlbpage.c |   5 +-
 arch/x86/mm/hugetlbpage.c     |   5 +-
 fs/hugetlbfs/inode.c          |  52 +++--
 include/asm-ia64/hugetlb.h    |   3 +-
 include/asm-powerpc/hugetlb.h |   3 +-
 include/asm-s390/hugetlb.h    |   3 +-
 include/asm-sh/hugetlb.h      |   3 +-
 include/asm-sparc/hugetlb.h   |   3 +-
 include/asm-x86/hugetlb.h     |   8 +-
 include/linux/hugetlb.h       |  88 +++++++-
 ipc/shm.c                     |   3 +-
 mm/hugetlb.c                  | 368 +++++++++++++++++++---------------
 mm/memory.c                   |   2 +-
 mm/mempolicy.c                |   9 +-
 mm/mmap.c                     |   3 +-
 19 files changed, 356 insertions(+), 218 deletions(-)

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index cd49e2860ee..6170f097d25 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
 unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
 
 pte_t *
-huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
+huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
 	unsigned long taddr = htlbpage_to_page(addr);
 	pgd_t *pgd;
@@ -75,7 +75,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  * Don't actually need to do any preparation, but need to make sure
  * the address is in the right region.
  */
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
@@ -149,7 +150,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
 
 	/* Handle MAP_FIXED */
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a96cc891cf..c94dc71af98 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -128,7 +128,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return NULL;
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pg;
 	pud_t *pu;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f4b6124fdb7..9162dc84f77 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -72,7 +72,8 @@ void arch_release_hugepage(struct page *page)
 	page[1].index = 0;
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgdp;
 	pud_t *pudp;
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index ae8c321d6e2..2f9dbe0ef4a 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -22,7 +22,8 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index ebefd2a1437..1307b23f6a7 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -175,7 +175,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
@@ -195,7 +195,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 				pgoff, flags);
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 0b3d567e686..52476fde899 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 1;
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -368,7 +369,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 428eff5b73f..516c581b537 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -80,6 +80,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	struct inode *inode = file->f_path.dentry->d_inode;
 	loff_t len, vma_len;
 	int ret;
+	struct hstate *h = hstate_file(file);
 
 	/*
 	 * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +93,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
 
-	if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT))
+	if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
 		return -EINVAL;
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -104,8 +105,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 
 	if (hugetlb_reserve_pages(inode,
-				vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
-				len >> HPAGE_SHIFT, vma))
+				vma->vm_pgoff >> huge_page_order(h),
+				len >> huge_page_shift(h), vma))
 		goto out;
 
 	ret = 0;
@@ -130,20 +131,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long start_addr;
+	struct hstate *h = hstate_file(file);
 
-	if (len & ~HPAGE_MASK)
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
 
 	if (addr) {
-		addr = ALIGN(addr, HPAGE_SIZE);
+		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
 		    (!vma || addr + len <= vma->vm_start))
@@ -156,7 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		start_addr = TASK_UNMAPPED_BASE;
 
 full_search:
-	addr = ALIGN(start_addr, HPAGE_SIZE);
+	addr = ALIGN(start_addr, huge_page_size(h));
 
 	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
 		/* At this point:  (!vma || addr < vma->vm_end). */
@@ -174,7 +176,7 @@ full_search:
 
 		if (!vma || addr + len <= vma->vm_start)
 			return addr;
-		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+		addr = ALIGN(vma->vm_end, huge_page_size(h));
 	}
 }
 #endif
@@ -225,10 +227,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
 static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 			      size_t len, loff_t *ppos)
 {
+	struct hstate *h = hstate_file(filp);
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
-	unsigned long index = *ppos >> HPAGE_SHIFT;
-	unsigned long offset = *ppos & ~HPAGE_MASK;
+	unsigned long index = *ppos >> huge_page_shift(h);
+	unsigned long offset = *ppos & ~huge_page_mask(h);
 	unsigned long end_index;
 	loff_t isize;
 	ssize_t retval = 0;
@@ -243,17 +246,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 	if (!isize)
 		goto out;
 
-	end_index = (isize - 1) >> HPAGE_SHIFT;
+	end_index = (isize - 1) >> huge_page_shift(h);
 	for (;;) {
 		struct page *page;
-		int nr, ret;
+		unsigned long nr, ret;
 
 		/* nr is the maximum number of bytes to copy from this page */
-		nr = HPAGE_SIZE;
+		nr = huge_page_size(h);
 		if (index >= end_index) {
 			if (index > end_index)
 				goto out;
-			nr = ((isize - 1) & ~HPAGE_MASK) + 1;
+			nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
 			if (nr <= offset) {
 				goto out;
 			}
@@ -287,8 +290,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 		offset += ret;
 		retval += ret;
 		len -= ret;
-		index += offset >> HPAGE_SHIFT;
-		offset &= ~HPAGE_MASK;
+		index += offset >> huge_page_shift(h);
+		offset &= ~huge_page_mask(h);
 
 		if (page)
 			page_cache_release(page);
@@ -298,7 +301,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 			break;
 	}
 out:
-	*ppos = ((loff_t)index << HPAGE_SHIFT) + offset;
+	*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
 	mutex_unlock(&inode->i_mutex);
 	return retval;
 }
@@ -339,8 +342,9 @@ static void truncate_huge_page(struct page *page)
 
 static void truncate_hugepages(struct inode *inode, loff_t lstart)
 {
+	struct hstate *h = hstate_inode(inode);
 	struct address_space *mapping = &inode->i_data;
-	const pgoff_t start = lstart >> HPAGE_SHIFT;
+	const pgoff_t start = lstart >> huge_page_shift(h);
 	struct pagevec pvec;
 	pgoff_t next;
 	int i, freed = 0;
@@ -449,8 +453,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
 	pgoff_t pgoff;
 	struct address_space *mapping = inode->i_mapping;
+	struct hstate *h = hstate_inode(inode);
 
-	BUG_ON(offset & ~HPAGE_MASK);
+	BUG_ON(offset & ~huge_page_mask(h));
 	pgoff = offset >> PAGE_SHIFT;
 
 	i_size_write(inode, offset);
@@ -465,6 +470,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	struct hstate *h = hstate_inode(inode);
 	int error;
 	unsigned int ia_valid = attr->ia_valid;
 
@@ -476,7 +482,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	if (ia_valid & ATTR_SIZE) {
 		error = -EINVAL;
-		if (!(attr->ia_size & ~HPAGE_MASK))
+		if (!(attr->ia_size & ~huge_page_mask(h)))
 			error = hugetlb_vmtruncate(inode, attr->ia_size);
 		if (error)
 			goto out;
@@ -610,9 +616,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
+	struct hstate *h = hstate_inode(dentry->d_inode);
 
 	buf->f_type = HUGETLBFS_MAGIC;
-	buf->f_bsize = HPAGE_SIZE;
+	buf->f_bsize = huge_page_size(h);
 	if (sbinfo) {
 		spin_lock(&sbinfo->stat_lock);
 		/* If no limits set, just report 0 for max/free/used
@@ -942,7 +949,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 		goto out_dentry;
 
 	error = -ENOMEM;
-	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL))
+	if (hugetlb_reserve_pages(inode, 0,
+			size >> huge_page_shift(hstate_inode(inode)), NULL))
 		goto out_inode;
 
 	d_instantiate(dentry, inode);
diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h
index e9d1e5e2382..da55c63728e 100644
--- a/include/asm-ia64/hugetlb.h
+++ b/include/asm-ia64/hugetlb.h
@@ -8,7 +8,8 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 
-int prepare_hugepage_range(unsigned long addr, unsigned long len);
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len);
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index 0a37aa5ecaa..ca37c4af27b 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -21,7 +21,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-s390/hugetlb.h b/include/asm-s390/hugetlb.h
index 600a776f8f7..670a1d1745d 100644
--- a/include/asm-s390/hugetlb.h
+++ b/include/asm-s390/hugetlb.h
@@ -22,7 +22,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h
index fb30018938c..967068fb79a 100644
--- a/include/asm-sh/hugetlb.h
+++ b/include/asm-sh/hugetlb.h
@@ -14,7 +14,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h
index aeb92374ca3..177061064ee 100644
--- a/include/asm-sparc/hugetlb.h
+++ b/include/asm-sparc/hugetlb.h
@@ -22,7 +22,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 7eed6e0883b..439a9acc132 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
-	if (len & ~HPAGE_MASK)
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
+	if (addr & ~huge_page_mask(h))
 		return -EINVAL;
 	return 0;
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index abbc187193a..ad2271e11f9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -8,7 +8,6 @@
 #include <linux/mempolicy.h>
 #include <linux/shm.h>
 #include <asm/tlbflush.h>
-#include <asm/hugetlb.h>
 
 struct ctl_table;
 
@@ -45,7 +44,8 @@ extern int sysctl_hugetlb_shm_group;
 
 /* arch callbacks */
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz);
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -80,7 +80,7 @@ static inline unsigned long hugetlb_total_pages(void)
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
-#define prepare_hugepage_range(addr,len)	(-EINVAL)
+#define prepare_hugepage_range(file, addr, len)	(-EINVAL)
 #define pmd_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
@@ -134,8 +134,6 @@ struct file *hugetlb_file_setup(const char *name, size_t);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
 
-#define BLOCKS_PER_HUGEPAGE	(HPAGE_SIZE / 512)
-
 static inline int is_file_hugepages(struct file *file)
 {
 	if (file->f_op == &hugetlbfs_file_operations)
@@ -164,4 +162,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long flags);
 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
 
+#ifdef CONFIG_HUGETLB_PAGE
+
+/* Defines one hugetlb page size */
+struct hstate {
+	int hugetlb_next_nid;
+	unsigned int order;
+	unsigned long mask;
+	unsigned long max_huge_pages;
+	unsigned long nr_huge_pages;
+	unsigned long free_huge_pages;
+	unsigned long resv_huge_pages;
+	unsigned long surplus_huge_pages;
+	unsigned long nr_overcommit_huge_pages;
+	struct list_head hugepage_freelists[MAX_NUMNODES];
+	unsigned int nr_huge_pages_node[MAX_NUMNODES];
+	unsigned int free_huge_pages_node[MAX_NUMNODES];
+	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+};
+
+extern struct hstate default_hstate;
+
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+{
+	return &default_hstate;
+}
+
+static inline struct hstate *hstate_file(struct file *f)
+{
+	return &default_hstate;
+}
+
+static inline struct hstate *hstate_inode(struct inode *i)
+{
+	return &default_hstate;
+}
+
+static inline unsigned long huge_page_size(struct hstate *h)
+{
+	return (unsigned long)PAGE_SIZE << h->order;
+}
+
+static inline unsigned long huge_page_mask(struct hstate *h)
+{
+	return h->mask;
+}
+
+static inline unsigned int huge_page_order(struct hstate *h)
+{
+	return h->order;
+}
+
+static inline unsigned huge_page_shift(struct hstate *h)
+{
+	return h->order + PAGE_SHIFT;
+}
+
+static inline unsigned int pages_per_huge_page(struct hstate *h)
+{
+	return 1 << h->order;
+}
+
+static inline unsigned int blocks_per_huge_page(struct hstate *h)
+{
+	return huge_page_size(h) / 512;
+}
+
+#include <asm/hugetlb.h>
+
+#else
+struct hstate {};
+#define hstate_file(f) NULL
+#define hstate_vma(v) NULL
+#define hstate_inode(i) NULL
+#define huge_page_size(h) PAGE_SIZE
+#define huge_page_mask(h) PAGE_MASK
+#define huge_page_order(h) 0
+#define huge_page_shift(h) PAGE_SHIFT
+#define pages_per_huge_page(h) 1
+#endif
+
 #endif /* _LINUX_HUGETLB_H */
diff --git a/ipc/shm.c b/ipc/shm.c
index 790240cd067..a726aebce7d 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -577,7 +577,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 
 		if (is_file_hugepages(shp->shm_file)) {
 			struct address_space *mapping = inode->i_mapping;
-			*rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
+			struct hstate *h = hstate_file(shp->shm_file);
+			*rss += pages_per_huge_page(h) * mapping->nrpages;
 		} else {
 			struct shmem_inode_info *info = SHMEM_I(inode);
 			spin_lock(&info->lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 32dff4290c6..0d8153e25f0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,18 +22,12 @@
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
-static unsigned long surplus_huge_pages;
-static unsigned long nr_overcommit_huge_pages;
 unsigned long max_huge_pages;
 unsigned long sysctl_overcommit_huge_pages;
-static struct list_head hugepage_freelists[MAX_NUMNODES];
-static unsigned int nr_huge_pages_node[MAX_NUMNODES];
-static unsigned int free_huge_pages_node[MAX_NUMNODES];
-static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
-static int hugetlb_next_nid;
+
+struct hstate default_hstate;
 
 /*
  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
@@ -203,11 +197,11 @@ static long region_count(struct list_head *head, long f, long t)
  * Convert the address within this vma to the page offset within
  * the mapping, in pagecache page units; huge pages here.
  */
-static pgoff_t vma_hugecache_offset(struct vm_area_struct *vma,
-					unsigned long address)
+static pgoff_t vma_hugecache_offset(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
 {
-	return ((address - vma->vm_start) >> HPAGE_SHIFT) +
-			(vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+	return ((address - vma->vm_start) >> huge_page_shift(h)) +
+			(vma->vm_pgoff >> huge_page_order(h));
 }
 
 /*
@@ -309,20 +303,21 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 }
 
 /* Decrement the reserved pages in the hugepage pool by one */
-static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
+static void decrement_hugepage_resv_vma(struct hstate *h,
+			struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & VM_NORESERVE)
 		return;
 
 	if (vma->vm_flags & VM_SHARED) {
 		/* Shared mappings always use reserves */
-		resv_huge_pages--;
+		h->resv_huge_pages--;
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		/*
 		 * Only the process that called mmap() has reserves for
 		 * private mappings.
 		 */
-		resv_huge_pages--;
+		h->resv_huge_pages--;
 	}
 }
 
@@ -344,12 +339,13 @@ static int vma_has_private_reserves(struct vm_area_struct *vma)
 	return 1;
 }
 
-static void clear_huge_page(struct page *page, unsigned long addr)
+static void clear_huge_page(struct page *page,
+			unsigned long addr, unsigned long sz)
 {
 	int i;
 
 	might_sleep();
-	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) {
+	for (i = 0; i < sz/PAGE_SIZE; i++) {
 		cond_resched();
 		clear_user_highpage(page + i, addr + i * PAGE_SIZE);
 	}
@@ -359,41 +355,43 @@ static void copy_huge_page(struct page *dst, struct page *src,
 			   unsigned long addr, struct vm_area_struct *vma)
 {
 	int i;
+	struct hstate *h = hstate_vma(vma);
 
 	might_sleep();
-	for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
+	for (i = 0; i < pages_per_huge_page(h); i++) {
 		cond_resched();
 		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
 	}
 }
 
-static void enqueue_huge_page(struct page *page)
+static void enqueue_huge_page(struct hstate *h, struct page *page)
 {
 	int nid = page_to_nid(page);
-	list_add(&page->lru, &hugepage_freelists[nid]);
-	free_huge_pages++;
-	free_huge_pages_node[nid]++;
+	list_add(&page->lru, &h->hugepage_freelists[nid]);
+	h->free_huge_pages++;
+	h->free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(void)
+static struct page *dequeue_huge_page(struct hstate *h)
 {
 	int nid;
 	struct page *page = NULL;
 
 	for (nid = 0; nid < MAX_NUMNODES; ++nid) {
-		if (!list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
+		if (!list_empty(&h->hugepage_freelists[nid])) {
+			page = list_entry(h->hugepage_freelists[nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
+			h->free_huge_pages--;
+			h->free_huge_pages_node[nid]--;
 			break;
 		}
 	}
 	return page;
 }
 
-static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
+static struct page *dequeue_huge_page_vma(struct hstate *h,
+				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
 {
 	int nid;
@@ -411,26 +409,26 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 	 * not "stolen". The child may still get SIGKILLed
 	 */
 	if (!vma_has_private_reserves(vma) &&
-			free_huge_pages - resv_huge_pages == 0)
+			h->free_huge_pages - h->resv_huge_pages == 0)
 		return NULL;
 
 	/* If reserves cannot be used, ensure enough pages are in the pool */
-	if (avoid_reserve && free_huge_pages - resv_huge_pages == 0)
+	if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
 		return NULL;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
 		nid = zone_to_nid(zone);
 		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
-		    !list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
+		    !list_empty(&h->hugepage_freelists[nid])) {
+			page = list_entry(h->hugepage_freelists[nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
+			h->free_huge_pages--;
+			h->free_huge_pages_node[nid]--;
 
 			if (!avoid_reserve)
-				decrement_hugepage_resv_vma(vma);
+				decrement_hugepage_resv_vma(h, vma);
 
 			break;
 		}
@@ -439,12 +437,13 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 	return page;
 }
 
-static void update_and_free_page(struct page *page)
+static void update_and_free_page(struct hstate *h, struct page *page)
 {
 	int i;
-	nr_huge_pages--;
-	nr_huge_pages_node[page_to_nid(page)]--;
-	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
+
+	h->nr_huge_pages--;
+	h->nr_huge_pages_node[page_to_nid(page)]--;
+	for (i = 0; i < pages_per_huge_page(h); i++) {
 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
 				1 << PG_private | 1<< PG_writeback);
@@ -452,11 +451,16 @@ static void update_and_free_page(struct page *page)
 	set_compound_page_dtor(page, NULL);
 	set_page_refcounted(page);
 	arch_release_hugepage(page);
-	__free_pages(page, HUGETLB_PAGE_ORDER);
+	__free_pages(page, huge_page_order(h));
 }
 
 static void free_huge_page(struct page *page)
 {
+	/*
+	 * Can't pass hstate in here because it is called from the
+	 * compound page destructor.
+	 */
+	struct hstate *h = &default_hstate;
 	int nid = page_to_nid(page);
 	struct address_space *mapping;
 
@@ -466,12 +470,12 @@ static void free_huge_page(struct page *page)
 	INIT_LIST_HEAD(&page->lru);
 
 	spin_lock(&hugetlb_lock);
-	if (surplus_huge_pages_node[nid]) {
-		update_and_free_page(page);
-		surplus_huge_pages--;
-		surplus_huge_pages_node[nid]--;
+	if (h->surplus_huge_pages_node[nid]) {
+		update_and_free_page(h, page);
+		h->surplus_huge_pages--;
+		h->surplus_huge_pages_node[nid]--;
 	} else {
-		enqueue_huge_page(page);
+		enqueue_huge_page(h, page);
 	}
 	spin_unlock(&hugetlb_lock);
 	if (mapping)
@@ -483,7 +487,7 @@ static void free_huge_page(struct page *page)
  * balanced by operating on them in a round-robin fashion.
  * Returns 1 if an adjustment was made.
  */
-static int adjust_pool_surplus(int delta)
+static int adjust_pool_surplus(struct hstate *h, int delta)
 {
 	static int prev_nid;
 	int nid = prev_nid;
@@ -496,15 +500,15 @@ static int adjust_pool_surplus(int delta)
 			nid = first_node(node_online_map);
 
 		/* To shrink on this node, there must be a surplus page */
-		if (delta < 0 && !surplus_huge_pages_node[nid])
+		if (delta < 0 && !h->surplus_huge_pages_node[nid])
 			continue;
 		/* Surplus cannot exceed the total number of pages */
-		if (delta > 0 && surplus_huge_pages_node[nid] >=
-						nr_huge_pages_node[nid])
+		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
+						h->nr_huge_pages_node[nid])
 			continue;
 
-		surplus_huge_pages += delta;
-		surplus_huge_pages_node[nid] += delta;
+		h->surplus_huge_pages += delta;
+		h->surplus_huge_pages_node[nid] += delta;
 		ret = 1;
 		break;
 	} while (nid != prev_nid);
@@ -513,46 +517,46 @@ static int adjust_pool_surplus(int delta)
 	return ret;
 }
 
-static void prep_new_huge_page(struct page *page, int nid)
+static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
 	set_compound_page_dtor(page, free_huge_page);
 	spin_lock(&hugetlb_lock);
-	nr_huge_pages++;
-	nr_huge_pages_node[nid]++;
+	h->nr_huge_pages++;
+	h->nr_huge_pages_node[nid]++;
 	spin_unlock(&hugetlb_lock);
 	put_page(page); /* free it into the hugepage allocator */
 }
 
-static struct page *alloc_fresh_huge_page_node(int nid)
+static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
 
 	page = alloc_pages_node(nid,
 		htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
 						__GFP_REPEAT|__GFP_NOWARN,
-		HUGETLB_PAGE_ORDER);
+		huge_page_order(h));
 	if (page) {
 		if (arch_prepare_hugepage(page)) {
 			__free_pages(page, HUGETLB_PAGE_ORDER);
 			return NULL;
 		}
-		prep_new_huge_page(page, nid);
+		prep_new_huge_page(h, page, nid);
 	}
 
 	return page;
 }
 
-static int alloc_fresh_huge_page(void)
+static int alloc_fresh_huge_page(struct hstate *h)
 {
 	struct page *page;
 	int start_nid;
 	int next_nid;
 	int ret = 0;
 
-	start_nid = hugetlb_next_nid;
+	start_nid = h->hugetlb_next_nid;
 
 	do {
-		page = alloc_fresh_huge_page_node(hugetlb_next_nid);
+		page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
 		if (page)
 			ret = 1;
 		/*
@@ -566,11 +570,11 @@ static int alloc_fresh_huge_page(void)
 		 * if we just successfully allocated a hugepage so that
 		 * the next caller gets hugepages on the next node.
 		 */
-		next_nid = next_node(hugetlb_next_nid, node_online_map);
+		next_nid = next_node(h->hugetlb_next_nid, node_online_map);
 		if (next_nid == MAX_NUMNODES)
 			next_nid = first_node(node_online_map);
-		hugetlb_next_nid = next_nid;
-	} while (!page && hugetlb_next_nid != start_nid);
+		h->hugetlb_next_nid = next_nid;
+	} while (!page && h->hugetlb_next_nid != start_nid);
 
 	if (ret)
 		count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -580,8 +584,8 @@ static int alloc_fresh_huge_page(void)
 	return ret;
 }
 
-static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
-						unsigned long address)
+static struct page *alloc_buddy_huge_page(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
 {
 	struct page *page;
 	unsigned int nid;
@@ -610,18 +614,18 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
 	 * per-node value is checked there.
 	 */
 	spin_lock(&hugetlb_lock);
-	if (surplus_huge_pages >= nr_overcommit_huge_pages) {
+	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
 		spin_unlock(&hugetlb_lock);
 		return NULL;
 	} else {
-		nr_huge_pages++;
-		surplus_huge_pages++;
+		h->nr_huge_pages++;
+		h->surplus_huge_pages++;
 	}
 	spin_unlock(&hugetlb_lock);
 
 	page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
 					__GFP_REPEAT|__GFP_NOWARN,
-					HUGETLB_PAGE_ORDER);
+					huge_page_order(h));
 
 	spin_lock(&hugetlb_lock);
 	if (page) {
@@ -636,12 +640,12 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
 		/*
 		 * We incremented the global counters already
 		 */
-		nr_huge_pages_node[nid]++;
-		surplus_huge_pages_node[nid]++;
+		h->nr_huge_pages_node[nid]++;
+		h->surplus_huge_pages_node[nid]++;
 		__count_vm_event(HTLB_BUDDY_PGALLOC);
 	} else {
-		nr_huge_pages--;
-		surplus_huge_pages--;
+		h->nr_huge_pages--;
+		h->surplus_huge_pages--;
 		__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
 	}
 	spin_unlock(&hugetlb_lock);
@@ -653,16 +657,16 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
  * Increase the hugetlb pool such that it can accomodate a reservation
  * of size 'delta'.
  */
-static int gather_surplus_pages(int delta)
+static int gather_surplus_pages(struct hstate *h, int delta)
 {
 	struct list_head surplus_list;
 	struct page *page, *tmp;
 	int ret, i;
 	int needed, allocated;
 
-	needed = (resv_huge_pages + delta) - free_huge_pages;
+	needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
 	if (needed <= 0) {
-		resv_huge_pages += delta;
+		h->resv_huge_pages += delta;
 		return 0;
 	}
 
@@ -673,7 +677,7 @@ static int gather_surplus_pages(int delta)
 retry:
 	spin_unlock(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
-		page = alloc_buddy_huge_page(NULL, 0);
+		page = alloc_buddy_huge_page(h, NULL, 0);
 		if (!page) {
 			/*
 			 * We were not able to allocate enough pages to
@@ -694,7 +698,8 @@ retry:
 	 * because either resv_huge_pages or free_huge_pages may have changed.
 	 */
 	spin_lock(&hugetlb_lock);
-	needed = (resv_huge_pages + delta) - (free_huge_pages + allocated);
+	needed = (h->resv_huge_pages + delta) -
+			(h->free_huge_pages + allocated);
 	if (needed > 0)
 		goto retry;
 
@@ -707,7 +712,7 @@ retry:
 	 * before they are reserved.
 	 */
 	needed += allocated;
-	resv_huge_pages += delta;
+	h->resv_huge_pages += delta;
 	ret = 0;
 free:
 	/* Free the needed pages to the hugetlb pool */
@@ -715,7 +720,7 @@ free:
 		if ((--needed) < 0)
 			break;
 		list_del(&page->lru);
-		enqueue_huge_page(page);
+		enqueue_huge_page(h, page);
 	}
 
 	/* Free unnecessary surplus pages to the buddy allocator */
@@ -743,7 +748,8 @@ free:
  * allocated to satisfy the reservation must be explicitly freed if they were
  * never used.
  */
-static void return_unused_surplus_pages(unsigned long unused_resv_pages)
+static void return_unused_surplus_pages(struct hstate *h,
+					unsigned long unused_resv_pages)
 {
 	static int nid = -1;
 	struct page *page;
@@ -758,27 +764,27 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 	unsigned long remaining_iterations = num_online_nodes();
 
 	/* Uncommit the reservation */
-	resv_huge_pages -= unused_resv_pages;
+	h->resv_huge_pages -= unused_resv_pages;
 
-	nr_pages = min(unused_resv_pages, surplus_huge_pages);
+	nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
 
 	while (remaining_iterations-- && nr_pages) {
 		nid = next_node(nid, node_online_map);
 		if (nid == MAX_NUMNODES)
 			nid = first_node(node_online_map);
 
-		if (!surplus_huge_pages_node[nid])
+		if (!h->surplus_huge_pages_node[nid])
 			continue;
 
-		if (!list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
+		if (!list_empty(&h->hugepage_freelists[nid])) {
+			page = list_entry(h->hugepage_freelists[nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
-			update_and_free_page(page);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
-			surplus_huge_pages--;
-			surplus_huge_pages_node[nid]--;
+			update_and_free_page(h, page);
+			h->free_huge_pages--;
+			h->free_huge_pages_node[nid]--;
+			h->surplus_huge_pages--;
+			h->surplus_huge_pages_node[nid]--;
 			nr_pages--;
 			remaining_iterations = num_online_nodes();
 		}
@@ -794,13 +800,14 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
  * an instantiated the change should be committed via vma_commit_reservation.
  * No action is required on failure.
  */
-static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
+static int vma_needs_reservation(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
 
 	if (vma->vm_flags & VM_SHARED) {
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		return region_chg(&inode->i_mapping->private_list,
 							idx, idx + 1);
 
@@ -809,7 +816,7 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
 
 	} else  {
 		int err;
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		struct resv_map *reservations = vma_resv_map(vma);
 
 		err = region_chg(&reservations->regions, idx, idx + 1);
@@ -818,18 +825,18 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
 		return 0;
 	}
 }
-static void vma_commit_reservation(struct vm_area_struct *vma,
-							unsigned long addr)
+static void vma_commit_reservation(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
 
 	if (vma->vm_flags & VM_SHARED) {
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		region_add(&inode->i_mapping->private_list, idx, idx + 1);
 
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		struct resv_map *reservations = vma_resv_map(vma);
 
 		/* Mark this page used in the map. */
@@ -840,6 +847,7 @@ static void vma_commit_reservation(struct vm_area_struct *vma,
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
 				    unsigned long addr, int avoid_reserve)
 {
+	struct hstate *h = hstate_vma(vma);
 	struct page *page;
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
@@ -852,7 +860,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	 * MAP_NORESERVE mappings may also need pages and quota allocated
 	 * if no reserve mapping overlaps.
 	 */
-	chg = vma_needs_reservation(vma, addr);
+	chg = vma_needs_reservation(h, vma, addr);
 	if (chg < 0)
 		return ERR_PTR(chg);
 	if (chg)
@@ -860,11 +868,11 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 			return ERR_PTR(-ENOSPC);
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_vma(vma, addr, avoid_reserve);
+	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page) {
-		page = alloc_buddy_huge_page(vma, addr);
+		page = alloc_buddy_huge_page(h, vma, addr);
 		if (!page) {
 			hugetlb_put_quota(inode->i_mapping, chg);
 			return ERR_PTR(-VM_FAULT_OOM);
@@ -874,7 +882,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	set_page_refcounted(page);
 	set_page_private(page, (unsigned long) mapping);
 
-	vma_commit_reservation(vma, addr);
+	vma_commit_reservation(h, vma, addr);
 
 	return page;
 }
@@ -882,21 +890,28 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 static int __init hugetlb_init(void)
 {
 	unsigned long i;
+	struct hstate *h = &default_hstate;
 
 	if (HPAGE_SHIFT == 0)
 		return 0;
 
+	if (!h->order) {
+		h->order = HPAGE_SHIFT - PAGE_SHIFT;
+		h->mask = HPAGE_MASK;
+	}
+
 	for (i = 0; i < MAX_NUMNODES; ++i)
-		INIT_LIST_HEAD(&hugepage_freelists[i]);
+		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
 
-	hugetlb_next_nid = first_node(node_online_map);
+	h->hugetlb_next_nid = first_node(node_online_map);
 
 	for (i = 0; i < max_huge_pages; ++i) {
-		if (!alloc_fresh_huge_page())
+		if (!alloc_fresh_huge_page(h))
 			break;
 	}
-	max_huge_pages = free_huge_pages = nr_huge_pages = i;
-	printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
+	max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
+	printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n",
+			h->free_huge_pages);
 	return 0;
 }
 module_init(hugetlb_init);
@@ -922,34 +937,36 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
-static void try_to_free_low(unsigned long count)
+static void try_to_free_low(struct hstate *h, unsigned long count)
 {
 	int i;
 
 	for (i = 0; i < MAX_NUMNODES; ++i) {
 		struct page *page, *next;
-		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
-			if (count >= nr_huge_pages)
+		struct list_head *freel = &h->hugepage_freelists[i];
+		list_for_each_entry_safe(page, next, freel, lru) {
+			if (count >= h->nr_huge_pages)
 				return;
 			if (PageHighMem(page))
 				continue;
 			list_del(&page->lru);
 			update_and_free_page(page);
-			free_huge_pages--;
-			free_huge_pages_node[page_to_nid(page)]--;
+			h->free_huge_pages--;
+			h->free_huge_pages_node[page_to_nid(page)]--;
 		}
 	}
 }
 #else
-static inline void try_to_free_low(unsigned long count)
+static inline void try_to_free_low(struct hstate *h, unsigned long count)
 {
 }
 #endif
 
-#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages)
+#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
 static unsigned long set_max_huge_pages(unsigned long count)
 {
 	unsigned long min_count, ret;
+	struct hstate *h = &default_hstate;
 
 	/*
 	 * Increase the pool size
@@ -963,19 +980,19 @@ static unsigned long set_max_huge_pages(unsigned long count)
 	 * within all the constraints specified by the sysctls.
 	 */
 	spin_lock(&hugetlb_lock);
-	while (surplus_huge_pages && count > persistent_huge_pages) {
-		if (!adjust_pool_surplus(-1))
+	while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
+		if (!adjust_pool_surplus(h, -1))
 			break;
 	}
 
-	while (count > persistent_huge_pages) {
+	while (count > persistent_huge_pages(h)) {
 		/*
 		 * If this allocation races such that we no longer need the
 		 * page, free_huge_page will handle it by freeing the page
 		 * and reducing the surplus.
 		 */
 		spin_unlock(&hugetlb_lock);
-		ret = alloc_fresh_huge_page();
+		ret = alloc_fresh_huge_page(h);
 		spin_lock(&hugetlb_lock);
 		if (!ret)
 			goto out;
@@ -997,21 +1014,21 @@ static unsigned long set_max_huge_pages(unsigned long count)
 	 * and won't grow the pool anywhere else. Not until one of the
 	 * sysctls are changed, or the surplus pages go out of use.
 	 */
-	min_count = resv_huge_pages + nr_huge_pages - free_huge_pages;
+	min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
 	min_count = max(count, min_count);
-	try_to_free_low(min_count);
-	while (min_count < persistent_huge_pages) {
-		struct page *page = dequeue_huge_page();
+	try_to_free_low(h, min_count);
+	while (min_count < persistent_huge_pages(h)) {
+		struct page *page = dequeue_huge_page(h);
 		if (!page)
 			break;
-		update_and_free_page(page);
+		update_and_free_page(h, page);
 	}
-	while (count < persistent_huge_pages) {
-		if (!adjust_pool_surplus(1))
+	while (count < persistent_huge_pages(h)) {
+		if (!adjust_pool_surplus(h, 1))
 			break;
 	}
 out:
-	ret = persistent_huge_pages;
+	ret = persistent_huge_pages(h);
 	spin_unlock(&hugetlb_lock);
 	return ret;
 }
@@ -1041,9 +1058,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 			struct file *file, void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
+	struct hstate *h = &default_hstate;
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
 	spin_lock(&hugetlb_lock);
-	nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
+	h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
 	spin_unlock(&hugetlb_lock);
 	return 0;
 }
@@ -1052,37 +1070,40 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 
 int hugetlb_report_meminfo(char *buf)
 {
+	struct hstate *h = &default_hstate;
 	return sprintf(buf,
 			"HugePages_Total: %5lu\n"
 			"HugePages_Free:  %5lu\n"
 			"HugePages_Rsvd:  %5lu\n"
 			"HugePages_Surp:  %5lu\n"
 			"Hugepagesize:    %5lu kB\n",
-			nr_huge_pages,
-			free_huge_pages,
-			resv_huge_pages,
-			surplus_huge_pages,
-			HPAGE_SIZE/1024);
+			h->nr_huge_pages,
+			h->free_huge_pages,
+			h->resv_huge_pages,
+			h->surplus_huge_pages,
+			1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
 }
 
 int hugetlb_report_node_meminfo(int nid, char *buf)
 {
+	struct hstate *h = &default_hstate;
 	return sprintf(buf,
 		"Node %d HugePages_Total: %5u\n"
 		"Node %d HugePages_Free:  %5u\n"
 		"Node %d HugePages_Surp:  %5u\n",
-		nid, nr_huge_pages_node[nid],
-		nid, free_huge_pages_node[nid],
-		nid, surplus_huge_pages_node[nid]);
+		nid, h->nr_huge_pages_node[nid],
+		nid, h->free_huge_pages_node[nid],
+		nid, h->surplus_huge_pages_node[nid]);
 }
 
 /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
 unsigned long hugetlb_total_pages(void)
 {
-	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
+	struct hstate *h = &default_hstate;
+	return h->nr_huge_pages * pages_per_huge_page(h);
 }
 
-static int hugetlb_acct_memory(long delta)
+static int hugetlb_acct_memory(struct hstate *h, long delta)
 {
 	int ret = -ENOMEM;
 
@@ -1105,18 +1126,18 @@ static int hugetlb_acct_memory(long delta)
 	 * semantics that cpuset has.
 	 */
 	if (delta > 0) {
-		if (gather_surplus_pages(delta) < 0)
+		if (gather_surplus_pages(h, delta) < 0)
 			goto out;
 
-		if (delta > cpuset_mems_nr(free_huge_pages_node)) {
-			return_unused_surplus_pages(delta);
+		if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
+			return_unused_surplus_pages(h, delta);
 			goto out;
 		}
 	}
 
 	ret = 0;
 	if (delta < 0)
-		return_unused_surplus_pages((unsigned long) -delta);
+		return_unused_surplus_pages(h, (unsigned long) -delta);
 
 out:
 	spin_unlock(&hugetlb_lock);
@@ -1141,14 +1162,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
 
 static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 {
+	struct hstate *h = hstate_vma(vma);
 	struct resv_map *reservations = vma_resv_map(vma);
 	unsigned long reserve;
 	unsigned long start;
 	unsigned long end;
 
 	if (reservations) {
-		start = vma_hugecache_offset(vma, vma->vm_start);
-		end = vma_hugecache_offset(vma, vma->vm_end);
+		start = vma_hugecache_offset(h, vma, vma->vm_start);
+		end = vma_hugecache_offset(h, vma, vma->vm_end);
 
 		reserve = (end - start) -
 			region_count(&reservations->regions, start, end);
@@ -1156,7 +1178,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 		kref_put(&reservations->refs, resv_map_release);
 
 		if (reserve)
-			hugetlb_acct_memory(-reserve);
+			hugetlb_acct_memory(h, -reserve);
 	}
 }
 
@@ -1214,14 +1236,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 	struct page *ptepage;
 	unsigned long addr;
 	int cow;
+	struct hstate *h = hstate_vma(vma);
+	unsigned long sz = huge_page_size(h);
 
 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
 		src_pte = huge_pte_offset(src, addr);
 		if (!src_pte)
 			continue;
-		dst_pte = huge_pte_alloc(dst, addr);
+		dst_pte = huge_pte_alloc(dst, addr, sz);
 		if (!dst_pte)
 			goto nomem;
 
@@ -1257,6 +1281,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	pte_t pte;
 	struct page *page;
 	struct page *tmp;
+	struct hstate *h = hstate_vma(vma);
+	unsigned long sz = huge_page_size(h);
+
 	/*
 	 * A page gathering list, protected by per file i_mmap_lock. The
 	 * lock is used to avoid list corruption from multiple unmapping
@@ -1265,11 +1292,11 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	LIST_HEAD(page_list);
 
 	WARN_ON(!is_vm_hugetlb_page(vma));
-	BUG_ON(start & ~HPAGE_MASK);
-	BUG_ON(end & ~HPAGE_MASK);
+	BUG_ON(start & ~huge_page_mask(h));
+	BUG_ON(end & ~huge_page_mask(h));
 
 	spin_lock(&mm->page_table_lock);
-	for (address = start; address < end; address += HPAGE_SIZE) {
+	for (address = start; address < end; address += sz) {
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
@@ -1383,6 +1410,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, pte_t pte,
 			struct page *pagecache_page)
 {
+	struct hstate *h = hstate_vma(vma);
 	struct page *old_page, *new_page;
 	int avoidcopy;
 	int outside_reserve = 0;
@@ -1443,7 +1471,7 @@ retry_avoidcopy:
 	__SetPageUptodate(new_page);
 	spin_lock(&mm->page_table_lock);
 
-	ptep = huge_pte_offset(mm, address & HPAGE_MASK);
+	ptep = huge_pte_offset(mm, address & huge_page_mask(h));
 	if (likely(pte_same(huge_ptep_get(ptep), pte))) {
 		/* Break COW */
 		huge_ptep_clear_flush(vma, address, ptep);
@@ -1458,14 +1486,14 @@ retry_avoidcopy:
 }
 
 /* Return the pagecache page at a given address within a VMA */
-static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
-			unsigned long address)
+static struct page *hugetlbfs_pagecache_page(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
 {
 	struct address_space *mapping;
 	pgoff_t idx;
 
 	mapping = vma->vm_file->f_mapping;
-	idx = vma_hugecache_offset(vma, address);
+	idx = vma_hugecache_offset(h, vma, address);
 
 	return find_lock_page(mapping, idx);
 }
@@ -1473,6 +1501,7 @@ static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, int write_access)
 {
+	struct hstate *h = hstate_vma(vma);
 	int ret = VM_FAULT_SIGBUS;
 	pgoff_t idx;
 	unsigned long size;
@@ -1493,7 +1522,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	mapping = vma->vm_file->f_mapping;
-	idx = vma_hugecache_offset(vma, address);
+	idx = vma_hugecache_offset(h, vma, address);
 
 	/*
 	 * Use page lock to guard against racing truncation
@@ -1502,7 +1531,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 retry:
 	page = find_lock_page(mapping, idx);
 	if (!page) {
-		size = i_size_read(mapping->host) >> HPAGE_SHIFT;
+		size = i_size_read(mapping->host) >> huge_page_shift(h);
 		if (idx >= size)
 			goto out;
 		page = alloc_huge_page(vma, address, 0);
@@ -1510,7 +1539,7 @@ retry:
 			ret = -PTR_ERR(page);
 			goto out;
 		}
-		clear_huge_page(page, address);
+		clear_huge_page(page, address, huge_page_size(h));
 		__SetPageUptodate(page);
 
 		if (vma->vm_flags & VM_SHARED) {
@@ -1526,14 +1555,14 @@ retry:
 			}
 
 			spin_lock(&inode->i_lock);
-			inode->i_blocks += BLOCKS_PER_HUGEPAGE;
+			inode->i_blocks += blocks_per_huge_page(h);
 			spin_unlock(&inode->i_lock);
 		} else
 			lock_page(page);
 	}
 
 	spin_lock(&mm->page_table_lock);
-	size = i_size_read(mapping->host) >> HPAGE_SHIFT;
+	size = i_size_read(mapping->host) >> huge_page_shift(h);
 	if (idx >= size)
 		goto backout;
 
@@ -1569,8 +1598,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte_t entry;
 	int ret;
 	static DEFINE_MUTEX(hugetlb_instantiation_mutex);
+	struct hstate *h = hstate_vma(vma);
 
-	ptep = huge_pte_alloc(mm, address);
+	ptep = huge_pte_alloc(mm, address, huge_page_size(h));
 	if (!ptep)
 		return VM_FAULT_OOM;
 
@@ -1594,7 +1624,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (likely(pte_same(entry, huge_ptep_get(ptep))))
 		if (write_access && !pte_write(entry)) {
 			struct page *page;
-			page = hugetlbfs_pagecache_page(vma, address);
+			page = hugetlbfs_pagecache_page(h, vma, address);
 			ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
 			if (page) {
 				unlock_page(page);
@@ -1615,6 +1645,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
 	int remainder = *length;
+	struct hstate *h = hstate_vma(vma);
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
@@ -1626,7 +1657,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * each hugepage.  We have to make * sure we get the
 		 * first, for the page indexing below to work.
 		 */
-		pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
+		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
 
 		if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
 		    (write && !pte_write(huge_ptep_get(pte)))) {
@@ -1644,7 +1675,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			break;
 		}
 
-		pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
+		pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
@@ -1660,7 +1691,7 @@ same_page:
 		--remainder;
 		++i;
 		if (vaddr < vma->vm_end && remainder &&
-				pfn_offset < HPAGE_SIZE/PAGE_SIZE) {
+				pfn_offset < pages_per_huge_page(h)) {
 			/*
 			 * We use pfn_offset to avoid touching the pageframes
 			 * of this compound page.
@@ -1682,13 +1713,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	unsigned long start = address;
 	pte_t *ptep;
 	pte_t pte;
+	struct hstate *h = hstate_vma(vma);
 
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
 
 	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
 	spin_lock(&mm->page_table_lock);
-	for (; address < end; address += HPAGE_SIZE) {
+	for (; address < end; address += huge_page_size(h)) {
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
@@ -1711,6 +1743,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 					struct vm_area_struct *vma)
 {
 	long ret, chg;
+	struct hstate *h = hstate_inode(inode);
 
 	if (vma && vma->vm_flags & VM_NORESERVE)
 		return 0;
@@ -1739,7 +1772,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 
 	if (hugetlb_get_quota(inode->i_mapping, chg))
 		return -ENOSPC;
-	ret = hugetlb_acct_memory(chg);
+	ret = hugetlb_acct_memory(h, chg);
 	if (ret < 0) {
 		hugetlb_put_quota(inode->i_mapping, chg);
 		return ret;
@@ -1751,12 +1784,13 @@ int hugetlb_reserve_pages(struct inode *inode,
 
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 {
+	struct hstate *h = hstate_inode(inode);
 	long chg = region_truncate(&inode->i_mapping->private_list, offset);
 
 	spin_lock(&inode->i_lock);
-	inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed;
+	inode->i_blocks -= blocks_per_huge_page(h);
 	spin_unlock(&inode->i_lock);
 
 	hugetlb_put_quota(inode->i_mapping, (chg - freed));
-	hugetlb_acct_memory(-(chg - freed));
+	hugetlb_acct_memory(h, -(chg - freed));
 }
diff --git a/mm/memory.c b/mm/memory.c
index 72932489a08..c1c1d6d8c22 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -903,7 +903,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			if (unlikely(is_vm_hugetlb_page(vma))) {
 				unmap_hugepage_range(vma, start, end, NULL);
 				zap_work -= (end - start) /
-						(HPAGE_SIZE / PAGE_SIZE);
+					pages_per_huge_page(hstate_vma(vma));
 				start = end;
 			} else
 				start = unmap_page_range(*tlbp, vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c94e58b192c..e550bec2058 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1481,7 +1481,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
 
 	if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
 		zl = node_zonelist(interleave_nid(*mpol, vma, addr,
-						HPAGE_SHIFT), gfp_flags);
+				huge_page_shift(hstate_vma(vma))), gfp_flags);
 	} else {
 		zl = policy_zonelist(gfp_flags, *mpol);
 		if ((*mpol)->mode == MPOL_BIND)
@@ -2220,9 +2220,12 @@ static void check_huge_range(struct vm_area_struct *vma,
 {
 	unsigned long addr;
 	struct page *page;
+	struct hstate *h = hstate_vma(vma);
+	unsigned long sz = huge_page_size(h);
 
-	for (addr = start; addr < end; addr += HPAGE_SIZE) {
-		pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
+	for (addr = start; addr < end; addr += sz) {
+		pte_t *ptep = huge_pte_offset(vma->vm_mm,
+						addr & huge_page_mask(h));
 		pte_t pte;
 
 		if (!ptep)
diff --git a/mm/mmap.c b/mm/mmap.c
index 57d3b6097de..5e0cc99e9cd 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1812,7 +1812,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	struct mempolicy *pol;
 	struct vm_area_struct *new;
 
-	if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+	if (is_vm_hugetlb_page(vma) && (addr &
+					~(huge_page_mask(hstate_vma(vma)))))
 		return -EINVAL;
 
 	if (mm->map_count >= sysctl_max_map_count)
-- 
GitLab


From e5ff215941d59f8ae6bf58f6428dc5c26745a612 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:42 -0700
Subject: [PATCH 165/853] hugetlb: multiple hstates for multiple page sizes

Add basic support for more than one hstate in hugetlbfs.  This is the key
to supporting multiple hugetlbfs page sizes at once.

- Rather than a single hstate, we now have an array, with an iterator
- default_hstate continues to be the struct hstate which we use by default
- Add functions for architectures to register new hstates

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  19 +++++-
 kernel/sysctl.c         |   8 ++-
 mm/hugetlb.c            | 148 ++++++++++++++++++++++++++++++++--------
 3 files changed, 142 insertions(+), 33 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ad2271e11f9..b75bdb4deba 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -36,8 +36,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
-extern unsigned long max_huge_pages;
-extern unsigned long sysctl_overcommit_huge_pages;
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
@@ -181,7 +179,17 @@ struct hstate {
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 };
 
-extern struct hstate default_hstate;
+void __init hugetlb_add_hstate(unsigned order);
+struct hstate *size_to_hstate(unsigned long size);
+
+#ifndef HUGE_MAX_HSTATE
+#define HUGE_MAX_HSTATE 1
+#endif
+
+extern struct hstate hstates[HUGE_MAX_HSTATE];
+extern unsigned int default_hstate_idx;
+
+#define default_hstate (hstates[default_hstate_idx])
 
 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 {
@@ -230,6 +238,11 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
 
 #include <asm/hugetlb.h>
 
+static inline struct hstate *page_hstate(struct page *page)
+{
+	return size_to_hstate(PAGE_SIZE << compound_order(page));
+}
+
 #else
 struct hstate {};
 #define hstate_file(f) NULL
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1f7b3b76a16..1a8299d1fe5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -959,7 +959,7 @@ static struct ctl_table vm_table[] = {
 #ifdef CONFIG_HUGETLB_PAGE
 	 {
 		.procname	= "nr_hugepages",
-		.data		= &max_huge_pages,
+		.data		= NULL,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_sysctl_handler,
@@ -985,10 +985,12 @@ static struct ctl_table vm_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nr_overcommit_hugepages",
-		.data		= &sysctl_overcommit_huge_pages,
-		.maxlen		= sizeof(sysctl_overcommit_huge_pages),
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_overcommit_handler,
+		.extra1		= (void *)&hugetlb_zero,
+		.extra2		= (void *)&hugetlb_infinity,
 	},
 #endif
 	{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0d8153e25f0..82378d44a0c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,12 +22,19 @@
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-unsigned long max_huge_pages;
-unsigned long sysctl_overcommit_huge_pages;
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
 
-struct hstate default_hstate;
+static int max_hstate;
+unsigned int default_hstate_idx;
+struct hstate hstates[HUGE_MAX_HSTATE];
+
+/* for command line parsing */
+static struct hstate * __initdata parsed_hstate;
+static unsigned long __initdata default_hstate_max_huge_pages;
+
+#define for_each_hstate(h) \
+	for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)
 
 /*
  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
@@ -454,13 +461,24 @@ static void update_and_free_page(struct hstate *h, struct page *page)
 	__free_pages(page, huge_page_order(h));
 }
 
+struct hstate *size_to_hstate(unsigned long size)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		if (huge_page_size(h) == size)
+			return h;
+	}
+	return NULL;
+}
+
 static void free_huge_page(struct page *page)
 {
 	/*
 	 * Can't pass hstate in here because it is called from the
 	 * compound page destructor.
 	 */
-	struct hstate *h = &default_hstate;
+	struct hstate *h = page_hstate(page);
 	int nid = page_to_nid(page);
 	struct address_space *mapping;
 
@@ -887,39 +905,94 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	return page;
 }
 
-static int __init hugetlb_init(void)
+static void __init hugetlb_init_one_hstate(struct hstate *h)
 {
 	unsigned long i;
-	struct hstate *h = &default_hstate;
-
-	if (HPAGE_SHIFT == 0)
-		return 0;
-
-	if (!h->order) {
-		h->order = HPAGE_SHIFT - PAGE_SHIFT;
-		h->mask = HPAGE_MASK;
-	}
 
 	for (i = 0; i < MAX_NUMNODES; ++i)
 		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
 
 	h->hugetlb_next_nid = first_node(node_online_map);
 
-	for (i = 0; i < max_huge_pages; ++i) {
+	for (i = 0; i < h->max_huge_pages; ++i) {
 		if (!alloc_fresh_huge_page(h))
 			break;
 	}
-	max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
-	printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n",
-			h->free_huge_pages);
+	h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
+}
+
+static void __init hugetlb_init_hstates(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		hugetlb_init_one_hstate(h);
+	}
+}
+
+static void __init report_hugepages(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		printk(KERN_INFO "Total HugeTLB memory allocated, "
+				"%ld %dMB pages\n",
+				h->free_huge_pages,
+				1 << (h->order + PAGE_SHIFT - 20));
+	}
+}
+
+static int __init hugetlb_init(void)
+{
+	BUILD_BUG_ON(HPAGE_SHIFT == 0);
+
+	if (!size_to_hstate(HPAGE_SIZE)) {
+		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+	}
+	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
+
+	hugetlb_init_hstates();
+
+	report_hugepages();
+
 	return 0;
 }
 module_init(hugetlb_init);
 
+/* Should be called on processing a hugepagesz=... option */
+void __init hugetlb_add_hstate(unsigned order)
+{
+	struct hstate *h;
+	if (size_to_hstate(PAGE_SIZE << order)) {
+		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
+		return;
+	}
+	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
+	BUG_ON(order == 0);
+	h = &hstates[max_hstate++];
+	h->order = order;
+	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+	hugetlb_init_one_hstate(h);
+	parsed_hstate = h;
+}
+
 static int __init hugetlb_setup(char *s)
 {
-	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
-		max_huge_pages = 0;
+	unsigned long *mhp;
+
+	/*
+	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
+	 * so this hugepages= parameter goes to the "default hstate".
+	 */
+	if (!max_hstate)
+		mhp = &default_hstate_max_huge_pages;
+	else
+		mhp = &parsed_hstate->max_huge_pages;
+
+	if (sscanf(s, "%lu", mhp) <= 0)
+		*mhp = 0;
+
 	return 1;
 }
 __setup("hugepages=", hugetlb_setup);
@@ -950,7 +1023,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
 			if (PageHighMem(page))
 				continue;
 			list_del(&page->lru);
-			update_and_free_page(page);
+			update_and_free_page(h, page);
 			h->free_huge_pages--;
 			h->free_huge_pages_node[page_to_nid(page)]--;
 		}
@@ -963,10 +1036,9 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
 #endif
 
 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
-static unsigned long set_max_huge_pages(unsigned long count)
+static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 {
 	unsigned long min_count, ret;
-	struct hstate *h = &default_hstate;
 
 	/*
 	 * Increase the pool size
@@ -1037,8 +1109,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 			   struct file *file, void __user *buffer,
 			   size_t *length, loff_t *ppos)
 {
+	struct hstate *h = &default_hstate;
+	unsigned long tmp;
+
+	if (!write)
+		tmp = h->max_huge_pages;
+
+	table->data = &tmp;
+	table->maxlen = sizeof(unsigned long);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
-	max_huge_pages = set_max_huge_pages(max_huge_pages);
+
+	if (write)
+		h->max_huge_pages = set_max_huge_pages(h, tmp);
+
 	return 0;
 }
 
@@ -1059,10 +1142,21 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 			size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
+	unsigned long tmp;
+
+	if (!write)
+		tmp = h->nr_overcommit_huge_pages;
+
+	table->data = &tmp;
+	table->maxlen = sizeof(unsigned long);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
-	spin_lock(&hugetlb_lock);
-	h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
-	spin_unlock(&hugetlb_lock);
+
+	if (write) {
+		spin_lock(&hugetlb_lock);
+		h->nr_overcommit_huge_pages = tmp;
+		spin_unlock(&hugetlb_lock);
+	}
+
 	return 0;
 }
 
-- 
GitLab


From a137e1cc6d6e7d315fef03962a2a5a113348b13b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:43 -0700
Subject: [PATCH 166/853] hugetlbfs: per mount huge page sizes

Add the ability to configure the hugetlb hstate used on a per mount basis.

- Add a new pagesize= option to the hugetlbfs mount that allows setting
  the page size
- This option causes the mount code to find the hstate corresponding to the
  specified size, and sets up a pointer to the hstate in the mount's
  superblock.
- Change the hstate accessors to use this information rather than the
  global_hstate they were using (requires a slight change in mm/memory.c
  so we don't NULL deref in the error-unmap path -- see comments).

[np: take hstate out of hugetlbfs inode and vma->vm_private_data]

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 45 ++++++++++++++++++++++++++++++++---------
 include/linux/hugetlb.h | 14 ++++++++-----
 mm/hugetlb.c            | 16 +++------------
 mm/memory.c             | 18 +++++++++++++++--
 4 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 516c581b537..dbd01d262ca 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
 enum {
 	Opt_size, Opt_nr_inodes,
 	Opt_mode, Opt_uid, Opt_gid,
+	Opt_pagesize,
 	Opt_err,
 };
 
@@ -62,6 +63,7 @@ static match_table_t tokens = {
 	{Opt_mode,	"mode=%o"},
 	{Opt_uid,	"uid=%u"},
 	{Opt_gid,	"gid=%u"},
+	{Opt_pagesize,	"pagesize=%s"},
 	{Opt_err,	NULL},
 };
 
@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 	char *p, *rest;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
+	unsigned long long size = 0;
+	enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
 
 	if (!options)
 		return 0;
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 			break;
 
 		case Opt_size: {
- 			unsigned long long size;
 			/* memparse() will accept a K/M/G without a digit */
 			if (!isdigit(*args[0].from))
 				goto bad_val;
 			size = memparse(args[0].from, &rest);
-			if (*rest == '%') {
-				size <<= HPAGE_SHIFT;
-				size *= max_huge_pages;
-				do_div(size, 100);
-			}
-			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+			setsize = SIZE_STD;
+			if (*rest == '%')
+				setsize = SIZE_PERCENT;
 			break;
 		}
 
@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 			pconfig->nr_inodes = memparse(args[0].from, &rest);
 			break;
 
+		case Opt_pagesize: {
+			unsigned long ps;
+			ps = memparse(args[0].from, &rest);
+			pconfig->hstate = size_to_hstate(ps);
+			if (!pconfig->hstate) {
+				printk(KERN_ERR
+				"hugetlbfs: Unsupported page size %lu MB\n",
+					ps >> 20);
+				return -EINVAL;
+			}
+			break;
+		}
+
 		default:
 			printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
 				 p);
@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 			break;
 		}
 	}
+
+	/* Do size after hstate is set up */
+	if (setsize > NO_SIZE) {
+		struct hstate *h = pconfig->hstate;
+		if (setsize == SIZE_PERCENT) {
+			size <<= huge_page_shift(h);
+			size *= h->max_huge_pages;
+			do_div(size, 100);
+		}
+		pconfig->nr_blocks = (size >> huge_page_shift(h));
+	}
+
 	return 0;
 
 bad_val:
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	config.uid = current->fsuid;
 	config.gid = current->fsgid;
 	config.mode = 0755;
+	config.hstate = &default_hstate;
 	ret = hugetlbfs_parse_options(data, &config);
 	if (ret)
 		return ret;
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbinfo)
 		return -ENOMEM;
 	sb->s_fs_info = sbinfo;
+	sbinfo->hstate = config.hstate;
 	spin_lock_init(&sbinfo->stat_lock);
 	sbinfo->max_blocks = config.nr_blocks;
 	sbinfo->free_blocks = config.nr_blocks;
 	sbinfo->max_inodes = config.nr_inodes;
 	sbinfo->free_inodes = config.nr_inodes;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = HPAGE_SIZE;
-	sb->s_blocksize_bits = HPAGE_SHIFT;
+	sb->s_blocksize = huge_page_size(config.hstate);
+	sb->s_blocksize_bits = huge_page_shift(config.hstate);
 	sb->s_magic = HUGETLBFS_MAGIC;
 	sb->s_op = &hugetlbfs_ops;
 	sb->s_time_gran = 1;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b75bdb4deba..ba9263e631b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -100,6 +100,7 @@ struct hugetlbfs_config {
 	umode_t mode;
 	long	nr_blocks;
 	long	nr_inodes;
+	struct hstate *hstate;
 };
 
 struct hugetlbfs_sb_info {
@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info {
 	long	max_inodes;   /* inodes allowed */
 	long	free_inodes;  /* inodes free */
 	spinlock_t	stat_lock;
+	struct hstate *hstate;
 };
 
 
@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx;
 
 #define default_hstate (hstates[default_hstate_idx])
 
-static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+static inline struct hstate *hstate_inode(struct inode *i)
 {
-	return &default_hstate;
+	struct hugetlbfs_sb_info *hsb;
+	hsb = HUGETLBFS_SB(i->i_sb);
+	return hsb->hstate;
 }
 
 static inline struct hstate *hstate_file(struct file *f)
 {
-	return &default_hstate;
+	return hstate_inode(f->f_dentry->d_inode);
 }
 
-static inline struct hstate *hstate_inode(struct inode *i)
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 {
-	return &default_hstate;
+	return hstate_file(vma->vm_file);
 }
 
 static inline unsigned long huge_page_size(struct hstate *h)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 82378d44a0c..4cf7a90e914 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page)
 {
-	/*
-	 * It is undesirable to test vma->vm_file as it should be non-null
-	 * for valid hugetlb area. However, vm_file will be NULL in the error
-	 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
-	 * do_mmap_pgoff() nullifies vma->vm_file before calling this function
-	 * to clean up. Since no pte has actually been setup, it is safe to
-	 * do nothing in this case.
-	 */
-	if (vma->vm_file) {
-		spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
-		__unmap_hugepage_range(vma, start, end, ref_page);
-		spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
-	}
+	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	__unmap_hugepage_range(vma, start, end, ref_page);
+	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
 }
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index c1c1d6d8c22..02fc6b1047b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			}
 
 			if (unlikely(is_vm_hugetlb_page(vma))) {
-				unmap_hugepage_range(vma, start, end, NULL);
-				zap_work -= (end - start) /
+				/*
+				 * It is undesirable to test vma->vm_file as it
+				 * should be non-null for valid hugetlb area.
+				 * However, vm_file will be NULL in the error
+				 * cleanup path of do_mmap_pgoff. When
+				 * hugetlbfs ->mmap method fails,
+				 * do_mmap_pgoff() nullifies vma->vm_file
+				 * before calling this function to clean up.
+				 * Since no pte has actually been setup, it is
+				 * safe to do nothing in this case.
+				 */
+				if (vma->vm_file) {
+					unmap_hugepage_range(vma, start, end, NULL);
+					zap_work -= (end - start) /
 					pages_per_huge_page(hstate_vma(vma));
+				}
+
 				start = end;
 			} else
 				start = unmap_page_range(*tlbp, vma,
-- 
GitLab


From a3437870160cf2caaac6bdd76c7377a5a4145a8c Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 23 Jul 2008 21:27:44 -0700
Subject: [PATCH 167/853] hugetlb: new sysfs interface

Provide new hugepages user APIs that are more suited to multiple hstates
in sysfs.  There is a new directory, /sys/kernel/hugepages.  Underneath
that directory there will be a directory per-supported hugepage size,
e.g.:

/sys/kernel/hugepages/hugepages-64kB
/sys/kernel/hugepages/hugepages-16384kB
/sys/kernel/hugepages/hugepages-16777216kB

corresponding to 64k, 16m and 16g respectively.  Within each
hugepages-size directory there are a number of files, corresponding to the
tracked counters in the hstate, e.g.:

/sys/kernel/hugepages/hugepages-64/nr_hugepages
/sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages
/sys/kernel/hugepages/hugepages-64/free_hugepages
/sys/kernel/hugepages/hugepages-64/resv_hugepages
/sys/kernel/hugepages/hugepages-64/surplus_hugepages

Of these files, the first two are read-write and the latter three are
read-only.  The size of the hugepage being manipulated is trivially
deducible from the enclosing directory and is always expressed in kB (to
match meminfo).

[dave@linux.vnet.ibm.com: fix build]
[nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel]
[nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency]
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../ABI/testing/sysfs-kernel-mm-hugepages     |  15 +
 Documentation/vm/hugetlbpage.txt              |  23 ++
 include/linux/hugetlb.h                       |   2 +
 mm/hugetlb.c                                  | 288 ++++++++++++++----
 4 files changed, 262 insertions(+), 66 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-hugepages

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
new file mode 100644
index 00000000000..e21c00571cf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -0,0 +1,15 @@
+What:		/sys/kernel/mm/hugepages/
+Date:		June 2008
+Contact:	Nishanth Aravamudan <nacc@us.ibm.com>, hugetlb maintainers
+Description:
+		/sys/kernel/mm/hugepages/ contains a number of subdirectories
+		of the form hugepages-<size>kB, where <size> is the page size
+		of the hugepages supported by the kernel/CPU combination.
+
+		Under these directories are a number of files:
+			nr_hugepages
+			nr_overcommit_hugepages
+			free_hugepages
+			surplus_hugepages
+			resv_hugepages
+		See Documentation/vm/hugetlbpage.txt for details.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 3102b81bef8..8a5b5763f0f 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -95,6 +95,29 @@ this condition holds, however, no more surplus huge pages will be
 allowed on the system until one of the two sysctls are increased
 sufficiently, or the surplus huge pages go out of use and are freed.
 
+With support for multiple hugepage pools at run-time available, much of
+the hugepage userspace interface has been duplicated in sysfs. The above
+information applies to the default hugepage size (which will be
+controlled by the proc interfaces for backwards compatibility). The root
+hugepage control directory is
+
+	/sys/kernel/mm/hugepages
+
+For each hugepage size supported by the running kernel, a subdirectory
+will exist, of the form
+
+	hugepages-${size}kB
+
+Inside each of these directories, the same set of files will exist:
+
+	nr_hugepages
+	nr_overcommit_hugepages
+	free_hugepages
+	resv_hugepages
+	surplus_hugepages
+
+which function as described above for the default hugepage-sized case.
+
 If the user applications are going to request hugepages using mmap system
 call, then it is required that system administrator mount a file system of
 type hugetlbfs:
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ba9263e631b..58c0de32e7f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -164,6 +164,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 #ifdef CONFIG_HUGETLB_PAGE
 
+#define HSTATE_NAME_LEN 32
 /* Defines one hugetlb page size */
 struct hstate {
 	int hugetlb_next_nid;
@@ -179,6 +180,7 @@ struct hstate {
 	unsigned int nr_huge_pages_node[MAX_NUMNODES];
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+	char name[HSTATE_NAME_LEN];
 };
 
 void __init hugetlb_add_hstate(unsigned order);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4cf7a90e914..bb49ce5d006 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -14,6 +14,7 @@
 #include <linux/mempolicy.h>
 #include <linux/cpuset.h>
 #include <linux/mutex.h>
+#include <linux/sysfs.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -942,72 +943,6 @@ static void __init report_hugepages(void)
 	}
 }
 
-static int __init hugetlb_init(void)
-{
-	BUILD_BUG_ON(HPAGE_SHIFT == 0);
-
-	if (!size_to_hstate(HPAGE_SIZE)) {
-		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
-		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
-	}
-	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
-
-	hugetlb_init_hstates();
-
-	report_hugepages();
-
-	return 0;
-}
-module_init(hugetlb_init);
-
-/* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
-{
-	struct hstate *h;
-	if (size_to_hstate(PAGE_SIZE << order)) {
-		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
-		return;
-	}
-	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
-	BUG_ON(order == 0);
-	h = &hstates[max_hstate++];
-	h->order = order;
-	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
-	hugetlb_init_one_hstate(h);
-	parsed_hstate = h;
-}
-
-static int __init hugetlb_setup(char *s)
-{
-	unsigned long *mhp;
-
-	/*
-	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
-	 * so this hugepages= parameter goes to the "default hstate".
-	 */
-	if (!max_hstate)
-		mhp = &default_hstate_max_huge_pages;
-	else
-		mhp = &parsed_hstate->max_huge_pages;
-
-	if (sscanf(s, "%lu", mhp) <= 0)
-		*mhp = 0;
-
-	return 1;
-}
-__setup("hugepages=", hugetlb_setup);
-
-static unsigned int cpuset_mems_nr(unsigned int *array)
-{
-	int node;
-	unsigned int nr = 0;
-
-	for_each_node_mask(node, cpuset_current_mems_allowed)
-		nr += array[node];
-
-	return nr;
-}
-
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
 static void try_to_free_low(struct hstate *h, unsigned long count)
@@ -1105,6 +1040,227 @@ out:
 	return ret;
 }
 
+#define HSTATE_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+#define HSTATE_ATTR(_name) \
+	static struct kobj_attribute _name##_attr = \
+		__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static struct kobject *hugepages_kobj;
+static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
+
+static struct hstate *kobj_to_hstate(struct kobject *kobj)
+{
+	int i;
+	for (i = 0; i < HUGE_MAX_HSTATE; i++)
+		if (hstate_kobjs[i] == kobj)
+			return &hstates[i];
+	BUG();
+	return NULL;
+}
+
+static ssize_t nr_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->nr_huge_pages);
+}
+static ssize_t nr_hugepages_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int err;
+	unsigned long input;
+	struct hstate *h = kobj_to_hstate(kobj);
+
+	err = strict_strtoul(buf, 10, &input);
+	if (err)
+		return 0;
+
+	h->max_huge_pages = set_max_huge_pages(h, input);
+
+	return count;
+}
+HSTATE_ATTR(nr_hugepages);
+
+static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
+}
+static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int err;
+	unsigned long input;
+	struct hstate *h = kobj_to_hstate(kobj);
+
+	err = strict_strtoul(buf, 10, &input);
+	if (err)
+		return 0;
+
+	spin_lock(&hugetlb_lock);
+	h->nr_overcommit_huge_pages = input;
+	spin_unlock(&hugetlb_lock);
+
+	return count;
+}
+HSTATE_ATTR(nr_overcommit_hugepages);
+
+static ssize_t free_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->free_huge_pages);
+}
+HSTATE_ATTR_RO(free_hugepages);
+
+static ssize_t resv_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->resv_huge_pages);
+}
+HSTATE_ATTR_RO(resv_hugepages);
+
+static ssize_t surplus_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->surplus_huge_pages);
+}
+HSTATE_ATTR_RO(surplus_hugepages);
+
+static struct attribute *hstate_attrs[] = {
+	&nr_hugepages_attr.attr,
+	&nr_overcommit_hugepages_attr.attr,
+	&free_hugepages_attr.attr,
+	&resv_hugepages_attr.attr,
+	&surplus_hugepages_attr.attr,
+	NULL,
+};
+
+static struct attribute_group hstate_attr_group = {
+	.attrs = hstate_attrs,
+};
+
+static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
+{
+	int retval;
+
+	hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
+							hugepages_kobj);
+	if (!hstate_kobjs[h - hstates])
+		return -ENOMEM;
+
+	retval = sysfs_create_group(hstate_kobjs[h - hstates],
+							&hstate_attr_group);
+	if (retval)
+		kobject_put(hstate_kobjs[h - hstates]);
+
+	return retval;
+}
+
+static void __init hugetlb_sysfs_init(void)
+{
+	struct hstate *h;
+	int err;
+
+	hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj);
+	if (!hugepages_kobj)
+		return;
+
+	for_each_hstate(h) {
+		err = hugetlb_sysfs_add_hstate(h);
+		if (err)
+			printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
+								h->name);
+	}
+}
+
+static void __exit hugetlb_exit(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		kobject_put(hstate_kobjs[h - hstates]);
+	}
+
+	kobject_put(hugepages_kobj);
+}
+module_exit(hugetlb_exit);
+
+static int __init hugetlb_init(void)
+{
+	BUILD_BUG_ON(HPAGE_SHIFT == 0);
+
+	if (!size_to_hstate(HPAGE_SIZE)) {
+		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+	}
+	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
+
+	hugetlb_init_hstates();
+
+	report_hugepages();
+
+	hugetlb_sysfs_init();
+
+	return 0;
+}
+module_init(hugetlb_init);
+
+/* Should be called on processing a hugepagesz=... option */
+void __init hugetlb_add_hstate(unsigned order)
+{
+	struct hstate *h;
+	if (size_to_hstate(PAGE_SIZE << order)) {
+		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
+		return;
+	}
+	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
+	BUG_ON(order == 0);
+	h = &hstates[max_hstate++];
+	h->order = order;
+	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
+					huge_page_size(h)/1024);
+	hugetlb_init_one_hstate(h);
+	parsed_hstate = h;
+}
+
+static int __init hugetlb_setup(char *s)
+{
+	unsigned long *mhp;
+
+	/*
+	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
+	 * so this hugepages= parameter goes to the "default hstate".
+	 */
+	if (!max_hstate)
+		mhp = &default_hstate_max_huge_pages;
+	else
+		mhp = &parsed_hstate->max_huge_pages;
+
+	if (sscanf(s, "%lu", mhp) <= 0)
+		*mhp = 0;
+
+	return 1;
+}
+__setup("hugepages=", hugetlb_setup);
+
+static unsigned int cpuset_mems_nr(unsigned int *array)
+{
+	int node;
+	unsigned int nr = 0;
+
+	for_each_node_mask(node, cpuset_current_mems_allowed)
+		nr += array[node];
+
+	return nr;
+}
+
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 			   struct file *file, void __user *buffer,
 			   size_t *length, loff_t *ppos)
-- 
GitLab


From 5ced66c901f1cf0b684feb15c2cd8b126e263d07 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:45 -0700
Subject: [PATCH 168/853] hugetlb: abstract numa round robin selection

Need this as a separate function for a future patch.

No behaviour change.

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb49ce5d006..5e620e25cf0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -565,6 +565,27 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 	return page;
 }
 
+/*
+ * Use a helper variable to find the next node and then
+ * copy it back to hugetlb_next_nid afterwards:
+ * otherwise there's a window in which a racer might
+ * pass invalid nid MAX_NUMNODES to alloc_pages_node.
+ * But we don't need to use a spin_lock here: it really
+ * doesn't matter if occasionally a racer chooses the
+ * same nid as we do.  Move nid forward in the mask even
+ * if we just successfully allocated a hugepage so that
+ * the next caller gets hugepages on the next node.
+ */
+static int hstate_next_node(struct hstate *h)
+{
+	int next_nid;
+	next_nid = next_node(h->hugetlb_next_nid, node_online_map);
+	if (next_nid == MAX_NUMNODES)
+		next_nid = first_node(node_online_map);
+	h->hugetlb_next_nid = next_nid;
+	return next_nid;
+}
+
 static int alloc_fresh_huge_page(struct hstate *h)
 {
 	struct page *page;
@@ -578,21 +599,7 @@ static int alloc_fresh_huge_page(struct hstate *h)
 		page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
 		if (page)
 			ret = 1;
-		/*
-		 * Use a helper variable to find the next node and then
-		 * copy it back to hugetlb_next_nid afterwards:
-		 * otherwise there's a window in which a racer might
-		 * pass invalid nid MAX_NUMNODES to alloc_pages_node.
-		 * But we don't need to use a spin_lock here: it really
-		 * doesn't matter if occasionally a racer chooses the
-		 * same nid as we do.  Move nid forward in the mask even
-		 * if we just successfully allocated a hugepage so that
-		 * the next caller gets hugepages on the next node.
-		 */
-		next_nid = next_node(h->hugetlb_next_nid, node_online_map);
-		if (next_nid == MAX_NUMNODES)
-			next_nid = first_node(node_online_map);
-		h->hugetlb_next_nid = next_nid;
+		next_nid = hstate_next_node(h);
 	} while (!page && h->hugetlb_next_nid != start_nid);
 
 	if (ret)
-- 
GitLab


From b54bbf7b81170f03597c17dd0b559e3006bc9868 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:45 -0700
Subject: [PATCH 169/853] mm: introduce non panic alloc_bootmem

Straight forward variant of the existing __alloc_bootmem_node, only
subsequent patch when allocating giant hugepages at boot -- don't want to
panic if we can't allocate as many as the user asked for.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  4 ++++
 mm/bootmem.c            | 12 ++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index dd8fee6c46d..f352c5f125b 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -89,6 +89,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
 				  unsigned long goal);
+extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
 extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long freepfn,
 				       unsigned long startpfn,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 4bc6ae2fbaa..9ac972535ff 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -578,6 +578,18 @@ void * __init alloc_bootmem_section(unsigned long size,
 }
 #endif
 
+void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
+				   unsigned long align, unsigned long goal)
+{
+	void *ptr;
+
+	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+	if (ptr)
+		return ptr;
+
+	return __alloc_bootmem_nopanic(size, align, goal);
+}
+
 #ifndef ARCH_LOW_ADDRESS_LIMIT
 #define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
 #endif
-- 
GitLab


From 01ad1c0827db5b3695c53e296dbb2c1da16a0911 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:46 -0700
Subject: [PATCH 170/853] mm: export prep_compound_page to mm

hugetlb will need to get compound pages from bootmem to handle the case of
them being greater than or equal to MAX_ORDER.  Export the constructor
function needed for this.

Acked-by: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/internal.h   | 2 ++
 mm/page_alloc.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/internal.h b/mm/internal.h
index 858ad01864d..1f43f741697 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -16,6 +16,8 @@
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
+extern void prep_compound_page(struct page *page, unsigned long order);
+
 static inline void set_page_count(struct page *page, int v)
 {
 	atomic_set(&page->_count, v);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e43aae135b3..eaa86671ebb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -264,7 +264,7 @@ static void free_compound_page(struct page *page)
 	__free_pages_ok(page, compound_order(page));
 }
 
-static void prep_compound_page(struct page *page, unsigned long order)
+void prep_compound_page(struct page *page, unsigned long order)
 {
 	int i;
 	int nr_pages = 1 << order;
-- 
GitLab


From aa888a74977a8f2120ae9332376e179c39a6b07d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:47 -0700
Subject: [PATCH 171/853] hugetlb: support larger than MAX_ORDER

This is needed on x86-64 to handle GB pages in hugetlbfs, because it is
not practical to enlarge MAX_ORDER to 1GB.

Instead the 1GB pages are only allocated at boot using the bootmem
allocator using the hugepages=...  option.

These 1G bootmem pages are never freed.  In theory it would be possible to
implement that with some complications, but since it would be a one-way
street (>= MAX_ORDER pages cannot be allocated later) I decided not to
currently.

The >= MAX_ORDER code is not ifdef'ed per architecture.  It is not very
big and the ifdef uglyness seemed not be worth it.

Known problems: /proc/meminfo and "free" do not display the memory
allocated for gb pages in "Total".  This is a little confusing for the
user.

Acked-by: Andrew Hastings <abh@cray.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5e620e25cf0..1a6fe87555b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -14,6 +14,7 @@
 #include <linux/mempolicy.h>
 #include <linux/cpuset.h>
 #include <linux/mutex.h>
+#include <linux/bootmem.h>
 #include <linux/sysfs.h>
 
 #include <asm/page.h>
@@ -489,7 +490,7 @@ static void free_huge_page(struct page *page)
 	INIT_LIST_HEAD(&page->lru);
 
 	spin_lock(&hugetlb_lock);
-	if (h->surplus_huge_pages_node[nid]) {
+	if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
 		update_and_free_page(h, page);
 		h->surplus_huge_pages--;
 		h->surplus_huge_pages_node[nid]--;
@@ -550,6 +551,9 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
 
+	if (h->order >= MAX_ORDER)
+		return NULL;
+
 	page = alloc_pages_node(nid,
 		htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
 						__GFP_REPEAT|__GFP_NOWARN,
@@ -616,6 +620,9 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 	struct page *page;
 	unsigned int nid;
 
+	if (h->order >= MAX_ORDER)
+		return NULL;
+
 	/*
 	 * Assume we will successfully allocate the surplus page to
 	 * prevent racing processes from causing the surplus to exceed
@@ -792,6 +799,10 @@ static void return_unused_surplus_pages(struct hstate *h,
 	/* Uncommit the reservation */
 	h->resv_huge_pages -= unused_resv_pages;
 
+	/* Cannot return gigantic pages currently */
+	if (h->order >= MAX_ORDER)
+		return;
+
 	nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
 
 	while (remaining_iterations-- && nr_pages) {
@@ -913,6 +924,63 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	return page;
 }
 
+static __initdata LIST_HEAD(huge_boot_pages);
+
+struct huge_bootmem_page {
+	struct list_head list;
+	struct hstate *hstate;
+};
+
+static int __init alloc_bootmem_huge_page(struct hstate *h)
+{
+	struct huge_bootmem_page *m;
+	int nr_nodes = nodes_weight(node_online_map);
+
+	while (nr_nodes) {
+		void *addr;
+
+		addr = __alloc_bootmem_node_nopanic(
+				NODE_DATA(h->hugetlb_next_nid),
+				huge_page_size(h), huge_page_size(h), 0);
+
+		if (addr) {
+			/*
+			 * Use the beginning of the huge page to store the
+			 * huge_bootmem_page struct (until gather_bootmem
+			 * puts them into the mem_map).
+			 */
+			m = addr;
+			if (m)
+				goto found;
+		}
+		hstate_next_node(h);
+		nr_nodes--;
+	}
+	return 0;
+
+found:
+	BUG_ON((unsigned long)virt_to_phys(m) & (huge_page_size(h) - 1));
+	/* Put them into a private list first because mem_map is not up yet */
+	list_add(&m->list, &huge_boot_pages);
+	m->hstate = h;
+	return 1;
+}
+
+/* Put bootmem huge pages into the standard lists after mem_map is up */
+static void __init gather_bootmem_prealloc(void)
+{
+	struct huge_bootmem_page *m;
+
+	list_for_each_entry(m, &huge_boot_pages, list) {
+		struct page *page = virt_to_page(m);
+		struct hstate *h = m->hstate;
+		__ClearPageReserved(page);
+		WARN_ON(page_count(page) != 1);
+		prep_compound_page(page, h->order);
+		prep_new_huge_page(h, page, page_to_nid(page));
+	}
+}
+
 static void __init hugetlb_init_one_hstate(struct hstate *h)
 {
 	unsigned long i;
@@ -923,7 +991,10 @@ static void __init hugetlb_init_one_hstate(struct hstate *h)
 	h->hugetlb_next_nid = first_node(node_online_map);
 
 	for (i = 0; i < h->max_huge_pages; ++i) {
-		if (!alloc_fresh_huge_page(h))
+		if (h->order >= MAX_ORDER) {
+			if (!alloc_bootmem_huge_page(h))
+				break;
+		} else if (!alloc_fresh_huge_page(h))
 			break;
 	}
 	h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
@@ -956,6 +1027,9 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
 {
 	int i;
 
+	if (h->order >= MAX_ORDER)
+		return;
+
 	for (i = 0; i < MAX_NUMNODES; ++i) {
 		struct page *page, *next;
 		struct list_head *freel = &h->hugepage_freelists[i];
@@ -982,6 +1056,9 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 {
 	unsigned long min_count, ret;
 
+	if (h->order >= MAX_ORDER)
+		return h->max_huge_pages;
+
 	/*
 	 * Increase the pool size
 	 * First take pages out of surplus state.  Then make up the
@@ -1210,6 +1287,8 @@ static int __init hugetlb_init(void)
 
 	hugetlb_init_hstates();
 
+	gather_bootmem_prealloc();
+
 	report_hugepages();
 
 	hugetlb_sysfs_init();
-- 
GitLab


From 8faa8b077b2cdc4e4646842fe50b07840955a013 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:48 -0700
Subject: [PATCH 172/853] hugetlb: support boot allocate different sizes

Make some infrastructure changes to allow boot-time allocation of
different hugepage page sizes.

- move all basic hstate initialisation into hugetlb_add_hstate
- create a new function hugetlb_hstate_alloc_pages() to do the
  actual initial page allocations. Call this function early in
  order to allocate giant pages from bootmem.
- Check for multiple hugepages= parameters

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: Andrew Hastings <abh@cray.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1a6fe87555b..243a8684d18 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -981,15 +981,10 @@ static void __init gather_bootmem_prealloc(void)
 	}
 }
 
-static void __init hugetlb_init_one_hstate(struct hstate *h)
+static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
 	unsigned long i;
 
-	for (i = 0; i < MAX_NUMNODES; ++i)
-		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
-
-	h->hugetlb_next_nid = first_node(node_online_map);
-
 	for (i = 0; i < h->max_huge_pages; ++i) {
 		if (h->order >= MAX_ORDER) {
 			if (!alloc_bootmem_huge_page(h))
@@ -997,7 +992,7 @@ static void __init hugetlb_init_one_hstate(struct hstate *h)
 		} else if (!alloc_fresh_huge_page(h))
 			break;
 	}
-	h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
+	h->max_huge_pages = i;
 }
 
 static void __init hugetlb_init_hstates(void)
@@ -1005,7 +1000,9 @@ static void __init hugetlb_init_hstates(void)
 	struct hstate *h;
 
 	for_each_hstate(h) {
-		hugetlb_init_one_hstate(h);
+		/* oversize hugepages were init'ed in early boot */
+		if (h->order < MAX_ORDER)
+			hugetlb_hstate_alloc_pages(h);
 	}
 }
 
@@ -1301,6 +1298,8 @@ module_init(hugetlb_init);
 void __init hugetlb_add_hstate(unsigned order)
 {
 	struct hstate *h;
+	unsigned long i;
+
 	if (size_to_hstate(PAGE_SIZE << order)) {
 		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
 		return;
@@ -1310,15 +1309,21 @@ void __init hugetlb_add_hstate(unsigned order)
 	h = &hstates[max_hstate++];
 	h->order = order;
 	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+	h->nr_huge_pages = 0;
+	h->free_huge_pages = 0;
+	for (i = 0; i < MAX_NUMNODES; ++i)
+		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
+	h->hugetlb_next_nid = first_node(node_online_map);
 	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
 					huge_page_size(h)/1024);
-	hugetlb_init_one_hstate(h);
+
 	parsed_hstate = h;
 }
 
 static int __init hugetlb_setup(char *s)
 {
 	unsigned long *mhp;
+	static unsigned long *last_mhp;
 
 	/*
 	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
@@ -1329,9 +1334,25 @@ static int __init hugetlb_setup(char *s)
 	else
 		mhp = &parsed_hstate->max_huge_pages;
 
+	if (mhp == last_mhp) {
+		printk(KERN_WARNING "hugepages= specified twice without "
+			"interleaving hugepagesz=, ignoring\n");
+		return 1;
+	}
+
 	if (sscanf(s, "%lu", mhp) <= 0)
 		*mhp = 0;
 
+	/*
+	 * Global state is always initialized later in hugetlb_init.
+	 * But we need to allocate >= MAX_ORDER hstates here early to still
+	 * use the bootmem allocator.
+	 */
+	if (max_hstate && parsed_hstate->order >= MAX_ORDER)
+		hugetlb_hstate_alloc_pages(parsed_hstate);
+
+	last_mhp = mhp;
+
 	return 1;
 }
 __setup("hugepages=", hugetlb_setup);
-- 
GitLab


From 4abd32dbab201c3ced0b0af12accea77cd9eeffc Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:49 -0700
Subject: [PATCH 173/853] hugetlb: printk cleanup

- Reword sentence to clarify meaning with multiple options
- Add support for using GB prefixes for the page size
- Add extra printk to delayed > MAX_ORDER allocation code

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 243a8684d18..0c74c14dd2f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1006,15 +1006,27 @@ static void __init hugetlb_init_hstates(void)
 	}
 }
 
+static char * __init memfmt(char *buf, unsigned long n)
+{
+	if (n >= (1UL << 30))
+		sprintf(buf, "%lu GB", n >> 30);
+	else if (n >= (1UL << 20))
+		sprintf(buf, "%lu MB", n >> 20);
+	else
+		sprintf(buf, "%lu KB", n >> 10);
+	return buf;
+}
+
 static void __init report_hugepages(void)
 {
 	struct hstate *h;
 
 	for_each_hstate(h) {
-		printk(KERN_INFO "Total HugeTLB memory allocated, "
-				"%ld %dMB pages\n",
-				h->free_huge_pages,
-				1 << (h->order + PAGE_SHIFT - 20));
+		char buf[32];
+		printk(KERN_INFO "HugeTLB registered %s page size, "
+				 "pre-allocated %ld pages\n",
+			memfmt(buf, huge_page_size(h)),
+			h->free_huge_pages);
 	}
 }
 
-- 
GitLab


From ceb868796181dc95ea01a110e123afd391639873 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:50 -0700
Subject: [PATCH 174/853] hugetlb: introduce pud_huge

Straight forward extensions for huge pages located in the PUD instead of
PMDs.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/hugetlbpage.c    |  6 ++++++
 arch/powerpc/mm/hugetlbpage.c |  5 +++++
 arch/s390/mm/hugetlbpage.c    |  5 +++++
 arch/sh/mm/hugetlbpage.c      |  5 +++++
 arch/sparc64/mm/hugetlbpage.c |  5 +++++
 arch/x86/mm/hugetlbpage.c     | 25 ++++++++++++++++++++++++-
 include/linux/hugetlb.h       |  5 +++++
 mm/hugetlb.c                  |  9 +++++++++
 mm/memory.c                   | 15 +++++++++++----
 9 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 6170f097d25..c45fc7f5a97 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -107,6 +107,12 @@ int pmd_huge(pmd_t pmd)
 {
 	return 0;
 }
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
 {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index c94dc71af98..63db7adce71 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -369,6 +369,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int write)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 9162dc84f77..f28c43d2f61 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -120,6 +120,11 @@ int pmd_huge(pmd_t pmd)
 	return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmdp, int write)
 {
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 2f9dbe0ef4a..9304117039c 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -79,6 +79,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 1307b23f6a7..f27d10369e0 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -295,6 +295,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 52476fde899..a4789e87a31 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -189,6 +189,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int write)
@@ -209,6 +214,11 @@ int pmd_huge(pmd_t pmd)
 	return !!(pmd_val(pmd) & _PAGE_PSE);
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int write)
@@ -217,9 +227,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 
 	page = pte_page(*(pte_t *)pmd);
 	if (page)
-		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
 	return page;
 }
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+		pud_t *pud, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pud);
+	if (page)
+		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
 #endif
 
 /* x86_64 also uses this file */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 58c0de32e7f..b2c17f62cac 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -50,7 +50,10 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write);
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 				pmd_t *pmd, int write);
+struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
+				pud_t *pud, int write);
 int pmd_huge(pmd_t pmd);
+int pud_huge(pud_t pmd);
 void hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
@@ -78,8 +81,10 @@ static inline unsigned long hugetlb_total_pages(void)
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
+#define follow_huge_pud(mm, addr, pud, write)	NULL
 #define prepare_hugepage_range(file, addr, len)	(-EINVAL)
 #define pmd_huge(x)	0
+#define pud_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 #define hugetlb_fault(mm, vma, addr, write)	({ BUG(); 0; })
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0c74c14dd2f..107c1ce223c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1996,6 +1996,15 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return ret;
 }
 
+/* Can be overriden by architectures */
+__attribute__((weak)) struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+	       pud_t *pud, int write)
+{
+	BUG();
+	return NULL;
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
diff --git a/mm/memory.c b/mm/memory.c
index 02fc6b1047b..262e3eb6601 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -998,19 +998,24 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 		goto no_page_table;
 
 	pud = pud_offset(pgd, address);
-	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+	if (pud_none(*pud))
+		goto no_page_table;
+	if (pud_huge(*pud)) {
+		BUG_ON(flags & FOLL_GET);
+		page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
+		goto out;
+	}
+	if (unlikely(pud_bad(*pud)))
 		goto no_page_table;
-	
+
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
 		goto no_page_table;
-
 	if (pmd_huge(*pmd)) {
 		BUG_ON(flags & FOLL_GET);
 		page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
 		goto out;
 	}
-
 	if (unlikely(pmd_bad(*pmd)))
 		goto no_page_table;
 
@@ -1567,6 +1572,8 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 	unsigned long next;
 	int err;
 
+	BUG_ON(pud_huge(*pud));
+
 	pmd = pmd_alloc(mm, pud, addr);
 	if (!pmd)
 		return -ENOMEM;
-- 
GitLab


From 39c11e6c05b7fedbf7ed4df3908b25f622d56204 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:50 -0700
Subject: [PATCH 175/853] x86: support GB hugepages on 64-bit

Acked-by: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/hugetlbpage.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index a4789e87a31..b7a65a07af0 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -134,9 +134,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	pgd = pgd_offset(mm, addr);
 	pud = pud_alloc(mm, pgd, addr);
 	if (pud) {
-		if (pud_none(*pud))
-			huge_pmd_share(mm, addr, pud);
-		pte = (pte_t *) pmd_alloc(mm, pud, addr);
+		if (sz == PUD_SIZE) {
+			pte = (pte_t *)pud;
+		} else {
+			BUG_ON(sz != PMD_SIZE);
+			if (pud_none(*pud))
+				huge_pmd_share(mm, addr, pud);
+			pte = (pte_t *) pmd_alloc(mm, pud, addr);
+		}
 	}
 	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
 
@@ -152,8 +157,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	pgd = pgd_offset(mm, addr);
 	if (pgd_present(*pgd)) {
 		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud))
+		if (pud_present(*pud)) {
+			if (pud_large(*pud))
+				return (pte_t *)pud;
 			pmd = pmd_offset(pud, addr);
+		}
 	}
 	return (pte_t *) pmd;
 }
@@ -216,7 +224,7 @@ int pmd_huge(pmd_t pmd)
 
 int pud_huge(pud_t pud)
 {
-	return 0;
+	return !!(pud_val(pud) & _PAGE_PSE);
 }
 
 struct page *
@@ -252,6 +260,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 		unsigned long addr, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
 {
+	struct hstate *h = hstate_file(file);
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long start_addr;
@@ -264,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 	}
 
 full_search:
-	addr = ALIGN(start_addr, HPAGE_SIZE);
+	addr = ALIGN(start_addr, huge_page_size(h));
 
 	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
 		/* At this point:  (!vma || addr < vma->vm_end). */
@@ -286,7 +295,7 @@ full_search:
 		}
 		if (addr + mm->cached_hole_size < vma->vm_start)
 		        mm->cached_hole_size = vma->vm_start - addr;
-		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+		addr = ALIGN(vma->vm_end, huge_page_size(h));
 	}
 }
 
@@ -294,6 +303,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
 		unsigned long addr0, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
 {
+	struct hstate *h = hstate_file(file);
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev_vma;
 	unsigned long base = mm->mmap_base, addr = addr0;
@@ -314,7 +324,7 @@ try_again:
 		goto fail;
 
 	/* either no address requested or cant fit in requested address hole */
-	addr = (mm->free_area_cache - len) & HPAGE_MASK;
+	addr = (mm->free_area_cache - len) & huge_page_mask(h);
 	do {
 		/*
 		 * Lookup failure means no vma is above this address,
@@ -345,7 +355,7 @@ try_again:
 		        largest_hole = vma->vm_start - addr;
 
 		/* try just below the current vma->vm_start */
-		addr = (vma->vm_start - len) & HPAGE_MASK;
+		addr = (vma->vm_start - len) & huge_page_mask(h);
 	} while (len <= vma->vm_start);
 
 fail:
@@ -383,10 +393,11 @@ unsigned long
 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
+	struct hstate *h = hstate_file(file);
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 
-	if (len & ~HPAGE_MASK)
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
 	if (len > TASK_SIZE)
 		return -ENOMEM;
@@ -398,7 +409,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	}
 
 	if (addr) {
-		addr = ALIGN(addr, HPAGE_SIZE);
+		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
 		    (!vma || addr + len <= vma->vm_start))
-- 
GitLab


From b4718e628dbf68a2dee23b5709e2aa3190409c56 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:51 -0700
Subject: [PATCH 176/853] x86: add hugepagesz option on 64-bit

Add an hugepagesz=...  option similar to IA64, PPC etc.  to x86-64.

This finally allows to select GB pages for hugetlbfs in x86 now that all
the infrastructure is in place.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 11 +++++++++--
 arch/x86/mm/hugetlbpage.c           | 17 +++++++++++++++++
 include/asm-x86/page.h              |  2 ++
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5e20ccb5a73..d55fd88fd0a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -774,8 +774,15 @@ and is between 256 and 4096 characters. It is defined in the file
 	hisax=		[HW,ISDN]
 			See Documentation/isdn/README.HiSax.
 
-	hugepages=	[HW,X86-32,IA-64] Maximal number of HugeTLB pages.
-	hugepagesz=	[HW,IA-64,PPC] The size of the HugeTLB pages.
+	hugepages=	[HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
+	hugepagesz=	[HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
+			On x86 this option can be specified multiple times
+			interleaved with hugepages= to reserve huge pages
+			of different sizes. Valid pages sizes on x86-64
+			are 2M (when the CPU supports "pse") and 1G (when the
+			CPU supports the "pdpe1gb" cpuinfo flag)
+			Note that 1GB pages can only be allocated at boot time
+			using hugepages= and not freed afterwards.
 
 	i8042.direct	[HW] Put keyboard port into non-translated mode
 	i8042.dumbkbd	[HW] Pretend that controller can only read data from
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index b7a65a07af0..8f307d914c2 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -425,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
 
+#ifdef CONFIG_X86_64
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long ps = memparse(opt, &opt);
+	if (ps == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (ps == PUD_SIZE && cpu_has_gbpages) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
+			ps >> 20);
+		return 0;
+	}
+	return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+#endif
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 6c846228948..6e02098b160 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -32,6 +32,8 @@
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 
+#define HUGE_MAX_HSTATE 2
+
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
-- 
GitLab


From e11bfbfcb08ef4223b863799897c19cdf7c5bc00 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 23 Jul 2008 21:27:52 -0700
Subject: [PATCH 177/853] hugetlb: override default huge page size

Allow configurations with the default huge page size which is different to
the traditional HPAGE_SIZE size.  The default huge page size is the one
represented in the legacy /proc ABIs, SHM, and which is defaulted to when
mounting hugetlbfs filesystems.

This is implemented with a new kernel option default_hugepagesz=, which
defaults to HPAGE_SIZE if not specified.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  7 +++++++
 mm/hugetlb.c                        | 23 +++++++++++++++++------
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d55fd88fd0a..30278e9e521 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -783,6 +783,13 @@ and is between 256 and 4096 characters. It is defined in the file
 			CPU supports the "pdpe1gb" cpuinfo flag)
 			Note that 1GB pages can only be allocated at boot time
 			using hugepages= and not freed afterwards.
+	default_hugepagesz=
+			[same as hugepagesz=] The size of the default
+			HugeTLB page size. This is the size represented by
+			the legacy /proc/ hugepages APIs, used for SHM, and
+			default size when mounting hugetlbfs filesystems.
+			Defaults to the default architecture's huge page size
+			if not specified.
 
 	i8042.direct	[HW] Put keyboard port into non-translated mode
 	i8042.dumbkbd	[HW] Pretend that controller can only read data from
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 107c1ce223c..2a2f6e86940 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,6 +34,7 @@ struct hstate hstates[HUGE_MAX_HSTATE];
 /* for command line parsing */
 static struct hstate * __initdata parsed_hstate;
 static unsigned long __initdata default_hstate_max_huge_pages;
+static unsigned long __initdata default_hstate_size;
 
 #define for_each_hstate(h) \
 	for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)
@@ -1288,11 +1289,14 @@ static int __init hugetlb_init(void)
 {
 	BUILD_BUG_ON(HPAGE_SHIFT == 0);
 
-	if (!size_to_hstate(HPAGE_SIZE)) {
-		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
-		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+	if (!size_to_hstate(default_hstate_size)) {
+		default_hstate_size = HPAGE_SIZE;
+		if (!size_to_hstate(default_hstate_size))
+			hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
 	}
-	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
+	default_hstate_idx = size_to_hstate(default_hstate_size) - hstates;
+	if (default_hstate_max_huge_pages)
+		default_hstate.max_huge_pages = default_hstate_max_huge_pages;
 
 	hugetlb_init_hstates();
 
@@ -1332,7 +1336,7 @@ void __init hugetlb_add_hstate(unsigned order)
 	parsed_hstate = h;
 }
 
-static int __init hugetlb_setup(char *s)
+static int __init hugetlb_nrpages_setup(char *s)
 {
 	unsigned long *mhp;
 	static unsigned long *last_mhp;
@@ -1367,7 +1371,14 @@ static int __init hugetlb_setup(char *s)
 
 	return 1;
 }
-__setup("hugepages=", hugetlb_setup);
+__setup("hugepages=", hugetlb_nrpages_setup);
+
+static int __init hugetlb_default_setup(char *s)
+{
+	default_hstate_size = memparse(s, &s);
+	return 1;
+}
+__setup("default_hugepagesz=", hugetlb_default_setup);
 
 static unsigned int cpuset_mems_nr(unsigned int *array)
 {
-- 
GitLab


From 53ba51d21d6e048424ab8aadfebdb1f25ae07b60 Mon Sep 17 00:00:00 2001
From: Jon Tollefson <kniht@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:27:52 -0700
Subject: [PATCH 178/853] hugetlb: allow arch overridden hugepage allocation

Allow alloc_bootmem_huge_page() to be overridden by architectures that
can't always use bootmem.  This requires huge_boot_pages to be available
for use by this function.

This is required for powerpc 16G pages, which have to be reserved prior to
boot-time.  The location of these pages are indicated in the device tree.

Acked-by: Adam Litke <agl@us.ibm.com>
Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 10 ++++++++++
 mm/hugetlb.c            | 11 +++--------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b2c17f62cac..9a71d4cc88c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -39,6 +39,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
+extern struct list_head huge_boot_pages;
 
 /* arch callbacks */
 
@@ -188,6 +189,14 @@ struct hstate {
 	char name[HSTATE_NAME_LEN];
 };
 
+struct huge_bootmem_page {
+	struct list_head list;
+	struct hstate *hstate;
+};
+
+/* arch callback */
+int __init alloc_bootmem_huge_page(struct hstate *h);
+
 void __init hugetlb_add_hstate(unsigned order);
 struct hstate *size_to_hstate(unsigned long size);
 
@@ -256,6 +265,7 @@ static inline struct hstate *page_hstate(struct page *page)
 
 #else
 struct hstate {};
+#define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
 #define hstate_vma(v) NULL
 #define hstate_inode(i) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2a2f6e86940..3e1506b808a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,6 +31,8 @@ static int max_hstate;
 unsigned int default_hstate_idx;
 struct hstate hstates[HUGE_MAX_HSTATE];
 
+__initdata LIST_HEAD(huge_boot_pages);
+
 /* for command line parsing */
 static struct hstate * __initdata parsed_hstate;
 static unsigned long __initdata default_hstate_max_huge_pages;
@@ -925,14 +927,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	return page;
 }
 
-static __initdata LIST_HEAD(huge_boot_pages);
-
-struct huge_bootmem_page {
-	struct list_head list;
-	struct hstate *hstate;
-};
-
-static int __init alloc_bootmem_huge_page(struct hstate *h)
+__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h)
 {
 	struct huge_bootmem_page *m;
 	int nr_nodes = nodes_weight(node_online_map);
-- 
GitLab


From ec4b2c0c8312d1118c2acd00c89988ecf955d5cc Mon Sep 17 00:00:00 2001
From: Jon Tollefson <kniht@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:27:53 -0700
Subject: [PATCH 179/853] powerpc: function to allocate gigantic hugepages

The 16G page locations have been saved during early boot in an array.  The
alloc_bootmem_huge_page() function adds a page from here to the
huge_boot_pages list.

Acked-by: Adam Litke <agl@us.ibm.com>
Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/hugetlbpage.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 63db7adce71..5df82186fc9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -29,6 +29,12 @@
 
 #define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
 #define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
+#define MAX_NUMBER_GPAGES	1024
+
+/* Tracks the 16G pages after the device tree is scanned and before the
+ * huge_boot_pages list is ready.  */
+static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
+static unsigned nr_gpages;
 
 unsigned int hugepte_shift;
 #define PTRS_PER_HUGEPTE	(1 << hugepte_shift)
@@ -104,6 +110,21 @@ pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr)
 }
 #endif
 
+/* Moves the gigantic page addresses from the temporary list to the
+ * huge_boot_pages list.  */
+int alloc_bootmem_huge_page(struct hstate *h)
+{
+	struct huge_bootmem_page *m;
+	if (nr_gpages == 0)
+		return 0;
+	m = phys_to_virt(gpage_freearray[--nr_gpages]);
+	gpage_freearray[nr_gpages] = 0;
+	list_add(&m->list, &huge_boot_pages);
+	m->hstate = h;
+	return 1;
+}
+
+
 /* Modelled after find_linux_pte() */
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
-- 
GitLab


From 658013e93eb70494f7300bc90457b09a807232a4 Mon Sep 17 00:00:00 2001
From: Jon Tollefson <kniht@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:27:54 -0700
Subject: [PATCH 180/853] powerpc: scan device tree for gigantic pages

The 16G huge pages have to be reserved in the HMC prior to boot.  The
location of the pages are placed in the device tree.  This patch adds code
to scan the device tree during very early boot and save these page
locations until hugetlbfs is ready for them.

Acked-by: Adam Litke <agl@us.ibm.com>
Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/hash_utils_64.c  | 44 +++++++++++++++++++++++++++++++-
 arch/powerpc/mm/hugetlbpage.c    | 16 ++++++++++++
 include/asm-powerpc/mmu-hash64.h |  2 ++
 3 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8d3b58ebd38..ae4c717243a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -68,6 +68,7 @@
 
 #define KB (1024)
 #define MB (1024*KB)
+#define GB (1024L*MB)
 
 /*
  * Note:  pte   --> Linux PTE
@@ -329,6 +330,44 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 	return 0;
 }
 
+/* Scan for 16G memory blocks that have been set aside for huge pages
+ * and reserve those blocks for 16G huge pages.
+ */
+static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
+					const char *uname, int depth,
+					void *data) {
+	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	unsigned long *addr_prop;
+	u32 *page_count_prop;
+	unsigned int expected_pages;
+	long unsigned int phys_addr;
+	long unsigned int block_size;
+
+	/* We are scanning "memory" nodes only */
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	/* This property is the log base 2 of the number of virtual pages that
+	 * will represent this memory block. */
+	page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
+	if (page_count_prop == NULL)
+		return 0;
+	expected_pages = (1 << page_count_prop[0]);
+	addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
+	if (addr_prop == NULL)
+		return 0;
+	phys_addr = addr_prop[0];
+	block_size = addr_prop[1];
+	if (block_size != (16 * GB))
+		return 0;
+	printk(KERN_INFO "Huge page(16GB) memory: "
+			"addr = 0x%lX size = 0x%lX pages = %d\n",
+			phys_addr, block_size, expected_pages);
+	lmb_reserve(phys_addr, block_size * expected_pages);
+	add_gpage(phys_addr, block_size, expected_pages);
+	return 0;
+}
+
 static void __init htab_init_page_sizes(void)
 {
 	int rc;
@@ -418,7 +457,10 @@ static void __init htab_init_page_sizes(void)
 	       );
 
 #ifdef CONFIG_HUGETLB_PAGE
-	/* Init large page size. Currently, we pick 16M or 1M depending
+	/* Reserve 16G huge page memory sections for huge pages */
+	of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+
+/* Init large page size. Currently, we pick 16M or 1M depending
 	 * on what is available
 	 */
 	if (mmu_psize_defs[MMU_PAGE_16M].shift)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 5df82186fc9..e2a650a9e53 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -110,6 +110,22 @@ pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr)
 }
 #endif
 
+/* Build list of addresses of gigantic pages.  This function is used in early
+ * boot before the buddy or bootmem allocator is setup.
+ */
+void add_gpage(unsigned long addr, unsigned long page_size,
+	unsigned long number_of_pages)
+{
+	if (!addr)
+		return;
+	while (number_of_pages > 0) {
+		gpage_freearray[nr_gpages] = addr;
+		nr_gpages++;
+		number_of_pages--;
+		addr += page_size;
+	}
+}
+
 /* Moves the gigantic page addresses from the temporary list to the
  * huge_boot_pages list.  */
 int alloc_bootmem_huge_page(struct hstate *h)
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h
index d1dc16afb11..b61181aa774 100644
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@@ -281,6 +281,8 @@ extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 			     unsigned long pstart, unsigned long mode,
 			     int psize, int ssize);
 extern void set_huge_psize(int psize);
+extern void add_gpage(unsigned long addr, unsigned long page_size,
+			  unsigned long number_of_pages);
 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
 
 extern void htab_initialize(void);
-- 
GitLab


From 91224346aa8c1cdaa660300a98e0b074a3a95030 Mon Sep 17 00:00:00 2001
From: Jon Tollefson <kniht@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:27:55 -0700
Subject: [PATCH 181/853] powerpc: define support for 16G hugepages

The huge page size is defined for 16G pages.  If a hugepagesz of 16G is
specified at boot-time then it becomes the huge page size instead of the
default 16M.

The change in pgtable-64K.h is to the macro pte_iterate_hashed_subpages to
make the increment to va (the 1 being shifted) be a long so that it is not
shifted to 0.  Otherwise it would create an infinite loop when the shift
value is for a 16G page (when base page size is 64K).

Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/hugetlbpage.c     | 62 ++++++++++++++++++++++---------
 include/asm-powerpc/pgtable-64k.h |  2 +-
 2 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e2a650a9e53..19b1a9cec6d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,8 +24,9 @@
 #include <asm/cputable.h>
 #include <asm/spu.h>
 
-#define HPAGE_SHIFT_64K	16
-#define HPAGE_SHIFT_16M	24
+#define PAGE_SHIFT_64K	16
+#define PAGE_SHIFT_16M	24
+#define PAGE_SHIFT_16G	34
 
 #define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
 #define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
@@ -95,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 static inline
 pmd_t *hpmd_offset(pud_t *pud, unsigned long addr)
 {
-	if (HPAGE_SHIFT == HPAGE_SHIFT_64K)
+	if (HPAGE_SHIFT == PAGE_SHIFT_64K)
 		return pmd_offset(pud, addr);
 	else
 		return (pmd_t *) pud;
@@ -103,7 +104,7 @@ pmd_t *hpmd_offset(pud_t *pud, unsigned long addr)
 static inline
 pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr)
 {
-	if (HPAGE_SHIFT == HPAGE_SHIFT_64K)
+	if (HPAGE_SHIFT == PAGE_SHIFT_64K)
 		return pmd_alloc(mm, pud, addr);
 	else
 		return (pmd_t *) pud;
@@ -260,7 +261,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 			continue;
 		hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
 #else
-		if (HPAGE_SHIFT == HPAGE_SHIFT_64K) {
+		if (HPAGE_SHIFT == PAGE_SHIFT_64K) {
 			if (pud_none_or_clear_bad(pud))
 				continue;
 			hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
@@ -592,20 +593,40 @@ void set_huge_psize(int psize)
 {
 	/* Check that it is a page size supported by the hardware and
 	 * that it fits within pagetable limits. */
-	if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT &&
+	if (mmu_psize_defs[psize].shift &&
+		mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
 		(mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
-			mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) {
+		 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
+		 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
+		/* Return if huge page size is the same as the
+		 * base page size. */
+		if (mmu_psize_defs[psize].shift == PAGE_SHIFT)
+			return;
+
 		HPAGE_SHIFT = mmu_psize_defs[psize].shift;
 		mmu_huge_psize = psize;
-#ifdef CONFIG_PPC_64K_PAGES
-		hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT);
-#else
-		if (HPAGE_SHIFT == HPAGE_SHIFT_64K)
-			hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT);
-		else
-			hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT);
-#endif
 
+		switch (HPAGE_SHIFT) {
+		case PAGE_SHIFT_64K:
+		    /* We only allow 64k hpages with 4k base page,
+		     * which was checked above, and always put them
+		     * at the PMD */
+		    hugepte_shift = PMD_SHIFT;
+		    break;
+		case PAGE_SHIFT_16M:
+		    /* 16M pages can be at two different levels
+		     * of pagestables based on base page size */
+		    if (PAGE_SHIFT == PAGE_SHIFT_64K)
+			    hugepte_shift = PMD_SHIFT;
+		    else /* 4k base page */
+			    hugepte_shift = PUD_SHIFT;
+		    break;
+		case PAGE_SHIFT_16G:
+		    /* 16G pages are always at PGD level */
+		    hugepte_shift = PGDIR_SHIFT;
+		    break;
+		}
+		hugepte_shift -= HPAGE_SHIFT;
 	} else
 		HPAGE_SHIFT = 0;
 }
@@ -621,17 +642,22 @@ static int __init hugepage_setup_sz(char *str)
 	shift = __ffs(size);
 	switch (shift) {
 #ifndef CONFIG_PPC_64K_PAGES
-	case HPAGE_SHIFT_64K:
+	case PAGE_SHIFT_64K:
 		mmu_psize = MMU_PAGE_64K;
 		break;
 #endif
-	case HPAGE_SHIFT_16M:
+	case PAGE_SHIFT_16M:
 		mmu_psize = MMU_PAGE_16M;
 		break;
+	case PAGE_SHIFT_16G:
+		mmu_psize = MMU_PAGE_16G;
+		break;
 	}
 
-	if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift)
+	if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) {
 		set_huge_psize(mmu_psize);
+		hugetlb_add_hstate(shift - PAGE_SHIFT);
+	}
 	else
 		printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
 
diff --git a/include/asm-powerpc/pgtable-64k.h b/include/asm-powerpc/pgtable-64k.h
index c5007712473..7e54adb3559 100644
--- a/include/asm-powerpc/pgtable-64k.h
+++ b/include/asm-powerpc/pgtable-64k.h
@@ -138,7 +138,7 @@ static inline struct subpage_prot_table *pgd_subpage_prot(pgd_t *pgd)
                 unsigned __split = (psize == MMU_PAGE_4K ||                 \
 				    psize == MMU_PAGE_64K_AP);              \
                 shift = mmu_psize_defs[psize].shift;                        \
-	        for (index = 0; va < __end; index++, va += (1 << shift)) {  \
+		for (index = 0; va < __end; index++, va += (1L << shift)) { \
 		        if (!__split || __rpte_sub_valid(rpte, index)) do { \
 
 #define pte_iterate_hashed_end() } while(0); } } while(0)
-- 
GitLab


From f4a67cceee4a6f5ed38011a698c9e34747270ae5 Mon Sep 17 00:00:00 2001
From: Jon Tollefson <kniht@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:27:55 -0700
Subject: [PATCH 182/853] fs: check for statfs overflow

Adds a check for an overflow in the filesystem size so if someone is
checking with statfs() on a 16G blocksize hugetlbfs in a 32bit binary that
it will report back EOVERFLOW instead of a size of 0.

Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c | 8 ++++----
 fs/open.c   | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index ed43e17a5dc..b4660428176 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
 {
 	
 	if (sizeof ubuf->f_blocks == 4) {
-		if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) &
-		    0xffffffff00000000ULL)
+		if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
+		     kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
 			return -EOVERFLOW;
 		/* f_files and f_ffree may be -1; it's okay
 		 * to stuff that into 32 bits */
@@ -271,8 +271,8 @@ out:
 static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
 {
 	if (sizeof ubuf->f_blocks == 4) {
-		if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) &
-		    0xffffffff00000000ULL)
+		if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
+		     kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
 			return -EOVERFLOW;
 		/* f_files and f_ffree may be -1; it's okay
 		 * to stuff that into 32 bits */
diff --git a/fs/open.c b/fs/open.c
index a99ad09c319..bb98d2fe809 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
 		memcpy(buf, &st, sizeof(st));
 	else {
 		if (sizeof buf->f_blocks == 4) {
-			if ((st.f_blocks | st.f_bfree | st.f_bavail) &
+			if ((st.f_blocks | st.f_bfree | st.f_bavail |
+			     st.f_bsize | st.f_frsize) &
 			    0xffffffff00000000ULL)
 				return -EOVERFLOW;
 			/*
-- 
GitLab


From 0d9ea75443dc7e37843e656b8ebc947a6d16d618 Mon Sep 17 00:00:00 2001
From: Jon Tollefson <kniht@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:27:56 -0700
Subject: [PATCH 183/853] powerpc: support multiple hugepage sizes

Instead of using the variable mmu_huge_psize to keep track of the huge
page size we use an array of MMU_PAGE_* values.  For each supported huge
page size we need to know the hugepte_shift value and have a
pgtable_cache.  The hstate or an mmu_huge_psizes index is passed to
functions so that they know which huge page size they should use.

The hugepage sizes 16M and 64K are setup(if available on the hardware) so
that they don't have to be set on the boot cmd line in order to use them.
The number of 16G pages have to be specified at boot-time though (e.g.
hugepagesz=16G hugepages=5).

Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  10 +-
 arch/powerpc/mm/hash_utils_64.c     |   9 +-
 arch/powerpc/mm/hugetlbpage.c       | 274 ++++++++++++++++++----------
 arch/powerpc/mm/init_64.c           |   8 +-
 arch/powerpc/mm/tlb_64.c            |   2 +-
 include/asm-powerpc/hugetlb.h       |   5 +-
 include/asm-powerpc/mmu-hash64.h    |   4 +-
 include/asm-powerpc/page_64.h       |   1 +
 include/asm-powerpc/pgalloc-64.h    |   4 +-
 9 files changed, 199 insertions(+), 118 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 30278e9e521..01a2992b575 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -776,11 +776,11 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	hugepages=	[HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
 	hugepagesz=	[HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
-			On x86 this option can be specified multiple times
-			interleaved with hugepages= to reserve huge pages
-			of different sizes. Valid pages sizes on x86-64
-			are 2M (when the CPU supports "pse") and 1G (when the
-			CPU supports the "pdpe1gb" cpuinfo flag)
+			On x86-64 and powerpc, this option can be specified
+			multiple times interleaved with hugepages= to reserve
+			huge pages of different sizes. Valid pages sizes on
+			x86-64 are 2M (when the CPU supports "pse") and 1G
+			(when the CPU supports the "pdpe1gb" cpuinfo flag)
 			Note that 1GB pages can only be allocated at boot time
 			using hugepages= and not freed afterwards.
 	default_hugepagesz=
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ae4c717243a..5ce5a4dcd00 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -103,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M;
 int mmu_highuser_ssize = MMU_SEGSIZE_256M;
 u16 mmu_slb_size = 64;
 #ifdef CONFIG_HUGETLB_PAGE
-int mmu_huge_psize = MMU_PAGE_16M;
 unsigned int HPAGE_SHIFT;
 #endif
 #ifdef CONFIG_PPC_64K_PAGES
@@ -460,15 +459,15 @@ static void __init htab_init_page_sizes(void)
 	/* Reserve 16G huge page memory sections for huge pages */
 	of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
 
-/* Init large page size. Currently, we pick 16M or 1M depending
+/* Set default large page size. Currently, we pick 16M or 1M depending
 	 * on what is available
 	 */
 	if (mmu_psize_defs[MMU_PAGE_16M].shift)
-		set_huge_psize(MMU_PAGE_16M);
+		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
 	/* With 4k/4level pagetables, we can't (for now) cope with a
 	 * huge page size < PMD_SIZE */
 	else if (mmu_psize_defs[MMU_PAGE_1M].shift)
-		set_huge_psize(MMU_PAGE_1M);
+		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
 #endif /* CONFIG_HUGETLB_PAGE */
 }
 
@@ -889,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 
 #ifdef CONFIG_HUGETLB_PAGE
 	/* Handle hugepage regions */
-	if (HPAGE_SHIFT && psize == mmu_huge_psize) {
+	if (HPAGE_SHIFT && mmu_huge_psizes[psize]) {
 		DBG_LOW(" -> huge page !\n");
 		return hash_huge_page(mm, access, ea, vsid, local, trap);
 	}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 19b1a9cec6d..fb42c4dd321 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -37,15 +37,30 @@
 static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
 static unsigned nr_gpages;
 
-unsigned int hugepte_shift;
-#define PTRS_PER_HUGEPTE	(1 << hugepte_shift)
-#define HUGEPTE_TABLE_SIZE	(sizeof(pte_t) << hugepte_shift)
+/* Array of valid huge page sizes - non-zero value(hugepte_shift) is
+ * stored for the huge page sizes that are valid.
+ */
+unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
+
+#define hugepte_shift			mmu_huge_psizes
+#define PTRS_PER_HUGEPTE(psize)		(1 << hugepte_shift[psize])
+#define HUGEPTE_TABLE_SIZE(psize)	(sizeof(pte_t) << hugepte_shift[psize])
+
+#define HUGEPD_SHIFT(psize)		(mmu_psize_to_shift(psize) \
+						+ hugepte_shift[psize])
+#define HUGEPD_SIZE(psize)		(1UL << HUGEPD_SHIFT(psize))
+#define HUGEPD_MASK(psize)		(~(HUGEPD_SIZE(psize)-1))
 
-#define HUGEPD_SHIFT		(HPAGE_SHIFT + hugepte_shift)
-#define HUGEPD_SIZE		(1UL << HUGEPD_SHIFT)
-#define HUGEPD_MASK		(~(HUGEPD_SIZE-1))
+/* Subtract one from array size because we don't need a cache for 4K since
+ * is not a huge page size */
+#define huge_pgtable_cache(psize)	(pgtable_cache[HUGEPTE_CACHE_NUM \
+							+ psize-1])
+#define HUGEPTE_CACHE_NAME(psize)	(huge_pgtable_cache_name[psize])
 
-#define huge_pgtable_cache	(pgtable_cache[HUGEPTE_CACHE_NUM])
+static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
+	"unused_4K", "hugepte_cache_64K", "unused_64K_AP",
+	"hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G"
+};
 
 /* Flag to mark huge PD pointers.  This means pmd_bad() and pud_bad()
  * will choke on pointers to hugepte tables, which is handy for
@@ -56,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t;
 
 #define hugepd_none(hpd)	((hpd).pd == 0)
 
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+	switch (shift) {
+#ifndef CONFIG_PPC_64K_PAGES
+	case PAGE_SHIFT_64K:
+	    return MMU_PAGE_64K;
+#endif
+	case PAGE_SHIFT_16M:
+	    return MMU_PAGE_16M;
+	case PAGE_SHIFT_16G:
+	    return MMU_PAGE_16G;
+	}
+	return -1;
+}
+
+static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+{
+	if (mmu_psize_defs[mmu_psize].shift)
+		return mmu_psize_defs[mmu_psize].shift;
+	BUG();
+}
+
 static inline pte_t *hugepd_page(hugepd_t hpd)
 {
 	BUG_ON(!(hpd.pd & HUGEPD_OK));
 	return (pte_t *)(hpd.pd & ~HUGEPD_OK);
 }
 
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr)
+static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
+				    struct hstate *hstate)
 {
-	unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1));
+	unsigned int shift = huge_page_shift(hstate);
+	int psize = shift_to_mmu_psize(shift);
+	unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
 	pte_t *dir = hugepd_page(*hpdp);
 
 	return dir + idx;
 }
 
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
-			   unsigned long address)
+			   unsigned long address, unsigned int psize)
 {
-	pte_t *new = kmem_cache_alloc(huge_pgtable_cache,
+	pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize),
 				      GFP_KERNEL|__GFP_REPEAT);
 
 	if (! new)
@@ -81,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 
 	spin_lock(&mm->page_table_lock);
 	if (!hugepd_none(*hpdp))
-		kmem_cache_free(huge_pgtable_cache, new);
+		kmem_cache_free(huge_pgtable_cache(psize), new);
 	else
 		hpdp->pd = (unsigned long)new | HUGEPD_OK;
 	spin_unlock(&mm->page_table_lock);
@@ -90,21 +130,22 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 
 /* Base page size affects how we walk hugetlb page tables */
 #ifdef CONFIG_PPC_64K_PAGES
-#define hpmd_offset(pud, addr)		pmd_offset(pud, addr)
-#define hpmd_alloc(mm, pud, addr)	pmd_alloc(mm, pud, addr)
+#define hpmd_offset(pud, addr, h)	pmd_offset(pud, addr)
+#define hpmd_alloc(mm, pud, addr, h)	pmd_alloc(mm, pud, addr)
 #else
 static inline
-pmd_t *hpmd_offset(pud_t *pud, unsigned long addr)
+pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
 {
-	if (HPAGE_SHIFT == PAGE_SHIFT_64K)
+	if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
 		return pmd_offset(pud, addr);
 	else
 		return (pmd_t *) pud;
 }
 static inline
-pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr)
+pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
+		  struct hstate *hstate)
 {
-	if (HPAGE_SHIFT == PAGE_SHIFT_64K)
+	if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
 		return pmd_alloc(mm, pud, addr);
 	else
 		return (pmd_t *) pud;
@@ -128,8 +169,9 @@ void add_gpage(unsigned long addr, unsigned long page_size,
 }
 
 /* Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.  */
-int alloc_bootmem_huge_page(struct hstate *h)
+ * huge_boot_pages list.
+ */
+int alloc_bootmem_huge_page(struct hstate *hstate)
 {
 	struct huge_bootmem_page *m;
 	if (nr_gpages == 0)
@@ -137,7 +179,7 @@ int alloc_bootmem_huge_page(struct hstate *h)
 	m = phys_to_virt(gpage_freearray[--nr_gpages]);
 	gpage_freearray[nr_gpages] = 0;
 	list_add(&m->list, &huge_boot_pages);
-	m->hstate = h;
+	m->hstate = hstate;
 	return 1;
 }
 
@@ -149,17 +191,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	pud_t *pu;
 	pmd_t *pm;
 
-	BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
+	unsigned int psize;
+	unsigned int shift;
+	unsigned long sz;
+	struct hstate *hstate;
+	psize = get_slice_psize(mm, addr);
+	shift = mmu_psize_to_shift(psize);
+	sz = ((1UL) << shift);
+	hstate = size_to_hstate(sz);
 
-	addr &= HPAGE_MASK;
+	addr &= hstate->mask;
 
 	pg = pgd_offset(mm, addr);
 	if (!pgd_none(*pg)) {
 		pu = pud_offset(pg, addr);
 		if (!pud_none(*pu)) {
-			pm = hpmd_offset(pu, addr);
+			pm = hpmd_offset(pu, addr, hstate);
 			if (!pmd_none(*pm))
-				return hugepte_offset((hugepd_t *)pm, addr);
+				return hugepte_offset((hugepd_t *)pm, addr,
+						      hstate);
 		}
 	}
 
@@ -173,16 +223,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	pud_t *pu;
 	pmd_t *pm;
 	hugepd_t *hpdp = NULL;
+	struct hstate *hstate;
+	unsigned int psize;
+	hstate = size_to_hstate(sz);
 
-	BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
+	psize = get_slice_psize(mm, addr);
+	BUG_ON(!mmu_huge_psizes[psize]);
 
-	addr &= HPAGE_MASK;
+	addr &= hstate->mask;
 
 	pg = pgd_offset(mm, addr);
 	pu = pud_alloc(mm, pg, addr);
 
 	if (pu) {
-		pm = hpmd_alloc(mm, pu, addr);
+		pm = hpmd_alloc(mm, pu, addr, hstate);
 		if (pm)
 			hpdp = (hugepd_t *)pm;
 	}
@@ -190,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	if (! hpdp)
 		return NULL;
 
-	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr))
+	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
 		return NULL;
 
-	return hugepte_offset(hpdp, addr);
+	return hugepte_offset(hpdp, addr, hstate);
 }
 
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
@@ -201,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 0;
 }
 
-static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
+static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
+			       unsigned int psize)
 {
 	pte_t *hugepte = hugepd_page(*hpdp);
 
 	hpdp->pd = 0;
 	tlb->need_flush = 1;
-	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM,
+	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
+						 HUGEPTE_CACHE_NUM+psize-1,
 						 PGF_CACHENUM_MASK));
 }
 
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 				   unsigned long addr, unsigned long end,
-				   unsigned long floor, unsigned long ceiling)
+				   unsigned long floor, unsigned long ceiling,
+				   unsigned int psize)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -225,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none(*pmd))
 			continue;
-		free_hugepte_range(tlb, (hugepd_t *)pmd);
+		free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
 	} while (pmd++, addr = next, addr != end);
 
 	start &= PUD_MASK;
@@ -251,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	pud_t *pud;
 	unsigned long next;
 	unsigned long start;
+	unsigned int shift;
+	unsigned int psize = get_slice_psize(tlb->mm, addr);
+	shift = mmu_psize_to_shift(psize);
 
 	start = addr;
 	pud = pud_offset(pgd, addr);
@@ -259,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 #ifdef CONFIG_PPC_64K_PAGES
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+		hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling,
+				       psize);
 #else
-		if (HPAGE_SHIFT == PAGE_SHIFT_64K) {
+		if (shift == PAGE_SHIFT_64K) {
 			if (pud_none_or_clear_bad(pud))
 				continue;
-			hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+					       ceiling, psize);
 		} else {
 			if (pud_none(*pud))
 				continue;
-			free_hugepte_range(tlb, (hugepd_t *)pud);
+			free_hugepte_range(tlb, (hugepd_t *)pud, psize);
 		}
 #endif
 	} while (pud++, addr = next, addr != end);
@@ -336,27 +398,29 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 	 * now has no other vmas using it, so can be freed, we don't
 	 * bother to round floor or end up - the tests don't need that.
 	 */
+	unsigned int psize = get_slice_psize(tlb->mm, addr);
 
-	addr &= HUGEPD_MASK;
+	addr &= HUGEPD_MASK(psize);
 	if (addr < floor) {
-		addr += HUGEPD_SIZE;
+		addr += HUGEPD_SIZE(psize);
 		if (!addr)
 			return;
 	}
 	if (ceiling) {
-		ceiling &= HUGEPD_MASK;
+		ceiling &= HUGEPD_MASK(psize);
 		if (!ceiling)
 			return;
 	}
 	if (end - 1 > ceiling - 1)
-		end -= HUGEPD_SIZE;
+		end -= HUGEPD_SIZE(psize);
 	if (addr > end - 1)
 		return;
 
 	start = addr;
 	pgd = pgd_offset(tlb->mm, addr);
 	do {
-		BUG_ON(get_slice_psize(tlb->mm, addr) != mmu_huge_psize);
+		psize = get_slice_psize(tlb->mm, addr);
+		BUG_ON(!mmu_huge_psizes[psize]);
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
@@ -373,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		 * necessary anymore if we make hpte_need_flush() get the
 		 * page size from the slices
 		 */
-		pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1);
+		unsigned int psize = get_slice_psize(mm, addr);
+		unsigned int shift = mmu_psize_to_shift(psize);
+		unsigned long sz = ((1UL) << shift);
+		struct hstate *hstate = size_to_hstate(sz);
+		pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
 	}
 	*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 }
@@ -390,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
 	pte_t *ptep;
 	struct page *page;
+	unsigned int mmu_psize = get_slice_psize(mm, address);
 
-	if (get_slice_psize(mm, address) != mmu_huge_psize)
+	/* Verify it is a huge page else bail. */
+	if (!mmu_huge_psizes[mmu_psize])
 		return ERR_PTR(-EINVAL);
 
 	ptep = huge_pte_offset(mm, address);
 	page = pte_page(*ptep);
-	if (page)
-		page += (address % HPAGE_SIZE) / PAGE_SIZE;
+	if (page) {
+		unsigned int shift = mmu_psize_to_shift(mmu_psize);
+		unsigned long sz = ((1UL) << shift);
+		page += (address % sz) / PAGE_SIZE;
+	}
 
 	return page;
 }
@@ -425,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long len, unsigned long pgoff,
 					unsigned long flags)
 {
-	return slice_get_unmapped_area(addr, len, flags,
-				       mmu_huge_psize, 1, 0);
+	struct hstate *hstate = hstate_file(file);
+	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
+	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
 }
 
 /*
  * Called by asm hashtable.S for doing lazy icache flush
  */
 static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
-						  pte_t pte, int trap)
+					pte_t pte, int trap, unsigned long sz)
 {
 	struct page *page;
 	int i;
@@ -446,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
 	/* page is dirty */
 	if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
 		if (trap == 0x400) {
-			for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++)
+			for (i = 0; i < (sz / PAGE_SIZE); i++)
 				__flush_dcache_icache(page_address(page+i));
 			set_bit(PG_arch_1, &page->flags);
 		} else {
@@ -462,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
 {
 	pte_t *ptep;
 	unsigned long old_pte, new_pte;
-	unsigned long va, rflags, pa;
+	unsigned long va, rflags, pa, sz;
 	long slot;
 	int err = 1;
 	int ssize = user_segment_size(ea);
+	unsigned int mmu_psize;
+	int shift;
+	mmu_psize = get_slice_psize(mm, ea);
 
+	if (!mmu_huge_psizes[mmu_psize])
+		goto out;
 	ptep = huge_pte_offset(mm, ea);
 
 	/* Search the Linux page table for a match with va */
@@ -509,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
 	rflags = 0x2 | (!(new_pte & _PAGE_RW));
  	/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
 	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+	shift = mmu_psize_to_shift(mmu_psize);
+	sz = ((1UL) << shift);
 	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
 		/* No CPU has hugepages but lacks no execute, so we
 		 * don't need to worry about that case */
 		rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
-						       trap);
+						       trap, sz);
 
 	/* Check if pte already has an hpte (case 2) */
 	if (unlikely(old_pte & _PAGE_HASHPTE)) {
 		/* There MIGHT be an HPTE for this pte */
 		unsigned long hash, slot;
 
-		hash = hpt_hash(va, HPAGE_SHIFT, ssize);
+		hash = hpt_hash(va, shift, ssize);
 		if (old_pte & _PAGE_F_SECOND)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		slot += (old_pte & _PAGE_F_GIX) >> 12;
 
-		if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize,
+		if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
 					 ssize, local) == -1)
 			old_pte &= ~_PAGE_HPTEFLAGS;
 	}
 
 	if (likely(!(old_pte & _PAGE_HASHPTE))) {
-		unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize);
+		unsigned long hash = hpt_hash(va, shift, ssize);
 		unsigned long hpte_group;
 
 		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
@@ -553,7 +634,7 @@ repeat:
 
 		/* Insert into the hash table, primary slot */
 		slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
-					  mmu_huge_psize, ssize);
+					  mmu_psize, ssize);
 
 		/* Primary is full, try the secondary */
 		if (unlikely(slot == -1)) {
@@ -561,7 +642,7 @@ repeat:
 				      HPTES_PER_GROUP) & ~0x7UL; 
 			slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
 						  HPTE_V_SECONDARY,
-						  mmu_huge_psize, ssize);
+						  mmu_psize, ssize);
 			if (slot == -1) {
 				if (mftb() & 0x1)
 					hpte_group = ((hash & htab_hash_mask) *
@@ -598,66 +679,50 @@ void set_huge_psize(int psize)
 		(mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
 		 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
 		 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
-		/* Return if huge page size is the same as the
-		 * base page size. */
-		if (mmu_psize_defs[psize].shift == PAGE_SHIFT)
+		/* Return if huge page size has already been setup or is the
+		 * same as the base page size. */
+		if (mmu_huge_psizes[psize] ||
+		   mmu_psize_defs[psize].shift == PAGE_SHIFT)
 			return;
+		hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
 
-		HPAGE_SHIFT = mmu_psize_defs[psize].shift;
-		mmu_huge_psize = psize;
-
-		switch (HPAGE_SHIFT) {
+		switch (mmu_psize_defs[psize].shift) {
 		case PAGE_SHIFT_64K:
 		    /* We only allow 64k hpages with 4k base page,
 		     * which was checked above, and always put them
 		     * at the PMD */
-		    hugepte_shift = PMD_SHIFT;
+		    hugepte_shift[psize] = PMD_SHIFT;
 		    break;
 		case PAGE_SHIFT_16M:
 		    /* 16M pages can be at two different levels
 		     * of pagestables based on base page size */
 		    if (PAGE_SHIFT == PAGE_SHIFT_64K)
-			    hugepte_shift = PMD_SHIFT;
+			    hugepte_shift[psize] = PMD_SHIFT;
 		    else /* 4k base page */
-			    hugepte_shift = PUD_SHIFT;
+			    hugepte_shift[psize] = PUD_SHIFT;
 		    break;
 		case PAGE_SHIFT_16G:
 		    /* 16G pages are always at PGD level */
-		    hugepte_shift = PGDIR_SHIFT;
+		    hugepte_shift[psize] = PGDIR_SHIFT;
 		    break;
 		}
-		hugepte_shift -= HPAGE_SHIFT;
+		hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
 	} else
-		HPAGE_SHIFT = 0;
+		hugepte_shift[psize] = 0;
 }
 
 static int __init hugepage_setup_sz(char *str)
 {
 	unsigned long long size;
-	int mmu_psize = -1;
+	int mmu_psize;
 	int shift;
 
 	size = memparse(str, &str);
 
 	shift = __ffs(size);
-	switch (shift) {
-#ifndef CONFIG_PPC_64K_PAGES
-	case PAGE_SHIFT_64K:
-		mmu_psize = MMU_PAGE_64K;
-		break;
-#endif
-	case PAGE_SHIFT_16M:
-		mmu_psize = MMU_PAGE_16M;
-		break;
-	case PAGE_SHIFT_16G:
-		mmu_psize = MMU_PAGE_16G;
-		break;
-	}
-
-	if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) {
+	mmu_psize = shift_to_mmu_psize(shift);
+	if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
 		set_huge_psize(mmu_psize);
-		hugetlb_add_hstate(shift - PAGE_SHIFT);
-	}
 	else
 		printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
 
@@ -672,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr)
 
 static int __init hugetlbpage_init(void)
 {
+	unsigned int psize;
+
 	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
 		return -ENODEV;
-
-	huge_pgtable_cache = kmem_cache_create("hugepte_cache",
-					       HUGEPTE_TABLE_SIZE,
-					       HUGEPTE_TABLE_SIZE,
-					       0,
-					       zero_ctor);
-	if (! huge_pgtable_cache)
-		panic("hugetlbpage_init(): could not create hugepte cache\n");
+	/* Add supported huge page sizes.  Need to change HUGE_MAX_HSTATE
+	 * and adjust PTE_NONCACHE_NUM if the number of supported huge page
+	 * sizes changes.
+	 */
+	set_huge_psize(MMU_PAGE_16M);
+	set_huge_psize(MMU_PAGE_64K);
+	set_huge_psize(MMU_PAGE_16G);
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+		if (mmu_huge_psizes[psize]) {
+			huge_pgtable_cache(psize) = kmem_cache_create(
+						HUGEPTE_CACHE_NAME(psize),
+						HUGEPTE_TABLE_SIZE(psize),
+						HUGEPTE_TABLE_SIZE(psize),
+						0,
+						zero_ctor);
+			if (!huge_pgtable_cache(psize))
+				panic("hugetlbpage_init(): could not create %s"\
+				      "\n", HUGEPTE_CACHE_NAME(psize));
+		}
+	}
 
 	return 0;
 }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6ef63caca68..a41bc5aa204 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -153,10 +153,10 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
 };
 
 #ifdef CONFIG_HUGETLB_PAGE
-/* Hugepages need one extra cache, initialized in hugetlbpage.c.  We
- * can't put into the tables above, because HPAGE_SHIFT is not compile
- * time constant. */
-struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1];
+/* Hugepages need an extra cache per hugepagesize, initialized in
+ * hugetlbpage.c.  We can't put into the tables above, because HPAGE_SHIFT
+ * is not compile time constant. */
+struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];
 #else
 struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
 #endif
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index a01b5c608ff..409fcc7b63c 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -147,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	 */
 	if (huge) {
 #ifdef CONFIG_HUGETLB_PAGE
-		psize = mmu_huge_psize;
+		psize = get_slice_psize(mm, addr);;
 #else
 		BUG();
 		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index ca37c4af27b..26f0d0ab27a 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -24,9 +24,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 static inline int prepare_hugepage_range(struct file *file,
 			unsigned long addr, unsigned long len)
 {
-	if (len & ~HPAGE_MASK)
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
+	if (addr & ~huge_page_mask(h))
 		return -EINVAL;
 	return 0;
 }
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h
index b61181aa774..19c7a940349 100644
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@@ -194,9 +194,9 @@ extern int mmu_ci_restrictions;
 
 #ifdef CONFIG_HUGETLB_PAGE
 /*
- * The page size index of the huge pages for use by hugetlbfs
+ * The page size indexes of the huge pages for use by hugetlbfs
  */
-extern int mmu_huge_psize;
+extern unsigned int mmu_huge_psizes[MMU_PAGE_COUNT];
 
 #endif /* CONFIG_HUGETLB_PAGE */
 
diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h
index 02fd80710e9..043bfdfe4f7 100644
--- a/include/asm-powerpc/page_64.h
+++ b/include/asm-powerpc/page_64.h
@@ -90,6 +90,7 @@ extern unsigned int HPAGE_SHIFT;
 #define HPAGE_SIZE		((1UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#define HUGE_MAX_HSTATE		3
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h
index 68980990f62..812a1d8f35c 100644
--- a/include/asm-powerpc/pgalloc-64.h
+++ b/include/asm-powerpc/pgalloc-64.h
@@ -22,7 +22,7 @@ extern struct kmem_cache *pgtable_cache[];
 #define PUD_CACHE_NUM		1
 #define PMD_CACHE_NUM		1
 #define HUGEPTE_CACHE_NUM	2
-#define PTE_NONCACHE_NUM	3  /* from GFP rather than kmem_cache */
+#define PTE_NONCACHE_NUM	7  /* from GFP rather than kmem_cache */
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
@@ -119,7 +119,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 	__free_page(ptepage);
 }
 
-#define PGF_CACHENUM_MASK	0x3
+#define PGF_CACHENUM_MASK	0x7
 
 typedef struct pgtable_free {
 	unsigned long val;
-- 
GitLab


From 7f09ca51e925ba62e9ebfd4979f093e97e38adeb Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:27:58 -0700
Subject: [PATCH 184/853] hugetlb: fix a hugepage reservation check for
 MAP_SHARED

When removing a huge page from the hugepage pool for a fault the system checks
to see if the mapping requires additional pages to be reserved, and if it does
whether there are any unreserved pages remaining.  If not, the allocation
fails without even attempting to get a page.  In order to determine whether to
apply this check we call vma_has_private_reserves() which tells us if this vma
is MAP_PRIVATE and is the owner.  This incorrectly triggers the remaining
reservation test for MAP_SHARED mappings which prevents allocation of the
final page in the pool even though it is reserved for this mapping.

In reality we only want to check this for MAP_PRIVATE mappings where the
process is not the original mapper.  Replace vma_has_private_reserves() with
vma_has_reserves() which indicates whether further reserves are required, and
update the caller.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3e1506b808a..8c20aed62b9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -342,13 +342,13 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 }
 
 /* Returns true if the VMA has associated reserve pages */
-static int vma_has_private_reserves(struct vm_area_struct *vma)
+static int vma_has_reserves(struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & VM_SHARED)
-		return 0;
-	if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
-		return 0;
-	return 1;
+		return 1;
+	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+		return 1;
+	return 0;
 }
 
 static void clear_huge_page(struct page *page,
@@ -420,7 +420,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 	 * have no page reserves. This check ensures that reservations are
 	 * not "stolen". The child may still get SIGKILLed
 	 */
-	if (!vma_has_private_reserves(vma) &&
+	if (!vma_has_reserves(vma) &&
 			h->free_huge_pages - h->resv_huge_pages == 0)
 		return NULL;
 
-- 
GitLab


From 7251ff78b94c2a68d267623d09b32672b20662c1 Mon Sep 17 00:00:00 2001
From: Adam Litke <agl@us.ibm.com>
Date: Wed, 23 Jul 2008 21:27:59 -0700
Subject: [PATCH 185/853] hugetlb: quota is not freed for unused reserved
 private huge pages

With shared reservations (and now also with private reservations), we reserve
huge pages at mmap time.  We also account for the mapping against fs quota to
prevent a reservation from being preempted by quota exhaustion.

When testing with the libhugetlbfs test suite, I found a problem with quota
accounting.  FS quota for allocated pages is handled correctly but we are not
releasing quota for private pages that were reserved but never allocated.  Do
this in hugetlb_vm_op_close() at the same time as unused page reservations are
released.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Acked-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8c20aed62b9..41341c41419 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1552,8 +1552,10 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 
 		kref_put(&reservations->refs, resv_map_release);
 
-		if (reserve)
+		if (reserve) {
 			hugetlb_acct_memory(h, -reserve);
+			hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
+		}
 	}
 }
 
-- 
GitLab


From 223e8dc9249c9e15f6c8b638d73fcad78ccb0a88 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:00 -0700
Subject: [PATCH 186/853] bootmem: reorder code to match new bootmem structure

This only reorders functions so that further patches will be easier to
read.  No code changed.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  86 +++++-----
 mm/bootmem.c            | 356 ++++++++++++++++++++--------------------
 2 files changed, 222 insertions(+), 220 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index f352c5f125b..5000fd70b04 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -41,36 +41,62 @@ typedef struct bootmem_data {
 extern bootmem_data_t bootmem_node_data[];
 
 extern unsigned long bootmem_bootmap_pages(unsigned long);
+
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+				       unsigned long freepfn,
+				       unsigned long startpfn,
+				       unsigned long endpfn);
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern unsigned long free_all_bootmem(void);
+
+extern void free_bootmem_node(pg_data_t *pgdat,
+			      unsigned long addr,
+			      unsigned long size);
 extern void free_bootmem(unsigned long addr, unsigned long size);
-extern void *__alloc_bootmem(unsigned long size,
+
+/*
+ * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
+ * the architecture-specific code should honor this).
+ *
+ * If flags is 0, then the return value is always 0 (success). If
+ * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
+ * memory already was reserved.
+ */
+#define BOOTMEM_DEFAULT		0
+#define BOOTMEM_EXCLUSIVE	(1<<0)
+
+extern int reserve_bootmem_node(pg_data_t *pgdat,
+				 unsigned long physaddr,
+				 unsigned long size,
+				 int flags);
+#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
+#endif
+
+extern void *__alloc_bootmem_nopanic(unsigned long size,
 			     unsigned long align,
 			     unsigned long goal);
-extern void *__alloc_bootmem_nopanic(unsigned long size,
+extern void *__alloc_bootmem(unsigned long size,
 				     unsigned long align,
 				     unsigned long goal);
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
+extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
 extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long size,
 				      unsigned long align,
 				      unsigned long goal);
-
-/*
- * flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
- * the architecture-specific code should honor this)
- */
-#define BOOTMEM_DEFAULT		0
-#define BOOTMEM_EXCLUSIVE	(1<<0)
-
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-/*
- * If flags is 0, then the return value is always 0 (success). If
- * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
- * memory already was reserved.
- */
-extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
 #define alloc_bootmem(x) \
 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
@@ -83,38 +109,16 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
 
 extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
 				   int flags);
-extern unsigned long free_all_bootmem(void);
-extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
-extern void *__alloc_bootmem_node(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal);
-extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal);
-extern unsigned long init_bootmem_node(pg_data_t *pgdat,
-				       unsigned long freepfn,
-				       unsigned long startpfn,
-				       unsigned long endpfn);
-extern int reserve_bootmem_node(pg_data_t *pgdat,
-				 unsigned long physaddr,
-				 unsigned long size,
-				 int flags);
-extern void free_bootmem_node(pg_data_t *pgdat,
-			      unsigned long addr,
-			      unsigned long size);
-extern void *alloc_bootmem_section(unsigned long size,
-				   unsigned long section_nr);
 
-#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+
+extern void *alloc_bootmem_section(unsigned long size,
+				   unsigned long section_nr);
 
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
 extern void *alloc_remap(int nid, unsigned long size);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9ac972535ff..24eacf52c50 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -38,6 +38,19 @@ unsigned long saved_max_pfn;
 
 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
 
+/*
+ * Given an initialised bdata, it returns the size of the boot bitmap
+ */
+static unsigned long __init get_mapsize(bootmem_data_t *bdata)
+{
+	unsigned long mapsize;
+	unsigned long start = PFN_DOWN(bdata->node_boot_start);
+	unsigned long end = bdata->node_low_pfn;
+
+	mapsize = ((end - start) + 7) / 8;
+	return ALIGN(mapsize, sizeof(long));
+}
+
 /* return the number of _pages_ that will be allocated for the boot bitmap */
 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
@@ -71,19 +84,6 @@ static void __init link_bootmem(bootmem_data_t *bdata)
 	list_add_tail(&bdata->list, &bdata_list);
 }
 
-/*
- * Given an initialised bdata, it returns the size of the boot bitmap
- */
-static unsigned long __init get_mapsize(bootmem_data_t *bdata)
-{
-	unsigned long mapsize;
-	unsigned long start = PFN_DOWN(bdata->node_boot_start);
-	unsigned long end = bdata->node_low_pfn;
-
-	mapsize = ((end - start) + 7) / 8;
-	return ALIGN(mapsize, sizeof(long));
-}
-
 /*
  * Called once to set up the allocator itself.
  */
@@ -108,6 +108,146 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 	return mapsize;
 }
 
+unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
+				unsigned long startpfn, unsigned long endpfn)
+{
+	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
+}
+
+unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
+{
+	max_low_pfn = pages;
+	min_low_pfn = start;
+	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
+}
+
+static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
+{
+	struct page *page;
+	unsigned long pfn;
+	unsigned long i, count;
+	unsigned long idx;
+	unsigned long *map;
+	int gofast = 0;
+
+	BUG_ON(!bdata->node_bootmem_map);
+
+	count = 0;
+	/* first extant page of the node */
+	pfn = PFN_DOWN(bdata->node_boot_start);
+	idx = bdata->node_low_pfn - pfn;
+	map = bdata->node_bootmem_map;
+	/*
+	 * Check if we are aligned to BITS_PER_LONG pages.  If so, we might
+	 * be able to free page orders of that size at once.
+	 */
+	if (!(pfn & (BITS_PER_LONG-1)))
+		gofast = 1;
+
+	for (i = 0; i < idx; ) {
+		unsigned long v = ~map[i / BITS_PER_LONG];
+
+		if (gofast && v == ~0UL) {
+			int order;
+
+			page = pfn_to_page(pfn);
+			count += BITS_PER_LONG;
+			order = ffs(BITS_PER_LONG) - 1;
+			__free_pages_bootmem(page, order);
+			i += BITS_PER_LONG;
+			page += BITS_PER_LONG;
+		} else if (v) {
+			unsigned long m;
+
+			page = pfn_to_page(pfn);
+			for (m = 1; m && i < idx; m<<=1, page++, i++) {
+				if (v & m) {
+					count++;
+					__free_pages_bootmem(page, 0);
+				}
+			}
+		} else {
+			i += BITS_PER_LONG;
+		}
+		pfn += BITS_PER_LONG;
+	}
+
+	/*
+	 * Now free the allocator bitmap itself, it's not
+	 * needed anymore:
+	 */
+	page = virt_to_page(bdata->node_bootmem_map);
+	idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
+	for (i = 0; i < idx; i++, page++)
+		__free_pages_bootmem(page, 0);
+	count += i;
+	bdata->node_bootmem_map = NULL;
+
+	return count;
+}
+
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
+{
+	register_page_bootmem_info_node(pgdat);
+	return free_all_bootmem_core(pgdat->bdata);
+}
+
+unsigned long __init free_all_bootmem(void)
+{
+	return free_all_bootmem_core(NODE_DATA(0)->bdata);
+}
+
+static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+				     unsigned long size)
+{
+	unsigned long sidx, eidx;
+	unsigned long i;
+
+	BUG_ON(!size);
+
+	/* out range */
+	if (addr + size < bdata->node_boot_start ||
+		PFN_DOWN(addr) > bdata->node_low_pfn)
+		return;
+	/*
+	 * round down end of usable mem, partially free pages are
+	 * considered reserved.
+	 */
+
+	if (addr >= bdata->node_boot_start && addr < bdata->last_success)
+		bdata->last_success = addr;
+
+	/*
+	 * Round up to index to the range.
+	 */
+	if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start))
+		sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
+	else
+		sidx = 0;
+
+	eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
+	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+
+	for (i = sidx; i < eidx; i++) {
+		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
+			BUG();
+	}
+}
+
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+			      unsigned long size)
+{
+	free_bootmem_core(pgdat->bdata, physaddr, size);
+}
+
+void __init free_bootmem(unsigned long addr, unsigned long size)
+{
+	bootmem_data_t *bdata;
+	list_for_each_entry(bdata, &bdata_list, list)
+		free_bootmem_core(bdata, addr, size);
+}
+
 /*
  * Marks a particular physical memory range as unallocatable. Usable RAM
  * might be used for boot-time allocations - or it might get added
@@ -183,43 +323,36 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata,
 	}
 }
 
-static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
-				     unsigned long size)
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+				 unsigned long size, int flags)
 {
-	unsigned long sidx, eidx;
-	unsigned long i;
-
-	BUG_ON(!size);
-
-	/* out range */
-	if (addr + size < bdata->node_boot_start ||
-		PFN_DOWN(addr) > bdata->node_low_pfn)
-		return;
-	/*
-	 * round down end of usable mem, partially free pages are
-	 * considered reserved.
-	 */
-
-	if (addr >= bdata->node_boot_start && addr < bdata->last_success)
-		bdata->last_success = addr;
+	int ret;
 
-	/*
-	 * Round up to index to the range.
-	 */
-	if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start))
-		sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
-	else
-		sidx = 0;
+	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+	if (ret < 0)
+		return -ENOMEM;
+	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+	return 0;
+}
 
-	eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
-	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
-		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+int __init reserve_bootmem(unsigned long addr, unsigned long size,
+			    int flags)
+{
+	bootmem_data_t *bdata;
+	int ret;
 
-	for (i = sidx; i < eidx; i++) {
-		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
-			BUG();
+	list_for_each_entry(bdata, &bdata_list, list) {
+		ret = can_reserve_bootmem_core(bdata, addr, size, flags);
+		if (ret < 0)
+			return ret;
 	}
+	list_for_each_entry(bdata, &bdata_list, list)
+		reserve_bootmem_core(bdata, addr, size, flags);
+
+	return 0;
 }
+#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
 /*
  * We 'merge' subsequent allocations to save space. We might 'lose'
@@ -371,140 +504,6 @@ found:
 	return ret;
 }
 
-static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
-{
-	struct page *page;
-	unsigned long pfn;
-	unsigned long i, count;
-	unsigned long idx;
-	unsigned long *map; 
-	int gofast = 0;
-
-	BUG_ON(!bdata->node_bootmem_map);
-
-	count = 0;
-	/* first extant page of the node */
-	pfn = PFN_DOWN(bdata->node_boot_start);
-	idx = bdata->node_low_pfn - pfn;
-	map = bdata->node_bootmem_map;
-	/*
-	 * Check if we are aligned to BITS_PER_LONG pages.  If so, we might
-	 * be able to free page orders of that size at once.
-	 */
-	if (!(pfn & (BITS_PER_LONG-1)))
-		gofast = 1;
-
-	for (i = 0; i < idx; ) {
-		unsigned long v = ~map[i / BITS_PER_LONG];
-
-		if (gofast && v == ~0UL) {
-			int order;
-
-			page = pfn_to_page(pfn);
-			count += BITS_PER_LONG;
-			order = ffs(BITS_PER_LONG) - 1;
-			__free_pages_bootmem(page, order);
-			i += BITS_PER_LONG;
-			page += BITS_PER_LONG;
-		} else if (v) {
-			unsigned long m;
-
-			page = pfn_to_page(pfn);
-			for (m = 1; m && i < idx; m<<=1, page++, i++) {
-				if (v & m) {
-					count++;
-					__free_pages_bootmem(page, 0);
-				}
-			}
-		} else {
-			i += BITS_PER_LONG;
-		}
-		pfn += BITS_PER_LONG;
-	}
-
-	/*
-	 * Now free the allocator bitmap itself, it's not
-	 * needed anymore:
-	 */
-	page = virt_to_page(bdata->node_bootmem_map);
-	idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
-	for (i = 0; i < idx; i++, page++)
-		__free_pages_bootmem(page, 0);
-	count += i;
-	bdata->node_bootmem_map = NULL;
-
-	return count;
-}
-
-unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
-				unsigned long startpfn, unsigned long endpfn)
-{
-	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
-}
-
-int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-				 unsigned long size, int flags)
-{
-	int ret;
-
-	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
-	if (ret < 0)
-		return -ENOMEM;
-	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
-
-	return 0;
-}
-
-void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-			      unsigned long size)
-{
-	free_bootmem_core(pgdat->bdata, physaddr, size);
-}
-
-unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
-{
-	register_page_bootmem_info_node(pgdat);
-	return free_all_bootmem_core(pgdat->bdata);
-}
-
-unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
-{
-	max_low_pfn = pages;
-	min_low_pfn = start;
-	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
-}
-
-#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-int __init reserve_bootmem(unsigned long addr, unsigned long size,
-			    int flags)
-{
-	bootmem_data_t *bdata;
-	int ret;
-
-	list_for_each_entry(bdata, &bdata_list, list) {
-		ret = can_reserve_bootmem_core(bdata, addr, size, flags);
-		if (ret < 0)
-			return ret;
-	}
-	list_for_each_entry(bdata, &bdata_list, list)
-		reserve_bootmem_core(bdata, addr, size, flags);
-
-	return 0;
-}
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-
-void __init free_bootmem(unsigned long addr, unsigned long size)
-{
-	bootmem_data_t *bdata;
-	list_for_each_entry(bdata, &bdata_list, list)
-		free_bootmem_core(bdata, addr, size);
-}
-
-unsigned long __init free_all_bootmem(void)
-{
-	return free_all_bootmem_core(NODE_DATA(0)->bdata);
-}
-
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 				      unsigned long goal)
 {
@@ -534,7 +533,6 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return NULL;
 }
 
-
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
-- 
GitLab


From 57cfc29efac6670355ee0e107c8dbae8237d406b Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:00 -0700
Subject: [PATCH 187/853] bootmem: clean up bootmem.c file header

Change the description, move a misplaced comment about the allocator
itself and add me to the list of copyright holders.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 24eacf52c50..286e12c536a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -1,12 +1,12 @@
 /*
- *  linux/mm/bootmem.c
+ *  bootmem - A boot-time physical memory allocator and configurator
  *
  *  Copyright (C) 1999 Ingo Molnar
- *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
+ *                1999 Kanoj Sarcar, SGI
+ *                2008 Johannes Weiner
  *
- *  simple boot-time physical memory area allocator and
- *  free memory collector. It's used to deal with reserved
- *  system memory and memory holes as well.
+ * Access to this subsystem has to be serialized externally (which is true
+ * for the boot process anyway).
  */
 #include <linux/init.h>
 #include <linux/pfn.h>
@@ -19,10 +19,6 @@
 
 #include "internal.h"
 
-/*
- * Access to this subsystem has to be serialized externally. (this is
- * true for the boot process anyway)
- */
 unsigned long max_low_pfn;
 unsigned long min_low_pfn;
 unsigned long max_pfn;
-- 
GitLab


From a66fd7daec1f40c1f0eac466f0da9206b615fe2a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:01 -0700
Subject: [PATCH 188/853] bootmem: add documentation to API functions

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 149 insertions(+), 1 deletion(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 286e12c536a..105ad4cff2e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -47,7 +47,10 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata)
 	return ALIGN(mapsize, sizeof(long));
 }
 
-/* return the number of _pages_ that will be allocated for the boot bitmap */
+/**
+ * bootmem_bootmap_pages - calculate bitmap size in pages
+ * @pages: number of pages the bitmap has to represent
+ */
 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
 	unsigned long mapsize;
@@ -104,12 +107,28 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 	return mapsize;
 }
 
+/**
+ * init_bootmem_node - register a node as boot memory
+ * @pgdat: node to register
+ * @freepfn: pfn where the bitmap for this node is to be placed
+ * @startpfn: first pfn on the node
+ * @endpfn: first pfn after the node
+ *
+ * Returns the number of bytes needed to hold the bitmap for this node.
+ */
 unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 				unsigned long startpfn, unsigned long endpfn)
 {
 	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
 }
 
+/**
+ * init_bootmem - register boot memory
+ * @start: pfn where the bitmap is to be placed
+ * @pages: number of available physical pages
+ *
+ * Returns the number of bytes needed to hold the bitmap.
+ */
 unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 {
 	max_low_pfn = pages;
@@ -182,12 +201,23 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	return count;
 }
 
+/**
+ * free_all_bootmem_node - release a node's free pages to the buddy allocator
+ * @pgdat: node to be released
+ *
+ * Returns the number of pages actually released.
+ */
 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
 	register_page_bootmem_info_node(pgdat);
 	return free_all_bootmem_core(pgdat->bdata);
 }
 
+/**
+ * free_all_bootmem - release free pages to the buddy allocator
+ *
+ * Returns the number of pages actually released.
+ */
 unsigned long __init free_all_bootmem(void)
 {
 	return free_all_bootmem_core(NODE_DATA(0)->bdata);
@@ -231,12 +261,32 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 	}
 }
 
+/**
+ * free_bootmem_node - mark a page range as usable
+ * @pgdat: node the range resides on
+ * @physaddr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * Partial pages will be considered reserved and left as they are.
+ *
+ * Only physical pages that actually reside on @pgdat are marked.
+ */
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 			      unsigned long size)
 {
 	free_bootmem_core(pgdat->bdata, physaddr, size);
 }
 
+/**
+ * free_bootmem - mark a page range as usable
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * Partial pages will be considered reserved and left as they are.
+ *
+ * All physical pages within the range are marked, no matter what
+ * node they reside on.
+ */
 void __init free_bootmem(unsigned long addr, unsigned long size)
 {
 	bootmem_data_t *bdata;
@@ -319,6 +369,17 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata,
 	}
 }
 
+/**
+ * reserve_bootmem_node - mark a page range as reserved
+ * @pgdat: node the range resides on
+ * @physaddr: starting address of the range
+ * @size: size of the range in bytes
+ * @flags: reservation flags (see linux/bootmem.h)
+ *
+ * Partial pages will be reserved.
+ *
+ * Only physical pages that actually reside on @pgdat are marked.
+ */
 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 				 unsigned long size, int flags)
 {
@@ -332,6 +393,17 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 }
 
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+/**
+ * reserve_bootmem - mark a page range as usable
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ * @flags: reservation flags (see linux/bootmem.h)
+ *
+ * Partial pages will be reserved.
+ *
+ * All physical pages within the range are marked, no matter what
+ * node they reside on.
+ */
 int __init reserve_bootmem(unsigned long addr, unsigned long size,
 			    int flags)
 {
@@ -500,6 +572,19 @@ found:
 	return ret;
 }
 
+/**
+ * __alloc_bootmem_nopanic - allocate boot memory without panicking
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * Returns NULL on failure.
+ */
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 				      unsigned long goal)
 {
@@ -514,6 +599,19 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 	return NULL;
 }
 
+/**
+ * __alloc_bootmem - allocate boot memory
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * The function panics if the request can not be satisfied.
+ */
 void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 			      unsigned long goal)
 {
@@ -529,6 +627,21 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return NULL;
 }
 
+/**
+ * __alloc_bootmem_node - allocate boot memory from a specific node
+ * @pgdat: node to allocate from
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may fall back to any node in the system if the specified node
+ * can not hold the requested memory.
+ *
+ * The function panics if the request can not be satisfied.
+ */
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
@@ -542,6 +655,13 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 }
 
 #ifdef CONFIG_SPARSEMEM
+/**
+ * alloc_bootmem_section - allocate boot memory from a specific section
+ * @size: size of the request in bytes
+ * @section_nr: sparse map section to allocate from
+ *
+ * Return NULL on failure.
+ */
 void * __init alloc_bootmem_section(unsigned long size,
 				    unsigned long section_nr)
 {
@@ -588,6 +708,19 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
 #define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
 #endif
 
+/**
+ * __alloc_bootmem_low - allocate low boot memory
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * The function panics if the request can not be satisfied.
+ */
 void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 				  unsigned long goal)
 {
@@ -609,6 +742,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 	return NULL;
 }
 
+/**
+ * __alloc_bootmem_low_node - allocate low boot memory from a specific node
+ * @pgdat: node to allocate from
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may fall back to any node in the system if the specified node
+ * can not hold the requested memory.
+ *
+ * The function panics if the request can not be satisfied.
+ */
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
-- 
GitLab


From 2e5237daf0cc3c8d87762f53f704dc54fa91dcf6 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:02 -0700
Subject: [PATCH 189/853] bootmem: add debugging framework

Introduce the bootmem_debug kernel parameter that enables very verbose
diagnostics regarding all range operations of bootmem as well as the
initialization and release of nodes.

[akpm@linux-foundation.org: fix printk warnings]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 51 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 105ad4cff2e..4e085ee1d98 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -34,6 +34,22 @@ unsigned long saved_max_pfn;
 
 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
 
+static int bootmem_debug;
+
+static int __init bootmem_debug_setup(char *buf)
+{
+	bootmem_debug = 1;
+	return 0;
+}
+early_param("bootmem_debug", bootmem_debug_setup);
+
+#define bdebug(fmt, args...) ({				\
+	if (unlikely(bootmem_debug))			\
+		printk(KERN_INFO			\
+			"bootmem::%s " fmt,		\
+			__FUNCTION__, ## args);		\
+})
+
 /*
  * Given an initialised bdata, it returns the size of the boot bitmap
  */
@@ -104,6 +120,9 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 	mapsize = get_mapsize(bdata);
 	memset(bdata->node_bootmem_map, 0xff, mapsize);
 
+	bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
+		bdata - bootmem_node_data, start, mapstart, end, mapsize);
+
 	return mapsize;
 }
 
@@ -198,6 +217,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	count += i;
 	bdata->node_bootmem_map = NULL;
 
+	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
+
 	return count;
 }
 
@@ -255,6 +276,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
 		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
 
+	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
+		sidx + PFN_DOWN(bdata->node_boot_start),
+		eidx + PFN_DOWN(bdata->node_boot_start));
+
 	for (i = sidx; i < eidx; i++) {
 		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
 			BUG();
@@ -360,13 +385,16 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata,
 	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
 		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
 
-	for (i = sidx; i < eidx; i++) {
-		if (test_and_set_bit(i, bdata->node_bootmem_map)) {
-#ifdef CONFIG_DEBUG_BOOTMEM
-			printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
-#endif
-		}
-	}
+	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
+		bdata - bootmem_node_data,
+		sidx + PFN_DOWN(bdata->node_boot_start),
+		eidx + PFN_DOWN(bdata->node_boot_start),
+		flags);
+
+	for (i = sidx; i < eidx; i++)
+		if (test_and_set_bit(i, bdata->node_bootmem_map))
+			bdebug("hm, page %lx reserved twice.\n",
+				PFN_DOWN(bdata->node_boot_start) + i);
 }
 
 /**
@@ -455,6 +483,10 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	if (!bdata->node_bootmem_map)
 		return NULL;
 
+	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
+		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
+		align, goal, limit);
+
 	/* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
 	node_boot_start = bdata->node_boot_start;
 	node_bootmem_map = bdata->node_bootmem_map;
@@ -562,6 +594,11 @@ found:
 		ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
 	}
 
+	bdebug("nid=%td start=%lx end=%lx\n",
+		bdata - bootmem_node_data,
+		start + PFN_DOWN(bdata->node_boot_start),
+		start + areasize + PFN_DOWN(bdata->node_boot_start));
+
 	/*
 	 * Reserve the area now:
 	 */
-- 
GitLab


From df049a5f41a3b2eee2131221959e3b558ba7c705 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:02 -0700
Subject: [PATCH 190/853] bootmem: revisit bitmap size calculations

Reincarnate get_mapsize as bootmap_bytes and implement
bootmem_bootmap_pages on top of it.

Adjust users of these helpers and make free_all_bootmem_core use
bootmem_bootmap_pages instead of open-coding it.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 4e085ee1d98..484849bfc8c 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -50,17 +50,11 @@ early_param("bootmem_debug", bootmem_debug_setup);
 			__FUNCTION__, ## args);		\
 })
 
-/*
- * Given an initialised bdata, it returns the size of the boot bitmap
- */
-static unsigned long __init get_mapsize(bootmem_data_t *bdata)
+static unsigned long __init bootmap_bytes(unsigned long pages)
 {
-	unsigned long mapsize;
-	unsigned long start = PFN_DOWN(bdata->node_boot_start);
-	unsigned long end = bdata->node_low_pfn;
+	unsigned long bytes = (pages + 7) / 8;
 
-	mapsize = ((end - start) + 7) / 8;
-	return ALIGN(mapsize, sizeof(long));
+	return ALIGN(bytes, sizeof(long));
 }
 
 /**
@@ -69,13 +63,9 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata)
  */
 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
-	unsigned long mapsize;
-
-	mapsize = (pages+7)/8;
-	mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK;
-	mapsize >>= PAGE_SHIFT;
+	unsigned long bytes = bootmap_bytes(pages);
 
-	return mapsize;
+	return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
 }
 
 /*
@@ -117,7 +107,7 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 	 * Initially all pages are reserved - setup_arch() has to
 	 * register free RAM areas explicitly.
 	 */
-	mapsize = get_mapsize(bdata);
+	mapsize = bootmap_bytes(end - start);
 	memset(bdata->node_bootmem_map, 0xff, mapsize);
 
 	bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
@@ -160,7 +150,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	struct page *page;
 	unsigned long pfn;
 	unsigned long i, count;
-	unsigned long idx;
+	unsigned long idx, pages;
 	unsigned long *map;
 	int gofast = 0;
 
@@ -211,7 +201,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	 * needed anymore:
 	 */
 	page = virt_to_page(bdata->node_bootmem_map);
-	idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
+	pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+	idx = bootmem_bootmap_pages(pages);
 	for (i = 0; i < idx; i++, page++)
 		__free_pages_bootmem(page, 0);
 	count += i;
-- 
GitLab


From 636cc40cb79f511d9caa27ef098a83e4fa4971fb Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:03 -0700
Subject: [PATCH 191/853] bootmem: revisit bootmem descriptor list handling

link_bootmem handles an insertion of a new descriptor into the sorted list
in more or less three explicit branches; empty list, insert in between and
append.  These cases can be expressed implicite.

Also mark the sorted list as initdata as it can be thrown away after boot
as well.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 484849bfc8c..9da7d409781 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -23,7 +23,6 @@ unsigned long max_low_pfn;
 unsigned long min_low_pfn;
 unsigned long max_pfn;
 
-static LIST_HEAD(bdata_list);
 #ifdef CONFIG_CRASH_DUMP
 /*
  * If we have booted due to a crash, max_pfn will be a very low value. We need
@@ -34,6 +33,8 @@ unsigned long saved_max_pfn;
 
 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
 
+static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
+
 static int bootmem_debug;
 
 static int __init bootmem_debug_setup(char *buf)
@@ -73,20 +74,16 @@ unsigned long __init bootmem_bootmap_pages(unsigned long pages)
  */
 static void __init link_bootmem(bootmem_data_t *bdata)
 {
-	bootmem_data_t *ent;
+	struct list_head *iter;
 
-	if (list_empty(&bdata_list)) {
-		list_add(&bdata->list, &bdata_list);
-		return;
-	}
-	/* insert in order */
-	list_for_each_entry(ent, &bdata_list, list) {
-		if (bdata->node_boot_start < ent->node_boot_start) {
-			list_add_tail(&bdata->list, &ent->list);
-			return;
-		}
+	list_for_each(iter, &bdata_list) {
+		bootmem_data_t *ent;
+
+		ent = list_entry(iter, bootmem_data_t, list);
+		if (bdata->node_boot_start < ent->node_boot_start)
+			break;
 	}
-	list_add_tail(&bdata->list, &bdata_list);
+	list_add_tail(&bdata->list, iter);
 }
 
 /*
-- 
GitLab


From 41546c17418fba08ece978bad72a33072715b8f3 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:03 -0700
Subject: [PATCH 192/853] bootmem: clean up free_all_bootmem_core

Rewrite the code in a more concise way using less variables.

[akpm@linux-foundation.org: fix printk warnings]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 83 ++++++++++++++++++++++++----------------------------
 1 file changed, 38 insertions(+), 45 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9da7d409781..300d126ec53 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -144,66 +144,59 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 
 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 {
+	int aligned;
 	struct page *page;
-	unsigned long pfn;
-	unsigned long i, count;
-	unsigned long idx, pages;
-	unsigned long *map;
-	int gofast = 0;
-
-	BUG_ON(!bdata->node_bootmem_map);
-
-	count = 0;
-	/* first extant page of the node */
-	pfn = PFN_DOWN(bdata->node_boot_start);
-	idx = bdata->node_low_pfn - pfn;
-	map = bdata->node_bootmem_map;
+	unsigned long start, end, pages, count = 0;
+
+	if (!bdata->node_bootmem_map)
+		return 0;
+
+	start = PFN_DOWN(bdata->node_boot_start);
+	end = bdata->node_low_pfn;
+
 	/*
-	 * Check if we are aligned to BITS_PER_LONG pages.  If so, we might
-	 * be able to free page orders of that size at once.
+	 * If the start is aligned to the machines wordsize, we might
+	 * be able to free pages in bulks of that order.
 	 */
-	if (!(pfn & (BITS_PER_LONG-1)))
-		gofast = 1;
+	aligned = !(start & (BITS_PER_LONG - 1));
+
+	bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
+		bdata - bootmem_node_data, start, end, aligned);
+
+	while (start < end) {
+		unsigned long *map, idx, vec;
 
-	for (i = 0; i < idx; ) {
-		unsigned long v = ~map[i / BITS_PER_LONG];
+		map = bdata->node_bootmem_map;
+		idx = start - PFN_DOWN(bdata->node_boot_start);
+		vec = ~map[idx / BITS_PER_LONG];
 
-		if (gofast && v == ~0UL) {
-			int order;
+		if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
+			int order = ilog2(BITS_PER_LONG);
 
-			page = pfn_to_page(pfn);
+			__free_pages_bootmem(pfn_to_page(start), order);
 			count += BITS_PER_LONG;
-			order = ffs(BITS_PER_LONG) - 1;
-			__free_pages_bootmem(page, order);
-			i += BITS_PER_LONG;
-			page += BITS_PER_LONG;
-		} else if (v) {
-			unsigned long m;
-
-			page = pfn_to_page(pfn);
-			for (m = 1; m && i < idx; m<<=1, page++, i++) {
-				if (v & m) {
-					count++;
+		} else {
+			unsigned long off = 0;
+
+			while (vec && off < BITS_PER_LONG) {
+				if (vec & 1) {
+					page = pfn_to_page(start + off);
 					__free_pages_bootmem(page, 0);
+					count++;
 				}
+				vec >>= 1;
+				off++;
 			}
-		} else {
-			i += BITS_PER_LONG;
 		}
-		pfn += BITS_PER_LONG;
+		start += BITS_PER_LONG;
 	}
 
-	/*
-	 * Now free the allocator bitmap itself, it's not
-	 * needed anymore:
-	 */
 	page = virt_to_page(bdata->node_bootmem_map);
 	pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
-	idx = bootmem_bootmap_pages(pages);
-	for (i = 0; i < idx; i++, page++)
-		__free_pages_bootmem(page, 0);
-	count += i;
-	bdata->node_bootmem_map = NULL;
+	pages = bootmem_bootmap_pages(pages);
+	count += pages;
+	while (pages--)
+		__free_pages_bootmem(page++, 0);
 
 	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
 
-- 
GitLab


From 5f2809e69c7128f86316048221cf45146f69a4a0 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:05 -0700
Subject: [PATCH 193/853] bootmem: clean up alloc_bootmem_core

alloc_bootmem_core has become quite nasty to read over time.  This is a
clean rewrite that keeps the semantics.

bdata->last_pos has been dropped.

bdata->last_success has been renamed to hint_idx and it is now an index
relative to the node's range.  Since further block searching might start
at this index, it is now set to the end of a succeeded allocation rather
than its beginning.

bdata->last_offset has been renamed to last_end_off to be more clear that
it represents the ending address of the last allocation relative to the
node.

[y-goto@jp.fujitsu.com: fix new alloc_bootmem_core()]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |   6 +-
 mm/bootmem.c            | 212 ++++++++++++++--------------------------
 2 files changed, 78 insertions(+), 140 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 5000fd70b04..90921d10ffa 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -31,10 +31,8 @@ typedef struct bootmem_data {
 	unsigned long node_boot_start;
 	unsigned long node_low_pfn;
 	void *node_bootmem_map;
-	unsigned long last_offset;
-	unsigned long last_pos;
-	unsigned long last_success;	/* Previous allocation point.  To speed
-					 * up searching */
+	unsigned long last_end_off;
+	unsigned long hint_idx;
 	struct list_head list;
 } bootmem_data_t;
 
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 300d126ec53..94ea612decc 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 	 * considered reserved.
 	 */
 
-	if (addr >= bdata->node_boot_start && addr < bdata->last_success)
-		bdata->last_success = addr;
+	if (addr >= bdata->node_boot_start &&
+			PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx)
+		bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start);
 
 	/*
 	 * Round up to index to the range.
@@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
-/*
- * We 'merge' subsequent allocations to save space. We might 'lose'
- * some fraction of a page if allocations cannot be satisfied due to
- * size constraints on boxes where there is physical RAM space
- * fragmentation - in these cases (mostly large memory boxes) this
- * is not a problem.
- *
- * On low memory boxes we get it right in 100% of the cases.
- *
- * alignment has to be a power of 2 value.
- *
- * NOTE:  This function is _not_ reentrant.
- */
-static void * __init
-alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
-		unsigned long align, unsigned long goal, unsigned long limit)
+static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
+				unsigned long size, unsigned long align,
+				unsigned long goal, unsigned long limit)
 {
-	unsigned long areasize, preferred;
-	unsigned long i, start = 0, incr, eidx, end_pfn;
-	void *ret;
-	unsigned long node_boot_start;
-	void *node_bootmem_map;
-
-	if (!size) {
-		printk("alloc_bootmem_core(): zero-sized request\n");
-		BUG();
-	}
-	BUG_ON(align & (align-1));
+	unsigned long min, max, start, sidx, midx, step;
+
+	BUG_ON(!size);
+	BUG_ON(align & (align - 1));
+	BUG_ON(limit && goal + size > limit);
 
-	/* on nodes without memory - bootmem_map is NULL */
 	if (!bdata->node_bootmem_map)
 		return NULL;
 
@@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
 		align, goal, limit);
 
-	/* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
-	node_boot_start = bdata->node_boot_start;
-	node_bootmem_map = bdata->node_bootmem_map;
-	if (align) {
-		node_boot_start = ALIGN(bdata->node_boot_start, align);
-		if (node_boot_start > bdata->node_boot_start)
-			node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
-			    PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
-	}
+	min = PFN_DOWN(bdata->node_boot_start);
+	max = bdata->node_low_pfn;
 
-	if (limit && node_boot_start >= limit)
+	goal >>= PAGE_SHIFT;
+	limit >>= PAGE_SHIFT;
+
+	if (limit && max > limit)
+		max = limit;
+	if (max <= min)
 		return NULL;
 
-	end_pfn = bdata->node_low_pfn;
-	limit = PFN_DOWN(limit);
-	if (limit && end_pfn > limit)
-		end_pfn = limit;
+	step = max(align >> PAGE_SHIFT, 1UL);
 
-	eidx = end_pfn - PFN_DOWN(node_boot_start);
+	if (goal && min < goal && goal < max)
+		start = ALIGN(goal, step);
+	else
+		start = ALIGN(min, step);
 
-	/*
-	 * We try to allocate bootmem pages above 'goal'
-	 * first, then we try to allocate lower pages.
-	 */
-	preferred = 0;
-	if (goal && PFN_DOWN(goal) < end_pfn) {
-		if (goal > node_boot_start)
-			preferred = goal - node_boot_start;
-
-		if (bdata->last_success > node_boot_start &&
-			bdata->last_success - node_boot_start >= preferred)
-			if (!limit || (limit && limit > bdata->last_success))
-				preferred = bdata->last_success - node_boot_start;
-	}
+	sidx = start - PFN_DOWN(bdata->node_boot_start);
+	midx = max - PFN_DOWN(bdata->node_boot_start);
 
-	preferred = PFN_DOWN(ALIGN(preferred, align));
-	areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
-	incr = align >> PAGE_SHIFT ? : 1;
+	if (bdata->hint_idx > sidx) {
+		/* Make sure we retry on failure */
+		goal = 1;
+		sidx = ALIGN(bdata->hint_idx, step);
+	}
 
-restart_scan:
-	for (i = preferred; i < eidx;) {
-		unsigned long j;
+	while (1) {
+		int merge;
+		void *region;
+		unsigned long eidx, i, start_off, end_off;
+find_block:
+		sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
+		sidx = ALIGN(sidx, step);
+		eidx = sidx + PFN_UP(size);
 
-		i = find_next_zero_bit(node_bootmem_map, eidx, i);
-		i = ALIGN(i, incr);
-		if (i >= eidx)
+		if (sidx >= midx || eidx > midx)
 			break;
-		if (test_bit(i, node_bootmem_map)) {
-			i += incr;
-			continue;
-		}
-		for (j = i + 1; j < i + areasize; ++j) {
-			if (j >= eidx)
-				goto fail_block;
-			if (test_bit(j, node_bootmem_map))
-				goto fail_block;
-		}
-		start = i;
-		goto found;
-	fail_block:
-		i = ALIGN(j, incr);
-		if (i == j)
-			i += incr;
-	}
-
-	if (preferred > 0) {
-		preferred = 0;
-		goto restart_scan;
-	}
-	return NULL;
 
-found:
-	bdata->last_success = PFN_PHYS(start) + node_boot_start;
-	BUG_ON(start >= eidx);
+		for (i = sidx; i < eidx; i++)
+			if (test_bit(i, bdata->node_bootmem_map)) {
+				sidx = ALIGN(i, step);
+				if (sidx == i)
+					sidx += step;
+				goto find_block;
+			}
 
-	/*
-	 * Is the next page of the previous allocation-end the start
-	 * of this allocation's buffer? If yes then we can 'merge'
-	 * the previous partial page with this allocation.
-	 */
-	if (align < PAGE_SIZE &&
-	    bdata->last_offset && bdata->last_pos+1 == start) {
-		unsigned long offset, remaining_size;
-		offset = ALIGN(bdata->last_offset, align);
-		BUG_ON(offset > PAGE_SIZE);
-		remaining_size = PAGE_SIZE - offset;
-		if (size < remaining_size) {
-			areasize = 0;
-			/* last_pos unchanged */
-			bdata->last_offset = offset + size;
-			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-					   offset + node_boot_start);
-		} else {
-			remaining_size = size - remaining_size;
-			areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
-			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-					   offset + node_boot_start);
-			bdata->last_pos = start + areasize - 1;
-			bdata->last_offset = remaining_size;
-		}
-		bdata->last_offset &= ~PAGE_MASK;
-	} else {
-		bdata->last_pos = start + areasize - 1;
-		bdata->last_offset = size & ~PAGE_MASK;
-		ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
+		if (bdata->last_end_off &&
+				PFN_DOWN(bdata->last_end_off) + 1 == sidx)
+			start_off = ALIGN(bdata->last_end_off, align);
+		else
+			start_off = PFN_PHYS(sidx);
+
+		merge = PFN_DOWN(start_off) < sidx;
+		end_off = start_off + size;
+
+		bdata->last_end_off = end_off;
+		bdata->hint_idx = PFN_UP(end_off);
+
+		/*
+		 * Reserve the area now:
+		 */
+		for (i = PFN_DOWN(start_off) + merge;
+				i < PFN_UP(end_off); i++)
+			if (test_and_set_bit(i, bdata->node_bootmem_map))
+				BUG();
+
+		region = phys_to_virt(bdata->node_boot_start + start_off);
+		memset(region, 0, size);
+		return region;
 	}
 
-	bdebug("nid=%td start=%lx end=%lx\n",
-		bdata - bootmem_node_data,
-		start + PFN_DOWN(bdata->node_boot_start),
-		start + areasize + PFN_DOWN(bdata->node_boot_start));
+	if (goal) {
+		goal = 0;
+		sidx = 0;
+		goto find_block;
+	}
 
-	/*
-	 * Reserve the area now:
-	 */
-	for (i = start; i < start + areasize; i++)
-		if (unlikely(test_and_set_bit(i, node_bootmem_map)))
-			BUG();
-	memset(ret, 0, size);
-	return ret;
+	return NULL;
 }
 
 /**
-- 
GitLab


From d747fa4bcebcf3696607b86a6b0dafa644be0676 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:05 -0700
Subject: [PATCH 194/853] bootmem: free/reserve helpers

Factor out the common operation of marking a range on the bitmap.

[akpm@linux-foundation.org: fix various warnings]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 65 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 22 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 94ea612decc..9d03ff65135 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -225,6 +225,44 @@ unsigned long __init free_all_bootmem(void)
 	return free_all_bootmem_core(NODE_DATA(0)->bdata);
 }
 
+static void __init __free(bootmem_data_t *bdata,
+			unsigned long sidx, unsigned long eidx)
+{
+	unsigned long idx;
+
+	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
+		sidx + PFN_DOWN(bdata->node_boot_start),
+		eidx + PFN_DOWN(bdata->node_boot_start));
+
+	for (idx = sidx; idx < eidx; idx++)
+		if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
+			BUG();
+}
+
+static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
+			unsigned long eidx, int flags)
+{
+	unsigned long idx;
+	int exclusive = flags & BOOTMEM_EXCLUSIVE;
+
+	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
+		bdata - bootmem_node_data,
+		sidx + PFN_DOWN(bdata->node_boot_start),
+		eidx + PFN_DOWN(bdata->node_boot_start),
+		flags);
+
+	for (idx = sidx; idx < eidx; idx++)
+		if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
+			if (exclusive) {
+				__free(bdata, sidx, idx);
+				return -EBUSY;
+			}
+			bdebug("silent double reserve of PFN %lx\n",
+				idx + PFN_DOWN(bdata->node_boot_start));
+		}
+	return 0;
+}
+
 static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 				     unsigned long size)
 {
@@ -258,14 +296,7 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
 		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
 
-	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
-		sidx + PFN_DOWN(bdata->node_boot_start),
-		eidx + PFN_DOWN(bdata->node_boot_start));
-
-	for (i = sidx; i < eidx; i++) {
-		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
-			BUG();
-	}
+	__free(bdata, sidx, eidx);
 }
 
 /**
@@ -367,16 +398,7 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata,
 	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
 		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
 
-	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
-		bdata - bootmem_node_data,
-		sidx + PFN_DOWN(bdata->node_boot_start),
-		eidx + PFN_DOWN(bdata->node_boot_start),
-		flags);
-
-	for (i = sidx; i < eidx; i++)
-		if (test_and_set_bit(i, bdata->node_bootmem_map))
-			bdebug("hm, page %lx reserved twice.\n",
-				PFN_DOWN(bdata->node_boot_start) + i);
+	return __reserve(bdata, sidx, eidx, flags);
 }
 
 /**
@@ -511,10 +533,9 @@ find_block:
 		/*
 		 * Reserve the area now:
 		 */
-		for (i = PFN_DOWN(start_off) + merge;
-				i < PFN_UP(end_off); i++)
-			if (test_and_set_bit(i, bdata->node_bootmem_map))
-				BUG();
+		if (__reserve(bdata, PFN_DOWN(start_off) + merge,
+				PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
+			BUG();
 
 		region = phys_to_virt(bdata->node_boot_start + start_off);
 		memset(region, 0, size);
-- 
GitLab


From e2bf3cae515090fefe28329e71230dfe7ab873b1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:06 -0700
Subject: [PATCH 195/853] bootmem: factor out the marking of a PFN range

Introduce new helpers that mark a range that resides completely on a node
or node-agnostic ranges that might also span node boundaries.

The free/reserve API functions will then directly use these helpers.

Note that the free/reserve semantics become more strict: while the prior
code took basically arbitrary range arguments and marked the PFNs that
happen to fall into that range, the new code requires node-specific ranges
to be completely on the node.  The node-agnostic requests might span node
boundaries as long as the nodes are contiguous.

Passing ranges that do not satisfy these criteria is a bug.

[akpm@linux-foundation.org: fix printk warnings]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 188 +++++++++++++++++++--------------------------------
 1 file changed, 69 insertions(+), 119 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9d03ff65135..e5415a5414a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -234,6 +234,9 @@ static void __init __free(bootmem_data_t *bdata,
 		sidx + PFN_DOWN(bdata->node_boot_start),
 		eidx + PFN_DOWN(bdata->node_boot_start));
 
+	if (bdata->hint_idx > sidx)
+		bdata->hint_idx = sidx;
+
 	for (idx = sidx; idx < eidx; idx++)
 		if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
 			BUG();
@@ -263,40 +266,57 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
 	return 0;
 }
 
-static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
-				     unsigned long size)
+static int __init mark_bootmem_node(bootmem_data_t *bdata,
+				unsigned long start, unsigned long end,
+				int reserve, int flags)
 {
 	unsigned long sidx, eidx;
-	unsigned long i;
 
-	BUG_ON(!size);
+	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
+		bdata - bootmem_node_data, start, end, reserve, flags);
 
-	/* out range */
-	if (addr + size < bdata->node_boot_start ||
-		PFN_DOWN(addr) > bdata->node_low_pfn)
-		return;
-	/*
-	 * round down end of usable mem, partially free pages are
-	 * considered reserved.
-	 */
+	BUG_ON(start < PFN_DOWN(bdata->node_boot_start));
+	BUG_ON(end > bdata->node_low_pfn);
 
-	if (addr >= bdata->node_boot_start &&
-			PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx)
-		bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start);
+	sidx = start - PFN_DOWN(bdata->node_boot_start);
+	eidx = end - PFN_DOWN(bdata->node_boot_start);
 
-	/*
-	 * Round up to index to the range.
-	 */
-	if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start))
-		sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
+	if (reserve)
+		return __reserve(bdata, sidx, eidx, flags);
 	else
-		sidx = 0;
+		__free(bdata, sidx, eidx);
+	return 0;
+}
+
+static int __init mark_bootmem(unsigned long start, unsigned long end,
+				int reserve, int flags)
+{
+	unsigned long pos;
+	bootmem_data_t *bdata;
+
+	pos = start;
+	list_for_each_entry(bdata, &bdata_list, list) {
+		int err;
+		unsigned long max;
+
+		if (pos < PFN_DOWN(bdata->node_boot_start)) {
+			BUG_ON(pos != start);
+			continue;
+		}
+
+		max = min(bdata->node_low_pfn, end);
 
-	eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
-	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
-		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+		err = mark_bootmem_node(bdata, pos, max, reserve, flags);
+		if (reserve && err) {
+			mark_bootmem(start, pos, 0, 0);
+			return err;
+		}
 
-	__free(bdata, sidx, eidx);
+		if (max == end)
+			return 0;
+		pos = bdata->node_low_pfn;
+	}
+	BUG();
 }
 
 /**
@@ -307,12 +327,17 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
  *
  * Partial pages will be considered reserved and left as they are.
  *
- * Only physical pages that actually reside on @pgdat are marked.
+ * The range must reside completely on the specified node.
  */
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 			      unsigned long size)
 {
-	free_bootmem_core(pgdat->bdata, physaddr, size);
+	unsigned long start, end;
+
+	start = PFN_UP(physaddr);
+	end = PFN_DOWN(physaddr + size);
+
+	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
 }
 
 /**
@@ -322,83 +347,16 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
  *
  * Partial pages will be considered reserved and left as they are.
  *
- * All physical pages within the range are marked, no matter what
- * node they reside on.
+ * The range must be contiguous but may span node boundaries.
  */
 void __init free_bootmem(unsigned long addr, unsigned long size)
 {
-	bootmem_data_t *bdata;
-	list_for_each_entry(bdata, &bdata_list, list)
-		free_bootmem_core(bdata, addr, size);
-}
-
-/*
- * Marks a particular physical memory range as unallocatable. Usable RAM
- * might be used for boot-time allocations - or it might get added
- * to the free page pool later on.
- */
-static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
-			unsigned long addr, unsigned long size, int flags)
-{
-	unsigned long sidx, eidx;
-	unsigned long i;
-
-	BUG_ON(!size);
-
-	/* out of range, don't hold other */
-	if (addr + size < bdata->node_boot_start ||
-		PFN_DOWN(addr) > bdata->node_low_pfn)
-		return 0;
-
-	/*
-	 * Round up to index to the range.
-	 */
-	if (addr > bdata->node_boot_start)
-		sidx= PFN_DOWN(addr - bdata->node_boot_start);
-	else
-		sidx = 0;
-
-	eidx = PFN_UP(addr + size - bdata->node_boot_start);
-	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
-		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
-
-	for (i = sidx; i < eidx; i++) {
-		if (test_bit(i, bdata->node_bootmem_map)) {
-			if (flags & BOOTMEM_EXCLUSIVE)
-				return -EBUSY;
-		}
-	}
-
-	return 0;
-
-}
-
-static void __init reserve_bootmem_core(bootmem_data_t *bdata,
-			unsigned long addr, unsigned long size, int flags)
-{
-	unsigned long sidx, eidx;
-	unsigned long i;
-
-	BUG_ON(!size);
-
-	/* out of range */
-	if (addr + size < bdata->node_boot_start ||
-		PFN_DOWN(addr) > bdata->node_low_pfn)
-		return;
-
-	/*
-	 * Round up to index to the range.
-	 */
-	if (addr > bdata->node_boot_start)
-		sidx= PFN_DOWN(addr - bdata->node_boot_start);
-	else
-		sidx = 0;
+	unsigned long start, end;
 
-	eidx = PFN_UP(addr + size - bdata->node_boot_start);
-	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
-		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+	start = PFN_UP(addr);
+	end = PFN_DOWN(addr + size);
 
-	return __reserve(bdata, sidx, eidx, flags);
+	mark_bootmem(start, end, 0, 0);
 }
 
 /**
@@ -410,18 +368,17 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata,
  *
  * Partial pages will be reserved.
  *
- * Only physical pages that actually reside on @pgdat are marked.
+ * The range must reside completely on the specified node.
  */
 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 				 unsigned long size, int flags)
 {
-	int ret;
+	unsigned long start, end;
 
-	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
-	if (ret < 0)
-		return -ENOMEM;
-	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
-	return 0;
+	start = PFN_DOWN(physaddr);
+	end = PFN_UP(physaddr + size);
+
+	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
 }
 
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
@@ -433,24 +390,17 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
  *
  * Partial pages will be reserved.
  *
- * All physical pages within the range are marked, no matter what
- * node they reside on.
+ * The range must be contiguous but may span node boundaries.
  */
 int __init reserve_bootmem(unsigned long addr, unsigned long size,
 			    int flags)
 {
-	bootmem_data_t *bdata;
-	int ret;
+	unsigned long start, end;
 
-	list_for_each_entry(bdata, &bdata_list, list) {
-		ret = can_reserve_bootmem_core(bdata, addr, size, flags);
-		if (ret < 0)
-			return ret;
-	}
-	list_for_each_entry(bdata, &bdata_list, list)
-		reserve_bootmem_core(bdata, addr, size, flags);
+	start = PFN_DOWN(addr);
+	end = PFN_UP(addr + size);
 
-	return 0;
+	return mark_bootmem(start, end, 1, flags);
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
@@ -663,7 +613,7 @@ void * __init alloc_bootmem_section(unsigned long size,
 	if (start_nr != section_nr || end_nr != section_nr) {
 		printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
 		       section_nr);
-		free_bootmem_core(pgdat->bdata, __pa(ptr), size);
+		free_bootmem_node(pgdat, __pa(ptr), size);
 		ptr = NULL;
 	}
 
-- 
GitLab


From 0f3caba211babef6e3fbde1ba76ddc79321bc92f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:07 -0700
Subject: [PATCH 196/853] bootmem: respect goal more likely

The old node-agnostic code tried allocating on all nodes starting from the
one with the lowest range.  alloc_bootmem_core retried without the goal if
it could not satisfy it and so the goal was only respected at all when it
happened to be on the first (lowest page numbers) node (or theoretically
if allocations failed on all nodes before to the one holding the goal).

Introduce a non-panicking helper that starts allocating from the node
holding the goal and falls back only after all thes tries failed, thus
moving the goal fallback code out of alloc_bootmem_core.

Make all other allocation functions benefit from this new helper.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 92 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 38 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index e5415a5414a..89646f77b42 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -408,6 +408,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
 				unsigned long size, unsigned long align,
 				unsigned long goal, unsigned long limit)
 {
+	unsigned long fallback = 0;
 	unsigned long min, max, start, sidx, midx, step;
 
 	BUG_ON(!size);
@@ -443,8 +444,11 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
 	midx = max - PFN_DOWN(bdata->node_boot_start);
 
 	if (bdata->hint_idx > sidx) {
-		/* Make sure we retry on failure */
-		goal = 1;
+		/*
+		 * Handle the valid case of sidx being zero and still
+		 * catch the fallback below.
+		 */
+		fallback = sidx + 1;
 		sidx = ALIGN(bdata->hint_idx, step);
 	}
 
@@ -492,10 +496,39 @@ find_block:
 		return region;
 	}
 
+	if (fallback) {
+		sidx = ALIGN(fallback - 1, step);
+		fallback = 0;
+		goto find_block;
+	}
+
+	return NULL;
+}
+
+static void * __init ___alloc_bootmem_nopanic(unsigned long size,
+					unsigned long align,
+					unsigned long goal,
+					unsigned long limit)
+{
+	bootmem_data_t *bdata;
+
+restart:
+	list_for_each_entry(bdata, &bdata_list, list) {
+		void *region;
+
+		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
+			continue;
+		if (limit && bdata->node_boot_start >= limit)
+			break;
+
+		region = alloc_bootmem_core(bdata, size, align, goal, limit);
+		if (region)
+			return region;
+	}
+
 	if (goal) {
 		goal = 0;
-		sidx = 0;
-		goto find_block;
+		goto restart;
 	}
 
 	return NULL;
@@ -515,16 +548,23 @@ find_block:
  * Returns NULL on failure.
  */
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
-				      unsigned long goal)
+					unsigned long goal)
 {
-	bootmem_data_t *bdata;
-	void *ptr;
+	return ___alloc_bootmem_nopanic(size, align, goal, 0);
+}
 
-	list_for_each_entry(bdata, &bdata_list, list) {
-		ptr = alloc_bootmem_core(bdata, size, align, goal, 0);
-		if (ptr)
-			return ptr;
-	}
+static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
+					unsigned long goal, unsigned long limit)
+{
+	void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
+
+	if (mem)
+		return mem;
+	/*
+	 * Whoops, we cannot satisfy the allocation request.
+	 */
+	printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
+	panic("Out of memory");
 	return NULL;
 }
 
@@ -544,16 +584,7 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 			      unsigned long goal)
 {
-	void *mem = __alloc_bootmem_nopanic(size,align,goal);
-
-	if (mem)
-		return mem;
-	/*
-	 * Whoops, we cannot satisfy the allocation request.
-	 */
-	printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
+	return ___alloc_bootmem(size, align, goal, 0);
 }
 
 /**
@@ -653,22 +684,7 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
 void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 				  unsigned long goal)
 {
-	bootmem_data_t *bdata;
-	void *ptr;
-
-	list_for_each_entry(bdata, &bdata_list, list) {
-		ptr = alloc_bootmem_core(bdata, size, align, goal,
-					ARCH_LOW_ADDRESS_LIMIT);
-		if (ptr)
-			return ptr;
-	}
-
-	/*
-	 * Whoops, we cannot satisfy the allocation request.
-	 */
-	printk(KERN_ALERT "low bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of low memory");
-	return NULL;
+	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
 }
 
 /**
-- 
GitLab


From 4cc278b721d5bf3569dfc5f1100253042e097bc3 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:08 -0700
Subject: [PATCH 197/853] bootmem: Make __alloc_bootmem_low_node fall back to
 other nodes

__alloc_bootmem_node already does this, make the interface consistent.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 89646f77b42..459da4710b8 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -587,6 +587,19 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return ___alloc_bootmem(size, align, goal, 0);
 }
 
+static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
+				unsigned long size, unsigned long align,
+				unsigned long goal, unsigned long limit)
+{
+	void *ptr;
+
+	ptr = alloc_bootmem_core(bdata, size, align, goal, limit);
+	if (ptr)
+		return ptr;
+
+	return ___alloc_bootmem(size, align, goal, limit);
+}
+
 /**
  * __alloc_bootmem_node - allocate boot memory from a specific node
  * @pgdat: node to allocate from
@@ -605,13 +618,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
-	void *ptr;
-
-	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
-	if (ptr)
-		return ptr;
-
-	return __alloc_bootmem(size, align, goal);
+	return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
 }
 
 #ifdef CONFIG_SPARSEMEM
@@ -705,6 +712,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
-	return alloc_bootmem_core(pgdat->bdata, size, align, goal,
-				ARCH_LOW_ADDRESS_LIMIT);
+	return ___alloc_bootmem_node(pgdat->bdata, size, align,
+				goal, ARCH_LOW_ADDRESS_LIMIT);
 }
-- 
GitLab


From 75a56cfe9fdb064d1db1cfbc564315fddb756fb1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:09 -0700
Subject: [PATCH 198/853] bootmem: revisit alloc_bootmem_section

Since alloc_bootmem_core does no goal-fallback anymore and just returns
NULL if the allocation fails, we might now use it in alloc_bootmem_section
without all the fixup code for a misplaced allocation.

Also, the limit can be the first PFN of the next section as the semantics
is that the limit is _above_ the allocated region, not within.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 459da4710b8..282b786c2b1 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -632,30 +632,15 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 void * __init alloc_bootmem_section(unsigned long size,
 				    unsigned long section_nr)
 {
-	void *ptr;
-	unsigned long limit, goal, start_nr, end_nr, pfn;
-	struct pglist_data *pgdat;
+	bootmem_data_t *bdata;
+	unsigned long pfn, goal, limit;
 
 	pfn = section_nr_to_pfn(section_nr);
-	goal = PFN_PHYS(pfn);
-	limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
-	pgdat = NODE_DATA(early_pfn_to_nid(pfn));
-	ptr = alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
-				limit);
-
-	if (!ptr)
-		return NULL;
-
-	start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
-	end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
-	if (start_nr != section_nr || end_nr != section_nr) {
-		printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
-		       section_nr);
-		free_bootmem_node(pgdat, __pa(ptr), size);
-		ptr = NULL;
-	}
+	goal = pfn << PAGE_SHIFT;
+	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
+	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
 
-	return ptr;
+	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
 }
 #endif
 
-- 
GitLab


From 3560e249abda6bee41a07a7bf0383a6e193e2839 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:09 -0700
Subject: [PATCH 199/853] bootmem: replace node_boot_start in struct
 bootmem_data

Almost all users of this field need a PFN instead of a physical address,
so replace node_boot_start with node_min_pfn.

[Lee.Schermerhorn@hp.com: fix spurious BUG_ON() in mark_bootmem()]
Signed-off-by: Johannes Weiner <hannes@saeureba.de>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/numa.c     |  2 +-
 arch/arm/plat-omap/fb.c  |  4 +---
 arch/avr32/mm/init.c     |  3 +--
 arch/ia64/mm/discontig.c | 19 ++++++++++---------
 arch/m32r/mm/discontig.c |  3 +--
 arch/m32r/mm/init.c      |  4 +---
 arch/mn10300/mm/init.c   |  6 +++---
 arch/sh/mm/init.c        |  2 +-
 include/linux/bootmem.h  |  2 +-
 mm/bootmem.c             | 40 +++++++++++++++++++++-------------------
 10 files changed, 41 insertions(+), 44 deletions(-)

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index def0c74a78a..d8c4ceaf00b 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -304,7 +304,7 @@ void __init paging_init(void)
 
 	for_each_online_node(nid) {
 		bootmem_data_t *bdata = &bootmem_node_data[nid];
-		unsigned long start_pfn = bdata->node_boot_start >> PAGE_SHIFT;
+		unsigned long start_pfn = bdata->node_min_pfn;
 		unsigned long end_pfn = bdata->node_low_pfn;
 
 		if (dma_local_pfn >= end_pfn - start_pfn)
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 7854f19b77c..96d6f061973 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -182,7 +182,7 @@ void __init omapfb_reserve_sdram(void)
 		return;
 
 	bdata = NODE_DATA(0)->bdata;
-	sdram_start = bdata->node_boot_start;
+	sdram_start = bdata->node_min_pfn << PAGE_SHIFT;
 	sdram_size = (bdata->node_low_pfn << PAGE_SHIFT) - sdram_start;
 	reserved = 0;
 	for (i = 0; ; i++) {
@@ -340,5 +340,3 @@ unsigned long omapfb_reserve_sram(unsigned long sram_pstart,
 
 
 #endif
-
-
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index 786de88a82a..3c85fdaa948 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -119,8 +119,7 @@ void __init paging_init(void)
 		unsigned long zones_size[MAX_NR_ZONES];
 		unsigned long low, start_pfn;
 
-		start_pfn = pgdat->bdata->node_boot_start;
-		start_pfn >>= PAGE_SHIFT;
+		start_pfn = pgdat->bdata->node_min_pfn;
 		low = pgdat->bdata->node_low_pfn;
 
 		memset(zones_size, 0, sizeof(zones_size));
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 2fcf8464331..d83125e1ed2 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -74,17 +74,17 @@ pg_data_t *pgdat_list[MAX_NUMNODES];
 static int __init build_node_maps(unsigned long start, unsigned long len,
 				  int node)
 {
-	unsigned long cstart, epfn, end = start + len;
+	unsigned long spfn, epfn, end = start + len;
 	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
-	cstart = GRANULEROUNDDOWN(start);
+	spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
 
 	if (!bdp->node_low_pfn) {
-		bdp->node_boot_start = cstart;
+		bdp->node_min_pfn = spfn;
 		bdp->node_low_pfn = epfn;
 	} else {
-		bdp->node_boot_start = min(cstart, bdp->node_boot_start);
+		bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
 		bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
 	}
 
@@ -221,20 +221,21 @@ static void __init fill_pernode(int node, unsigned long pernode,
 static int __init find_pernode_space(unsigned long start, unsigned long len,
 				     int node)
 {
-	unsigned long epfn;
+	unsigned long spfn, epfn;
 	unsigned long pernodesize = 0, pernode, pages, mapsize;
 	struct bootmem_data *bdp = &bootmem_node_data[node];
 
+	spfn = start >> PAGE_SHIFT;
 	epfn = (start + len) >> PAGE_SHIFT;
 
-	pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
+	pages = bdp->node_low_pfn - bdp->node_min_pfn;
 	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 
 	/*
 	 * Make sure this memory falls within this node's usable memory
 	 * since we may have thrown some away in build_maps().
 	 */
-	if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn)
+	if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
 		return 0;
 
 	/* Don't setup this node's local space twice... */
@@ -296,7 +297,7 @@ static void __init reserve_pernode_space(void)
 		bdp = pdp->bdata;
 
 		/* First the bootmem_map itself */
-		pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
+		pages = bdp->node_low_pfn - bdp->node_min_pfn;
 		size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 		base = __pa(bdp->node_bootmem_map);
 		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
@@ -466,7 +467,7 @@ void __init find_memory(void)
 
 		init_bootmem_node(pgdat_list[node],
 				  map>>PAGE_SHIFT,
-				  bdp->node_boot_start>>PAGE_SHIFT,
+				  bdp->node_min_pfn,
 				  bdp->node_low_pfn);
 	}
 
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index cc23934bc41..cbc3c4c5456 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -123,8 +123,7 @@ unsigned long __init setup_memory(void)
 	return max_low_pfn;
 }
 
-#define START_PFN(nid)	\
-	(NODE_DATA(nid)->bdata->node_boot_start >> PAGE_SHIFT)
+#define START_PFN(nid)		(NODE_DATA(nid)->bdata->node_min_pfn)
 #define MAX_LOW_PFN(nid)	(NODE_DATA(nid)->bdata->node_low_pfn)
 
 unsigned long __init zone_sizes_init(void)
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 28799af15e9..2554eb59cfe 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -93,8 +93,7 @@ void free_initrd_mem(unsigned long, unsigned long);
 #endif
 
 /* It'd be good if these lines were in the standard header file. */
-#define START_PFN(nid)	\
-	(NODE_DATA(nid)->bdata->node_boot_start >> PAGE_SHIFT)
+#define START_PFN(nid)		(NODE_DATA(nid)->bdata->node_min_pfn)
 #define MAX_LOW_PFN(nid)	(NODE_DATA(nid)->bdata->node_low_pfn)
 
 #ifndef CONFIG_DISCONTIGMEM
@@ -252,4 +251,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
 }
 #endif
-
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index 8c5d88c7b90..8cee387a24f 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -67,8 +67,8 @@ void __init paging_init(void)
 
 	/* declare the sizes of the RAM zones (only use the normal zone) */
 	zones_size[ZONE_NORMAL] =
-		(contig_page_data.bdata->node_low_pfn) -
-		(contig_page_data.bdata->node_boot_start >> PAGE_SHIFT);
+		contig_page_data.bdata->node_low_pfn -
+		contig_page_data.bdata->node_min_pfn;
 
 	/* pass the memory from the bootmem allocator to the main allocator */
 	free_area_init(zones_size);
@@ -87,7 +87,7 @@ void __init mem_init(void)
 	if (!mem_map)
 		BUG();
 
-#define START_PFN	(contig_page_data.bdata->node_boot_start >> PAGE_SHIFT)
+#define START_PFN	(contig_page_data.bdata->node_min_pfn)
 #define MAX_LOW_PFN	(contig_page_data.bdata->node_low_pfn)
 
 	max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index d7df26bd1e5..d652d375eb1 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -191,7 +191,7 @@ void __init paging_init(void)
 		pg_data_t *pgdat = NODE_DATA(nid);
 		unsigned long low, start_pfn;
 
-		start_pfn = pgdat->bdata->node_boot_start >> PAGE_SHIFT;
+		start_pfn = pgdat->bdata->node_min_pfn;
 		low = pgdat->bdata->node_low_pfn;
 
 		if (max_zone_pfns[ZONE_NORMAL] < low)
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 90921d10ffa..4ddf2922fc8 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -28,7 +28,7 @@ extern unsigned long saved_max_pfn;
  * memory pages (including holes) on the node.
  */
 typedef struct bootmem_data {
-	unsigned long node_boot_start;
+	unsigned long node_min_pfn;
 	unsigned long node_low_pfn;
 	void *node_bootmem_map;
 	unsigned long last_end_off;
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 282b786c2b1..4af15d0340a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -80,7 +80,7 @@ static void __init link_bootmem(bootmem_data_t *bdata)
 		bootmem_data_t *ent;
 
 		ent = list_entry(iter, bootmem_data_t, list);
-		if (bdata->node_boot_start < ent->node_boot_start)
+		if (bdata->node_min_pfn < ent->node_min_pfn)
 			break;
 	}
 	list_add_tail(&bdata->list, iter);
@@ -96,7 +96,7 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 
 	mminit_validate_memmodel_limits(&start, &end);
 	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
-	bdata->node_boot_start = PFN_PHYS(start);
+	bdata->node_min_pfn = start;
 	bdata->node_low_pfn = end;
 	link_bootmem(bdata);
 
@@ -151,7 +151,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	if (!bdata->node_bootmem_map)
 		return 0;
 
-	start = PFN_DOWN(bdata->node_boot_start);
+	start = bdata->node_min_pfn;
 	end = bdata->node_low_pfn;
 
 	/*
@@ -167,7 +167,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 		unsigned long *map, idx, vec;
 
 		map = bdata->node_bootmem_map;
-		idx = start - PFN_DOWN(bdata->node_boot_start);
+		idx = start - bdata->node_min_pfn;
 		vec = ~map[idx / BITS_PER_LONG];
 
 		if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
@@ -192,7 +192,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	}
 
 	page = virt_to_page(bdata->node_bootmem_map);
-	pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+	pages = bdata->node_low_pfn - bdata->node_min_pfn;
 	pages = bootmem_bootmap_pages(pages);
 	count += pages;
 	while (pages--)
@@ -231,8 +231,8 @@ static void __init __free(bootmem_data_t *bdata,
 	unsigned long idx;
 
 	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
-		sidx + PFN_DOWN(bdata->node_boot_start),
-		eidx + PFN_DOWN(bdata->node_boot_start));
+		sidx + bdata->node_min_pfn,
+		eidx + bdata->node_min_pfn);
 
 	if (bdata->hint_idx > sidx)
 		bdata->hint_idx = sidx;
@@ -250,8 +250,8 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
 
 	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
 		bdata - bootmem_node_data,
-		sidx + PFN_DOWN(bdata->node_boot_start),
-		eidx + PFN_DOWN(bdata->node_boot_start),
+		sidx + bdata->node_min_pfn,
+		eidx + bdata->node_min_pfn,
 		flags);
 
 	for (idx = sidx; idx < eidx; idx++)
@@ -261,7 +261,7 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
 				return -EBUSY;
 			}
 			bdebug("silent double reserve of PFN %lx\n",
-				idx + PFN_DOWN(bdata->node_boot_start));
+				idx + bdata->node_min_pfn);
 		}
 	return 0;
 }
@@ -275,11 +275,11 @@ static int __init mark_bootmem_node(bootmem_data_t *bdata,
 	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
 		bdata - bootmem_node_data, start, end, reserve, flags);
 
-	BUG_ON(start < PFN_DOWN(bdata->node_boot_start));
+	BUG_ON(start < bdata->node_min_pfn);
 	BUG_ON(end > bdata->node_low_pfn);
 
-	sidx = start - PFN_DOWN(bdata->node_boot_start);
-	eidx = end - PFN_DOWN(bdata->node_boot_start);
+	sidx = start - bdata->node_min_pfn;
+	eidx = end - bdata->node_min_pfn;
 
 	if (reserve)
 		return __reserve(bdata, sidx, eidx, flags);
@@ -299,7 +299,8 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
 		int err;
 		unsigned long max;
 
-		if (pos < PFN_DOWN(bdata->node_boot_start)) {
+		if (pos < bdata->node_min_pfn ||
+		    pos >= bdata->node_low_pfn) {
 			BUG_ON(pos != start);
 			continue;
 		}
@@ -422,7 +423,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
 		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
 		align, goal, limit);
 
-	min = PFN_DOWN(bdata->node_boot_start);
+	min = bdata->node_min_pfn;
 	max = bdata->node_low_pfn;
 
 	goal >>= PAGE_SHIFT;
@@ -440,8 +441,8 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
 	else
 		start = ALIGN(min, step);
 
-	sidx = start - PFN_DOWN(bdata->node_boot_start);
-	midx = max - PFN_DOWN(bdata->node_boot_start);
+	sidx = start - bdata->node_min_pfn;;
+	midx = max - bdata->node_min_pfn;
 
 	if (bdata->hint_idx > sidx) {
 		/*
@@ -491,7 +492,8 @@ find_block:
 				PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
 			BUG();
 
-		region = phys_to_virt(bdata->node_boot_start + start_off);
+		region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
+				start_off);
 		memset(region, 0, size);
 		return region;
 	}
@@ -518,7 +520,7 @@ restart:
 
 		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
 			continue;
-		if (limit && bdata->node_boot_start >= limit)
+		if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
 			break;
 
 		region = alloc_bootmem_core(bdata, size, align, goal, limit);
-- 
GitLab


From 2be0ffe2b29bd31d3debd0877797892ff2d91f4c Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Wed, 23 Jul 2008 21:28:11 -0700
Subject: [PATCH 200/853] mm: add alloc_pages_exact() and free_pages_exact()

alloc_pages_exact() is similar to alloc_pages(), except that it allocates
the minimum number of pages to fulfill the request.  This is useful if you
want to allocate a very large buffer that is slightly larger than an even
power-of-two number of pages.  In that case, alloc_pages() will waste a
lot of memory.

I have a video driver that wants to allocate a 5MB buffer.  alloc_pages()
wiill waste 3MB of physically-contiguous memory.

Signed-off-by: Timur Tabi <timur@freescale.com>
Cc: Andi Kleen <andi@firstfloor.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  3 +++
 mm/page_alloc.c     | 53 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f640ed24142..e8003afeffb 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -228,6 +228,9 @@ extern struct page *alloc_page_vma(gfp_t gfp_mask,
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
+void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
+void free_pages_exact(void *virt, size_t size);
+
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask),0)
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eaa86671ebb..8d528d57b40 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1697,6 +1697,59 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
+/**
+ * alloc_pages_exact - allocate an exact number physically-contiguous pages.
+ * @size: the number of bytes to allocate
+ * @gfp_mask: GFP flags for the allocation
+ *
+ * This function is similar to alloc_pages(), except that it allocates the
+ * minimum number of pages to satisfy the request.  alloc_pages() can only
+ * allocate memory in power-of-two pages.
+ *
+ * This function is also limited by MAX_ORDER.
+ *
+ * Memory allocated by this function must be released by free_pages_exact().
+ */
+void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
+{
+	unsigned int order = get_order(size);
+	unsigned long addr;
+
+	addr = __get_free_pages(gfp_mask, order);
+	if (addr) {
+		unsigned long alloc_end = addr + (PAGE_SIZE << order);
+		unsigned long used = addr + PAGE_ALIGN(size);
+
+		split_page(virt_to_page(addr), order);
+		while (used < alloc_end) {
+			free_page(used);
+			used += PAGE_SIZE;
+		}
+	}
+
+	return (void *)addr;
+}
+EXPORT_SYMBOL(alloc_pages_exact);
+
+/**
+ * free_pages_exact - release memory allocated via alloc_pages_exact()
+ * @virt: the value returned by alloc_pages_exact.
+ * @size: size of allocation, same value as passed to alloc_pages_exact().
+ *
+ * Release the memory allocated by a previous call to alloc_pages_exact.
+ */
+void free_pages_exact(void *virt, size_t size)
+{
+	unsigned long addr = (unsigned long)virt;
+	unsigned long end = addr + PAGE_ALIGN(size);
+
+	while (addr < end) {
+		free_page(addr);
+		addr += PAGE_SIZE;
+	}
+}
+EXPORT_SYMBOL(free_pages_exact);
+
 static unsigned int nr_free_zone_pages(int offset)
 {
 	struct zoneref *z;
-- 
GitLab


From b69a7288ea7bf171328f313f0edae629f50e3bdb Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:12 -0700
Subject: [PATCH 201/853] mm/page_alloc.c: cleanups

This patch contains the following cleanups:
- make the following needlessly global variables static:
  - required_kernelcore
  - zone_movable_pfn[]
- make the following needlessly global functions static:
  - move_freepages()
  - move_freepages_block()
  - setup_pageset()
  - find_usable_zone_for_movable()
  - adjust_zone_range_for_zone_movable()
  - __absent_pages_in_range()
  - find_min_pfn_for_node()
  - find_zone_movable_pfns_for_nodes()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8d528d57b40..cd4c41432ef 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -153,9 +153,9 @@ static unsigned long __meminitdata dma_reserve;
   static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
   static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
 #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
-  unsigned long __initdata required_kernelcore;
+  static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
-  unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+  static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
 
   /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
   int movable_zone;
@@ -674,9 +674,9 @@ static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
  * Note that start_page and end_pages are not aligned on a pageblock
  * boundary. If alignment is required, use move_freepages_block()
  */
-int move_freepages(struct zone *zone,
-			struct page *start_page, struct page *end_page,
-			int migratetype)
+static int move_freepages(struct zone *zone,
+			  struct page *start_page, struct page *end_page,
+			  int migratetype)
 {
 	struct page *page;
 	unsigned long order;
@@ -715,7 +715,8 @@ int move_freepages(struct zone *zone,
 	return pages_moved;
 }
 
-int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
+static int move_freepages_block(struct zone *zone, struct page *page,
+				int migratetype)
 {
 	unsigned long start_pfn, end_pfn;
 	struct page *start_page, *end_page;
@@ -2652,7 +2653,7 @@ static int zone_batchsize(struct zone *zone)
 	return batch;
 }
 
-inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
 
@@ -3099,7 +3100,7 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
  * assumption is made that zones within a node are ordered in monotonic
  * increasing memory addresses so that the "highest" populated zone is used
  */
-void __init find_usable_zone_for_movable(void)
+static void __init find_usable_zone_for_movable(void)
 {
 	int zone_index;
 	for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) {
@@ -3125,7 +3126,7 @@ void __init find_usable_zone_for_movable(void)
  * highest usable zone for ZONE_MOVABLE. This preserves the assumption that
  * zones within a node are in order of monotonic increases memory addresses
  */
-void __meminit adjust_zone_range_for_zone_movable(int nid,
+static void __meminit adjust_zone_range_for_zone_movable(int nid,
 					unsigned long zone_type,
 					unsigned long node_start_pfn,
 					unsigned long node_end_pfn,
@@ -3186,7 +3187,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
  * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
  * then all holes in the requested range will be accounted for.
  */
-unsigned long __meminit __absent_pages_in_range(int nid,
+static unsigned long __meminit __absent_pages_in_range(int nid,
 				unsigned long range_start_pfn,
 				unsigned long range_end_pfn)
 {
@@ -3723,7 +3724,7 @@ static void __init sort_node_map(void)
 }
 
 /* Find the lowest pfn for a node */
-unsigned long __init find_min_pfn_for_node(int nid)
+static unsigned long __init find_min_pfn_for_node(int nid)
 {
 	int i;
 	unsigned long min_pfn = ULONG_MAX;
@@ -3795,7 +3796,7 @@ static unsigned long __init early_calculate_totalpages(void)
  * memory. When they don't, some nodes will have more kernelcore than
  * others
  */
-void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
+static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
 {
 	int i, nid;
 	unsigned long usable_startpfn;
-- 
GitLab


From d92bc318547507a944a22e7ef936793dc0fe167f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:12 -0700
Subject: [PATCH 202/853] mm: make register_page_bootmem_info_section() static

Make the needlessly global register_page_bootmem_info_section() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6e26adc08f1..ec85c37dcfb 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -86,7 +86,7 @@ void put_page_bootmem(struct page *page)
 
 }
 
-void register_page_bootmem_info_section(unsigned long start_pfn)
+static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
 	unsigned long *usemap, mapsize, section_nr, i;
 	struct mem_section *ms;
-- 
GitLab


From 27ac792ca0b0a1e7e65f20342260650516c95864 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Wed, 23 Jul 2008 21:28:13 -0700
Subject: [PATCH 203/853] PAGE_ALIGN(): correctly handle 64-bit values on
 32-bit architectures

On 32-bit architectures PAGE_ALIGN() truncates 64-bit values to the 32-bit
boundary. For example:

	u64 val = PAGE_ALIGN(size);

always returns a value < 4GB even if size is greater than 4GB.

The problem resides in PAGE_MASK definition (from include/asm-x86/page.h for
example):

#define PAGE_SHIFT      12
#define PAGE_SIZE       (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK       (~(PAGE_SIZE-1))
...
#define PAGE_ALIGN(addr)       (((addr)+PAGE_SIZE-1)&PAGE_MASK)

The "~" is performed on a 32-bit value, so everything in "and" with
PAGE_MASK greater than 4GB will be truncated to the 32-bit boundary.
Using the ALIGN() macro seems to be the right way, because it uses
typeof(addr) for the mask.

Also move the PAGE_ALIGN() definitions out of include/asm-*/page.h in
include/linux/mm.h.

See also lkml discussion: http://lkml.org/lkml/2008/6/11/237

[akpm@linux-foundation.org: fix drivers/media/video/uvc/uvc_queue.c]
[akpm@linux-foundation.org: fix v850]
[akpm@linux-foundation.org: fix powerpc]
[akpm@linux-foundation.org: fix arm]
[akpm@linux-foundation.org: fix mips]
[akpm@linux-foundation.org: fix drivers/media/video/pvrusb2/pvrusb2-dvb.c]
[akpm@linux-foundation.org: fix drivers/mtd/maps/uclinux.c]
[akpm@linux-foundation.org: fix powerpc]
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/module.c                     | 1 +
 arch/arm/plat-omap/fb.c                      | 1 +
 arch/avr32/mm/ioremap.c                      | 1 +
 arch/h8300/kernel/setup.c                    | 1 +
 arch/m68k/amiga/chipram.c                    | 1 +
 arch/m68knommu/kernel/setup.c                | 1 +
 arch/mips/kernel/module.c                    | 1 +
 arch/mips/sgi-ip27/ip27-klnuma.c             | 1 +
 arch/powerpc/kernel/suspend.c                | 1 +
 arch/powerpc/lib/code-patching.c             | 1 +
 arch/sparc64/kernel/iommu_common.h           | 2 +-
 arch/x86/kernel/module_64.c                  | 1 +
 arch/xtensa/kernel/setup.c                   | 1 +
 drivers/char/random.c                        | 1 +
 drivers/ieee1394/iso.c                       | 1 +
 drivers/media/video/pvrusb2/pvrusb2-dvb.c    | 1 +
 drivers/media/video/pvrusb2/pvrusb2-ioread.c | 1 +
 drivers/media/video/uvc/uvc_queue.c          | 1 +
 drivers/media/video/videobuf-core.c          | 1 +
 drivers/mtd/maps/uclinux.c                   | 1 +
 drivers/net/mlx4/eq.c                        | 1 +
 drivers/pcmcia/electra_cf.c                  | 1 +
 drivers/scsi/sun_esp.c                       | 1 +
 drivers/video/acornfb.c                      | 1 +
 drivers/video/imxfb.c                        | 1 +
 drivers/video/omap/dispc.c                   | 1 +
 drivers/video/omap/omapfb_main.c             | 1 +
 drivers/video/pxafb.c                        | 1 +
 drivers/video/sa1100fb.c                     | 1 +
 include/asm-alpha/page.h                     | 3 ---
 include/asm-arm/page-nommu.h                 | 4 +---
 include/asm-arm/page.h                       | 3 ---
 include/asm-avr32/page.h                     | 3 ---
 include/asm-blackfin/page.h                  | 3 ---
 include/asm-cris/page.h                      | 3 ---
 include/asm-frv/page.h                       | 3 ---
 include/asm-h8300/page.h                     | 3 ---
 include/asm-ia64/page.h                      | 1 -
 include/asm-m32r/page.h                      | 3 ---
 include/asm-m68k/dvma.h                      | 2 +-
 include/asm-m68k/page.h                      | 3 ---
 include/asm-m68knommu/page.h                 | 3 ---
 include/asm-mips/page.h                      | 3 ---
 include/asm-mips/processor.h                 | 2 +-
 include/asm-mn10300/page.h                   | 3 ---
 include/asm-parisc/page.h                    | 4 ----
 include/asm-powerpc/page.h                   | 3 ---
 include/asm-s390/page.h                      | 3 ---
 include/asm-sh/page.h                        | 3 ---
 include/asm-sparc/page_32.h                  | 3 ---
 include/asm-sparc/page_64.h                  | 3 ---
 include/asm-um/page.h                        | 3 ---
 include/asm-v850/page.h                      | 4 ----
 include/asm-x86/page.h                       | 3 ---
 include/asm-xtensa/page.h                    | 2 --
 include/linux/mm.h                           | 3 +++
 sound/core/info.c                            | 1 +
 57 files changed, 36 insertions(+), 74 deletions(-)

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 79b7e5cf541..a68259a0ccc 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleloader.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 96d6f061973..5d107520e6b 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/bootmem.h>
diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c
index 3437c82434a..f03b79f0e0a 100644
--- a/arch/avr32/mm/ioremap.c
+++ b/arch/avr32/mm/ioremap.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/io.h>
 
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index b1f25c20a5d..7fda657110e 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/fb.h>
 #include <linux/console.h>
diff --git a/arch/m68k/amiga/chipram.c b/arch/m68k/amiga/chipram.c
index cbe36538af4..61df1d33c05 100644
--- a/arch/m68k/amiga/chipram.c
+++ b/arch/m68k/amiga/chipram.c
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index 03f4fe6a2fc..5985f198902 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/fb.h>
 #include <linux/module.h>
+#include <linux/mm.h>
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index e7ed0ac4853..1f60e27523d 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -22,6 +22,7 @@
 
 #include <linux/moduleloader.h>
 #include <linux/elf.h>
+#include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index 48932ce1d73..d9c79d8be81 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -4,6 +4,7 @@
  * Copyright 2000 - 2001 Kanoj Sarcar (kanoj@sgi.com)
  */
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/kernel.h>
 #include <linux/nodemask.h>
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index 8cee5710754..6fc6328dc62 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -7,6 +7,7 @@
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
 
+#include <linux/mm.h>
 #include <asm/page.h>
 
 /* References to section boundaries */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 0559fe086eb..7c975d43e3f 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <asm/page.h>
 #include <asm/code-patching.h>
 
diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h
index f3575a614fa..53b19c8231a 100644
--- a/arch/sparc64/kernel/iommu_common.h
+++ b/arch/sparc64/kernel/iommu_common.h
@@ -23,7 +23,7 @@
 #define IO_PAGE_SHIFT			13
 #define IO_PAGE_SIZE			(1UL << IO_PAGE_SHIFT)
 #define IO_PAGE_MASK			(~(IO_PAGE_SIZE-1))
-#define IO_PAGE_ALIGN(addr)		(((addr)+IO_PAGE_SIZE-1)&IO_PAGE_MASK)
+#define IO_PAGE_ALIGN(addr)		ALIGN(addr, IO_PAGE_SIZE)
 
 #define IO_TSB_ENTRIES			(128*1024)
 #define IO_TSB_SIZE			(IO_TSB_ENTRIES * 8)
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 0e867676b5a..6ba87830d4b 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -22,6 +22,7 @@
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/bug.h>
 
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 5e6d75c9f92..a00359e8f7a 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -16,6 +16,7 @@
 
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/screen_info.h>
 #include <linux/bootmem.h>
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0cf98bd4f2d..e0d0e371909 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -236,6 +236,7 @@
 #include <linux/fs.h>
 #include <linux/genhd.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
 #include <linux/cryptohash.h>
diff --git a/drivers/ieee1394/iso.c b/drivers/ieee1394/iso.c
index 07ca35c98f9..1cf6487b65b 100644
--- a/drivers/ieee1394/iso.c
+++ b/drivers/ieee1394/iso.c
@@ -11,6 +11,7 @@
 
 #include <linux/pci.h>
 #include <linux/sched.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 
 #include "hosts.h"
diff --git a/drivers/media/video/pvrusb2/pvrusb2-dvb.c b/drivers/media/video/pvrusb2/pvrusb2-dvb.c
index 6ec4bf81fc7..77b3c338506 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-dvb.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-dvb.c
@@ -20,6 +20,7 @@
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/mm.h>
 #include "dvbdev.h"
 #include "pvrusb2-debug.h"
 #include "pvrusb2-hdw-internal.h"
diff --git a/drivers/media/video/pvrusb2/pvrusb2-ioread.c b/drivers/media/video/pvrusb2/pvrusb2-ioread.c
index 05a1376405e..b4824782d85 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-ioread.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-ioread.c
@@ -22,6 +22,7 @@
 #include "pvrusb2-debug.h"
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
diff --git a/drivers/media/video/uvc/uvc_queue.c b/drivers/media/video/uvc/uvc_queue.c
index 7388d0cee3d..5646a6a3293 100644
--- a/drivers/media/video/uvc/uvc_queue.c
+++ b/drivers/media/video/uvc/uvc_queue.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/version.h>
+#include <linux/mm.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/usb.h>
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index 0a88c44ace0..b7b05842cf2 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 
diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c
index c42f4b83f68..3fcf92130aa 100644
--- a/drivers/mtd/maps/uclinux.c
+++ b/drivers/mtd/maps/uclinux.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/major.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index e141a1513f0..ea3a09aaa84 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -33,6 +33,7 @@
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/dma-mapping.h>
 
 #include <linux/mlx4/cmd.h>
diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c
index c21f9a9c3e3..a34284b1482 100644
--- a/drivers/pcmcia/electra_cf.c
+++ b/drivers/pcmcia/electra_cf.c
@@ -28,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/of_platform.h>
 
diff --git a/drivers/scsi/sun_esp.c b/drivers/scsi/sun_esp.c
index 2c87db98cdf..f9cf7015136 100644
--- a/drivers/scsi/sun_esp.c
+++ b/drivers/scsi/sun_esp.c
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 
 #include <asm/irq.h>
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c
index eedb8285e32..017233d0c48 100644
--- a/drivers/video/acornfb.c
+++ b/drivers/video/acornfb.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/platform_device.h>
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 94e4d3ac1a0..0c5a475c1ca 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -24,6 +24,7 @@
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/fb.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index ab32ceb0617..ab77c51fe9d 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -20,6 +20,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/dma-mapping.h>
+#include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/clk.h>
 #include <linux/io.h>
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 14d0f7a1114..f85af5c4fa6 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -25,6 +25,7 @@
  * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 #include <linux/platform_device.h>
+#include <linux/mm.h>
 #include <linux/uaccess.h>
 
 #include <asm/mach-types.h>
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index bb251436950..5e8a140399f 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -30,6 +30,7 @@
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/fb.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index ab2b2110478..4a9f7e12180 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -167,6 +167,7 @@
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/fb.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h
index 22ff9762d17..0995f9d1341 100644
--- a/include/asm-alpha/page.h
+++ b/include/asm-alpha/page.h
@@ -80,9 +80,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #define __pa(x)			((unsigned long) (x) - PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
 #ifndef CONFIG_DISCONTIGMEM
diff --git a/include/asm-arm/page-nommu.h b/include/asm-arm/page-nommu.h
index a1bcad06048..ea1cde84f50 100644
--- a/include/asm-arm/page-nommu.h
+++ b/include/asm-arm/page-nommu.h
@@ -7,6 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
 #ifndef _ASMARM_PAGE_NOMMU_H
 #define _ASMARM_PAGE_NOMMU_H
 
@@ -42,9 +43,6 @@ typedef unsigned long pgprot_t;
 #define __pmd(x)        (x)
 #define __pgprot(x)     (x)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-arm/page.h b/include/asm-arm/page.h
index 8e05bdb5f12..7c5fc5582e5 100644
--- a/include/asm-arm/page.h
+++ b/include/asm-arm/page.h
@@ -15,9 +15,6 @@
 #define PAGE_SIZE		(1UL << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__
 
 #ifndef CONFIG_MMU
diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h
index cbbc5ca9728..f805d1cb11b 100644
--- a/include/asm-avr32/page.h
+++ b/include/asm-avr32/page.h
@@ -57,9 +57,6 @@ static inline int get_order(unsigned long size)
 
 #endif /* !__ASSEMBLY__ */
 
-/* Align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff
  * permanently to the physical addresses 0x00000000 -> 0x1fffffff when
diff --git a/include/asm-blackfin/page.h b/include/asm-blackfin/page.h
index c7db0220fbd..344f6a8c1f2 100644
--- a/include/asm-blackfin/page.h
+++ b/include/asm-blackfin/page.h
@@ -51,9 +51,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h
index c45bb1ef397..d19272ba6b6 100644
--- a/include/asm-cris/page.h
+++ b/include/asm-cris/page.h
@@ -60,9 +60,6 @@ typedef struct page *pgtable_t;
 
 #define page_to_phys(page)     __pa((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h
index c2c1e89e747..bd9c220094c 100644
--- a/include/asm-frv/page.h
+++ b/include/asm-frv/page.h
@@ -40,9 +40,6 @@ typedef struct page *pgtable_t;
 #define __pgprot(x)	((pgprot_t) { (x) } )
 #define PTE_MASK	PAGE_MASK
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 #define devmem_is_allowed(pfn)	1
 
 #define __pa(vaddr)		virt_to_phys((void *) (unsigned long) (vaddr))
diff --git a/include/asm-h8300/page.h b/include/asm-h8300/page.h
index d6a3eaf3b27..0b6acf0b03a 100644
--- a/include/asm-h8300/page.h
+++ b/include/asm-h8300/page.h
@@ -43,9 +43,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 36f39321b76..5f271bc712e 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -40,7 +40,6 @@
 
 #define PAGE_SIZE		(__IA64_UL_CONST(1) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE - 1))
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
 
 #define PERCPU_PAGE_SHIFT	16	/* log2() of max. size of per-CPU area */
 #define PERCPU_PAGE_SIZE	(__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT)
diff --git a/include/asm-m32r/page.h b/include/asm-m32r/page.h
index 8a677f3fca6..c9333089fe1 100644
--- a/include/asm-m32r/page.h
+++ b/include/asm-m32r/page.h
@@ -41,9 +41,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * This handles the memory map.. We could make this a config
  * option, but too many people screw it up, and too few need
diff --git a/include/asm-m68k/dvma.h b/include/asm-m68k/dvma.h
index 4fff408d015..890bbf7e775 100644
--- a/include/asm-m68k/dvma.h
+++ b/include/asm-m68k/dvma.h
@@ -13,7 +13,7 @@
 #define DVMA_PAGE_SHIFT	13
 #define DVMA_PAGE_SIZE	(1UL << DVMA_PAGE_SHIFT)
 #define DVMA_PAGE_MASK	(~(DVMA_PAGE_SIZE-1))
-#define DVMA_PAGE_ALIGN(addr)	(((addr)+DVMA_PAGE_SIZE-1)&DVMA_PAGE_MASK)
+#define DVMA_PAGE_ALIGN(addr)	ALIGN(addr, DVMA_PAGE_SIZE)
 
 extern void dvma_init(void);
 extern int dvma_map_iommu(unsigned long kaddr, unsigned long baddr,
diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h
index 880c2cbff8a..a34b8bad784 100644
--- a/include/asm-m68k/page.h
+++ b/include/asm-m68k/page.h
@@ -103,9 +103,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #endif /* !__ASSEMBLY__ */
 
 #include <asm/page_offset.h>
diff --git a/include/asm-m68knommu/page.h b/include/asm-m68knommu/page.h
index 1e82ebb7d64..3a1ede4544c 100644
--- a/include/asm-m68knommu/page.h
+++ b/include/asm-m68knommu/page.h
@@ -43,9 +43,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h
index 494f00ba954..fe7a88ea066 100644
--- a/include/asm-mips/page.h
+++ b/include/asm-mips/page.h
@@ -137,9 +137,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * __pa()/__va() should be used only during mem init.
  */
diff --git a/include/asm-mips/processor.h b/include/asm-mips/processor.h
index 58cbac5a64e..a1e4453469f 100644
--- a/include/asm-mips/processor.h
+++ b/include/asm-mips/processor.h
@@ -45,7 +45,7 @@ extern unsigned int vced_count, vcei_count;
  * This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+#define TASK_UNMAPPED_BASE	((TASK_SIZE / 3) & ~(PAGE_SIZE))
 #endif
 
 #ifdef CONFIG_64BIT
diff --git a/include/asm-mn10300/page.h b/include/asm-mn10300/page.h
index 124971b9fb9..8288e124165 100644
--- a/include/asm-mn10300/page.h
+++ b/include/asm-mn10300/page.h
@@ -61,9 +61,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * This handles the memory map.. We could make this a config
  * option, but too many people screw it up, and too few need
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index 27d50b85954..c3941f09a87 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -119,10 +119,6 @@ extern int npmem_ranges;
 #define PMD_ENTRY_SIZE	(1UL << BITS_PER_PMD_ENTRY)
 #define PTE_ENTRY_SIZE	(1UL << BITS_PER_PTE_ENTRY)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
-
 #define LINUX_GATEWAY_SPACE     0
 
 /* This governs the relationship between virtual and physical addresses.
diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
index cffdf0eb0df..e088545cb3f 100644
--- a/include/asm-powerpc/page.h
+++ b/include/asm-powerpc/page.h
@@ -119,9 +119,6 @@ extern phys_addr_t kernstart_addr;
 /* align addr on a size boundary - adjust address up if needed */
 #define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
-
 /*
  * Don't compare things with KERNELBASE or PAGE_OFFSET to test for
  * "kernelness", use is_kernel_addr() - it should do what you want.
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index 12fd9c4f0f1..991ba939408 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -138,9 +138,6 @@ void arch_alloc_page(struct page *page, int order);
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)        (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #define __PAGE_OFFSET           0x0UL
 #define PAGE_OFFSET             0x0UL
 #define __pa(x)                 (unsigned long)(x)
diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index 304c30b5d94..5dc01d2fcc4 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -22,9 +22,6 @@
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 #define PTE_MASK	PAGE_MASK
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
 #define HPAGE_SHIFT	16
 #elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
diff --git a/include/asm-sparc/page_32.h b/include/asm-sparc/page_32.h
index 14de518cc38..cf5fb70ca1c 100644
--- a/include/asm-sparc/page_32.h
+++ b/include/asm-sparc/page_32.h
@@ -134,9 +134,6 @@ BTFIXUPDEF_SETHI(sparc_unmapped_base)
 
 #endif /* !(__ASSEMBLY__) */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)  (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #define PAGE_OFFSET	0xf0000000
 #ifndef __ASSEMBLY__
 extern unsigned long phys_base;
diff --git a/include/asm-sparc/page_64.h b/include/asm-sparc/page_64.h
index a8a2bba032c..b579b910ef5 100644
--- a/include/asm-sparc/page_64.h
+++ b/include/asm-sparc/page_64.h
@@ -106,9 +106,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !(__ASSEMBLY__) */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 /* We used to stick this into a hard-coded global register (%g4)
  * but that does not make sense anymore.
  */
diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index 916e1a61999..335c57383c0 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -92,9 +92,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x) ((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long uml_physmem;
 
 #define PAGE_OFFSET (uml_physmem)
diff --git a/include/asm-v850/page.h b/include/asm-v850/page.h
index 74a539a9bd5..f9de35d873f 100644
--- a/include/asm-v850/page.h
+++ b/include/asm-v850/page.h
@@ -94,10 +94,6 @@ typedef unsigned long pgprot_t;
 #endif /* !__ASSEMBLY__ */
 
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
-
 /* No current v850 processor has virtual memory.  */
 #define __virt_to_phys(addr)	(addr)
 #define __phys_to_virt(addr)	(addr)
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 6e02098b160..49982110e4d 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -34,9 +34,6 @@
 
 #define HUGE_MAX_HSTATE 2
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 #endif
diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h
index 80a6ae0dd25..11f7dc2dbec 100644
--- a/include/asm-xtensa/page.h
+++ b/include/asm-xtensa/page.h
@@ -26,13 +26,11 @@
 
 /*
  * PAGE_SHIFT determines the page size
- * PAGE_ALIGN(x) aligns the pointer to the (next) page boundary
  */
 
 #define PAGE_SHIFT		12
 #define PAGE_SIZE		(__XTENSA_UL_CONST(1) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE - 1) & PAGE_MASK)
 
 #define PAGE_OFFSET		XCHAL_KSEG_CACHED_VADDR
 #define MAX_MEM_PFN		XCHAL_KSEG_SIZE
diff --git a/include/linux/mm.h b/include/linux/mm.h
index df322fb4df3..d87a5a5fe87 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -41,6 +41,9 @@ extern unsigned long mmap_min_addr;
 
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way
diff --git a/sound/core/info.c b/sound/core/info.c
index cb5ead3e202..c67773ad929 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -21,6 +21,7 @@
 
 #include <linux/init.h>
 #include <linux/time.h>
+#include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <sound/core.h>
-- 
GitLab


From f84f9504bddeec33a72d64ebe95143d3aaeb3f9b Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Wed, 23 Jul 2008 21:28:14 -0700
Subject: [PATCH 204/853] mm: remove initialization of static per-cpu variables

This was required by some old, no-longer-used gcc on sparc.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/swap.c b/mm/swap.c
index 45c9f25a8a3..dd89234ee51 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,9 +34,9 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
-static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs) = { 0, };
+static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 
 /*
  * This path almost never happens for VM activity - pages are normally
@@ -493,7 +493,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
  */
 #define ACCT_THRESHOLD	max(16, NR_CPUS * 2)
 
-static DEFINE_PER_CPU(long, committed_space) = 0;
+static DEFINE_PER_CPU(long, committed_space);
 
 void vm_acct_memory(long pages)
 {
-- 
GitLab


From 48c906823f3927b981db9f0b03c2e2499977ee93 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Wed, 23 Jul 2008 21:28:15 -0700
Subject: [PATCH 205/853] memory hotplug: allocate usemap on the section with
 pgdat

Usemaps are allocated on the section which has pgdat by this.

Because usemap size is very small, many other sections usemaps are
allocated on only one page.  If a section has usemap, it can't be removed
until removing other sections.  This dependency is not desirable for
memory removing.

Pgdat has similar feature.  When a section has pgdat area, it must be the
last section for removing on the node.  So, if section A has pgdat and
section B has usemap for section A, Both sections can't be removed due to
dependency each other.

To solve this issue, this patch collects usemap on same section with pgdat
as much as possible.  If other sections doesn't have any dependency, this
section will be able to be removed finally.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: David Miller <davem@davemloft.net>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hiroyuki KAMEZAWA <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Tony Breeds <tony@bakeyournoodle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/sparse.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

diff --git a/mm/sparse.c b/mm/sparse.c
index 7a3650923d9..8ffc0899000 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -269,16 +269,92 @@ static unsigned long *__kmalloc_section_usemap(void)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static unsigned long * __init
+sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
+{
+	unsigned long section_nr;
+
+	/*
+	 * A page may contain usemaps for other sections preventing the
+	 * page being freed and making a section unremovable while
+	 * other sections referencing the usemap retmain active. Similarly,
+	 * a pgdat can prevent a section being removed. If section A
+	 * contains a pgdat and section B contains the usemap, both
+	 * sections become inter-dependent. This allocates usemaps
+	 * from the same section as the pgdat where possible to avoid
+	 * this problem.
+	 */
+	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	return alloc_bootmem_section(usemap_size(), section_nr);
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+	unsigned long usemap_snr, pgdat_snr;
+	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
+	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	int usemap_nid;
+
+	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	if (usemap_snr == pgdat_snr)
+		return;
+
+	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
+		/* skip redundant message */
+		return;
+
+	old_usemap_snr = usemap_snr;
+	old_pgdat_snr = pgdat_snr;
+
+	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
+	if (usemap_nid != nid) {
+		printk(KERN_INFO
+		       "node %d must be removed before remove section %ld\n",
+		       nid, usemap_snr);
+		return;
+	}
+	/*
+	 * There is a circular dependency.
+	 * Some platforms allow un-removable section because they will just
+	 * gather other removable sections for dynamic partitioning.
+	 * Just notify un-removable section's number here.
+	 */
+	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
+	       pgdat_snr, nid);
+	printk(KERN_CONT
+	       " have a circular dependency on usemap and pgdat allocations\n");
+}
+#else
+static unsigned long * __init
+sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
+{
+	return NULL;
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
 {
 	unsigned long *usemap;
 	struct mem_section *ms = __nr_to_section(pnum);
 	int nid = sparse_early_nid(ms);
 
-	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid));
 	if (usemap)
 		return usemap;
 
+	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	if (usemap) {
+		check_usemap_section_nr(nid, usemap);
+		return usemap;
+	}
+
 	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
 	nid = 0;
 
-- 
GitLab


From af370fb8cb3031f20438f246798d5f0d98089f29 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Wed, 23 Jul 2008 21:28:17 -0700
Subject: [PATCH 206/853] memory hotplug: small fixes to bootmem freeing for
 memory hotremove

- Change some naming
  * Magic -> types
  * MIX_INFO -> MIX_SECTION_INFO
  * Change definition of bootmem type from direct hex value

- __free_pages_bootmem() becomes __meminit.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  8 ++++----
 mm/memory_hotplug.c            | 12 ++++++------
 mm/page_alloc.c                |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index ea9f5ad9ec8..3628e5088f6 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,12 +13,12 @@ struct mem_section;
 #ifdef CONFIG_MEMORY_HOTPLUG
 
 /*
- * Magic number for free bootmem.
+ * Types for free bootmem.
  * The normal smallest mapcount is -1. Here is smaller value than it.
  */
-#define SECTION_INFO		0xfffffffe
-#define MIX_INFO		0xfffffffd
-#define NODE_INFO		0xfffffffc
+#define SECTION_INFO		(-1 - 1)
+#define MIX_SECTION_INFO	(-1 - 2)
+#define NODE_INFO		(-1 - 3)
 
 /*
  * pgdat resizing functions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ec85c37dcfb..0fb05b258f0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -62,9 +62,9 @@ static void release_memory_resource(struct resource *res)
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-static void get_page_bootmem(unsigned long info,  struct page *page, int magic)
+static void get_page_bootmem(unsigned long info,  struct page *page, int type)
 {
-	atomic_set(&page->_mapcount, magic);
+	atomic_set(&page->_mapcount, type);
 	SetPagePrivate(page);
 	set_page_private(page, info);
 	atomic_inc(&page->_count);
@@ -72,10 +72,10 @@ static void get_page_bootmem(unsigned long info,  struct page *page, int magic)
 
 void put_page_bootmem(struct page *page)
 {
-	int magic;
+	int type;
 
-	magic = atomic_read(&page->_mapcount);
-	BUG_ON(magic >= -1);
+	type = atomic_read(&page->_mapcount);
+	BUG_ON(type >= -1);
 
 	if (atomic_dec_return(&page->_count) == 1) {
 		ClearPagePrivate(page);
@@ -119,7 +119,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
 	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
 
 	for (i = 0; i < mapsize; i++, page++)
-		get_page_bootmem(section_nr, page, MIX_INFO);
+		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
 
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cd4c41432ef..6da667274df 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -533,7 +533,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 /*
  * permit the bootmem allocator to evade page validation on high-order frees
  */
-void __free_pages_bootmem(struct page *page, unsigned int order)
+void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
 {
 	if (order == 0) {
 		__ClearPageReserved(page);
-- 
GitLab


From 2f7f24eca31c4fc2fdb134b2ef743ccd67cfb9a9 Mon Sep 17 00:00:00 2001
From: Kent Liu <kent.liu@linux.intel.com>
Date: Wed, 23 Jul 2008 21:28:18 -0700
Subject: [PATCH 207/853] memory-hotplug: don't calculate vm_total_pages twice
 when rebuilding zonelists in online_pages()

If zonelist is required to be rebuilt in online_pages(), there is no need
to recalculate vm_total_pages in that function, as it has been updated in
the call build_all_zonelists().

Signed-off-by: Kent Liu <kent.liu@linux.intel.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0fb05b258f0..93aba78dc8b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -429,7 +429,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 
 	if (need_zonelists_rebuild)
 		build_all_zonelists();
-	vm_total_pages = nr_free_pagecache_pages();
+	else
+		vm_total_pages = nr_free_pagecache_pages();
+
 	writeback_set_ratelimit();
 
 	if (onlined_pages)
-- 
GitLab


From 5c755e9fd813810680abd56ec09a5f90143e815b Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Wed, 23 Jul 2008 21:28:19 -0700
Subject: [PATCH 208/853] memory-hotplug: add sysfs removable attribute for
 hotplug memory remove

Memory may be hot-removed on a per-memory-block basis, particularly on
POWER where the SPARSEMEM section size often matches the memory-block
size.  A user-level agent must be able to identify which sections of
memory are likely to be removable before attempting the potentially
expensive operation.  This patch adds a file called "removable" to the
memory directory in sysfs to help such an agent.  In this patch, a memory
block is considered removable if;

o It contains only MOVABLE pageblocks
o It contains only pageblocks with free pages regardless of pageblock type

On the other hand, a memory block starting with a PageReserved() page will
never be considered removable.  Without this patch, the user-agent is
forced to choose a memory block to remove randomly.

Sample output of the sysfs files:

./memory/memory0/removable: 0
./memory/memory1/removable: 0
./memory/memory2/removable: 0
./memory/memory3/removable: 0
./memory/memory4/removable: 0
./memory/memory5/removable: 0
./memory/memory6/removable: 0
./memory/memory7/removable: 1
./memory/memory8/removable: 0
./memory/memory9/removable: 0
./memory/memory10/removable: 0
./memory/memory11/removable: 0
./memory/memory12/removable: 0
./memory/memory13/removable: 0
./memory/memory14/removable: 0
./memory/memory15/removable: 0
./memory/memory16/removable: 0
./memory/memory17/removable: 1
./memory/memory18/removable: 1
./memory/memory19/removable: 1
./memory/memory20/removable: 1
./memory/memory21/removable: 1
./memory/memory22/removable: 1

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../ABI/testing/sysfs-devices-memory          | 24 ++++++++
 drivers/base/memory.c                         | 19 ++++++
 include/linux/memory_hotplug.h                | 12 ++++
 mm/memory_hotplug.c                           | 60 +++++++++++++++++++
 4 files changed, 115 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-memory

diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
new file mode 100644
index 00000000000..7a16fe1e227
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -0,0 +1,24 @@
+What:		/sys/devices/system/memory
+Date:		June 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The /sys/devices/system/memory contains a snapshot of the
+		internal state of the kernel memory blocks. Files could be
+		added or removed dynamically to represent hot-add/remove
+		operations.
+
+Users:		hotplug memory add/remove tools
+		https://w3.opensource.ibm.com/projects/powerpc-utils/
+
+What:		/sys/devices/system/memory/memoryX/removable
+Date:		June 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/removable
+		indicates whether this memory block is removable or not.
+		This is useful for a user-level agent to determine
+		identify removable sections of the memory before attempting
+		potentially expensive hot-remove memory operation
+
+Users:		hotplug memory remove tools
+		https://w3.opensource.ibm.com/projects/powerpc-utils/
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 4d4e0e7b6e9..855ed1a9f97 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -100,6 +100,21 @@ static ssize_t show_mem_phys_index(struct sys_device *dev,
 	return sprintf(buf, "%08lx\n", mem->phys_index);
 }
 
+/*
+ * Show whether the section of memory is likely to be hot-removable
+ */
+static ssize_t show_mem_removable(struct sys_device *dev, char *buf)
+{
+	unsigned long start_pfn;
+	int ret;
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+
+	start_pfn = section_nr_to_pfn(mem->phys_index);
+	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
+	return sprintf(buf, "%d\n", ret);
+}
+
 /*
  * online, offline, going offline, etc.
  */
@@ -262,6 +277,7 @@ static ssize_t show_phys_device(struct sys_device *dev,
 static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
 static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
 static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
+static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
 
 #define mem_create_simple_file(mem, attr_name)	\
 	sysdev_create_file(&mem->sysdev, &attr_##attr_name)
@@ -350,6 +366,8 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
 		ret = mem_create_simple_file(mem, state);
 	if (!ret)
 		ret = mem_create_simple_file(mem, phys_device);
+	if (!ret)
+		ret = mem_create_simple_file(mem, removable);
 
 	return ret;
 }
@@ -394,6 +412,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
 	mem_remove_simple_file(mem, phys_index);
 	mem_remove_simple_file(mem, state);
 	mem_remove_simple_file(mem, phys_device);
+	mem_remove_simple_file(mem, removable);
 	unregister_memory(mem, section);
 
 	return 0;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 3628e5088f6..763ba81fc0f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -199,6 +199,18 @@ extern int walk_memory_resource(unsigned long start_pfn,
 			unsigned long nr_pages, void *arg,
 			int (*func)(unsigned long, unsigned long, void *));
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
+
+#else
+static inline int is_mem_section_removable(unsigned long pfn,
+					unsigned long nr_pages)
+{
+	return 0;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int remove_memory(u64 start, u64 size);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 93aba78dc8b..89fee2dcb03 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -522,6 +522,66 @@ error:
 EXPORT_SYMBOL_GPL(add_memory);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
+/*
+ * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
+ * set and the size of the free page is given by page_order(). Using this,
+ * the function determines if the pageblock contains only free pages.
+ * Due to buddy contraints, a free page at least the size of a pageblock will
+ * be located at the start of the pageblock
+ */
+static inline int pageblock_free(struct page *page)
+{
+	return PageBuddy(page) && page_order(page) >= pageblock_order;
+}
+
+/* Return the start of the next active pageblock after a given page */
+static struct page *next_active_pageblock(struct page *page)
+{
+	int pageblocks_stride;
+
+	/* Ensure the starting page is pageblock-aligned */
+	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+
+	/* Move forward by at least 1 * pageblock_nr_pages */
+	pageblocks_stride = 1;
+
+	/* If the entire pageblock is free, move to the end of free page */
+	if (pageblock_free(page))
+		pageblocks_stride += page_order(page) - pageblock_order;
+
+	return page + (pageblocks_stride * pageblock_nr_pages);
+}
+
+/* Checks if this range of memory is likely to be hot-removable. */
+int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
+{
+	int type;
+	struct page *page = pfn_to_page(start_pfn);
+	struct page *end_page = page + nr_pages;
+
+	/* Check the starting page of each pageblock within the range */
+	for (; page < end_page; page = next_active_pageblock(page)) {
+		type = get_pageblock_migratetype(page);
+
+		/*
+		 * A pageblock containing MOVABLE or free pages is considered
+		 * removable
+		 */
+		if (type != MIGRATE_MOVABLE && !pageblock_free(page))
+			return 0;
+
+		/*
+		 * A pageblock starting with a PageReserved page is not
+		 * considered removable.
+		 */
+		if (PageReserved(page))
+			return 0;
+	}
+
+	/* All pageblocks in the memory block are likely to be hot-removable */
+	return 1;
+}
+
 /*
  * Confirm all pages in a range [start, end) is belongs to the same zone.
  */
-- 
GitLab


From 9ca908f47bc784c90e17a553ce33e756c73feac4 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 23 Jul 2008 21:28:20 -0700
Subject: [PATCH 209/853] kcalloc: remove runtime division

While in all cases in the kernel we know the size of the elements to be
created, we don't always know the count of elements.  By commuting the size
and count in the overflow check, the compiler can reduce the runtime division
of size_t with a compare to a (unique) constant in these cases.

Signed-off-by: Milton Miller <miltonm@bga.com>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9aa90a6f20e..41103910f8a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -180,7 +180,7 @@ size_t ksize(const void *);
  */
 static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 {
-	if (n != 0 && size > ULONG_MAX / n)
+	if (size != 0 && n > ULONG_MAX / size)
 		return NULL;
 	return __kmalloc(n * size, flags | __GFP_ZERO);
 }
-- 
GitLab


From 83d1674a946141c3c59d430e96c224f7937e6158 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Wed, 23 Jul 2008 21:28:22 -0700
Subject: [PATCH 210/853] mm: make CONFIG_MIGRATION available w/o CONFIG_NUMA

We'd like to support CONFIG_MEMORY_HOTREMOVE on s390, which depends on
CONFIG_MIGRATION.  So far, CONFIG_MIGRATION is only available with NUMA
support.

This patch makes CONFIG_MIGRATION selectable for architectures that define
ARCH_ENABLE_MEMORY_HOTREMOVE.  When MIGRATION is enabled w/o NUMA, the
kernel won't compile because migrate_vmas() does not know about
vm_ops->migrate() and vma_migratable() does not know about policy_zone.
To fix this, those two functions can be restricted to '#ifdef CONFIG_NUMA'
because they are not being used w/o NUMA.  vma_migratable() is moved over
from migrate.h to mempolicy.h.

[kosaki.motohiro@jp.fujitsu.com: build fix]
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: KOSAKI Motorhiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 19 +++++++++++++++++++
 include/linux/migrate.h   | 21 ---------------------
 mm/Kconfig                |  2 +-
 mm/migrate.c              |  2 +-
 4 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3a39570b81b..085c903fe0f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -59,6 +59,7 @@ enum {
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/nodemask.h>
+#include <linux/pagemap.h>
 
 struct mm_struct;
 
@@ -220,6 +221,24 @@ extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
 extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 			int no_context);
 #endif
+
+/* Check if a vma is migratable */
+static inline int vma_migratable(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
+		return 0;
+	/*
+	 * Migration allocates pages in the highest zone. If we cannot
+	 * do so then migration (at least from node to node) is not
+	 * possible.
+	 */
+	if (vma->vm_file &&
+		gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
+								< policy_zone)
+			return 0;
+	return 1;
+}
+
 #else
 
 struct mempolicy {};
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e10a90a93b5..03aea612d28 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -3,28 +3,10 @@
 
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
-#include <linux/pagemap.h>
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 #ifdef CONFIG_MIGRATION
-/* Check if a vma is migratable */
-static inline int vma_migratable(struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
-		return 0;
-	/*
-	 * Migration allocates pages in the highest zone. If we cannot
-	 * do so then migration (at least from node to node) is not
-	 * possible.
-	 */
-	if (vma->vm_file &&
-		gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
-								< policy_zone)
-			return 0;
-	return 1;
-}
-
 extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
@@ -39,9 +21,6 @@ extern int migrate_vmas(struct mm_struct *mm,
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
 #else
-static inline int vma_migratable(struct vm_area_struct *vma)
-					{ return 0; }
-
 static inline int isolate_lru_page(struct page *p, struct list_head *list)
 					{ return -ENOSYS; }
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
diff --git a/mm/Kconfig b/mm/Kconfig
index c4de85285bb..aa799007a11 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -174,7 +174,7 @@ config SPLIT_PTLOCK_CPUS
 config MIGRATION
 	bool "Page migration"
 	def_bool y
-	depends on NUMA
+	depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE
 	help
 	  Allows the migration of the physical location of pages of processes
 	  while the virtual addresses are not changed. This is useful for
diff --git a/mm/migrate.c b/mm/migrate.c
index e7d13a708da..376cceba82f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1071,7 +1071,6 @@ out2:
 	mmput(mm);
 	return err;
 }
-#endif
 
 /*
  * Call migration functions in the vma_ops that may prepare
@@ -1093,3 +1092,4 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	}
  	return err;
 }
+#endif
-- 
GitLab


From 78ecba081224a2db5876b6b81cfed0b78f58adc7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 23 Jul 2008 21:28:23 -0700
Subject: [PATCH 211/853] mm: fix ever-decreasing swap priority

Vegard Nossum has noticed the ever-decreasing negative priority in a
swapon /swapoff loop, which eventually would misprioritize when int wraps
positive.  Not worth spending much code on, but probably better fixed.

It's easy to handle the swapping on and off of just one area, but there's
not much point if a pair or more still misbehave.  To handle the general
case, swapoff should compact negative priorities, keeping them always from
-1 to -MAX_SWAPFILES.  That's a change, but should cause no regression,
since these negative (unspecified) priorities are disjoint from the the
positive specified priorities 0 to 32767.

One small functional difference, which seems appropriate: when swapoff
fails to free all swap from a negative priority area, that area is now
reinserted at lowest priority, rather than at its original priority.

In moving down swapon's setting of priority, I notice that an area is
visible to /proc/swaps when it has swap_map set, yet that was being set
before all the visible fields were properly filled in: corrected.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 49 +++++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index bd1bb592030..2f33edb8bee 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -37,6 +37,7 @@ DEFINE_SPINLOCK(swap_lock);
 unsigned int nr_swapfiles;
 long total_swap_pages;
 static int swap_overflow;
+static int least_priority;
 
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
@@ -1260,6 +1261,11 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
 		/* just pick something that's safe... */
 		swap_list.next = swap_list.head;
 	}
+	if (p->prio < 0) {
+		for (i = p->next; i >= 0; i = swap_info[i].next)
+			swap_info[i].prio = p->prio--;
+		least_priority++;
+	}
 	nr_swap_pages -= p->pages;
 	total_swap_pages -= p->pages;
 	p->flags &= ~SWP_WRITEOK;
@@ -1272,9 +1278,14 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
 	if (err) {
 		/* re-insert swap space back into swap_list */
 		spin_lock(&swap_lock);
-		for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
+		if (p->prio < 0)
+			p->prio = --least_priority;
+		prev = -1;
+		for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
 			if (p->prio >= swap_info[i].prio)
 				break;
+			prev = i;
+		}
 		p->next = i;
 		if (prev < 0)
 			swap_list.head = swap_list.next = p - swap_info;
@@ -1447,7 +1458,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 	unsigned int type;
 	int i, prev;
 	int error;
-	static int least_priority;
 	union swap_header *swap_header = NULL;
 	int swap_header_version;
 	unsigned int nr_good_pages = 0;
@@ -1455,7 +1465,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 	sector_t span;
 	unsigned long maxpages = 1;
 	int swapfilesize;
-	unsigned short *swap_map;
+	unsigned short *swap_map = NULL;
 	struct page *page = NULL;
 	struct inode *inode = NULL;
 	int did_down = 0;
@@ -1474,22 +1484,10 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 	}
 	if (type >= nr_swapfiles)
 		nr_swapfiles = type+1;
+	memset(p, 0, sizeof(*p));
 	INIT_LIST_HEAD(&p->extent_list);
 	p->flags = SWP_USED;
-	p->swap_file = NULL;
-	p->old_block_size = 0;
-	p->swap_map = NULL;
-	p->lowest_bit = 0;
-	p->highest_bit = 0;
-	p->cluster_nr = 0;
-	p->inuse_pages = 0;
 	p->next = -1;
-	if (swap_flags & SWAP_FLAG_PREFER) {
-		p->prio =
-		  (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
-	} else {
-		p->prio = --least_priority;
-	}
 	spin_unlock(&swap_lock);
 	name = getname(specialfile);
 	error = PTR_ERR(name);
@@ -1632,19 +1630,20 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 			goto bad_swap;
 
 		/* OK, set up the swap map and apply the bad block list */
-		if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
+		swap_map = vmalloc(maxpages * sizeof(short));
+		if (!swap_map) {
 			error = -ENOMEM;
 			goto bad_swap;
 		}
 
 		error = 0;
-		memset(p->swap_map, 0, maxpages * sizeof(short));
+		memset(swap_map, 0, maxpages * sizeof(short));
 		for (i = 0; i < swap_header->info.nr_badpages; i++) {
 			int page_nr = swap_header->info.badpages[i];
 			if (page_nr <= 0 || page_nr >= swap_header->info.last_page)
 				error = -EINVAL;
 			else
-				p->swap_map[page_nr] = SWAP_MAP_BAD;
+				swap_map[page_nr] = SWAP_MAP_BAD;
 		}
 		nr_good_pages = swap_header->info.last_page -
 				swap_header->info.nr_badpages -
@@ -1654,7 +1653,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 	}
 
 	if (nr_good_pages) {
-		p->swap_map[0] = SWAP_MAP_BAD;
+		swap_map[0] = SWAP_MAP_BAD;
 		p->max = maxpages;
 		p->pages = nr_good_pages;
 		nr_extents = setup_swap_extents(p, &span);
@@ -1672,6 +1671,12 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
+	if (swap_flags & SWAP_FLAG_PREFER)
+		p->prio =
+		  (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
+	else
+		p->prio = --least_priority;
+	p->swap_map = swap_map;
 	p->flags = SWP_ACTIVE;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
@@ -1707,12 +1712,8 @@ bad_swap:
 	destroy_swap_extents(p);
 bad_swap_2:
 	spin_lock(&swap_lock);
-	swap_map = p->swap_map;
 	p->swap_file = NULL;
-	p->swap_map = NULL;
 	p->flags = 0;
-	if (!(swap_flags & SWAP_FLAG_PREFER))
-		++least_priority;
 	spin_unlock(&swap_lock);
 	vfree(swap_map);
 	if (swap_file)
-- 
GitLab


From 5459c164f0591ee75ed0203bb8f3817f25948e2f Mon Sep 17 00:00:00 2001
From: "Andrew G. Morgan" <morgan@kernel.org>
Date: Wed, 23 Jul 2008 21:28:24 -0700
Subject: [PATCH 212/853] security: protect legacy applications from executing
 with insufficient privilege

When cap_bset suppresses some of the forced (fP) capabilities of a file,
it is generally only safe to execute the program if it understands how to
recognize it doesn't have enough privilege to work correctly.  For legacy
applications (fE!=0), which have no non-destructive way to determine that
they are missing privilege, we fail to execute (EPERM) any executable that
requires fP capabilities, but would otherwise get pP' < fP.  This is a
fail-safe permission check.

For some discussion of why it is problematic for (legacy) privileged
applications to run with less than the set of capabilities requested for
them, see:

 http://userweb.kernel.org/~morgan/sendmail-capabilities-war-story.html

With this iteration of this support, we do not include setuid-0 based
privilege protection from the bounding set.  That is, the admin can still
(ab)use the bounding set to suppress the privileges of a setuid-0 program.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/binfmts.h |   2 +-
 security/commoncap.c    | 108 ++++++++++++++++++++++------------------
 2 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index ee0ed48e834..826f6235080 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -38,7 +38,7 @@ struct linux_binprm{
 		     misc_bang:1;
 	struct file * file;
 	int e_uid, e_gid;
-	kernel_cap_t cap_inheritable, cap_permitted;
+	kernel_cap_t cap_post_exec_permitted;
 	bool cap_effective;
 	void *security;
 	int argc, envc;
diff --git a/security/commoncap.c b/security/commoncap.c
index 0b6537a3672..4afbece37a0 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -162,8 +162,7 @@ void cap_capset_set (struct task_struct *target, kernel_cap_t *effective,
 
 static inline void bprm_clear_caps(struct linux_binprm *bprm)
 {
-	cap_clear(bprm->cap_inheritable);
-	cap_clear(bprm->cap_permitted);
+	cap_clear(bprm->cap_post_exec_permitted);
 	bprm->cap_effective = false;
 }
 
@@ -198,6 +197,7 @@ static inline int cap_from_disk(struct vfs_cap_data *caps,
 {
 	__u32 magic_etc;
 	unsigned tocopy, i;
+	int ret;
 
 	if (size < sizeof(magic_etc))
 		return -EINVAL;
@@ -225,19 +225,40 @@ static inline int cap_from_disk(struct vfs_cap_data *caps,
 		bprm->cap_effective = false;
 	}
 
-	for (i = 0; i < tocopy; ++i) {
-		bprm->cap_permitted.cap[i] =
-			le32_to_cpu(caps->data[i].permitted);
-		bprm->cap_inheritable.cap[i] =
-			le32_to_cpu(caps->data[i].inheritable);
-	}
-	while (i < VFS_CAP_U32) {
-		bprm->cap_permitted.cap[i] = 0;
-		bprm->cap_inheritable.cap[i] = 0;
-		i++;
+	ret = 0;
+
+	CAP_FOR_EACH_U32(i) {
+		__u32 value_cpu;
+
+		if (i >= tocopy) {
+			/*
+			 * Legacy capability sets have no upper bits
+			 */
+			bprm->cap_post_exec_permitted.cap[i] = 0;
+			continue;
+		}
+		/*
+		 * pP' = (X & fP) | (pI & fI)
+		 */
+		value_cpu = le32_to_cpu(caps->data[i].permitted);
+		bprm->cap_post_exec_permitted.cap[i] =
+			(current->cap_bset.cap[i] & value_cpu) |
+			(current->cap_inheritable.cap[i] &
+				le32_to_cpu(caps->data[i].inheritable));
+		if (value_cpu & ~bprm->cap_post_exec_permitted.cap[i]) {
+			/*
+			 * insufficient to execute correctly
+			 */
+			ret = -EPERM;
+		}
 	}
 
-	return 0;
+	/*
+	 * For legacy apps, with no internal support for recognizing they
+	 * do not have enough capabilities, we return an error if they are
+	 * missing some "forced" (aka file-permitted) capabilities.
+	 */
+	return bprm->cap_effective ? ret : 0;
 }
 
 /* Locate any VFS capabilities: */
@@ -269,9 +290,9 @@ static int get_file_caps(struct linux_binprm *bprm)
 		goto out;
 
 	rc = cap_from_disk(&vcaps, bprm, rc);
-	if (rc)
+	if (rc == -EINVAL)
 		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
-			__func__, rc, bprm->filename);
+		       __func__, rc, bprm->filename);
 
 out:
 	dput(dentry);
@@ -304,25 +325,24 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 	int ret;
 
 	ret = get_file_caps(bprm);
-	if (ret)
-		printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n",
-			__func__, ret, bprm->filename);
-
-	/*  To support inheritance of root-permissions and suid-root
-	 *  executables under compatibility mode, we raise all three
-	 *  capability sets for the file.
-	 *
-	 *  If only the real uid is 0, we only raise the inheritable
-	 *  and permitted sets of the executable file.
-	 */
 
-	if (!issecure (SECURE_NOROOT)) {
+	if (!issecure(SECURE_NOROOT)) {
+		/*
+		 * To support inheritance of root-permissions and suid-root
+		 * executables under compatibility mode, we override the
+		 * capability sets for the file.
+		 *
+		 * If only the real uid is 0, we do not set the effective
+		 * bit.
+		 */
 		if (bprm->e_uid == 0 || current->uid == 0) {
-			cap_set_full (bprm->cap_inheritable);
-			cap_set_full (bprm->cap_permitted);
+			/* pP' = (cap_bset & ~0) | (pI & ~0) */
+			bprm->cap_post_exec_permitted = cap_combine(
+				current->cap_bset, current->cap_inheritable
+				);
+			bprm->cap_effective = (bprm->e_uid == 0);
+			ret = 0;
 		}
-		if (bprm->e_uid == 0)
-			bprm->cap_effective = true;
 	}
 
 	return ret;
@@ -330,17 +350,9 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 
 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 {
-	/* Derived from fs/exec.c:compute_creds. */
-	kernel_cap_t new_permitted, working;
-
-	new_permitted = cap_intersect(bprm->cap_permitted,
-				 current->cap_bset);
-	working = cap_intersect(bprm->cap_inheritable,
-				 current->cap_inheritable);
-	new_permitted = cap_combine(new_permitted, working);
-
 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
-	    !cap_issubset (new_permitted, current->cap_permitted)) {
+	    !cap_issubset(bprm->cap_post_exec_permitted,
+			  current->cap_permitted)) {
 		set_dumpable(current->mm, suid_dumpable);
 		current->pdeath_signal = 0;
 
@@ -350,9 +362,9 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 				bprm->e_gid = current->gid;
 			}
 			if (cap_limit_ptraced_target()) {
-				new_permitted =
-					cap_intersect(new_permitted,
-						      current->cap_permitted);
+				bprm->cap_post_exec_permitted = cap_intersect(
+					bprm->cap_post_exec_permitted,
+					current->cap_permitted);
 			}
 		}
 	}
@@ -364,9 +376,9 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
 	if (!is_global_init(current)) {
-		current->cap_permitted = new_permitted;
+		current->cap_permitted = bprm->cap_post_exec_permitted;
 		if (bprm->cap_effective)
-			current->cap_effective = new_permitted;
+			current->cap_effective = bprm->cap_post_exec_permitted;
 		else
 			cap_clear(current->cap_effective);
 	}
@@ -381,9 +393,7 @@ int cap_bprm_secureexec (struct linux_binprm *bprm)
 	if (current->uid != 0) {
 		if (bprm->cap_effective)
 			return 1;
-		if (!cap_isclear(bprm->cap_permitted))
-			return 1;
-		if (!cap_isclear(bprm->cap_inheritable))
+		if (!cap_isclear(bprm->cap_post_exec_permitted))
 			return 1;
 	}
 
-- 
GitLab


From ab763c7112ce0e2559c73f921617c81dc7287ca6 Mon Sep 17 00:00:00 2001
From: "Andrew G. Morgan" <morgan@kernel.org>
Date: Wed, 23 Jul 2008 21:28:25 -0700
Subject: [PATCH 213/853] security: filesystem capabilities refactor kernel
 code

To date, we've tried hard to confine filesystem support for capabilities
to the security modules.  This has left a lot of the code in
kernel/capability.c in a state where it looks like it supports something
that filesystem support for capabilities actually suppresses when the LSM
security/commmoncap.c code runs.  What is left is a lot of code that uses
sub-optimal locking in the main kernel

With this change we refactor the main kernel code and make it explicit
which locks are needed and that the only remaining kernel races in this
area are associated with non-filesystem capability code.

Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/capability.c | 338 +++++++++++++++++++++++++++++---------------
 1 file changed, 221 insertions(+), 117 deletions(-)

diff --git a/kernel/capability.c b/kernel/capability.c
index 901e0fdc3ff..0101e847603 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
 	return 0;
 }
 
+#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
+
+/*
+ * Without filesystem capability support, we nominally support one process
+ * setting the capabilities of another
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+				     kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+	struct task_struct *target;
+	int ret;
+
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	if (pid && pid != task_pid_vnr(current)) {
+		target = find_task_by_vpid(pid);
+		if (!target) {
+			ret = -ESRCH;
+			goto out;
+		}
+	} else
+		target = current;
+
+	ret = security_capget(target, pEp, pIp, pPp);
+
+out:
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	return ret;
+}
+
+/*
+ * cap_set_pg - set capabilities for all processes in a given process
+ * group.  We call this holding task_capability_lock and tasklist_lock.
+ */
+static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
+			     kernel_cap_t *inheritable,
+			     kernel_cap_t *permitted)
+{
+	struct task_struct *g, *target;
+	int ret = -EPERM;
+	int found = 0;
+	struct pid *pgrp;
+
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	pgrp = find_vpid(pgrp_nr);
+	do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
+		target = g;
+		while_each_thread(g, target) {
+			if (!security_capset_check(target, effective,
+						   inheritable, permitted)) {
+				security_capset_set(target, effective,
+						    inheritable, permitted);
+				ret = 0;
+			}
+			found = 1;
+		}
+	} while_each_pid_task(pgrp, PIDTYPE_PGID, g);
+
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	if (!found)
+		ret = 0;
+	return ret;
+}
+
 /*
- * For sys_getproccap() and sys_setproccap(), any of the three
- * capability set pointers may be NULL -- indicating that that set is
- * uninteresting and/or not to be changed.
+ * cap_set_all - set capabilities for all processes other than init
+ * and self.  We call this holding task_capability_lock and tasklist_lock.
  */
+static inline int cap_set_all(kernel_cap_t *effective,
+			      kernel_cap_t *inheritable,
+			      kernel_cap_t *permitted)
+{
+	struct task_struct *g, *target;
+	int ret = -EPERM;
+	int found = 0;
+
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	do_each_thread(g, target) {
+		if (target == current
+		    || is_container_init(target->group_leader))
+			continue;
+		found = 1;
+		if (security_capset_check(target, effective, inheritable,
+					  permitted))
+			continue;
+		ret = 0;
+		security_capset_set(target, effective, inheritable, permitted);
+	} while_each_thread(g, target);
+
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	if (!found)
+		ret = 0;
+
+	return ret;
+}
+
+/*
+ * Given the target pid does not refer to the current process we
+ * need more elaborate support... (This support is not present when
+ * filesystem capabilities are configured.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
+					    kernel_cap_t *inheritable,
+					    kernel_cap_t *permitted)
+{
+	struct task_struct *target;
+	int ret;
+
+	if (!capable(CAP_SETPCAP))
+		return -EPERM;
+
+	if (pid == -1)	          /* all procs other than current and init */
+		return cap_set_all(effective, inheritable, permitted);
+
+	else if (pid < 0)                    /* all procs in process group */
+		return cap_set_pg(-pid, effective, inheritable, permitted);
+
+	/* target != current */
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	target = find_task_by_vpid(pid);
+	if (!target)
+		ret = -ESRCH;
+	else {
+		ret = security_capset_check(target, effective, inheritable,
+					    permitted);
+
+		/* having verified that the proposed changes are legal,
+		   we now put them into effect. */
+		if (!ret)
+			security_capset_set(target, effective, inheritable,
+					    permitted);
+	}
+
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	return ret;
+}
+
+#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
+
+/*
+ * If we have configured with filesystem capability support, then the
+ * only thing that can change the capabilities of the current process
+ * is the current process. As such, we can't be in this code at the
+ * same time as we are in the process of setting capabilities in this
+ * process. The net result is that we can limit our use of locks to
+ * when we are reading the caps of another process.
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+				     kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+	int ret;
+
+	if (pid && (pid != task_pid_vnr(current))) {
+		struct task_struct *target;
+
+		spin_lock(&task_capability_lock);
+		read_lock(&tasklist_lock);
+
+		target = find_task_by_vpid(pid);
+		if (!target)
+			ret = -ESRCH;
+		else
+			ret = security_capget(target, pEp, pIp, pPp);
+
+		read_unlock(&tasklist_lock);
+		spin_unlock(&task_capability_lock);
+	} else
+		ret = security_capget(current, pEp, pIp, pPp);
+
+	return ret;
+}
+
+/*
+ * With filesystem capability support configured, the kernel does not
+ * permit the changing of capabilities in one process by another
+ * process. (CAP_SETPCAP has much less broad semantics when configured
+ * this way.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid,
+					    kernel_cap_t *effective,
+					    kernel_cap_t *inheritable,
+					    kernel_cap_t *permitted)
+{
+	return -EPERM;
+}
+
+#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
 
 /*
  * Atomically modify the effective capabilities returning the original
@@ -155,7 +352,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
 {
 	int ret = 0;
 	pid_t pid;
-	struct task_struct *target;
 	unsigned tocopy;
 	kernel_cap_t pE, pI, pP;
 
@@ -169,23 +365,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
 	if (pid < 0)
 		return -EINVAL;
 
-	spin_lock(&task_capability_lock);
-	read_lock(&tasklist_lock);
-
-	if (pid && pid != task_pid_vnr(current)) {
-		target = find_task_by_vpid(pid);
-		if (!target) {
-			ret = -ESRCH;
-			goto out;
-		}
-	} else
-		target = current;
-
-	ret = security_capget(target, &pE, &pI, &pP);
-
-out:
-	read_unlock(&tasklist_lock);
-	spin_unlock(&task_capability_lock);
+	ret = cap_get_target_pid(pid, &pE, &pI, &pP);
 
 	if (!ret) {
 		struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
@@ -216,7 +396,6 @@ out:
 		 * before modification is attempted and the application
 		 * fails.
 		 */
-
 		if (copy_to_user(dataptr, kdata, tocopy
 				 * sizeof(struct __user_cap_data_struct))) {
 			return -EFAULT;
@@ -226,70 +405,8 @@ out:
 	return ret;
 }
 
-/*
- * cap_set_pg - set capabilities for all processes in a given process
- * group.  We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
-			      kernel_cap_t *inheritable,
-			      kernel_cap_t *permitted)
-{
-	struct task_struct *g, *target;
-	int ret = -EPERM;
-	int found = 0;
-	struct pid *pgrp;
-
-	pgrp = find_vpid(pgrp_nr);
-	do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
-		target = g;
-		while_each_thread(g, target) {
-			if (!security_capset_check(target, effective,
-							inheritable,
-							permitted)) {
-				security_capset_set(target, effective,
-							inheritable,
-							permitted);
-				ret = 0;
-			}
-			found = 1;
-		}
-	} while_each_pid_task(pgrp, PIDTYPE_PGID, g);
-
-	if (!found)
-		ret = 0;
-	return ret;
-}
-
-/*
- * cap_set_all - set capabilities for all processes other than init
- * and self.  We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_all(kernel_cap_t *effective,
-			       kernel_cap_t *inheritable,
-			       kernel_cap_t *permitted)
-{
-     struct task_struct *g, *target;
-     int ret = -EPERM;
-     int found = 0;
-
-     do_each_thread(g, target) {
-             if (target == current || is_container_init(target->group_leader))
-                     continue;
-             found = 1;
-	     if (security_capset_check(target, effective, inheritable,
-						permitted))
-		     continue;
-	     ret = 0;
-	     security_capset_set(target, effective, inheritable, permitted);
-     } while_each_thread(g, target);
-
-     if (!found)
-	     ret = 0;
-     return ret;
-}
-
 /**
- * sys_capset - set capabilities for a process or a group of processes
+ * sys_capset - set capabilities for a process or (*) a group of processes
  * @header: pointer to struct that contains capability version and
  *	target pid data
  * @data: pointer to struct that contains the effective, permitted,
@@ -313,7 +430,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 	unsigned i, tocopy;
 	kernel_cap_t inheritable, permitted, effective;
-	struct task_struct *target;
 	int ret;
 	pid_t pid;
 
@@ -324,9 +440,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	if (get_user(pid, &header->pid))
 		return -EFAULT;
 
-	if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
-		return -EPERM;
-
 	if (copy_from_user(&kdata, data, tocopy
 			   * sizeof(struct __user_cap_data_struct))) {
 		return -EFAULT;
@@ -344,40 +457,31 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 		i++;
 	}
 
-	spin_lock(&task_capability_lock);
-	read_lock(&tasklist_lock);
-
-	if (pid > 0 && pid != task_pid_vnr(current)) {
-		target = find_task_by_vpid(pid);
-		if (!target) {
-			ret = -ESRCH;
-			goto out;
-		}
-	} else
-		target = current;
-
-	ret = 0;
-
-	/* having verified that the proposed changes are legal,
-	   we now put them into effect. */
-	if (pid < 0) {
-		if (pid == -1)	/* all procs other than current and init */
-			ret = cap_set_all(&effective, &inheritable, &permitted);
+	if (pid && (pid != task_pid_vnr(current)))
+		ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
+						&permitted);
+	else {
+		/*
+		 * This lock is required even when filesystem
+		 * capability support is configured - it protects the
+		 * sys_capget() call from returning incorrect data in
+		 * the case that the targeted process is not the
+		 * current one.
+		 */
+		spin_lock(&task_capability_lock);
 
-		else		/* all procs in process group */
-			ret = cap_set_pg(-pid, &effective, &inheritable,
-					 &permitted);
-	} else {
-		ret = security_capset_check(target, &effective, &inheritable,
+		ret = security_capset_check(current, &effective, &inheritable,
 					    &permitted);
+		/*
+		 * Having verified that the proposed changes are
+		 * legal, we now put them into effect.
+		 */
 		if (!ret)
-			security_capset_set(target, &effective, &inheritable,
+			security_capset_set(current, &effective, &inheritable,
 					    &permitted);
+		spin_unlock(&task_capability_lock);
 	}
 
-out:
-	read_unlock(&tasklist_lock);
-	spin_unlock(&task_capability_lock);
 
 	return ret;
 }
-- 
GitLab


From 84aaa7ab4c40b66d6dd9aa393901551ad50ec640 Mon Sep 17 00:00:00 2001
From: "Andrew G. Morgan" <morgan@kernel.org>
Date: Wed, 23 Jul 2008 21:28:25 -0700
Subject: [PATCH 214/853] security: filesystem capabilities no longer
 experimental

Filesystem capabilities have come of age.  Remove the experimental tag for
configuring filesystem capabilities.

Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/security/Kconfig b/security/Kconfig
index 62ed4717d33..559293922a4 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -74,8 +74,7 @@ config SECURITY_NETWORK_XFRM
 	  If you are unsure how to answer this question, answer N.
 
 config SECURITY_FILE_CAPABILITIES
-	bool "File POSIX Capabilities (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "File POSIX Capabilities"
 	default n
 	help
 	  This enables filesystem capabilities, allowing you to give
-- 
GitLab


From 9b3e43a747c74029b0acf6acf4666601f132f471 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 23 Jul 2008 21:28:26 -0700
Subject: [PATCH 215/853] security: remove unused forwards

Why would linux/security.h need forward declarations for nfsctl_arg and
swap_info_struct?  It's hard to imagine: remove them.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/security.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 31c8851ec5d..f0e9adb22ac 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -102,9 +102,7 @@ extern unsigned long mmap_min_addr;
 #define LSM_SETID_FS	8
 
 /* forward declares to avoid warnings */
-struct nfsctl_arg;
 struct sched_param;
-struct swap_info_struct;
 struct request_sock;
 
 /* bprm_apply_creds unsafe reasons */
-- 
GitLab


From 5002779d37b261271da9883e06c14b097d4781c4 Mon Sep 17 00:00:00 2001
From: Tilman Schmidt <tilman@imap.cc>
Date: Wed, 23 Jul 2008 21:28:27 -0700
Subject: [PATCH 216/853] gigaset: use dev_ macros for messages

The info() / warn() / err() macros from usb.h for generating kernel
messages are considered inferior to dev_info() / dev_warn() / dev_err()
from device.h.  Replace them where possible.  Also correct the severity
level and improve the text of one message.

Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/isdn/gigaset/asyncdata.c   |  3 +-
 drivers/isdn/gigaset/common.c      |  2 +-
 drivers/isdn/gigaset/gigaset.h     |  3 --
 drivers/isdn/gigaset/i4l.c         | 56 ++++++++++++++++++------------
 drivers/isdn/gigaset/interface.c   | 25 ++++++-------
 drivers/isdn/gigaset/usb-gigaset.c |  7 ++--
 6 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/drivers/isdn/gigaset/asyncdata.c b/drivers/isdn/gigaset/asyncdata.c
index 091deb9d1c4..c2bd97d2927 100644
--- a/drivers/isdn/gigaset/asyncdata.c
+++ b/drivers/isdn/gigaset/asyncdata.c
@@ -575,7 +575,8 @@ int gigaset_m10x_send_skb(struct bc_state *bcs, struct sk_buff *skb)
 	else
 		skb = iraw_encode(skb, HW_HDR_LEN, 0);
 	if (!skb) {
-		err("unable to allocate memory for encoding!\n");
+		dev_err(bcs->cs->dev,
+			"unable to allocate memory for encoding!\n");
 		return -ENOMEM;
 	}
 
diff --git a/drivers/isdn/gigaset/common.c b/drivers/isdn/gigaset/common.c
index 827c32c1679..9d3ce7718e5 100644
--- a/drivers/isdn/gigaset/common.c
+++ b/drivers/isdn/gigaset/common.c
@@ -287,7 +287,7 @@ struct event_t *gigaset_add_event(struct cardstate *cs,
 	tail = cs->ev_tail;
 	next = (tail + 1) % MAX_EVENTS;
 	if (unlikely(next == cs->ev_head))
-		err("event queue full");
+		dev_err(cs->dev, "event queue full\n");
 	else {
 		event = cs->events + tail;
 		event->type = type;
diff --git a/drivers/isdn/gigaset/gigaset.h b/drivers/isdn/gigaset/gigaset.h
index f365993161f..00375295499 100644
--- a/drivers/isdn/gigaset/gigaset.h
+++ b/drivers/isdn/gigaset/gigaset.h
@@ -106,7 +106,6 @@ enum debuglevel {
 #undef err
 #undef info
 #undef warn
-#undef notice
 
 #define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \
 	format "\n" , ## arg)
@@ -114,8 +113,6 @@ enum debuglevel {
 	format "\n" , ## arg)
 #define warn(format, arg...) printk(KERN_WARNING KBUILD_MODNAME ": " \
 	format "\n" , ## arg)
-#define notice(format, arg...) printk(KERN_NOTICE KBUILD_MODNAME ": " \
-	format "\n" , ## arg)
 
 #ifdef CONFIG_GIGASET_DEBUG
 
diff --git a/drivers/isdn/gigaset/i4l.c b/drivers/isdn/gigaset/i4l.c
index 9e089f06a94..3c127a8cbaf 100644
--- a/drivers/isdn/gigaset/i4l.c
+++ b/drivers/isdn/gigaset/i4l.c
@@ -46,7 +46,8 @@ static int writebuf_from_LL(int driverID, int channel, int ack,
 		return -ENODEV;
 	}
 	if (channel < 0 || channel >= cs->channels) {
-		err("%s: invalid channel ID (%d)", __func__, channel);
+		dev_err(cs->dev, "%s: invalid channel ID (%d)\n",
+			__func__, channel);
 		return -ENODEV;
 	}
 	bcs = &cs->bcs[channel];
@@ -58,11 +59,13 @@ static int writebuf_from_LL(int driverID, int channel, int ack,
 
 	if (!len) {
 		if (ack)
-			notice("%s: not ACKing empty packet", __func__);
+			dev_notice(cs->dev, "%s: not ACKing empty packet\n",
+				   __func__);
 		return 0;
 	}
 	if (len > MAX_BUF_SIZE) {
-		err("%s: packet too large (%d bytes)", __func__, len);
+		dev_err(cs->dev, "%s: packet too large (%d bytes)\n",
+			__func__, len);
 		return -EINVAL;
 	}
 
@@ -116,8 +119,7 @@ static int command_from_LL(isdn_ctrl *cntrl)
 	gigaset_debugdrivers();
 
 	if (!cs) {
-		warn("LL tried to access unknown device with nr. %d",
-		     cntrl->driver);
+		err("%s: invalid driver ID (%d)", __func__, cntrl->driver);
 		return -ENODEV;
 	}
 
@@ -126,7 +128,7 @@ static int command_from_LL(isdn_ctrl *cntrl)
 		gig_dbg(DEBUG_ANY, "ISDN_CMD_IOCTL (driver: %d, arg: %ld)",
 			cntrl->driver, cntrl->arg);
 
-		warn("ISDN_CMD_IOCTL is not supported.");
+		dev_warn(cs->dev, "ISDN_CMD_IOCTL not supported\n");
 		return -EINVAL;
 
 	case ISDN_CMD_DIAL:
@@ -138,22 +140,23 @@ static int command_from_LL(isdn_ctrl *cntrl)
 			cntrl->parm.setup.si1, cntrl->parm.setup.si2);
 
 		if (cntrl->arg >= cs->channels) {
-			err("ISDN_CMD_DIAL: invalid channel (%d)",
-			    (int) cntrl->arg);
+			dev_err(cs->dev,
+				"ISDN_CMD_DIAL: invalid channel (%d)\n",
+				(int) cntrl->arg);
 			return -EINVAL;
 		}
 
 		bcs = cs->bcs + cntrl->arg;
 
 		if (!gigaset_get_channel(bcs)) {
-			err("ISDN_CMD_DIAL: channel not free");
+			dev_err(cs->dev, "ISDN_CMD_DIAL: channel not free\n");
 			return -EBUSY;
 		}
 
 		sp = kmalloc(sizeof *sp, GFP_ATOMIC);
 		if (!sp) {
 			gigaset_free_channel(bcs);
-			err("ISDN_CMD_DIAL: out of memory");
+			dev_err(cs->dev, "ISDN_CMD_DIAL: out of memory\n");
 			return -ENOMEM;
 		}
 		*sp = cntrl->parm.setup;
@@ -173,8 +176,9 @@ static int command_from_LL(isdn_ctrl *cntrl)
 		gig_dbg(DEBUG_ANY, "ISDN_CMD_ACCEPTD");
 
 		if (cntrl->arg >= cs->channels) {
-			err("ISDN_CMD_ACCEPTD: invalid channel (%d)",
-			    (int) cntrl->arg);
+			dev_err(cs->dev,
+				"ISDN_CMD_ACCEPTD: invalid channel (%d)\n",
+				(int) cntrl->arg);
 			return -EINVAL;
 		}
 
@@ -196,8 +200,9 @@ static int command_from_LL(isdn_ctrl *cntrl)
 			(int) cntrl->arg);
 
 		if (cntrl->arg >= cs->channels) {
-			err("ISDN_CMD_HANGUP: invalid channel (%u)",
-			    (unsigned) cntrl->arg);
+			dev_err(cs->dev,
+				"ISDN_CMD_HANGUP: invalid channel (%d)\n",
+				(int) cntrl->arg);
 			return -EINVAL;
 		}
 
@@ -224,8 +229,9 @@ static int command_from_LL(isdn_ctrl *cntrl)
 			cntrl->arg & 0xff, (cntrl->arg >> 8));
 
 		if ((cntrl->arg & 0xff) >= cs->channels) {
-			err("ISDN_CMD_SETL2: invalid channel (%u)",
-			    (unsigned) cntrl->arg & 0xff);
+			dev_err(cs->dev,
+				"ISDN_CMD_SETL2: invalid channel (%d)\n",
+				(int) cntrl->arg & 0xff);
 			return -EINVAL;
 		}
 
@@ -244,14 +250,16 @@ static int command_from_LL(isdn_ctrl *cntrl)
 			cntrl->arg & 0xff, (cntrl->arg >> 8));
 
 		if ((cntrl->arg & 0xff) >= cs->channels) {
-			err("ISDN_CMD_SETL3: invalid channel (%u)",
-			    (unsigned) cntrl->arg & 0xff);
+			dev_err(cs->dev,
+				"ISDN_CMD_SETL3: invalid channel (%d)\n",
+				(int) cntrl->arg & 0xff);
 			return -EINVAL;
 		}
 
 		if (cntrl->arg >> 8 != ISDN_PROTO_L3_TRANS) {
-			err("ISDN_CMD_SETL3: invalid protocol %lu",
-			    cntrl->arg >> 8);
+			dev_err(cs->dev,
+				"ISDN_CMD_SETL3: invalid protocol %lu\n",
+				cntrl->arg >> 8);
 			return -EINVAL;
 		}
 
@@ -262,8 +270,9 @@ static int command_from_LL(isdn_ctrl *cntrl)
 	case ISDN_CMD_ALERT:
 		gig_dbg(DEBUG_ANY, "ISDN_CMD_ALERT"); //FIXME
 		if (cntrl->arg >= cs->channels) {
-			err("ISDN_CMD_ALERT: invalid channel (%d)",
-			    (int) cntrl->arg);
+			dev_err(cs->dev,
+				"ISDN_CMD_ALERT: invalid channel (%d)\n",
+				(int) cntrl->arg);
 			return -EINVAL;
 		}
 		//bcs = cs->bcs + cntrl->arg;
@@ -295,7 +304,8 @@ static int command_from_LL(isdn_ctrl *cntrl)
 		gig_dbg(DEBUG_ANY, "ISDN_CMD_GETSIL");
 		break;
 	default:
-		err("unknown command %d from LL", cntrl->command);
+		dev_err(cs->dev, "unknown command %d from LL\n",
+			cntrl->command);
 		return -EINVAL;
 	}
 
diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c
index af195b07c19..521951a898e 100644
--- a/drivers/isdn/gigaset/interface.c
+++ b/drivers/isdn/gigaset/interface.c
@@ -197,7 +197,7 @@ static void if_close(struct tty_struct *tty, struct file *filp)
 	mutex_lock(&cs->mutex);
 
 	if (!cs->open_count)
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 	else {
 		if (!--cs->open_count) {
 			spin_lock_irqsave(&cs->lock, flags);
@@ -232,7 +232,7 @@ static int if_ioctl(struct tty_struct *tty, struct file *file,
 		return -ERESTARTSYS; // FIXME -EINTR?
 
 	if (!cs->open_count)
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 	else {
 		retval = 0;
 		switch (cmd) {
@@ -364,9 +364,9 @@ static int if_write(struct tty_struct *tty, const unsigned char *buf, int count)
 		return -ERESTARTSYS; // FIXME -EINTR?
 
 	if (!cs->open_count)
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 	else if (cs->mstate != MS_LOCKED) {
-		warn("can't write to unlocked device");
+		dev_warn(cs->dev, "can't write to unlocked device\n");
 		retval = -EBUSY;
 	} else if (!cs->connected) {
 		gig_dbg(DEBUG_ANY, "can't write to unplugged device");
@@ -398,9 +398,9 @@ static int if_write_room(struct tty_struct *tty)
 		return -ERESTARTSYS; // FIXME -EINTR?
 
 	if (!cs->open_count)
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 	else if (cs->mstate != MS_LOCKED) {
-		warn("can't write to unlocked device");
+		dev_warn(cs->dev, "can't write to unlocked device\n");
 		retval = -EBUSY;
 	} else if (!cs->connected) {
 		gig_dbg(DEBUG_ANY, "can't write to unplugged device");
@@ -430,9 +430,9 @@ static int if_chars_in_buffer(struct tty_struct *tty)
 		return -ERESTARTSYS; // FIXME -EINTR?
 
 	if (!cs->open_count)
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 	else if (cs->mstate != MS_LOCKED) {
-		warn("can't write to unlocked device");
+		dev_warn(cs->dev, "can't write to unlocked device\n");
 		retval = -EBUSY;
 	} else if (!cs->connected) {
 		gig_dbg(DEBUG_ANY, "can't write to unplugged device");
@@ -460,7 +460,7 @@ static void if_throttle(struct tty_struct *tty)
 	mutex_lock(&cs->mutex);
 
 	if (!cs->open_count)
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 	else {
 		//FIXME
 	}
@@ -483,7 +483,7 @@ static void if_unthrottle(struct tty_struct *tty)
 	mutex_lock(&cs->mutex);
 
 	if (!cs->open_count)
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 	else {
 		//FIXME
 	}
@@ -510,7 +510,7 @@ static void if_set_termios(struct tty_struct *tty, struct ktermios *old)
 	mutex_lock(&cs->mutex);
 
 	if (!cs->open_count) {
-		warn("%s: device not opened", __func__);
+		dev_warn(cs->dev, "%s: device not opened\n", __func__);
 		goto out;
 	}
 
@@ -623,7 +623,8 @@ void gigaset_if_init(struct cardstate *cs)
 	if (!IS_ERR(cs->tty_dev))
 		dev_set_drvdata(cs->tty_dev, cs);
 	else {
-		warn("could not register device to the tty subsystem");
+		dev_warn(cs->dev,
+			 "could not register device to the tty subsystem\n");
 		cs->tty_dev = NULL;
 	}
 	mutex_unlock(&cs->mutex);
diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c
index 77d20ab0cd4..4661830a49d 100644
--- a/drivers/isdn/gigaset/usb-gigaset.c
+++ b/drivers/isdn/gigaset/usb-gigaset.c
@@ -498,8 +498,9 @@ static int send_cb(struct cardstate *cs, struct cmdbuf_t *cb)
 
 			if (status) {
 				ucs->busy = 0;
-				err("could not submit urb (error %d)\n",
-				    -status);
+				dev_err(cs->dev,
+					"could not submit urb (error %d)\n",
+					-status);
 				cb->len = 0; /* skip urb => remove cb+wakeup
 						in next loop cycle */
 			}
@@ -670,7 +671,7 @@ static int write_modem(struct cardstate *cs)
 	spin_unlock_irqrestore(&cs->lock, flags);
 
 	if (ret) {
-		err("could not submit urb (error %d)\n", -ret);
+		dev_err(cs->dev, "could not submit urb (error %d)\n", -ret);
 		ucs->busy = 0;
 	}
 
-- 
GitLab


From 5f09c4c797d00bef5700e1ca085b4efcedaf34b8 Mon Sep 17 00:00:00 2001
From: Tilman Schmidt <tilman@imap.cc>
Date: Wed, 23 Jul 2008 21:28:27 -0700
Subject: [PATCH 217/853] gigaset: gigaset_isowbuf_getbytes() may return signed
 unnoticed

ifd->offset is unsigned.  gigaset_isowbuf_getbytes() may return signed
unnoticed.  Revised version of patch originally submitted by Roel Kluin
<12o3l@tiscali.nl>.

Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/isdn/gigaset/bas-gigaset.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index 5255b5e20e1..3f11910c7cc 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -1050,10 +1050,9 @@ static int submit_iso_write_urb(struct isow_urbctx_t *ucx)
 		}
 
 		/* retrieve block of data to send */
-		ifd->offset = gigaset_isowbuf_getbytes(ubc->isooutbuf,
-						       ifd->length);
-		if (ifd->offset < 0) {
-			if (ifd->offset == -EBUSY) {
+		rc = gigaset_isowbuf_getbytes(ubc->isooutbuf, ifd->length);
+		if (rc < 0) {
+			if (rc == -EBUSY) {
 				gig_dbg(DEBUG_ISO,
 					"%s: buffer busy at frame %d",
 					__func__, nframe);
@@ -1062,11 +1061,12 @@ static int submit_iso_write_urb(struct isow_urbctx_t *ucx)
 			} else {
 				dev_err(ucx->bcs->cs->dev,
 					"%s: buffer error %d at frame %d\n",
-					__func__, ifd->offset, nframe);
-				return ifd->offset;
+					__func__, rc, nframe);
+				return rc;
 			}
 			break;
 		}
+		ifd->offset = rc;
 		ucx->limit = ubc->isooutbuf->nextread;
 		ifd->status = 0;
 		ifd->actual_length = 0;
-- 
GitLab


From e53f12cc6c43b69f54937f15c5706d83f67c2fdd Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:28 -0700
Subject: [PATCH 218/853] remove include/asm-h8300/keyboard.h

This patch removes the unused include/asm-h8300/keyboard.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-h8300/keyboard.h | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 include/asm-h8300/keyboard.h

diff --git a/include/asm-h8300/keyboard.h b/include/asm-h8300/keyboard.h
deleted file mode 100644
index 90efbd65539..00000000000
--- a/include/asm-h8300/keyboard.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *  linux/include/asm-h8300/keyboard.h
- *  Created 04 Dec 2001 by Khaled Hassounah <khassounah@mediumware.net>
- *  This file contains the Dragonball architecture specific keyboard definitions
- */
-
-#ifndef _H8300_KEYBOARD_H
-#define _H8300_KEYBOARD_H
-
-
-/* dummy i.e. no real keyboard */
-#define kbd_setkeycode(x...)	(-ENOSYS)
-#define kbd_getkeycode(x...)	(-ENOSYS)
-#define kbd_translate(x...)	(0)
-#define kbd_unexpected_up(x...)	(1)
-#define kbd_leds(x...)		do {;} while (0)
-#define kbd_init_hw(x...)	do {;} while (0)
-#define kbd_enable_irq(x...)	do {;} while (0)
-#define kbd_disable_irq(x...)	do {;} while (0)
-
-#endif  /* _H8300_KEYBOARD_H */
-
-
-
-- 
GitLab


From fb9ba4e95921f71d874beee2d0964fc2322b47a2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:29 -0700
Subject: [PATCH 219/853] alpha: remove the unused ALPHA_CORE_AGP option

The real option is named AGP_ALPHA_CORE.

Reviewed-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index dbe8c280fea..1bec55d63ef 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -333,11 +333,6 @@ config PCI_SYSCALL
 config IOMMU_HELPER
 	def_bool PCI
 
-config ALPHA_CORE_AGP
-	bool
-	depends on ALPHA_GENERIC || ALPHA_TITAN || ALPHA_MARVEL
-	default y
-
 config ALPHA_NONAME
 	bool
 	depends on ALPHA_BOOK1 || ALPHA_NONAME_CH
-- 
GitLab


From 0d63081d418c73cc187c893069e0f24c4c6eecd3 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Wed, 23 Jul 2008 21:28:32 -0700
Subject: [PATCH 220/853] swsusp: provide users with a hint about the
 no_console_suspend option

Tell the user about the no_console_suspend option, so that we don't have to
tell each bug reporter personally.

[akpm@linux-foundation.org: clarify the text a little]
Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/printk.c b/kernel/printk.c
index 07ad9e7f7a6..3f7a2a94583 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -933,7 +933,7 @@ void suspend_console(void)
 {
 	if (!console_suspend_enabled)
 		return;
-	printk("Suspending console(s)\n");
+	printk("Suspending console(s) (use no_console_suspend to debug)\n");
 	acquire_console_sem();
 	console_suspended = 1;
 }
-- 
GitLab


From 77437fd4e61f87cc94d9314baa5cbf50e3ccdf54 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 23 Jul 2008 21:28:33 -0700
Subject: [PATCH 221/853] pm: boot time suspend selftest

Boot-time test for system suspend states (STR or standby).  The generic
RTC framework triggers wakeup alarms, which are used to exit those states.

  - Measures some aspects of suspend time ... this uses "jiffies" until
    someone converts it to use a timebase that works properly even while
    timer IRQs are disabled.

  - Triggered by a command line parameter.  By default nothing even
    vaguely troublesome will happen, but "test_suspend=mem" will give
    you a brief STR test during system boot.  (Or you may need to use
    "test_suspend=standby" instead, if your hardware needs that.)

This isn't without problems.  It fires early enough during boot that for
example both PCMCIA and MMC stacks have misbehaved.  The workaround in
those cases was to boot without such media cards inserted.

[matthltc@us.ibm.com: fix compile failure in boot time suspend selftest]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Pavel Machek <pavel@suse.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |   9 +-
 kernel/power/Kconfig                |  11 ++
 kernel/power/main.c                 | 194 +++++++++++++++++++++++++++-
 3 files changed, 212 insertions(+), 2 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 01a2992b575..4d705713cab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -87,7 +87,8 @@ parameter is applicable:
 	SH	SuperH architecture is enabled.
 	SMP	The kernel is an SMP kernel.
 	SPARC	Sparc architecture is enabled.
-	SWSUSP	Software suspend is enabled.
+	SWSUSP	Software suspend (hibernation) is enabled.
+	SUSPEND	System suspend states are enabled.
 	TS	Appropriate touchscreen support is enabled.
 	USB	USB support is enabled.
 	USBHID	USB Human Interface Device support is enabled.
@@ -2123,6 +2124,12 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	tdfx=		[HW,DRM]
 
+	test_suspend=	[SUSPEND]
+			Specify "mem" (for Suspend-to-RAM) or "standby" (for
+			standby suspend) as the system sleep state to briefly
+			enter during system startup.  The system is woken from
+			this state using a wakeup-capable RTC alarm.
+
 	thash_entries=	[KNL,NET]
 			Set number of hash buckets for TCP connection
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 59dfdf1e1d2..dcd165f92a8 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -94,6 +94,17 @@ config SUSPEND
 	  powered and thus its contents are preserved, such as the
 	  suspend-to-RAM state (e.g. the ACPI S3 state).
 
+config PM_TEST_SUSPEND
+	bool "Test suspend/resume and wakealarm during bootup"
+	depends on SUSPEND && PM_DEBUG && RTC_LIB=y
+	---help---
+	This option will let you suspend your machine during bootup, and
+	make it wake up a few seconds later using an RTC wakeup alarm.
+	Enable this with a kernel parameter like "test_suspend=mem".
+
+	You probably want to have your system's RTC driver statically
+	linked, ensuring that it's available when this test runs.
+
 config SUSPEND_FREEZER
 	bool "Enable freezer for suspend to RAM/standby" \
 		if ARCH_WANTS_FREEZER_CONTROL || BROKEN
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3398f4651aa..95bff23ecda 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -132,6 +132,61 @@ static inline int suspend_test(int level) { return 0; }
 
 #ifdef CONFIG_SUSPEND
 
+#ifdef CONFIG_PM_TEST_SUSPEND
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending.  Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS	5
+
+static unsigned long suspend_test_start_time;
+
+static void suspend_test_start(void)
+{
+	/* FIXME Use better timebase than "jiffies", ideally a clocksource.
+	 * What we want is a hardware counter that will work correctly even
+	 * during the irqs-are-off stages of the suspend/resume cycle...
+	 */
+	suspend_test_start_time = jiffies;
+}
+
+static void suspend_test_finish(const char *label)
+{
+	long nj = jiffies - suspend_test_start_time;
+	unsigned msec;
+
+	msec = jiffies_to_msecs(abs(nj));
+	pr_info("PM: %s took %d.%03d seconds\n", label,
+			msec / 1000, msec % 1000);
+
+	/* Warning on suspend means the RTC alarm period needs to be
+	 * larger -- the system was sooo slooowwww to suspend that the
+	 * alarm (should have) fired before the system went to sleep!
+	 *
+	 * Warning on either suspend or resume also means the system
+	 * has some performance issues.  The stack dump of a WARN_ON
+	 * is more likely to get the right attention than a printk...
+	 */
+	WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000));
+}
+
+#else
+
+static void suspend_test_start(void)
+{
+}
+
+static void suspend_test_finish(const char *label)
+{
+}
+
+#endif
+
 /* This is just an arbitrary number */
 #define FREE_PAGE_NUMBER (100)
 
@@ -266,12 +321,13 @@ int suspend_devices_and_enter(suspend_state_t state)
 			goto Close;
 	}
 	suspend_console();
+	suspend_test_start();
 	error = device_suspend(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to suspend\n");
 		goto Recover_platform;
 	}
-
+	suspend_test_finish("suspend devices");
 	if (suspend_test(TEST_DEVICES))
 		goto Recover_platform;
 
@@ -293,7 +349,9 @@ int suspend_devices_and_enter(suspend_state_t state)
 	if (suspend_ops->finish)
 		suspend_ops->finish();
  Resume_devices:
+	suspend_test_start();
 	device_resume(PMSG_RESUME);
+	suspend_test_finish("resume devices");
 	resume_console();
  Close:
 	if (suspend_ops->end)
@@ -521,3 +579,137 @@ static int __init pm_init(void)
 }
 
 core_initcall(pm_init);
+
+
+#ifdef CONFIG_PM_TEST_SUSPEND
+
+#include <linux/rtc.h>
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system.  RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+	static char err_readtime[] __initdata =
+		KERN_ERR "PM: can't read %s time, err %d\n";
+	static char err_wakealarm [] __initdata =
+		KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+	static char err_suspend[] __initdata =
+		KERN_ERR "PM: suspend test failed, error %d\n";
+	static char info_test[] __initdata =
+		KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+	unsigned long		now;
+	struct rtc_wkalrm	alm;
+	int			status;
+
+	/* this may fail if the RTC hasn't been initialized */
+	status = rtc_read_time(rtc, &alm.time);
+	if (status < 0) {
+		printk(err_readtime, rtc->dev.bus_id, status);
+		return;
+	}
+	rtc_tm_to_time(&alm.time, &now);
+
+	memset(&alm, 0, sizeof alm);
+	rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+	alm.enabled = true;
+
+	status = rtc_set_alarm(rtc, &alm);
+	if (status < 0) {
+		printk(err_wakealarm, rtc->dev.bus_id, status);
+		return;
+	}
+
+	if (state == PM_SUSPEND_MEM) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+		if (status == -ENODEV)
+			state = PM_SUSPEND_STANDBY;
+	}
+	if (state == PM_SUSPEND_STANDBY) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+	}
+	if (status < 0)
+		printk(err_suspend, status);
+}
+
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(char **)name_ptr = dev->bus_id;
+	return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time.  They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
+
+static char warn_bad_state[] __initdata =
+	KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+	unsigned i;
+
+	/* "=mem" ==> "mem" */
+	value++;
+	for (i = 0; i < PM_SUSPEND_MAX; i++) {
+		if (!pm_states[i])
+			continue;
+		if (strcmp(pm_states[i], value) != 0)
+			continue;
+		test_state = (__force suspend_state_t) i;
+		return 0;
+	}
+	printk(warn_bad_state, value);
+	return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+	static char		warn_no_rtc[] __initdata =
+		KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+	char			*pony = NULL;
+	struct rtc_device	*rtc = NULL;
+
+	/* PM is initialized by now; is that state testable? */
+	if (test_state == PM_SUSPEND_ON)
+		goto done;
+	if (!valid_state(test_state)) {
+		printk(warn_bad_state, pm_states[test_state]);
+		goto done;
+	}
+
+	/* RTCs have initialized by now too ... can we use one? */
+	class_find_device(rtc_class, NULL, &pony, has_wakealarm);
+	if (pony)
+		rtc = rtc_class_open(pony);
+	if (!rtc) {
+		printk(warn_no_rtc);
+		goto done;
+	}
+
+	/* go for it */
+	test_wakealarm(rtc, test_state);
+	rtc_class_close(rtc);
+done:
+	return 0;
+}
+late_initcall(test_suspend);
+
+#endif /* CONFIG_PM_TEST_SUSPEND */
-- 
GitLab


From d75f65fd247fe85d90a3880d143b1bb22fe13a48 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:34 -0700
Subject: [PATCH 222/853] remove include/linux/pm_legacy.h

Remove the obsolete and no longer used include/linux/pm_legacy.h

Reviewed-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Pavel Machek <pavel@suse.cz>
Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/kernel/pm.c            |  1 -
 arch/mips/au1000/common/power.c |  1 -
 arch/x86/kernel/apm_32.c        |  1 -
 include/linux/pm_legacy.h       | 35 ---------------------------------
 4 files changed, 38 deletions(-)
 delete mode 100644 include/linux/pm_legacy.h

diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index 73f3aeefd20..d1113c5031f 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/pm_legacy.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/sysctl.h>
diff --git a/arch/mips/au1000/common/power.c b/arch/mips/au1000/common/power.c
index 2166b9e1e80..bd854a6d1d8 100644
--- a/arch/mips/au1000/common/power.c
+++ b/arch/mips/au1000/common/power.c
@@ -31,7 +31,6 @@
 
 #include <linux/init.h>
 #include <linux/pm.h>
-#include <linux/pm_legacy.h>
 #include <linux/sysctl.h>
 #include <linux/jiffies.h>
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index bf9b441331e..9ee24e6bc4b 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -219,7 +219,6 @@
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
-#include <linux/pm_legacy.h>
 #include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
diff --git a/include/linux/pm_legacy.h b/include/linux/pm_legacy.h
deleted file mode 100644
index 446f4f42b95..00000000000
--- a/include/linux/pm_legacy.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __LINUX_PM_LEGACY_H__
-#define __LINUX_PM_LEGACY_H__
-
-
-#ifdef CONFIG_PM_LEGACY
-
-/*
- * Register a device with power management
- */
-struct pm_dev __deprecated *
-pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
-
-/*
- * Send a request to all devices
- */
-int __deprecated pm_send_all(pm_request_t rqst, void *data);
-
-#else /* CONFIG_PM_LEGACY */
-
-static inline struct pm_dev *pm_register(pm_dev_t type,
-					 unsigned long id,
-					 pm_callback callback)
-{
-	return NULL;
-}
-
-static inline int pm_send_all(pm_request_t rqst, void *data)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PM_LEGACY */
-
-#endif /* __LINUX_PM_LEGACY_H__ */
-
-- 
GitLab


From 558481f038e587b22d02167af58914c814ce9de5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Jul 2008 21:28:35 -0700
Subject: [PATCH 223/853] pm: remove definition of struct pm_dev

Remove the definition of 'struct pm_dev', which is not used any more,
along with some related stuff from include/linux/pm.h .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 4ad9de94449..5bf1ce89cfb 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -68,30 +68,6 @@ enum
  */
 #define PM_PCI_ID(dev) ((dev)->bus->number << 16 | (dev)->devfn)
 
-/*
- * Request handler callback
- */
-struct pm_dev;
-
-typedef int (*pm_callback)(struct pm_dev *dev, pm_request_t rqst, void *data);
-
-/*
- * Dynamic device information
- */
-struct pm_dev
-{
-	pm_dev_t	 type;
-	unsigned long	 id;
-	pm_callback	 callback;
-	void		*data;
-
-	unsigned long	 flags;
-	unsigned long	 state;
-	unsigned long	 prev_state;
-
-	struct list_head entry;
-};
-
 /* Functions above this comment are list-based old-style power
  * management. Please avoid using them.  */
 
-- 
GitLab


From e7ecb331e11d1f7aa66aeef9170fc20781c9bb55 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Jul 2008 21:28:35 -0700
Subject: [PATCH 224/853] pm: remove remaining obsolete definitions from pm.h

Remove the remaining obsolete definitions from include/linux/pm.h and move
the definitions of PM_SUSPEND and PM_RESUME to the header of h3600 which
is the only user of them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-arm/arch-sa1100/h3600.h |  5 ++++
 include/linux/pm.h                  | 46 -----------------------------
 2 files changed, 5 insertions(+), 46 deletions(-)

diff --git a/include/asm-arm/arch-sa1100/h3600.h b/include/asm-arm/arch-sa1100/h3600.h
index 1b635597157..3ca0ecf095e 100644
--- a/include/asm-arm/arch-sa1100/h3600.h
+++ b/include/asm-arm/arch-sa1100/h3600.h
@@ -23,6 +23,11 @@
 #ifndef _INCLUDE_H3600_H_
 #define _INCLUDE_H3600_H_
 
+typedef int __bitwise pm_request_t;
+
+#define PM_SUSPEND	((__force pm_request_t) 1)	/* enter D1-D3 */
+#define PM_RESUME	((__force pm_request_t) 2)	/* enter D0 */
+
 /* generalized support for H3xxx series Compaq Pocket PC's */
 #define machine_is_h3xxx() (machine_is_h3100() || machine_is_h3600() || machine_is_h3800())
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 5bf1ce89cfb..390dd95a375 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -25,52 +25,6 @@
 #include <asm/atomic.h>
 #include <asm/errno.h>
 
-/*
- * Power management requests... these are passed to pm_send_all() and friends.
- *
- * these functions are old and deprecated, see below.
- */
-typedef int __bitwise pm_request_t;
-
-#define PM_SUSPEND	((__force pm_request_t) 1)	/* enter D1-D3 */
-#define PM_RESUME	((__force pm_request_t) 2)	/* enter D0 */
-
-
-/*
- * Device types... these are passed to pm_register
- */
-typedef int __bitwise pm_dev_t;
-
-#define PM_UNKNOWN_DEV	((__force pm_dev_t) 0)	/* generic */
-#define PM_SYS_DEV	((__force pm_dev_t) 1)	/* system device (fan, KB controller, ...) */
-#define PM_PCI_DEV	((__force pm_dev_t) 2)	/* PCI device */
-#define PM_USB_DEV	((__force pm_dev_t) 3)	/* USB device */
-#define PM_SCSI_DEV	((__force pm_dev_t) 4)	/* SCSI device */
-#define PM_ISA_DEV	((__force pm_dev_t) 5)	/* ISA device */
-#define	PM_MTD_DEV	((__force pm_dev_t) 6)	/* Memory Technology Device */
-
-/*
- * System device hardware ID (PnP) values
- */
-enum
-{
-	PM_SYS_UNKNOWN = 0x00000000, /* generic */
-	PM_SYS_KBC =	 0x41d00303, /* keyboard controller */
-	PM_SYS_COM =	 0x41d00500, /* serial port */
-	PM_SYS_IRDA =	 0x41d00510, /* IRDA controller */
-	PM_SYS_FDC =	 0x41d00700, /* floppy controller */
-	PM_SYS_VGA =	 0x41d00900, /* VGA controller */
-	PM_SYS_PCMCIA =	 0x41d00e00, /* PCMCIA controller */
-};
-
-/*
- * Device identifier
- */
-#define PM_PCI_ID(dev) ((dev)->bus->number << 16 | (dev)->devfn)
-
-/* Functions above this comment are list-based old-style power
- * management. Please avoid using them.  */
-
 /*
  * Callbacks for platform drivers to implement.
  */
-- 
GitLab


From 40b4ac33b4d1bdd5cbeb2241be2399c550fa3696 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Jul 2008 21:28:36 -0700
Subject: [PATCH 225/853] pm: remove obsolete piece of PM documentation

Remove some obsolete PM documentation.

The majority of contents of Documentation/power/pm.txt are
outdated.  Remove the outdated parts of this file and move the rest
to Documentation/power/apm-acpi.txt .  Update the index in
Documentation/power/ as appropriate.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/power/00-INDEX     |   4 +-
 Documentation/power/apm-acpi.txt |  32 ++++
 Documentation/power/pm.txt       | 257 -------------------------------
 3 files changed, 34 insertions(+), 259 deletions(-)
 create mode 100644 Documentation/power/apm-acpi.txt
 delete mode 100644 Documentation/power/pm.txt

diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX
index a55d7f1c836..fb742c213c9 100644
--- a/Documentation/power/00-INDEX
+++ b/Documentation/power/00-INDEX
@@ -1,5 +1,7 @@
 00-INDEX
 	- This file
+apm-acpi.txt
+	- basic info about the APM and ACPI support.
 basic-pm-debugging.txt
 	- Debugging suspend and resume
 devices.txt
@@ -14,8 +16,6 @@ notifiers.txt
 	- Registering suspend notifiers in device drivers
 pci.txt
 	- How the PCI Subsystem Does Power Management
-pm.txt
-	- info on Linux power management support.
 pm_qos_interface.txt
 	- info on Linux PM Quality of Service interface
 power_supply_class.txt
diff --git a/Documentation/power/apm-acpi.txt b/Documentation/power/apm-acpi.txt
new file mode 100644
index 00000000000..1bd799dc17e
--- /dev/null
+++ b/Documentation/power/apm-acpi.txt
@@ -0,0 +1,32 @@
+APM or ACPI?
+------------
+If you have a relatively recent x86 mobile, desktop, or server system,
+odds are it supports either Advanced Power Management (APM) or
+Advanced Configuration and Power Interface (ACPI).  ACPI is the newer
+of the two technologies and puts power management in the hands of the
+operating system, allowing for more intelligent power management than
+is possible with BIOS controlled APM.
+
+The best way to determine which, if either, your system supports is to
+build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is
+enabled by default).  If a working ACPI implementation is found, the
+ACPI driver will override and disable APM, otherwise the APM driver
+will be used.
+
+No, sorry, you cannot have both ACPI and APM enabled and running at
+once.  Some people with broken ACPI or broken APM implementations
+would like to use both to get a full set of working features, but you
+simply cannot mix and match the two.  Only one power management
+interface can be in control of the machine at once.  Think about it..
+
+User-space Daemons
+------------------
+Both APM and ACPI rely on user-space daemons, apmd and acpid
+respectively, to be completely functional.  Obtain both of these
+daemons from your Linux distribution or from the Internet (see below)
+and be sure that they are started sometime in the system boot process.
+Go ahead and start both.  If ACPI or APM is not available on your
+system the associated daemon will exit gracefully.
+
+  apmd:   http://worldvisions.ca/~apenwarr/apmd/
+  acpid:  http://acpid.sf.net/
diff --git a/Documentation/power/pm.txt b/Documentation/power/pm.txt
deleted file mode 100644
index be841507e43..00000000000
--- a/Documentation/power/pm.txt
+++ /dev/null
@@ -1,257 +0,0 @@
-               Linux Power Management Support
-
-This document briefly describes how to use power management with your
-Linux system and how to add power management support to Linux drivers.
-
-APM or ACPI?
-------------
-If you have a relatively recent x86 mobile, desktop, or server system,
-odds are it supports either Advanced Power Management (APM) or
-Advanced Configuration and Power Interface (ACPI).  ACPI is the newer
-of the two technologies and puts power management in the hands of the
-operating system, allowing for more intelligent power management than
-is possible with BIOS controlled APM.
-
-The best way to determine which, if either, your system supports is to
-build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is
-enabled by default).  If a working ACPI implementation is found, the
-ACPI driver will override and disable APM, otherwise the APM driver
-will be used.
-
-No, sorry, you cannot have both ACPI and APM enabled and running at
-once.  Some people with broken ACPI or broken APM implementations
-would like to use both to get a full set of working features, but you
-simply cannot mix and match the two.  Only one power management
-interface can be in control of the machine at once.  Think about it..
-
-User-space Daemons
-------------------
-Both APM and ACPI rely on user-space daemons, apmd and acpid
-respectively, to be completely functional.  Obtain both of these
-daemons from your Linux distribution or from the Internet (see below)
-and be sure that they are started sometime in the system boot process.
-Go ahead and start both.  If ACPI or APM is not available on your
-system the associated daemon will exit gracefully.
-
-  apmd:   http://worldvisions.ca/~apenwarr/apmd/
-  acpid:  http://acpid.sf.net/
-
-Driver Interface -- OBSOLETE, DO NOT USE!
-----------------*************************
-
-Note: pm_register(), pm_access(), pm_dev_idle() and friends are
-obsolete. Please do not use them. Instead you should properly hook
-your driver into the driver model, and use its suspend()/resume()
-callbacks to do this kind of stuff.
-
-If you are writing a new driver or maintaining an old driver, it
-should include power management support.  Without power management
-support, a single driver may prevent a system with power management
-capabilities from ever being able to suspend (safely).
-
-Overview:
-1) Register each instance of a device with "pm_register"
-2) Call "pm_access" before accessing the hardware.
-   (this will ensure that the hardware is awake and ready)
-3) Your "pm_callback" is called before going into a
-   suspend state (ACPI D1-D3) or after resuming (ACPI D0)
-   from a suspend.
-4) Call "pm_dev_idle" when the device is not being used
-   (optional but will improve device idle detection)
-5) When unloaded, unregister the device with "pm_unregister"
-
-/*
- * Description: Register a device with the power-management subsystem
- *
- * Parameters:
- *   type - device type (PCI device, system device, ...)
- *   id - instance number or unique identifier
- *   cback - request handler callback (suspend, resume, ...)
- *
- * Returns: Registered PM device or NULL on error
- *
- * Examples:
- *   dev = pm_register(PM_SYS_DEV, PM_SYS_VGA, vga_callback);
- *
- *   struct pci_dev *pci_dev = pci_find_dev(...);
- *   dev = pm_register(PM_PCI_DEV, PM_PCI_ID(pci_dev), callback);
- */
-struct pm_dev *pm_register(pm_dev_t type, unsigned long id, pm_callback cback);
-
-/*
- * Description: Unregister a device with the power management subsystem
- *
- * Parameters:
- *   dev - PM device previously returned from pm_register
- */
-void pm_unregister(struct pm_dev *dev);
-
-/*
- * Description: Unregister all devices with a matching callback function
- *
- * Parameters:
- *   cback - previously registered request callback
- *
- * Notes: Provided for easier porting from old APM interface
- */
-void pm_unregister_all(pm_callback cback);
-
-/*
- * Power management request callback
- *
- * Parameters:
- *   dev - PM device previously returned from pm_register
- *   rqst - request type
- *   data - data, if any, associated with the request
- *
- * Returns: 0 if the request is successful
- *          EINVAL if the request is not supported
- *          EBUSY if the device is now busy and cannot handle the request
- *          ENOMEM if the device was unable to handle the request due to memory
- *
- * Details: The device request callback will be called before the
- *          device/system enters a suspend state (ACPI D1-D3) or
- *          or after the device/system resumes from suspend (ACPI D0).
- *          For PM_SUSPEND, the ACPI D-state being entered is passed
- *          as the "data" argument to the callback.  The device
- *          driver should save (PM_SUSPEND) or restore (PM_RESUME)
- *          device context when the request callback is called.
- *
- *          Once a driver returns 0 (success) from a suspend
- *          request, it should not process any further requests or
- *          access the device hardware until a call to "pm_access" is made.
- */
-typedef int (*pm_callback)(struct pm_dev *dev, pm_request_t rqst, void *data);
-
-Driver Details
---------------
-This is just a quick Q&A as a stopgap until a real driver writers'
-power management guide is available.
-
-Q: When is a device suspended?
-
-Devices can be suspended based on direct user request (eg. laptop lid
-closes), system power policy (eg.  sleep after 30 minutes of console
-inactivity), or device power policy (eg. power down device after 5
-minutes of inactivity)
-
-Q: Must a driver honor a suspend request?
-
-No, a driver can return -EBUSY from a suspend request and this
-will stop the system from suspending.  When a suspend request
-fails, all suspended devices are resumed and the system continues
-to run.  Suspend can be retried at a later time.
-
-Q: Can the driver block suspend/resume requests?
-
-Yes, a driver can delay its return from a suspend or resume
-request until the device is ready to handle requests.  It
-is advantageous to return as quickly as possible from a
-request as suspend/resume are done serially.
-
-Q: What context is a suspend/resume initiated from?
-
-A suspend or resume is initiated from a kernel thread context.
-It is safe to block, allocate memory, initiate requests
-or anything else you can do within the kernel.
-
-Q: Will requests continue to arrive after a suspend?
-
-Possibly.  It is the driver's responsibility to queue(*),
-fail, or drop any requests that arrive after returning
-success to a suspend request.  It is important that the
-driver not access its device until after it receives
-a resume request as the device's bus may no longer
-be active.
-
-(*) If a driver queues requests for processing after
-    resume be aware that the device, network, etc.
-    might be in a different state than at suspend time.
-    It's probably better to drop requests unless
-    the driver is a storage device.
-
-Q: Do I have to manage bus-specific power management registers
-
-No.  It is the responsibility of the bus driver to manage
-PCI, USB, etc. power management registers.  The bus driver
-or the power management subsystem will also enable any
-wake-on functionality that the device has.
-
-Q: So, really, what do I need to do to support suspend/resume?
-
-You need to save any device context that would
-be lost if the device was powered off and then restore
-it at resume time.  When ACPI is active, there are
-three levels of device suspend states; D1, D2, and D3.
-(The suspend state is passed as the "data" argument
-to the device callback.)  With D3, the device is powered
-off and loses all context, D1 and D2 are shallower power
-states and require less device context to be saved.  To
-play it safe, just save everything at suspend and restore
-everything at resume.
-
-Q: Where do I store device context for suspend?
-
-Anywhere in memory, kmalloc a buffer or store it
-in the device descriptor.  You are guaranteed that the
-contents of memory will be restored and accessible
-before resume, even when the system suspends to disk.
-
-Q: What do I need to do for ACPI vs. APM vs. etc?
-
-Drivers need not be aware of the specific power management
-technology that is active.  They just need to be aware
-of when the overlying power management system requests
-that they suspend or resume.
-
-Q: What about device dependencies?
-
-When a driver registers a device, the power management
-subsystem uses the information provided to build a
-tree of device dependencies (eg. USB device X is on
-USB controller Y which is on PCI bus Z)  When power
-management wants to suspend a device, it first sends
-a suspend request to its driver, then the bus driver,
-and so on up to the system bus.  Device resumes
-proceed in the opposite direction.
-
-Q: Who do I contact for additional information about
-   enabling power management for my specific driver/device?
-
-ACPI Development mailing list: linux-acpi@vger.kernel.org
-
-System Interface -- OBSOLETE, DO NOT USE!
-----------------*************************
-If you are providing new power management support to Linux (ie.
-adding support for something like APM or ACPI), you should
-communicate with drivers through the existing generic power
-management interface.
-
-/*
- * Send a request to all devices
- *
- * Parameters:
- *   rqst - request type
- *   data - data, if any, associated with the request
- *
- * Returns: 0 if the request is successful
- *          See "pm_callback" return for errors
- *
- * Details: Walk list of registered devices and call pm_send
- *          for each until complete or an error is encountered.
- *          If an error is encountered for a suspend request,
- *          return all devices to the state they were in before
- *          the suspend request.
- */
-int pm_send_all(pm_request_t rqst, void *data);
-
-/*
- * Find a matching device
- *
- * Parameters:
- *   type - device type (PCI device, system device, or 0 to match all devices)
- *   from - previous match or NULL to start from the beginning
- *
- * Returns: Matching device or NULL if none found
- */
-struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from);
-- 
GitLab


From 8c363265d57d755e62053e9f69a1f2164e83f7ea Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Jul 2008 21:28:37 -0700
Subject: [PATCH 226/853] pm: drop unnecessary includes from pm.h

Drop unnecessary includes from include/linux/pm.h .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 390dd95a375..ed98d967f9f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -22,8 +22,6 @@
 #define _LINUX_PM_H
 
 #include <linux/list.h>
-#include <asm/atomic.h>
-#include <asm/errno.h>
 
 /*
  * Callbacks for platform drivers to implement.
-- 
GitLab


From 8111d1b552349921aae1acf73e4e8cea98e80970 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 23 Jul 2008 21:28:37 -0700
Subject: [PATCH 227/853] pm: add new PM_EVENT codes for runtime power
 transitions

This patch (as1112) adds some new PM_EVENT_* codes for use by kernel
subsystems.  They describe runtime power-state transitions of the sort already
implemented by the USB subsystem.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/include/linux/pm.h b/include/linux/pm.h
index ed98d967f9f..4dcce54b6d7 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -245,6 +245,21 @@ struct pm_ext_ops {
  * RECOVER	Creation of a hibernation image or restoration of the main
  *		memory contents from a hibernation image has failed, call
  *		->thaw() and ->complete() for all devices.
+ *
+ * The following PM_EVENT_ messages are defined for internal use by
+ * kernel subsystems.  They are never issued by the PM core.
+ *
+ * USER_SUSPEND		Manual selective suspend was issued by userspace.
+ *
+ * USER_RESUME		Manual selective resume was issued by userspace.
+ *
+ * REMOTE_WAKEUP	Remote-wakeup request was received from the device.
+ *
+ * AUTO_SUSPEND		Automatic (device idle) runtime suspend was
+ *			initiated by the subsystem.
+ *
+ * AUTO_RESUME		Automatic (device needed) runtime resume was
+ *			requested by a driver.
  */
 
 #define PM_EVENT_ON		0x0000
@@ -256,9 +271,18 @@ struct pm_ext_ops {
 #define PM_EVENT_THAW		0x0020
 #define PM_EVENT_RESTORE	0x0040
 #define PM_EVENT_RECOVER	0x0080
+#define PM_EVENT_USER		0x0100
+#define PM_EVENT_REMOTE		0x0200
+#define PM_EVENT_AUTO		0x0400
 
-#define PM_EVENT_SLEEP	(PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
+#define PM_EVENT_SLEEP		(PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
+#define PM_EVENT_USER_SUSPEND	(PM_EVENT_USER | PM_EVENT_SUSPEND)
+#define PM_EVENT_USER_RESUME	(PM_EVENT_USER | PM_EVENT_RESUME)
+#define PM_EVENT_REMOTE_WAKEUP	(PM_EVENT_REMOTE | PM_EVENT_RESUME)
+#define PM_EVENT_AUTO_SUSPEND	(PM_EVENT_AUTO | PM_EVENT_SUSPEND)
+#define PM_EVENT_AUTO_RESUME	(PM_EVENT_AUTO | PM_EVENT_RESUME)
 
+#define PMSG_ON		((struct pm_message){ .event = PM_EVENT_ON, })
 #define PMSG_FREEZE	((struct pm_message){ .event = PM_EVENT_FREEZE, })
 #define PMSG_QUIESCE	((struct pm_message){ .event = PM_EVENT_QUIESCE, })
 #define PMSG_SUSPEND	((struct pm_message){ .event = PM_EVENT_SUSPEND, })
@@ -267,7 +291,16 @@ struct pm_ext_ops {
 #define PMSG_THAW	((struct pm_message){ .event = PM_EVENT_THAW, })
 #define PMSG_RESTORE	((struct pm_message){ .event = PM_EVENT_RESTORE, })
 #define PMSG_RECOVER	((struct pm_message){ .event = PM_EVENT_RECOVER, })
-#define PMSG_ON		((struct pm_message){ .event = PM_EVENT_ON, })
+#define PMSG_USER_SUSPEND	((struct pm_messge) \
+					{ .event = PM_EVENT_USER_SUSPEND, })
+#define PMSG_USER_RESUME	((struct pm_messge) \
+					{ .event = PM_EVENT_USER_RESUME, })
+#define PMSG_REMOTE_RESUME	((struct pm_messge) \
+					{ .event = PM_EVENT_REMOTE_RESUME, })
+#define PMSG_AUTO_SUSPEND	((struct pm_messge) \
+					{ .event = PM_EVENT_AUTO_SUSPEND, })
+#define PMSG_AUTO_RESUME		((struct pm_messge) \
+					{ .event = PM_EVENT_AUTO_RESUME, })
 
 /**
  * Device power management states
-- 
GitLab


From 0d83304c7e7bd3b05be90281b3a47841bc8f057a Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Jul 2008 21:28:38 -0700
Subject: [PATCH 228/853] pm: hibernation: simplify memory bitmap

This patch simplifies the memory bitmap manipulations.

- remove the member size in struct bm_block

It is not necessary for struct bm_block to have the number of bit chunks that
can be calculated by using end_pfn and start_pfn.

- use find_next_bit() for memory_bm_next_pfn

No need to invent the bitmap library only for the memory bitmap.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/snapshot.c | 88 ++++++++++-------------------------------
 1 file changed, 21 insertions(+), 67 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5f91a07c4ea..5d2ab836e99 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -205,8 +205,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
  *	objects.  The main list's elements are of type struct zone_bitmap
  *	and each of them corresonds to one zone.  For each zone bitmap
  *	object there is a list of objects of type struct bm_block that
- *	represent each blocks of bit chunks in which information is
- *	stored.
+ *	represent each blocks of bitmap in which information is stored.
  *
  *	struct memory_bitmap contains a pointer to the main list of zone
  *	bitmap objects, a struct bm_position used for browsing the bitmap,
@@ -224,26 +223,27 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
  *	pfns that correspond to the start and end of the represented zone.
  *
  *	struct bm_block contains a pointer to the memory page in which
- *	information is stored (in the form of a block of bit chunks
- *	of type unsigned long each).  It also contains the pfns that
- *	correspond to the start and end of the represented memory area and
- *	the number of bit chunks in the block.
+ *	information is stored (in the form of a block of bitmap)
+ *	It also contains the pfns that correspond to the start and end of
+ *	the represented memory area.
  */
 
 #define BM_END_OF_MAP	(~0UL)
 
-#define BM_CHUNKS_PER_BLOCK	(PAGE_SIZE / sizeof(long))
-#define BM_BITS_PER_CHUNK	(sizeof(long) << 3)
 #define BM_BITS_PER_BLOCK	(PAGE_SIZE << 3)
 
 struct bm_block {
 	struct bm_block *next;		/* next element of the list */
 	unsigned long start_pfn;	/* pfn represented by the first bit */
 	unsigned long end_pfn;	/* pfn represented by the last bit plus 1 */
-	unsigned int size;	/* number of bit chunks */
-	unsigned long *data;	/* chunks of bits representing pages */
+	unsigned long *data;	/* bitmap representing pages */
 };
 
+static inline unsigned long bm_block_bits(struct bm_block *bb)
+{
+	return bb->end_pfn - bb->start_pfn;
+}
+
 struct zone_bitmap {
 	struct zone_bitmap *next;	/* next element of the list */
 	unsigned long start_pfn;	/* minimal pfn in this zone */
@@ -257,7 +257,6 @@ struct zone_bitmap {
 struct bm_position {
 	struct zone_bitmap *zone_bm;
 	struct bm_block *block;
-	int chunk;
 	int bit;
 };
 
@@ -272,12 +271,6 @@ struct memory_bitmap {
 
 /* Functions that operate on memory bitmaps */
 
-static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
-{
-	bm->cur.chunk = 0;
-	bm->cur.bit = -1;
-}
-
 static void memory_bm_position_reset(struct memory_bitmap *bm)
 {
 	struct zone_bitmap *zone_bm;
@@ -285,7 +278,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	zone_bm = bm->zone_bm_list;
 	bm->cur.zone_bm = zone_bm;
 	bm->cur.block = zone_bm->bm_blocks;
-	memory_bm_reset_chunk(bm);
+	bm->cur.bit = 0;
 }
 
 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
@@ -394,12 +387,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 			bb->start_pfn = pfn;
 			if (nr >= BM_BITS_PER_BLOCK) {
 				pfn += BM_BITS_PER_BLOCK;
-				bb->size = BM_CHUNKS_PER_BLOCK;
 				nr -= BM_BITS_PER_BLOCK;
 			} else {
 				/* This is executed only once in the loop */
 				pfn += nr;
-				bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
 			}
 			bb->end_pfn = pfn;
 			bb = bb->next;
@@ -478,8 +469,8 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 	}
 	zone_bm->cur_block = bb;
 	pfn -= bb->start_pfn;
-	*bit_nr = pfn % BM_BITS_PER_CHUNK;
-	*addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+	*bit_nr = pfn;
+	*addr = bb->data;
 	return 0;
 }
 
@@ -528,36 +519,6 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 	return test_bit(bit, addr);
 }
 
-/* Two auxiliary functions for memory_bm_next_pfn */
-
-/* Find the first set bit in the given chunk, if there is one */
-
-static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p)
-{
-	bit++;
-	while (bit < BM_BITS_PER_CHUNK) {
-		if (test_bit(bit, chunk_p))
-			return bit;
-
-		bit++;
-	}
-	return -1;
-}
-
-/* Find a chunk containing some bits set in given block of bits */
-
-static inline int next_chunk_in_block(int n, struct bm_block *bb)
-{
-	n++;
-	while (n < bb->size) {
-		if (bb->data[n])
-			return n;
-
-		n++;
-	}
-	return -1;
-}
-
 /**
  *	memory_bm_next_pfn - find the pfn that corresponds to the next set bit
  *	in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
@@ -571,40 +532,33 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 {
 	struct zone_bitmap *zone_bm;
 	struct bm_block *bb;
-	int chunk;
 	int bit;
 
 	do {
 		bb = bm->cur.block;
 		do {
-			chunk = bm->cur.chunk;
 			bit = bm->cur.bit;
-			do {
-				bit = next_bit_in_chunk(bit, bb->data + chunk);
-				if (bit >= 0)
-					goto Return_pfn;
-
-				chunk = next_chunk_in_block(chunk, bb);
-				bit = -1;
-			} while (chunk >= 0);
+			bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
+			if (bit < bm_block_bits(bb))
+				goto Return_pfn;
+
 			bb = bb->next;
 			bm->cur.block = bb;
-			memory_bm_reset_chunk(bm);
+			bm->cur.bit = 0;
 		} while (bb);
 		zone_bm = bm->cur.zone_bm->next;
 		if (zone_bm) {
 			bm->cur.zone_bm = zone_bm;
 			bm->cur.block = zone_bm->bm_blocks;
-			memory_bm_reset_chunk(bm);
+			bm->cur.bit = 0;
 		}
 	} while (zone_bm);
 	memory_bm_position_reset(bm);
 	return BM_END_OF_MAP;
 
  Return_pfn:
-	bm->cur.chunk = chunk;
-	bm->cur.bit = bit;
-	return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
+	bm->cur.bit = bit + 1;
+	return bb->start_pfn + bit;
 }
 
 /**
-- 
GitLab


From c1a220e7acf8ad2c03504891f4a70cd9c32c904b Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 23 Jul 2008 21:28:39 -0700
Subject: [PATCH 229/853] pm: introduce new interfaces schedule_work_on() and
 queue_work_on()

This interface allows adding a job on a specific cpu.

Although a work struct on a cpu will be scheduled to other cpu if the cpu
dies, there is a recursion if a work task tries to offline the cpu it's
running on.  we need to schedule the task to a specific cpu in this case.
http://bugzilla.kernel.org/show_bug.cgi?id=10897

[oleg@tv-sign.ru: cleanups]
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Rus <harbour@sfinx.od.ua>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  3 +++
 kernel/workqueue.c        | 39 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 542526c6e8e..14d47120682 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -179,6 +179,8 @@ __create_workqueue_key(const char *name, int singlethread,
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern int queue_work_on(int cpu, struct workqueue_struct *wq,
+			struct work_struct *work);
 extern int queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
@@ -188,6 +190,7 @@ extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
 extern int schedule_work(struct work_struct *work);
+extern int schedule_work_on(int cpu, struct work_struct *work);
 extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a6d36346d10..6fd158b2102 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -140,7 +140,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	wake_up(&cwq->more_work);
 }
 
-/* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
 {
@@ -175,6 +174,31 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
+/**
+ * queue_work_on - queue work on specific cpu
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to a specific CPU, the caller must ensure it
+ * can't go away.
+ */
+int
+queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
+{
+	int ret = 0;
+
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+		BUG_ON(!list_empty(&work->entry));
+		__queue_work(wq_per_cpu(wq, cpu), work);
+		ret = 1;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_on);
+
 static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -553,6 +577,19 @@ int schedule_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(schedule_work);
 
+/*
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+int schedule_work_on(int cpu, struct work_struct *work)
+{
+	return queue_work_on(cpu, keventd_wq, work);
+}
+EXPORT_SYMBOL(schedule_work_on);
+
 /**
  * schedule_delayed_work - put work task in global workqueue after delay
  * @dwork: job to be done
-- 
GitLab


From 2f15fc4bdf91eb399da3f47a09c55831d9f22826 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 23 Jul 2008 21:28:40 -0700
Subject: [PATCH 230/853] pm: schedule sysrq poweroff on boot cpu

schedule sysrq poweroff on boot cpu.

sysrq poweroff needs to disable nonboot cpus, and we need to run this on boot
cpu to avoid any recursion.  http://bugzilla.kernel.org/show_bug.cgi?id=10897

[kosaki.motohiro@jp.fujitsu.com: build fix]
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Rus <harbour@sfinx.od.ua>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/poweroff.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 678ec736076..72016f05147 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -10,6 +10,7 @@
 #include <linux/pm.h>
 #include <linux/workqueue.h>
 #include <linux/reboot.h>
+#include <linux/cpumask.h>
 
 /*
  * When the user hits Sys-Rq o to power down the machine this is the
@@ -25,7 +26,8 @@ static DECLARE_WORK(poweroff_work, do_poweroff);
 
 static void handle_poweroff(int key, struct tty_struct *tty)
 {
-	schedule_work(&poweroff_work);
+	/* run sysrq poweroff on boot cpu */
+	schedule_work_on(first_cpu(cpu_online_map), &poweroff_work);
 }
 
 static struct sysrq_key_op	sysrq_poweroff_op = {
-- 
GitLab


From bdfe6b7c681669148dae4db27eb24ee5408ba371 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Wed, 23 Jul 2008 21:28:41 -0700
Subject: [PATCH 231/853] pm: acpi hibernation: utilize hardware signature

ACPI defines a hardware signature.  BIOS calculates the signature according to
hardware configure and if hardware changes while hibernated, the signature
will change.  In that case, S4 resume should fail.

Still, there may be systems on which this mechanism does not work correctly,
so it is better to provide a workaround for them.  For this reason, add a new
switch to the acpi_sleep= command line argument allowing one to disable
hardware signature checking.

[shaohua.li@intel.com: build fix]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Len Brown <lenb@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: <Valdis.Kletnieks@vt.edu>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  4 +++-
 arch/x86/kernel/acpi/sleep.c        |  4 ++++
 drivers/acpi/sleep/main.c           | 22 ++++++++++++++++++++++
 include/linux/acpi.h                |  1 +
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4d705713cab..497a98dafda 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -148,10 +148,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			default: 0
 
 	acpi_sleep=	[HW,ACPI] Sleep options
-			Format: { s3_bios, s3_mode, s3_beep, old_ordering }
+			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering }
 			See Documentation/power/video.txt for s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
 			as soon as the kernel's real-mode entry point is called.
+			s4_nohwsig prevents ACPI hardware signature from being
+			used during resume from hibernation.
 			old_ordering causes the ACPI 1.0 ordering of the _PTS
 			control method, wrt putting devices into low power
 			states, to be enforced (the ACPI 2.0 ordering of _PTS is
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index a3ddad18aaa..fa2161d5003 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -150,6 +150,10 @@ static int __init acpi_sleep_setup(char *str)
 			acpi_realmode_flags |= 2;
 		if (strncmp(str, "s3_beep", 7) == 0)
 			acpi_realmode_flags |= 4;
+#ifdef CONFIG_HIBERNATION
+		if (strncmp(str, "s4_nohwsig", 10) == 0)
+			acpi_no_s4_hw_signature();
+#endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
 		str = strchr(str, ',');
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c
index 0489a7d1d42..313507accf1 100644
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/sleep/main.c
@@ -283,6 +283,15 @@ static struct platform_suspend_ops acpi_suspend_ops_old = {
 #endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATION
+static unsigned long s4_hardware_signature;
+static struct acpi_table_facs *facs;
+static bool nosigcheck;
+
+void __init acpi_no_s4_hw_signature(void)
+{
+	nosigcheck = true;
+}
+
 static int acpi_hibernation_begin(void)
 {
 	acpi_target_sleep_state = ACPI_STATE_S4;
@@ -316,6 +325,12 @@ static void acpi_hibernation_leave(void)
 	acpi_enable();
 	/* Reprogram control registers and execute _BFS */
 	acpi_leave_sleep_state_prep(ACPI_STATE_S4);
+	/* Check the hardware signature */
+	if (facs && s4_hardware_signature != facs->hardware_signature) {
+		printk(KERN_EMERG "ACPI: Hardware changed while hibernated, "
+			"cannot resume!\n");
+		panic("ACPI S4 hardware signature mismatch");
+	}
 }
 
 static void acpi_pm_enable_gpes(void)
@@ -544,6 +559,13 @@ int __init acpi_sleep_init(void)
 			&acpi_hibernation_ops_old : &acpi_hibernation_ops);
 		sleep_states[ACPI_STATE_S4] = 1;
 		printk(" S4");
+		if (!nosigcheck) {
+			acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS,
+				(struct acpi_table_header **)&facs);
+			if (facs)
+				s4_hardware_signature =
+					facs->hardware_signature;
+		}
 	}
 #endif
 	status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a1717763937..702f79dad16 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -236,6 +236,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 		      const char *name);
 
 #ifdef CONFIG_PM_SLEEP
+void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 #endif /* CONFIG_PM_SLEEP */
 #else	/* CONFIG_ACPI */
-- 
GitLab


From e41fb7c58e3ca18ec5c9c9bb7bb68e8e653c9e8e Mon Sep 17 00:00:00 2001
From: Carlos Corbacho <carlos@strangeworlds.co.uk>
Date: Wed, 23 Jul 2008 21:28:43 -0700
Subject: [PATCH 232/853] pm: acpi pm: add DMI quirk list for ACPI 1.0 suspend
 ordering

There are a few BIOSes that we know of already that need to use the ACPI 1.0
suspend order.  This appears to be only be a small minority of mostly nVidia
based systems.

Based on observation of Windows behaviour, it's clear that Windows is also
doing maintaining its own list of broken hardware that needs this workaround.

Signed-off-by: Carlos Corbacho <carlos@strangeworlds.co.uk>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Len Brown <lenb@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/sleep/main.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c
index 313507accf1..d13194a031b 100644
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/sleep/main.c
@@ -280,6 +280,24 @@ static struct platform_suspend_ops acpi_suspend_ops_old = {
 	.end = acpi_pm_end,
 	.recover = acpi_pm_finish,
 };
+
+static int __init init_old_suspend_ordering(const struct dmi_system_id *d)
+{
+	old_suspend_ordering = true;
+	return 0;
+}
+
+static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
+	{
+	.callback = init_old_suspend_ordering,
+	.ident = "Abit KN9 (nForce4 variant)",
+	.matches = {
+		DMI_MATCH(DMI_BOARD_VENDOR, "http://www.abit.com.tw/"),
+		DMI_MATCH(DMI_BOARD_NAME, "KN9 Series(NF-CK804)"),
+		},
+	},
+	{},
+};
 #endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATION
@@ -531,6 +549,8 @@ int __init acpi_sleep_init(void)
 	u8 type_a, type_b;
 #ifdef CONFIG_SUSPEND
 	int i = 0;
+
+	dmi_check_system(acpisleep_dmi_table);
 #endif
 
 	if (acpi_disabled)
-- 
GitLab


From f0af566da6e9a4a2f5a83c5a70f3d0a772050e21 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 23 Jul 2008 21:28:44 -0700
Subject: [PATCH 233/853] pm: fix try_to_freeze_tasks()'s use of do_div()

Fix try_to_freeze_tasks()'s use of do_div() on an s64 by making
elapsed_csecs64 a u64 instead and dividing that.

Possibly this should be guarded lest the interval calculation turn up
negative, but the possible negativity of the result of the division is
cast away anyway.

This was introduced by patch 438e2ce68dfd4af4cfcec2f873564fb921db4bb5.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5fb87652f21..278946aecaf 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -149,7 +149,7 @@ static int try_to_freeze_tasks(bool sig_only)
 	unsigned long end_time;
 	unsigned int todo;
 	struct timeval start, end;
-	s64 elapsed_csecs64;
+	u64 elapsed_csecs64;
 	unsigned int elapsed_csecs;
 
 	do_gettimeofday(&start);
-- 
GitLab


From 912019572180f287e85b5534fbb1c1e3ca6df6c9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:45 -0700
Subject: [PATCH 234/853] mn10300: move sg_dma_{address,len}() to
 asm/scatterlist.h

mn10300 was the only architecture where sg_dma_{address,len}() were not
in asm/scatterlist.h, and it's not a big surprise that this caused a
compile error somewhere:

/home/bunk/linux/kernel-2.6/git/linux-2.6/drivers/media/video/videobuf-dma-sg.c: In function `videobuf_dma_map':
/home/bunk/linux/kernel-2.6/git/linux-2.6/drivers/media/video/videobuf-dma-sg.c:238: error: implicit declaration of function 'sg_dma_address'

Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-mn10300/pci.h         | 9 ---------
 include/asm-mn10300/scatterlist.h | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/asm-mn10300/pci.h b/include/asm-mn10300/pci.h
index 205192c52bb..cd9cc5c89ce 100644
--- a/include/asm-mn10300/pci.h
+++ b/include/asm-mn10300/pci.h
@@ -74,15 +74,6 @@ struct pci_dev;
 /* This is always fine. */
 #define pci_dac_dma_supported(pci_dev, mask)	(0)
 
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)	((sg)->dma_address)
-#define sg_dma_len(sg)		((sg)->length)
-
 /* Return the index of the PCI controller for device. */
 static inline int pci_controller_num(struct pci_dev *dev)
 {
diff --git a/include/asm-mn10300/scatterlist.h b/include/asm-mn10300/scatterlist.h
index e29d91dbcf2..67535901b9f 100644
--- a/include/asm-mn10300/scatterlist.h
+++ b/include/asm-mn10300/scatterlist.h
@@ -43,4 +43,13 @@ struct scatterlist {
 
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
+/*
+ * These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
 #endif /* _ASM_SCATTERLIST_H */
-- 
GitLab


From d50004b0867a59f8a81116f000edb352595343d9 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@intellilink.co.jp>
Date: Wed, 23 Jul 2008 21:28:45 -0700
Subject: [PATCH 235/853] cris: remove unused global_flush_tlb

global_flush_tlb is declared but never used.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Cc: Mikael Starvik <starvik@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-cris/cacheflush.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/asm-cris/cacheflush.h b/include/asm-cris/cacheflush.h
index 01af2de27c5..cf60e3f69f8 100644
--- a/include/asm-cris/cacheflush.h
+++ b/include/asm-cris/cacheflush.h
@@ -26,7 +26,6 @@
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
 
-void global_flush_tlb(void); 
 int change_page_attr(struct page *page, int numpages, pgprot_t prot);
 
 #endif /* _CRIS_CACHEFLUSH_H */
-- 
GitLab


From ed62f77bb631bc4a2d8acb0521b720cb55e58183 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Jul 2008 21:28:46 -0700
Subject: [PATCH 236/853] cris: use simple_read_from_buffer()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/cris/kernel/profile.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/cris/kernel/profile.c b/arch/cris/kernel/profile.c
index 44f7b4f7947..9aa571169bc 100644
--- a/arch/cris/kernel/profile.c
+++ b/arch/cris/kernel/profile.c
@@ -35,19 +35,16 @@ read_cris_profile(struct file *file, char __user *buf,
 		  size_t count, loff_t *ppos)
 {
 	unsigned long p = *ppos;
+	ssize_t ret;
 
-	if (p > SAMPLE_BUFFER_SIZE)
-		return 0;
+	ret = simple_read_from_buffer(buf, count, ppos, sample_buffer,
+						SAMPLE_BUFFER_SIZE);
+	if (ret < 0)
+		return ret;
 
-	if (p + count > SAMPLE_BUFFER_SIZE)
-		count = SAMPLE_BUFFER_SIZE - p;
-	if (copy_to_user(buf, sample_buffer + p,count))
-		return -EFAULT;
+	memset(sample_buffer + p, 0, ret);
 
-	memset(sample_buffer + p, 0, count);
-	*ppos += count;
-
-	return count;
+	return ret;
 }
 
 static ssize_t
-- 
GitLab


From 4c182ae7810f3fe444e666f3f78c209a7c116fdf Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 23 Jul 2008 21:28:47 -0700
Subject: [PATCH 237/853] arch/um/kernel/irq.c: clean up some functions

Make activate_fd() and free_irq_by_irq_and_dev() static.  Remove
init_aio_irq() since it has no users.

Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: WANG Cong <wangcong@zeuux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/include/irq_kern.h |  2 --
 arch/um/include/irq_user.h |  2 --
 arch/um/kernel/irq.c       | 35 ++---------------------------------
 3 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h
index 4f775597fd5..fba3895274f 100644
--- a/arch/um/include/irq_kern.h
+++ b/arch/um/include/irq_kern.h
@@ -13,8 +13,6 @@ extern int um_request_irq(unsigned int irq, int fd, int type,
 			  irq_handler_t handler,
 			  unsigned long irqflags,  const char * devname,
 			  void *dev_id);
-extern int init_aio_irq(int irq, char *name,
-			irq_handler_t handler);
 
 #endif
 
diff --git a/arch/um/include/irq_user.h b/arch/um/include/irq_user.h
index e60b31873de..c6c784df267 100644
--- a/arch/um/include/irq_user.h
+++ b/arch/um/include/irq_user.h
@@ -21,8 +21,6 @@ struct irq_fd {
 enum { IRQ_READ, IRQ_WRITE };
 
 extern void sigio_handler(int sig, struct uml_pt_regs *regs);
-extern int activate_fd(int irq, int fd, int type, void *dev_id);
-extern void free_irq_by_irq_and_dev(unsigned int irq, void *dev_id);
 extern void free_irq_by_fd(int fd);
 extern void reactivate_fd(int fd, int irqnum);
 extern void deactivate_fd(int fd, int irqnum);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 91587f8db34..3d7aad09b17 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -102,7 +102,7 @@ void sigio_handler(int sig, struct uml_pt_regs *regs)
 
 static DEFINE_SPINLOCK(irq_lock);
 
-int activate_fd(int irq, int fd, int type, void *dev_id)
+static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
 	struct pollfd *tmp_pfd;
 	struct irq_fd *new_fd, *irq_fd;
@@ -216,7 +216,7 @@ static int same_irq_and_dev(struct irq_fd *irq, void *d)
 	return ((irq->irq == data->irq) && (irq->id == data->dev));
 }
 
-void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
 	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
 							  .dev  = dev });
@@ -403,37 +403,6 @@ void __init init_IRQ(void)
 	}
 }
 
-int init_aio_irq(int irq, char *name, irq_handler_t handler)
-{
-	int fds[2], err;
-
-	err = os_pipe(fds, 1, 1);
-	if (err) {
-		printk(KERN_ERR "init_aio_irq - os_pipe failed, err = %d\n",
-		       -err);
-		goto out;
-	}
-
-	err = um_request_irq(irq, fds[0], IRQ_READ, handler,
-			     IRQF_DISABLED | IRQF_SAMPLE_RANDOM, name,
-			     (void *) (long) fds[0]);
-	if (err) {
-		printk(KERN_ERR "init_aio_irq - : um_request_irq failed, "
-		       "err = %d\n",
-		       err);
-		goto out_close;
-	}
-
-	err = fds[1];
-	goto out;
-
- out_close:
-	os_close_file(fds[0]);
-	os_close_file(fds[1]);
- out:
-	return err;
-}
-
 /*
  * IRQ stack entry and exit:
  *
-- 
GitLab


From 4a5675820436e4ad738dd442c1cc8a165101509b Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 23 Jul 2008 21:28:49 -0700
Subject: [PATCH 238/853] arch/um/kernel/mem.c: remove arch_validate()

- Remove arch_validate(), because no one uses it.

- Remove useless macro HAVE_ARCH_VALIDATE.

- Make the variable 'empty_bad_page' static.

Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: WANG Cong <wangcong@zeuux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/kernel/ksyms.c |  1 -
 arch/um/kernel/mem.c   | 33 +--------------------------------
 include/asm-um/page.h  |  3 ---
 3 files changed, 1 insertion(+), 36 deletions(-)

diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index ccc02a616c2..836fc9b9470 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -18,7 +18,6 @@ EXPORT_SYMBOL(get_signals);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(sys_waitpid);
 EXPORT_SYMBOL(flush_tlb_range);
-EXPORT_SYMBOL(arch_validate);
 
 EXPORT_SYMBOL(high_physmem);
 EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index b0ee64622ff..e2274ef3155 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -21,7 +21,7 @@
 /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
 unsigned long *empty_zero_page = NULL;
 /* allocated in paging_init and unchanged thereafter */
-unsigned long *empty_bad_page = NULL;
+static unsigned long *empty_bad_page = NULL;
 
 /*
  * Initialized during boot, and readonly for initializing page tables
@@ -240,37 +240,6 @@ void __init paging_init(void)
 #endif
 }
 
-struct page *arch_validate(struct page *page, gfp_t mask, int order)
-{
-	unsigned long addr, zero = 0;
-	int i;
-
- again:
-	if (page == NULL)
-		return page;
-	if (PageHighMem(page))
-		return page;
-
-	addr = (unsigned long) page_address(page);
-	for (i = 0; i < (1 << order); i++) {
-		current->thread.fault_addr = (void *) addr;
-		if (__do_copy_to_user((void __user *) addr, &zero,
-				     sizeof(zero),
-				     &current->thread.fault_addr,
-				     &current->thread.fault_catcher)) {
-			if (!(mask & __GFP_WAIT))
-				return NULL;
-			else break;
-		}
-		addr += PAGE_SIZE;
-	}
-
-	if (i == (1 << order))
-		return page;
-	page = alloc_pages(mask, order);
-	goto again;
-}
-
 /*
  * This can't do anything because nothing in the kernel image can be freed
  * since it's not in kernel physical memory.
diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index 335c57383c0..a6df1f13d73 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -115,9 +115,6 @@ extern unsigned long uml_physmem;
 #define pfn_valid(pfn) ((pfn) < max_mapnr)
 #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
 
-extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
-#define HAVE_ARCH_VALIDATE
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
-- 
GitLab


From 99764fa4ceeecba8b9e0a8a5565b418a2e94f83b Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 23 Jul 2008 21:28:49 -0700
Subject: [PATCH 239/853] UML: make several more things static

- Make some variables and functions static, since they don't need to be
  global.

- Remove an unused function - arch/um/kernel/time.c::sched_clock().

- Clean the style a bit as complained by checkpatch.pl.

Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: WANG Cong <wangcong@zeuux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/include/skas/skas.h     | 1 -
 arch/um/include/um_uaccess.h    | 1 -
 arch/um/kernel/physmem.c        | 2 +-
 arch/um/kernel/ptrace.c         | 2 +-
 arch/um/kernel/time.c           | 8 --------
 arch/um/kernel/uaccess.c        | 2 +-
 arch/um/os-Linux/sigio.c        | 2 +-
 arch/um/os-Linux/signal.c       | 2 +-
 arch/um/os-Linux/skas/process.c | 2 +-
 arch/um/os-Linux/umid.c         | 2 +-
 arch/um/sys-i386/bugs.c         | 2 +-
 arch/um/sys-i386/checksum.S     | 5 ++---
 arch/um/sys-i386/ldt.c          | 4 ++--
 include/asm-um/ptrace-generic.h | 3 ---
 14 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/arch/um/include/skas/skas.h b/arch/um/include/skas/skas.h
index b073f8a86bd..64d2c744330 100644
--- a/arch/um/include/skas/skas.h
+++ b/arch/um/include/skas/skas.h
@@ -16,7 +16,6 @@ extern int user_thread(unsigned long stack, int flags);
 extern void new_thread_handler(void);
 extern void handle_syscall(struct uml_pt_regs *regs);
 extern int new_mm(unsigned long stack);
-extern void get_skas_faultinfo(int pid, struct faultinfo * fi);
 extern long execute_syscall_skas(void *r);
 extern unsigned long current_stub_stack(void);
 
diff --git a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h
index 2b6fc8e0f07..45c04999d67 100644
--- a/arch/um/include/um_uaccess.h
+++ b/arch/um/include/um_uaccess.h
@@ -34,7 +34,6 @@ extern int copy_to_user(void __user *to, const void *from, int n);
 
 extern int __do_copy_to_user(void *to, const void *from, int n,
 			     void **fault_addr, jmp_buf **fault_catcher);
-extern void __do_copy(void *to, const void *from, int n);
 
 /*
  * strncpy_from_user: - Copy a NUL terminated string from userspace.
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 9757085a022..a1a9090254c 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -185,7 +185,7 @@ unsigned long find_iomem(char *driver, unsigned long *len_out)
 	return 0;
 }
 
-int setup_iomem(void)
+static int setup_iomem(void)
 {
 	struct iomem_region *region = iomem_regions;
 	unsigned long iomem_start = high_physmem + PAGE_SIZE;
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 47b57b497d5..15e8b7c4de1 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -225,7 +225,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	return ret;
 }
 
-void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
+static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
 		  int error_code)
 {
 	struct siginfo info;
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index c3e2f369c33..47f04f4a346 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -13,14 +13,6 @@
 #include "kern_util.h"
 #include "os.h"
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies_64 * (NSEC_PER_SEC / HZ);
-}
-
 void timer_handler(int sig, struct uml_pt_regs *regs)
 {
 	unsigned long flags;
diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c
index f0f4b040d7c..dd33f040c52 100644
--- a/arch/um/kernel/uaccess.c
+++ b/arch/um/kernel/uaccess.c
@@ -12,7 +12,7 @@
 #include <linux/string.h>
 #include "os.h"
 
-void __do_copy(void *to, const void *from, int n)
+static void __do_copy(void *to, const void *from, int n)
 {
 	memcpy(to, from, n);
 }
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index eb8f2e4be19..63d299df152 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -530,7 +530,7 @@ static void tty_close(int master, int slave)
 		printk(UM_KERN_CONT "No, enabling workaround\n");
 }
 
-void __init check_sigio(void)
+static void __init check_sigio(void)
 {
 	if ((access("/dev/ptmx", R_OK) < 0) &&
 	    (access("/dev/ptyp0", R_OK) < 0)) {
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 5aade6027e4..6ae180703a6 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -126,7 +126,7 @@ void set_sigstack(void *sig_stack, int size)
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
-void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
+static void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
 
 void handle_signal(int sig, struct sigcontext *sc)
 {
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 172ad8f72e1..d6e0a2234b8 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -96,7 +96,7 @@ bad_wait:
 
 extern unsigned long current_stub_stack(void);
 
-void get_skas_faultinfo(int pid, struct faultinfo * fi)
+static void get_skas_faultinfo(int pid, struct faultinfo *fi)
 {
 	int err;
 
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 106fa864155..a27defb8188 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -245,7 +245,7 @@ int __init set_umid(char *name)
 /* Changed in make_umid, which is called during early boot */
 static int umid_setup = 0;
 
-int __init make_umid(void)
+static int __init make_umid(void)
 {
 	int fd, err;
 	char tmp[256];
diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
index a74442d1376..2c6d0d731c1 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/um/sys-i386/bugs.c
@@ -12,7 +12,7 @@
 #include "sysdep/ptrace.h"
 
 /* Set during early boot */
-int host_has_cmov = 1;
+static int host_has_cmov = 1;
 static jmp_buf cmov_test_return;
 
 static void cmov_sigill_test_handler(int sig)
diff --git a/arch/um/sys-i386/checksum.S b/arch/um/sys-i386/checksum.S
index 62c7e564f22..f058d2f82e1 100644
--- a/arch/um/sys-i386/checksum.S
+++ b/arch/um/sys-i386/checksum.S
@@ -243,13 +243,12 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
 	.previous
 
 .align 4
-.globl csum_partial_copy_generic_i386
-				
+
 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
 
 #define ARGBASE 16		
 #define FP		12
-		
+
 csum_partial_copy_generic_i386:
 	subl  $4,%esp	
 	pushl %edi
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index a34263e6b08..a4846a84a7b 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -14,8 +14,8 @@
 
 extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
 
-long write_ldt_entry(struct mm_id * mm_idp, int func, struct user_desc * desc,
-		     void **addr, int done)
+static long write_ldt_entry(struct mm_id *mm_idp, int func,
+		     struct user_desc *desc, void **addr, int done)
 {
 	long res;
 
diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h
index 6aefcd32fc6..315749705ea 100644
--- a/include/asm-um/ptrace-generic.h
+++ b/include/asm-um/ptrace-generic.h
@@ -47,9 +47,6 @@ extern int set_fpregs(struct user_i387_struct __user *buf,
 
 extern void show_regs(struct pt_regs *regs);
 
-extern void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
-			 int error_code);
-
 extern int arch_copy_tls(struct task_struct *new);
 extern void clear_flushed_tls(struct task_struct *task);
 
-- 
GitLab


From f606ddf42fd4edc558eeb48bfee66d2c591571d2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:50 -0700
Subject: [PATCH 240/853] remove the v850 port

Trying to compile the v850 port brings many compile errors, one of them exists
since at least kernel 2.6.19.

There also seems to be noone willing to bring this port back into a usable
state.

This patch therefore removes the v850 port.

If anyone ever decides to revive the v850 port the code will still be
available from older kernels, and it wouldn't be impossible for the port to
reenter the kernel if it would become actively maintained again.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                            |    3 -
 arch/v850/Kconfig                      |  353 --------
 arch/v850/Kconfig.debug                |   10 -
 arch/v850/Makefile                     |   54 --
 arch/v850/README                       |   44 -
 arch/v850/configs/rte-ma1-cb_defconfig |  617 -------------
 arch/v850/configs/rte-me2-cb_defconfig |  462 ----------
 arch/v850/configs/sim_defconfig        |  451 ----------
 arch/v850/kernel/Makefile              |   40 -
 arch/v850/kernel/anna-rom.ld           |   16 -
 arch/v850/kernel/anna.c                |  202 -----
 arch/v850/kernel/anna.ld               |   20 -
 arch/v850/kernel/as85ep1-rom.ld        |   21 -
 arch/v850/kernel/as85ep1.c             |  234 -----
 arch/v850/kernel/as85ep1.ld            |   49 --
 arch/v850/kernel/asm-offsets.c         |   58 --
 arch/v850/kernel/bug.c                 |  142 ---
 arch/v850/kernel/entry.S               | 1121 ------------------------
 arch/v850/kernel/fpga85e2c.c           |  167 ----
 arch/v850/kernel/fpga85e2c.ld          |   62 --
 arch/v850/kernel/gbus_int.c            |  271 ------
 arch/v850/kernel/head.S                |  128 ---
 arch/v850/kernel/highres_timer.c       |  132 ---
 arch/v850/kernel/init_task.c           |   48 -
 arch/v850/kernel/intv.S                |   87 --
 arch/v850/kernel/irq.c                 |  123 ---
 arch/v850/kernel/ma.c                  |   69 --
 arch/v850/kernel/mach.c                |   17 -
 arch/v850/kernel/mach.h                |   56 --
 arch/v850/kernel/me2.c                 |   73 --
 arch/v850/kernel/memcons.c             |  135 ---
 arch/v850/kernel/module.c              |  237 -----
 arch/v850/kernel/process.c             |  217 -----
 arch/v850/kernel/procfs.c              |   67 --
 arch/v850/kernel/ptrace.c              |  235 -----
 arch/v850/kernel/rte_cb.c              |  193 ----
 arch/v850/kernel/rte_cb_leds.c         |  137 ---
 arch/v850/kernel/rte_cb_multi.c        |  121 ---
 arch/v850/kernel/rte_ma1_cb-rom.ld     |   14 -
 arch/v850/kernel/rte_ma1_cb.c          |  107 ---
 arch/v850/kernel/rte_ma1_cb.ld         |   57 --
 arch/v850/kernel/rte_mb_a_pci.c        |  819 -----------------
 arch/v850/kernel/rte_me2_cb.c          |  298 -------
 arch/v850/kernel/rte_me2_cb.ld         |   30 -
 arch/v850/kernel/rte_nb85e_cb-multi.ld |   57 --
 arch/v850/kernel/rte_nb85e_cb.c        |   81 --
 arch/v850/kernel/rte_nb85e_cb.ld       |   22 -
 arch/v850/kernel/setup.c               |  329 -------
 arch/v850/kernel/signal.c              |  523 -----------
 arch/v850/kernel/sim.c                 |  172 ----
 arch/v850/kernel/sim.ld                |   13 -
 arch/v850/kernel/sim85e2.c             |  195 -----
 arch/v850/kernel/sim85e2.ld            |   36 -
 arch/v850/kernel/simcons.c             |  161 ----
 arch/v850/kernel/syscalls.c            |  196 -----
 arch/v850/kernel/teg.c                 |   62 --
 arch/v850/kernel/time.c                |  106 ---
 arch/v850/kernel/v850_ksyms.c          |   51 --
 arch/v850/kernel/v850e2_cache.c        |  127 ---
 arch/v850/kernel/v850e_cache.c         |  174 ----
 arch/v850/kernel/v850e_intc.c          |  104 ---
 arch/v850/kernel/v850e_timer_d.c       |   54 --
 arch/v850/kernel/v850e_utils.c         |   62 --
 arch/v850/kernel/vmlinux.lds.S         |  306 -------
 arch/v850/lib/Makefile                 |    6 -
 arch/v850/lib/ashldi3.c                |   62 --
 arch/v850/lib/ashrdi3.c                |   63 --
 arch/v850/lib/checksum.c               |  155 ----
 arch/v850/lib/lshrdi3.c                |   62 --
 arch/v850/lib/memcpy.c                 |   92 --
 arch/v850/lib/memset.c                 |   68 --
 arch/v850/lib/muldi3.c                 |   61 --
 arch/v850/lib/negdi2.c                 |   25 -
 drivers/serial/Kconfig                 |   16 -
 drivers/watchdog/Kconfig               |    2 -
 drivers/watchdog/Makefile              |    2 -
 include/asm-v850/Kbuild                |    1 -
 include/asm-v850/a.out.h               |   21 -
 include/asm-v850/anna.h                |  137 ---
 include/asm-v850/as85ep1.h             |  152 ----
 include/asm-v850/asm.h                 |   32 -
 include/asm-v850/atomic.h              |  131 ---
 include/asm-v850/auxvec.h              |    4 -
 include/asm-v850/bitops.h              |  161 ----
 include/asm-v850/bug.h                 |   25 -
 include/asm-v850/bugs.h                |   16 -
 include/asm-v850/byteorder.h           |   48 -
 include/asm-v850/cache.h               |   26 -
 include/asm-v850/cacheflush.h          |   70 --
 include/asm-v850/checksum.h            |  112 ---
 include/asm-v850/clinkage.h            |   26 -
 include/asm-v850/cputime.h             |    6 -
 include/asm-v850/current.h             |   47 -
 include/asm-v850/delay.h               |   47 -
 include/asm-v850/device.h              |    7 -
 include/asm-v850/div64.h               |    1 -
 include/asm-v850/dma-mapping.h         |   11 -
 include/asm-v850/dma.h                 |   18 -
 include/asm-v850/elf.h                 |   99 ---
 include/asm-v850/emergency-restart.h   |    6 -
 include/asm-v850/entry.h               |  113 ---
 include/asm-v850/errno.h               |    6 -
 include/asm-v850/fb.h                  |   12 -
 include/asm-v850/fcntl.h               |   11 -
 include/asm-v850/flat.h                |  133 ---
 include/asm-v850/fpga85e2c.h           |   82 --
 include/asm-v850/futex.h               |    6 -
 include/asm-v850/gbus_int.h            |   97 --
 include/asm-v850/hardirq.h             |   28 -
 include/asm-v850/highres_timer.h       |   44 -
 include/asm-v850/hw_irq.h              |    4 -
 include/asm-v850/io.h                  |  142 ---
 include/asm-v850/ioctl.h               |    1 -
 include/asm-v850/ioctls.h              |   84 --
 include/asm-v850/ipcbuf.h              |   29 -
 include/asm-v850/irq.h                 |   55 --
 include/asm-v850/irq_regs.h            |    1 -
 include/asm-v850/kdebug.h              |    1 -
 include/asm-v850/kmap_types.h          |   19 -
 include/asm-v850/kvm.h                 |    6 -
 include/asm-v850/linkage.h             |    8 -
 include/asm-v850/local.h               |    6 -
 include/asm-v850/ma.h                  |  101 ---
 include/asm-v850/ma1.h                 |   50 --
 include/asm-v850/machdep.h             |   60 --
 include/asm-v850/macrology.h           |   17 -
 include/asm-v850/me2.h                 |  182 ----
 include/asm-v850/mman.h                |   15 -
 include/asm-v850/mmu.h                 |   11 -
 include/asm-v850/mmu_context.h         |   13 -
 include/asm-v850/module.h              |   62 --
 include/asm-v850/msgbuf.h              |   31 -
 include/asm-v850/mutex.h               |    9 -
 include/asm-v850/page.h                |  124 ---
 include/asm-v850/param.h               |   33 -
 include/asm-v850/pci.h                 |  119 ---
 include/asm-v850/percpu.h              |   14 -
 include/asm-v850/pgalloc.h             |   22 -
 include/asm-v850/pgtable.h             |   59 --
 include/asm-v850/poll.h                |    9 -
 include/asm-v850/posix_types.h         |   72 --
 include/asm-v850/processor.h           |  120 ---
 include/asm-v850/ptrace.h              |  121 ---
 include/asm-v850/resource.h            |    6 -
 include/asm-v850/rte_cb.h              |   78 --
 include/asm-v850/rte_ma1_cb.h          |  128 ---
 include/asm-v850/rte_mb_a_pci.h        |   56 --
 include/asm-v850/rte_me2_cb.h          |  202 -----
 include/asm-v850/rte_nb85e_cb.h        |  111 ---
 include/asm-v850/scatterlist.h         |   31 -
 include/asm-v850/sections.h            |    6 -
 include/asm-v850/segment.h             |   36 -
 include/asm-v850/semaphore.h           |    1 -
 include/asm-v850/sembuf.h              |   25 -
 include/asm-v850/serial.h              |   56 --
 include/asm-v850/setup.h               |    6 -
 include/asm-v850/shmbuf.h              |   42 -
 include/asm-v850/shmparam.h            |    6 -
 include/asm-v850/sigcontext.h          |   25 -
 include/asm-v850/siginfo.h             |    6 -
 include/asm-v850/signal.h              |  168 ----
 include/asm-v850/sim.h                 |   47 -
 include/asm-v850/sim85e2.h             |   69 --
 include/asm-v850/sim85e2c.h            |   26 -
 include/asm-v850/sim85e2s.h            |   28 -
 include/asm-v850/simsyscall.h          |   99 ---
 include/asm-v850/socket.h              |   57 --
 include/asm-v850/sockios.h             |   13 -
 include/asm-v850/stat.h                |   73 --
 include/asm-v850/statfs.h              |    6 -
 include/asm-v850/string.h              |   25 -
 include/asm-v850/system.h              |  123 ---
 include/asm-v850/teg.h                 |  101 ---
 include/asm-v850/termbits.h            |  200 -----
 include/asm-v850/termios.h             |   90 --
 include/asm-v850/thread_info.h         |  129 ---
 include/asm-v850/timex.h               |   18 -
 include/asm-v850/tlb.h                 |   21 -
 include/asm-v850/tlbflush.h            |   64 --
 include/asm-v850/topology.h            |    6 -
 include/asm-v850/types.h               |   36 -
 include/asm-v850/uaccess.h             |  159 ----
 include/asm-v850/ucontext.h            |   14 -
 include/asm-v850/unaligned.h           |   22 -
 include/asm-v850/unistd.h              |  244 ------
 include/asm-v850/user.h                |   52 --
 include/asm-v850/v850e.h               |   21 -
 include/asm-v850/v850e2.h              |   69 --
 include/asm-v850/v850e2_cache.h        |   75 --
 include/asm-v850/v850e_cache.h         |   48 -
 include/asm-v850/v850e_intc.h          |  133 ---
 include/asm-v850/v850e_timer_c.h       |   48 -
 include/asm-v850/v850e_timer_d.h       |   62 --
 include/asm-v850/v850e_uart.h          |   76 --
 include/asm-v850/v850e_uarta.h         |  278 ------
 include/asm-v850/v850e_uartb.h         |  262 ------
 include/asm-v850/v850e_utils.h         |   35 -
 include/linux/audit.h                  |    1 -
 include/linux/module.h                 |    2 +-
 include/linux/serial_core.h            |    3 -
 include/linux/syscalls.h               |    2 +-
 scripts/genksyms/genksyms.c            |    3 +-
 scripts/mod/file2alias.c               |    2 +-
 scripts/mod/mk_elfconfig.c             |    2 +-
 204 files changed, 5 insertions(+), 18406 deletions(-)
 delete mode 100644 arch/v850/Kconfig
 delete mode 100644 arch/v850/Kconfig.debug
 delete mode 100644 arch/v850/Makefile
 delete mode 100644 arch/v850/README
 delete mode 100644 arch/v850/configs/rte-ma1-cb_defconfig
 delete mode 100644 arch/v850/configs/rte-me2-cb_defconfig
 delete mode 100644 arch/v850/configs/sim_defconfig
 delete mode 100644 arch/v850/kernel/Makefile
 delete mode 100644 arch/v850/kernel/anna-rom.ld
 delete mode 100644 arch/v850/kernel/anna.c
 delete mode 100644 arch/v850/kernel/anna.ld
 delete mode 100644 arch/v850/kernel/as85ep1-rom.ld
 delete mode 100644 arch/v850/kernel/as85ep1.c
 delete mode 100644 arch/v850/kernel/as85ep1.ld
 delete mode 100644 arch/v850/kernel/asm-offsets.c
 delete mode 100644 arch/v850/kernel/bug.c
 delete mode 100644 arch/v850/kernel/entry.S
 delete mode 100644 arch/v850/kernel/fpga85e2c.c
 delete mode 100644 arch/v850/kernel/fpga85e2c.ld
 delete mode 100644 arch/v850/kernel/gbus_int.c
 delete mode 100644 arch/v850/kernel/head.S
 delete mode 100644 arch/v850/kernel/highres_timer.c
 delete mode 100644 arch/v850/kernel/init_task.c
 delete mode 100644 arch/v850/kernel/intv.S
 delete mode 100644 arch/v850/kernel/irq.c
 delete mode 100644 arch/v850/kernel/ma.c
 delete mode 100644 arch/v850/kernel/mach.c
 delete mode 100644 arch/v850/kernel/mach.h
 delete mode 100644 arch/v850/kernel/me2.c
 delete mode 100644 arch/v850/kernel/memcons.c
 delete mode 100644 arch/v850/kernel/module.c
 delete mode 100644 arch/v850/kernel/process.c
 delete mode 100644 arch/v850/kernel/procfs.c
 delete mode 100644 arch/v850/kernel/ptrace.c
 delete mode 100644 arch/v850/kernel/rte_cb.c
 delete mode 100644 arch/v850/kernel/rte_cb_leds.c
 delete mode 100644 arch/v850/kernel/rte_cb_multi.c
 delete mode 100644 arch/v850/kernel/rte_ma1_cb-rom.ld
 delete mode 100644 arch/v850/kernel/rte_ma1_cb.c
 delete mode 100644 arch/v850/kernel/rte_ma1_cb.ld
 delete mode 100644 arch/v850/kernel/rte_mb_a_pci.c
 delete mode 100644 arch/v850/kernel/rte_me2_cb.c
 delete mode 100644 arch/v850/kernel/rte_me2_cb.ld
 delete mode 100644 arch/v850/kernel/rte_nb85e_cb-multi.ld
 delete mode 100644 arch/v850/kernel/rte_nb85e_cb.c
 delete mode 100644 arch/v850/kernel/rte_nb85e_cb.ld
 delete mode 100644 arch/v850/kernel/setup.c
 delete mode 100644 arch/v850/kernel/signal.c
 delete mode 100644 arch/v850/kernel/sim.c
 delete mode 100644 arch/v850/kernel/sim.ld
 delete mode 100644 arch/v850/kernel/sim85e2.c
 delete mode 100644 arch/v850/kernel/sim85e2.ld
 delete mode 100644 arch/v850/kernel/simcons.c
 delete mode 100644 arch/v850/kernel/syscalls.c
 delete mode 100644 arch/v850/kernel/teg.c
 delete mode 100644 arch/v850/kernel/time.c
 delete mode 100644 arch/v850/kernel/v850_ksyms.c
 delete mode 100644 arch/v850/kernel/v850e2_cache.c
 delete mode 100644 arch/v850/kernel/v850e_cache.c
 delete mode 100644 arch/v850/kernel/v850e_intc.c
 delete mode 100644 arch/v850/kernel/v850e_timer_d.c
 delete mode 100644 arch/v850/kernel/v850e_utils.c
 delete mode 100644 arch/v850/kernel/vmlinux.lds.S
 delete mode 100644 arch/v850/lib/Makefile
 delete mode 100644 arch/v850/lib/ashldi3.c
 delete mode 100644 arch/v850/lib/ashrdi3.c
 delete mode 100644 arch/v850/lib/checksum.c
 delete mode 100644 arch/v850/lib/lshrdi3.c
 delete mode 100644 arch/v850/lib/memcpy.c
 delete mode 100644 arch/v850/lib/memset.c
 delete mode 100644 arch/v850/lib/muldi3.c
 delete mode 100644 arch/v850/lib/negdi2.c
 delete mode 100644 include/asm-v850/Kbuild
 delete mode 100644 include/asm-v850/a.out.h
 delete mode 100644 include/asm-v850/anna.h
 delete mode 100644 include/asm-v850/as85ep1.h
 delete mode 100644 include/asm-v850/asm.h
 delete mode 100644 include/asm-v850/atomic.h
 delete mode 100644 include/asm-v850/auxvec.h
 delete mode 100644 include/asm-v850/bitops.h
 delete mode 100644 include/asm-v850/bug.h
 delete mode 100644 include/asm-v850/bugs.h
 delete mode 100644 include/asm-v850/byteorder.h
 delete mode 100644 include/asm-v850/cache.h
 delete mode 100644 include/asm-v850/cacheflush.h
 delete mode 100644 include/asm-v850/checksum.h
 delete mode 100644 include/asm-v850/clinkage.h
 delete mode 100644 include/asm-v850/cputime.h
 delete mode 100644 include/asm-v850/current.h
 delete mode 100644 include/asm-v850/delay.h
 delete mode 100644 include/asm-v850/device.h
 delete mode 100644 include/asm-v850/div64.h
 delete mode 100644 include/asm-v850/dma-mapping.h
 delete mode 100644 include/asm-v850/dma.h
 delete mode 100644 include/asm-v850/elf.h
 delete mode 100644 include/asm-v850/emergency-restart.h
 delete mode 100644 include/asm-v850/entry.h
 delete mode 100644 include/asm-v850/errno.h
 delete mode 100644 include/asm-v850/fb.h
 delete mode 100644 include/asm-v850/fcntl.h
 delete mode 100644 include/asm-v850/flat.h
 delete mode 100644 include/asm-v850/fpga85e2c.h
 delete mode 100644 include/asm-v850/futex.h
 delete mode 100644 include/asm-v850/gbus_int.h
 delete mode 100644 include/asm-v850/hardirq.h
 delete mode 100644 include/asm-v850/highres_timer.h
 delete mode 100644 include/asm-v850/hw_irq.h
 delete mode 100644 include/asm-v850/io.h
 delete mode 100644 include/asm-v850/ioctl.h
 delete mode 100644 include/asm-v850/ioctls.h
 delete mode 100644 include/asm-v850/ipcbuf.h
 delete mode 100644 include/asm-v850/irq.h
 delete mode 100644 include/asm-v850/irq_regs.h
 delete mode 100644 include/asm-v850/kdebug.h
 delete mode 100644 include/asm-v850/kmap_types.h
 delete mode 100644 include/asm-v850/kvm.h
 delete mode 100644 include/asm-v850/linkage.h
 delete mode 100644 include/asm-v850/local.h
 delete mode 100644 include/asm-v850/ma.h
 delete mode 100644 include/asm-v850/ma1.h
 delete mode 100644 include/asm-v850/machdep.h
 delete mode 100644 include/asm-v850/macrology.h
 delete mode 100644 include/asm-v850/me2.h
 delete mode 100644 include/asm-v850/mman.h
 delete mode 100644 include/asm-v850/mmu.h
 delete mode 100644 include/asm-v850/mmu_context.h
 delete mode 100644 include/asm-v850/module.h
 delete mode 100644 include/asm-v850/msgbuf.h
 delete mode 100644 include/asm-v850/mutex.h
 delete mode 100644 include/asm-v850/page.h
 delete mode 100644 include/asm-v850/param.h
 delete mode 100644 include/asm-v850/pci.h
 delete mode 100644 include/asm-v850/percpu.h
 delete mode 100644 include/asm-v850/pgalloc.h
 delete mode 100644 include/asm-v850/pgtable.h
 delete mode 100644 include/asm-v850/poll.h
 delete mode 100644 include/asm-v850/posix_types.h
 delete mode 100644 include/asm-v850/processor.h
 delete mode 100644 include/asm-v850/ptrace.h
 delete mode 100644 include/asm-v850/resource.h
 delete mode 100644 include/asm-v850/rte_cb.h
 delete mode 100644 include/asm-v850/rte_ma1_cb.h
 delete mode 100644 include/asm-v850/rte_mb_a_pci.h
 delete mode 100644 include/asm-v850/rte_me2_cb.h
 delete mode 100644 include/asm-v850/rte_nb85e_cb.h
 delete mode 100644 include/asm-v850/scatterlist.h
 delete mode 100644 include/asm-v850/sections.h
 delete mode 100644 include/asm-v850/segment.h
 delete mode 100644 include/asm-v850/semaphore.h
 delete mode 100644 include/asm-v850/sembuf.h
 delete mode 100644 include/asm-v850/serial.h
 delete mode 100644 include/asm-v850/setup.h
 delete mode 100644 include/asm-v850/shmbuf.h
 delete mode 100644 include/asm-v850/shmparam.h
 delete mode 100644 include/asm-v850/sigcontext.h
 delete mode 100644 include/asm-v850/siginfo.h
 delete mode 100644 include/asm-v850/signal.h
 delete mode 100644 include/asm-v850/sim.h
 delete mode 100644 include/asm-v850/sim85e2.h
 delete mode 100644 include/asm-v850/sim85e2c.h
 delete mode 100644 include/asm-v850/sim85e2s.h
 delete mode 100644 include/asm-v850/simsyscall.h
 delete mode 100644 include/asm-v850/socket.h
 delete mode 100644 include/asm-v850/sockios.h
 delete mode 100644 include/asm-v850/stat.h
 delete mode 100644 include/asm-v850/statfs.h
 delete mode 100644 include/asm-v850/string.h
 delete mode 100644 include/asm-v850/system.h
 delete mode 100644 include/asm-v850/teg.h
 delete mode 100644 include/asm-v850/termbits.h
 delete mode 100644 include/asm-v850/termios.h
 delete mode 100644 include/asm-v850/thread_info.h
 delete mode 100644 include/asm-v850/timex.h
 delete mode 100644 include/asm-v850/tlb.h
 delete mode 100644 include/asm-v850/tlbflush.h
 delete mode 100644 include/asm-v850/topology.h
 delete mode 100644 include/asm-v850/types.h
 delete mode 100644 include/asm-v850/uaccess.h
 delete mode 100644 include/asm-v850/ucontext.h
 delete mode 100644 include/asm-v850/unaligned.h
 delete mode 100644 include/asm-v850/unistd.h
 delete mode 100644 include/asm-v850/user.h
 delete mode 100644 include/asm-v850/v850e.h
 delete mode 100644 include/asm-v850/v850e2.h
 delete mode 100644 include/asm-v850/v850e2_cache.h
 delete mode 100644 include/asm-v850/v850e_cache.h
 delete mode 100644 include/asm-v850/v850e_intc.h
 delete mode 100644 include/asm-v850/v850e_timer_c.h
 delete mode 100644 include/asm-v850/v850e_timer_d.h
 delete mode 100644 include/asm-v850/v850e_uart.h
 delete mode 100644 include/asm-v850/v850e_uarta.h
 delete mode 100644 include/asm-v850/v850e_uartb.h
 delete mode 100644 include/asm-v850/v850e_utils.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ffd78c4e27..7e5c7b0290b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4131,9 +4131,6 @@ W:	http://www.uclinux.org/
 L:	uclinux-dev@uclinux.org  (subscribers-only)
 S:	Maintained
 
-UCLINUX FOR NEC V850
-P:	Miles Bader
-
 UCLINUX FOR RENESAS H8/300
 P:	Yoshinori Sato
 M:	ysato@users.sourceforge.jp
diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
deleted file mode 100644
index 4379f43505e..00000000000
--- a/arch/v850/Kconfig
+++ /dev/null
@@ -1,353 +0,0 @@
-#############################################################################
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
-#
-#############################################################################
-
-mainmenu "uClinux/v850 (w/o MMU) Kernel Configuration"
-
-config MMU
-       	bool
-	default n
-config ZONE_DMA
-	bool
-	default y
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default n
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-config GENERIC_HWEIGHT
-	bool
-	default y
-config GENERIC_CALIBRATE_DELAY
-	bool
-	default y
-
-config GENERIC_HARDIRQS
-	bool
-	default y
-
-config GENERIC_IRQ_PROBE
-	bool
-	default y
-
-config GENERIC_TIME
-	bool
-	default y
-
-config TIME_LOW_RES
-	bool
-	default y
-
-config ARCH_HAS_ILOG2_U32
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U64
-	bool
-	default n
-
-config ARCH_SUPPORTS_AOUT
-	def_bool y
-
-# Turn off some random 386 crap that can affect device config
-config ISA
-	bool
-	default n
-config ISAPNP
-	bool
-	default n
-config EISA
-	bool
-	default n
-config MCA
-	bool
-	default n
-
-
-#############################################################################
-#### v850-specific config
-
-# Define the architecture
-config V850
-	bool
-	default y
-	select HAVE_IDE
-
-menu "Processor type and features"
-
-   choice
-	  prompt "Platform"
-	  default GDB
-      config V850E_SIM
-      	     bool "GDB"
-      config RTE_CB_MA1
-      	     bool "RTE-V850E/MA1-CB"
-      config RTE_CB_NB85E
-      	     bool "RTE-V850E/NB85E-CB"
-      config RTE_CB_ME2
-      	     bool "RTE-V850E/ME2-CB"
-      config V850E_AS85EP1
-      	     bool "AS85EP1"
-      config V850E2_SIM85E2C
-      	     bool "sim85e2c"
-      config V850E2_SIM85E2S
-      	     bool "sim85e2s"
-      config V850E2_FPGA85E2C
-      	     bool "NA85E2C-FPGA"
-      config V850E2_ANNA
-      	     bool "Anna"
-   endchoice
-
-   #### V850E processor-specific config
-
-   # All CPUs currently supported use the v850e architecture
-   config V850E
-   	  bool
-	  default y
-
-   # The RTE-V850E/MA1-CB is the only type of V850E/MA1 platform we
-   # currently support
-   config V850E_MA1
-   	  bool
-	  depends on RTE_CB_MA1
-	  default y
-   # Similarly for the RTE-V850E/NB85E-CB - V850E/TEG
-   config V850E_TEG
-   	  bool
-	  depends on RTE_CB_NB85E
-	  default y
-   # ... and the RTE-V850E/ME2-CB - V850E/ME2
-   config V850E_ME2
-   	  bool
-	  depends on RTE_CB_ME2
-	  default y
-
-
-   #### sim85e2-specific config
-
-   config V850E2_SIM85E2
-   	  bool
-	  depends on V850E2_SIM85E2C || V850E2_SIM85E2S
-	  default y
-
-
-   #### V850E2 processor-specific config
-
-   # V850E2 processors
-   config V850E2
-   	  bool
-	  depends on V850E2_SIM85E2 || V850E2_FPGA85E2C || V850E2_ANNA
-	  default y
-
-
-   #### RTE-CB platform-specific config
-
-   # Boards in the RTE-x-CB series
-   config RTE_CB
-   	  bool
-	  depends on RTE_CB_MA1 || RTE_CB_NB85E || RTE_CB_ME2
-	  default y
-
-   config RTE_CB_MULTI
-   	  bool
-	  # RTE_CB_NB85E can either have multi ROM support or not, but
-	  # other platforms (currently only RTE_CB_MA1) require it.
-	  prompt "Multi monitor ROM support" if RTE_CB_NB85E
-	  depends on RTE_CB_MA1 || RTE_CB_NB85E
-	  default y
-
-   config RTE_CB_MULTI_DBTRAP
-   	  bool "Pass illegal insn trap / dbtrap to kernel"
-	  depends on RTE_CB_MULTI
-	  default n
-
-   config RTE_CB_MA1_KSRAM
-   	  bool "Kernel in SRAM (limits size of kernel)"
-	  depends on RTE_CB_MA1 && RTE_CB_MULTI
-	  default n
-
-   config RTE_MB_A_PCI
-   	  bool "Mother-A PCI support"
-	  depends on RTE_CB
-	  default y
-
-   # The GBUS is used to talk to the RTE-MOTHER-A board
-   config RTE_GBUS_INT
-   	  bool
-	  depends on RTE_MB_A_PCI
-	  default y
-
-   # The only PCI bus we support is on the RTE-MOTHER-A board
-   config PCI
-   	  bool
-	  default RTE_MB_A_PCI
-
-   #### Some feature-specific configs
-
-   # Everything except for the GDB simulator uses the same interrupt controller
-   config V850E_INTC
-   	  bool
-	  default !V850E_SIM
-
-   # Everything except for the various simulators uses the "Timer D" unit
-   config V850E_TIMER_D
-   	  bool
-	  default !V850E_SIM && !V850E2_SIM85E2
-
-   # Cache control used on some v850e1 processors
-   config V850E_CACHE
-          bool
-	  default V850E_TEG || V850E_ME2
-
-   # Cache control used on v850e2 processors; I think this should
-   # actually apply to more, but currently only the SIM85E2S uses it
-   config V850E2_CACHE
-   	  bool
-	  default V850E2_SIM85E2S
-
-   config NO_CACHE
-   	  bool
-	  default !V850E_CACHE && !V850E2_CACHE
-
-   # HZ depends on the platform
-   config HZ
-	  int
-	  default 24  if V850E_SIM || V850E2_SIM85E2
-	  default 122 if V850E2_FPGA85E2C
-	  default 100
-
-   #### Misc config
-
-   config ROM_KERNEL
-   	  bool "Kernel in ROM"
-	  depends on V850E2_ANNA || V850E_AS85EP1 || RTE_CB_ME2
-
-   # Some platforms pre-zero memory, in which case the kernel doesn't need to
-   config ZERO_BSS
-   	  bool
-	  depends on !V850E2_SIM85E2C
-	  default y
-
-   # The crappy-ass zone allocator requires that the start of allocatable
-   # memory be aligned to the largest possible allocation.
-   config FORCE_MAX_ZONEORDER
-   	  int
-	  default 8 if V850E2_SIM85E2C || V850E2_FPGA85E2C
-
-   config V850E_HIGHRES_TIMER
-   	  bool "High resolution timer support"
-	  depends on V850E_TIMER_D
-   config TIME_BOOTUP
-   	  bool "Time bootup"
-	  depends on V850E_HIGHRES_TIMER
-
-   config RESET_GUARD
-   	  bool "Reset Guard"
-
-source "mm/Kconfig"
-
-endmenu
-
-
-#############################################################################
-
-source init/Kconfig
-
-#############################################################################
-
-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
-
-#    config PCI
-# 	   bool "PCI support"
-# 	   help
-# 	     Support for PCI bus.
-
-source "drivers/pci/Kconfig"
-
-source "drivers/pcmcia/Kconfig"
-
-source "drivers/pci/hotplug/Kconfig"
-
-endmenu
-
-menu "Executable file formats"
-
-source "fs/Kconfig.binfmt"
-
-endmenu
-
-source "net/Kconfig"
-
-#############################################################################
-
-source "drivers/base/Kconfig"
-
-source drivers/mtd/Kconfig
-
-source drivers/parport/Kconfig
-
-#source drivers/pnp/Kconfig
-
-source drivers/block/Kconfig
-
-#############################################################################
-
-menu "Disk device support"
-
-source "drivers/ide/Kconfig"
-
-source "drivers/scsi/Kconfig"
-
-endmenu
-
-#############################################################################
-
-
-source "drivers/md/Kconfig"
-
-source "drivers/message/fusion/Kconfig"
-
-source "drivers/ieee1394/Kconfig"
-
-source "drivers/message/i2o/Kconfig"
-
-source "drivers/net/Kconfig"
-
-source "drivers/isdn/Kconfig"
-
-#source "drivers/telephony/Kconfig"
-
-#
-# input before char - char/joystick depends on it. As does USB.
-#
-source "drivers/input/Kconfig"
-
-source "drivers/char/Kconfig"
-
-#source drivers/misc/Config.in
-source "drivers/media/Kconfig"
-
-source "fs/Kconfig"
-
-source "drivers/video/Kconfig"
-
-source "sound/Kconfig"
-
-source "drivers/usb/Kconfig"
-
-source "arch/v850/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
-
-#############################################################################
diff --git a/arch/v850/Kconfig.debug b/arch/v850/Kconfig.debug
deleted file mode 100644
index 4acfb9cca1c..00000000000
--- a/arch/v850/Kconfig.debug
+++ /dev/null
@@ -1,10 +0,0 @@
-menu "Kernel hacking"
-
-source "lib/Kconfig.debug"
-
-config NO_KERNEL_MSG
-	bool "Suppress Kernel BUG Messages"
-	help
-	  Do not output any debug BUG messages within the kernel.
-
-endmenu
diff --git a/arch/v850/Makefile b/arch/v850/Makefile
deleted file mode 100644
index 8b629df0029..00000000000
--- a/arch/v850/Makefile
+++ /dev/null
@@ -1,54 +0,0 @@
-#
-# arch/v850/Makefile
-#
-#  Copyright (C) 2001,02,03,05  NEC Corporation
-#  Copyright (C) 2001,02,03,05  Miles Bader <miles@gnu.org>
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-arch_dir = arch/v850
-
-KBUILD_CFLAGS += -mv850e
-# r16 is a fixed pointer to the current task
-KBUILD_CFLAGS += -ffixed-r16 -mno-prolog-function
-KBUILD_CFLAGS += -fno-builtin
-KBUILD_CFLAGS += -D__linux__ -DUTS_SYSNAME=\"uClinux\"
-
-# By default, build a kernel that runs on the gdb v850 simulator.
-KBUILD_DEFCONFIG := sim_defconfig
-
-# This prevents the linker from consolidating the .gnu.linkonce.this_module
-# section into .text (which the v850 default linker script for -r does for
-# some reason)
-LDFLAGS_MODULE += --unique=.gnu.linkonce.this_module
-
-OBJCOPY_FLAGS_BLOB := -I binary -O elf32-little -B v850e
-
-
-head-y := $(arch_dir)/kernel/head.o $(arch_dir)/kernel/init_task.o
-core-y += $(arch_dir)/kernel/
-libs-y += $(arch_dir)/lib/
-
-
-# Deal with the initial contents of the root device
-ifdef ROOT_FS_IMAGE
-core-y += root_fs_image.o
-
-# Because the kernel build-system erases all explicit .o build rules, we
-# have to use an intermediate target to fool it into building for us.
-# This results in it being built anew each time, but that's alright.
-root_fs_image.o: root_fs_image_force
-
-root_fs_image_force: $(ROOT_FS_IMAGE)
-	$(OBJCOPY) $(OBJCOPY_FLAGS_BLOB) --rename-section .data=.root,alloc,load,readonly,data,contents $< root_fs_image.o
-endif
-
-CLEAN_FILES += root_fs_image.o
diff --git a/arch/v850/README b/arch/v850/README
deleted file mode 100644
index 12f7f7a665e..00000000000
--- a/arch/v850/README
+++ /dev/null
@@ -1,44 +0,0 @@
-This port to the NEC V850E processor supports the following platforms:
-
-   "sim"
-	The gdb v850e simulator (CONFIG_V850E_SIM).
-
-   "rte-ma1-cb"
-	The Midas labs RTE-V850E/MA1-CB and RTE-V850E/NB85E-CB evaluation
-	boards (CONFIG_RTE_CB_MA1 and CONFIG_RTE_CB_NB85E).  This support
-	has only been tested when running with the Multi-debugger monitor
-	ROM (for the Green Hills Multi debugger).  The optional NEC
-	Solution Gear RTE-MOTHER-A motherboard is also supported, which
-	allows PCI boards to be used (CONFIG_RTE_MB_A_PCI).
-
-   "rte-me2-cb"
-	The Midas labs RTE-V850E/ME2-CB evaluation board (CONFIG_RTE_CB_ME2).
-     	This has only been tested using a kernel downloaded via an ICE
-     	connection using the Multi debugger.  Support for the RTE-MOTHER-A is
-     	present, but hasn't been tested (unlike the other Midas labs cpu
-     	boards, the RTE-V850E/ME2-CB includes an ethernet adaptor).
-
-   "as85ep1"
-	The NEC AS85EP1 V850E evaluation chip/board (CONFIG_V850E_AS85EP1).
-
-   "anna"
-	The NEC `Anna' (board/chip) implementation of the V850E2 processor
-	(CONFIG_V850E2_ANNA).
-
-   "sim85e2c", "sim85e2s"
-   	The sim85e2c and sim85e2s simulators, which are verilog simulations
-	of the V850E2 NA85E2C/NA85E2S cpu cores (CONFIG_V850E2_SIM85E2C and
-	CONFIG_V850E2_SIM85E2S).
-
-   "fpga85e2c"
-	A FPGA implementation of the V850E2 NA85E2C cpu core
-	(CONFIG_V850E2_FPGA85E2C).
-
-To get a default kernel configuration for a particular platform, you can
-use a <platform>_defconfig make target (e.g., "make rte-me2-cb_defconfig");
-to see which default configurations are possible, look in the directory
-"arch/v850/configs".
-
-Porting to anything with a V850E/MA1 or MA2 processor should be simple.
-See the file <asm-v850/machdep.h> and the files it includes for an example of
-how to add platform/chip-specific support.
diff --git a/arch/v850/configs/rte-ma1-cb_defconfig b/arch/v850/configs/rte-ma1-cb_defconfig
deleted file mode 100644
index 1a5beda36e2..00000000000
--- a/arch/v850/configs/rte-ma1-cb_defconfig
+++ /dev/null
@@ -1,617 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-uc0
-# Fri Sep  2 13:54:27 2005
-#
-# CONFIG_MMU is not set
-# CONFIG_UID16 is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-# CONFIG_ISA is not set
-# CONFIG_ISAPNP is not set
-# CONFIG_EISA is not set
-# CONFIG_MCA is not set
-CONFIG_V850=y
-
-#
-# Processor type and features
-#
-# CONFIG_V850E_SIM is not set
-CONFIG_RTE_CB_MA1=y
-# CONFIG_RTE_CB_NB85E is not set
-# CONFIG_RTE_CB_ME2 is not set
-# CONFIG_V850E_AS85EP1 is not set
-# CONFIG_V850E2_SIM85E2C is not set
-# CONFIG_V850E2_SIM85E2S is not set
-# CONFIG_V850E2_FPGA85E2C is not set
-# CONFIG_V850E2_ANNA is not set
-CONFIG_V850E=y
-CONFIG_V850E_MA1=y
-CONFIG_RTE_CB=y
-CONFIG_RTE_CB_MULTI=y
-CONFIG_RTE_CB_MULTI_DBTRAP=y
-# CONFIG_RTE_CB_MA1_KSRAM is not set
-CONFIG_RTE_MB_A_PCI=y
-CONFIG_RTE_GBUS_INT=y
-CONFIG_PCI=y
-CONFIG_V850E_INTC=y
-CONFIG_V850E_TIMER_D=y
-# CONFIG_V850E_CACHE is not set
-# CONFIG_V850E2_CACHE is not set
-CONFIG_NO_CACHE=y
-CONFIG_ZERO_BSS=y
-# CONFIG_V850E_HIGHRES_TIMER is not set
-# CONFIG_RESET_GUARD is not set
-CONFIG_LARGE_ALLOCS=y
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-
-#
-# Code maturity level options
-#
-# CONFIG_EXPERIMENTAL is not set
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_SYSCTL is not set
-# CONFIG_AUDIT is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_KOBJECT_UEVENT=y
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-# CONFIG_BASE_FULL is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_BASE_SMALL=1
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-
-#
-# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
-#
-# CONFIG_PCI_LEGACY_PROC is not set
-# CONFIG_PCI_NAMES is not set
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# PCI Hotplug Support
-#
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_FLAT=y
-# CONFIG_BINFMT_ZFLAT is not set
-# CONFIG_BINFMT_SHARED_FLAT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_PACKET is not set
-# CONFIG_UNIX is not set
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-# CONFIG_IP_TCPDIAG is not set
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-# CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=y
-# CONFIG_MTD_DEBUG is not set
-# CONFIG_MTD_CONCAT is not set
-# CONFIG_MTD_PARTITIONS is not set
-
-#
-# User Modules And Translation Layers
-#
-# CONFIG_MTD_CHAR is not set
-CONFIG_MTD_BLOCK=y
-# CONFIG_FTL is not set
-# CONFIG_NFTL is not set
-# CONFIG_INFTL is not set
-
-#
-# RAM/ROM/Flash chip drivers
-#
-# CONFIG_MTD_CFI is not set
-# CONFIG_MTD_JEDECPROBE is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_RAM is not set
-# CONFIG_MTD_ROM is not set
-# CONFIG_MTD_ABSENT is not set
-
-#
-# Mapping drivers for chip access
-#
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
-# CONFIG_MTD_PLATRAM is not set
-
-#
-# Self-contained MTD device drivers
-#
-# CONFIG_MTD_PMC551 is not set
-CONFIG_MTD_SLRAM=y
-# CONFIG_MTD_PHRAM is not set
-# CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-# CONFIG_MTD_DOC2000 is not set
-# CONFIG_MTD_DOC2001 is not set
-# CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
-# CONFIG_MTD_NAND is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# Disk device support
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_SMC is not set
-
-#
-# Tulip family network device support
-#
-# CONFIG_NET_TULIP is not set
-# CONFIG_HP100 is not set
-# CONFIG_NE2000 is not set
-CONFIG_NET_PCI=y
-# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
-# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_DGRS is not set
-CONFIG_EEPRO100=y
-# CONFIG_E100 is not set
-# CONFIG_FEALNX is not set
-# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
-# CONFIG_8139TOO is not set
-# CONFIG_SIS900 is not set
-# CONFIG_EPIC100 is not set
-# CONFIG_SUNDANCE is not set
-# CONFIG_TLAN is not set
-# CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_R8169 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_V850E_UART=y
-CONFIG_V850E_UART_CONSOLE=y
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_DRM is not set
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_JBD is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_ROMFS_FS=y
-# CONFIG_MAGIC_ROM_PTR is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
-# CONFIG_NFSD is not set
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-# CONFIG_USB is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_MAGIC_SYSRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_NO_KERNEL_MSG is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
diff --git a/arch/v850/configs/rte-me2-cb_defconfig b/arch/v850/configs/rte-me2-cb_defconfig
deleted file mode 100644
index 15e66647806..00000000000
--- a/arch/v850/configs/rte-me2-cb_defconfig
+++ /dev/null
@@ -1,462 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-uc0
-# Fri Sep  2 13:47:50 2005
-#
-# CONFIG_MMU is not set
-# CONFIG_UID16 is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-# CONFIG_ISA is not set
-# CONFIG_ISAPNP is not set
-# CONFIG_EISA is not set
-# CONFIG_MCA is not set
-CONFIG_V850=y
-
-#
-# Processor type and features
-#
-# CONFIG_V850E_SIM is not set
-# CONFIG_RTE_CB_MA1 is not set
-# CONFIG_RTE_CB_NB85E is not set
-CONFIG_RTE_CB_ME2=y
-# CONFIG_V850E_AS85EP1 is not set
-# CONFIG_V850E2_SIM85E2C is not set
-# CONFIG_V850E2_SIM85E2S is not set
-# CONFIG_V850E2_FPGA85E2C is not set
-# CONFIG_V850E2_ANNA is not set
-CONFIG_V850E=y
-CONFIG_V850E_ME2=y
-CONFIG_RTE_CB=y
-# CONFIG_RTE_MB_A_PCI is not set
-# CONFIG_PCI is not set
-CONFIG_V850E_INTC=y
-CONFIG_V850E_TIMER_D=y
-CONFIG_V850E_CACHE=y
-# CONFIG_V850E2_CACHE is not set
-# CONFIG_NO_CACHE is not set
-# CONFIG_ROM_KERNEL is not set
-CONFIG_ZERO_BSS=y
-# CONFIG_V850E_HIGHRES_TIMER is not set
-# CONFIG_RESET_GUARD is not set
-CONFIG_LARGE_ALLOCS=y
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-
-#
-# Code maturity level options
-#
-# CONFIG_EXPERIMENTAL is not set
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_SYSCTL is not set
-# CONFIG_HOTPLUG is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-# CONFIG_BASE_FULL is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_BASE_SMALL=1
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-
-#
-# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
-#
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# PCI Hotplug Support
-#
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_FLAT=y
-# CONFIG_BINFMT_ZFLAT is not set
-# CONFIG_BINFMT_SHARED_FLAT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Networking
-#
-# CONFIG_NET is not set
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=y
-# CONFIG_MTD_DEBUG is not set
-# CONFIG_MTD_CONCAT is not set
-# CONFIG_MTD_PARTITIONS is not set
-
-#
-# User Modules And Translation Layers
-#
-# CONFIG_MTD_CHAR is not set
-CONFIG_MTD_BLOCK=y
-# CONFIG_FTL is not set
-# CONFIG_NFTL is not set
-# CONFIG_INFTL is not set
-
-#
-# RAM/ROM/Flash chip drivers
-#
-# CONFIG_MTD_CFI is not set
-# CONFIG_MTD_JEDECPROBE is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_RAM is not set
-# CONFIG_MTD_ROM is not set
-# CONFIG_MTD_ABSENT is not set
-
-#
-# Mapping drivers for chip access
-#
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
-# CONFIG_MTD_PLATRAM is not set
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_SLRAM=y
-# CONFIG_MTD_PHRAM is not set
-# CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-# CONFIG_MTD_DOC2000 is not set
-# CONFIG_MTD_DOC2001 is not set
-# CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
-# CONFIG_MTD_NAND is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-
-#
-# Disk device support
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_LIBPS2 is not set
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=1
-# CONFIG_SERIAL_8250_EXTENDED is not set
-
-#
-# Non-8250 serial port support
-#
-# CONFIG_V850E_UART is not set
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_JBD is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_ROMFS_FS=y
-# CONFIG_MAGIC_ROM_PTR is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_MAGIC_SYSRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_NO_KERNEL_MSG is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
diff --git a/arch/v850/configs/sim_defconfig b/arch/v850/configs/sim_defconfig
deleted file mode 100644
index f31ba7398ad..00000000000
--- a/arch/v850/configs/sim_defconfig
+++ /dev/null
@@ -1,451 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-uc0
-# Fri Sep  2 13:36:43 2005
-#
-# CONFIG_MMU is not set
-# CONFIG_UID16 is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-# CONFIG_ISA is not set
-# CONFIG_ISAPNP is not set
-# CONFIG_EISA is not set
-# CONFIG_MCA is not set
-CONFIG_V850=y
-
-#
-# Processor type and features
-#
-CONFIG_V850E_SIM=y
-# CONFIG_RTE_CB_MA1 is not set
-# CONFIG_RTE_CB_NB85E is not set
-# CONFIG_RTE_CB_ME2 is not set
-# CONFIG_V850E_AS85EP1 is not set
-# CONFIG_V850E2_SIM85E2C is not set
-# CONFIG_V850E2_SIM85E2S is not set
-# CONFIG_V850E2_FPGA85E2C is not set
-# CONFIG_V850E2_ANNA is not set
-CONFIG_V850E=y
-# CONFIG_PCI is not set
-# CONFIG_V850E_INTC is not set
-# CONFIG_V850E_TIMER_D is not set
-# CONFIG_V850E_CACHE is not set
-# CONFIG_V850E2_CACHE is not set
-CONFIG_NO_CACHE=y
-CONFIG_ZERO_BSS=y
-# CONFIG_RESET_GUARD is not set
-CONFIG_LARGE_ALLOCS=y
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-
-#
-# Code maturity level options
-#
-# CONFIG_EXPERIMENTAL is not set
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_SYSCTL is not set
-# CONFIG_HOTPLUG is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-# CONFIG_BASE_FULL is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_BASE_SMALL=1
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-
-#
-# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
-#
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# PCI Hotplug Support
-#
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_FLAT=y
-# CONFIG_BINFMT_ZFLAT is not set
-# CONFIG_BINFMT_SHARED_FLAT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Networking
-#
-# CONFIG_NET is not set
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=y
-# CONFIG_MTD_DEBUG is not set
-# CONFIG_MTD_CONCAT is not set
-# CONFIG_MTD_PARTITIONS is not set
-
-#
-# User Modules And Translation Layers
-#
-# CONFIG_MTD_CHAR is not set
-CONFIG_MTD_BLOCK=y
-# CONFIG_FTL is not set
-# CONFIG_NFTL is not set
-# CONFIG_INFTL is not set
-
-#
-# RAM/ROM/Flash chip drivers
-#
-# CONFIG_MTD_CFI is not set
-# CONFIG_MTD_JEDECPROBE is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_RAM is not set
-# CONFIG_MTD_ROM is not set
-# CONFIG_MTD_ABSENT is not set
-
-#
-# Mapping drivers for chip access
-#
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
-# CONFIG_MTD_PLATRAM is not set
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_SLRAM=y
-# CONFIG_MTD_PHRAM is not set
-# CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-# CONFIG_MTD_DOC2000 is not set
-# CONFIG_MTD_DOC2001 is not set
-# CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
-# CONFIG_MTD_NAND is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-
-#
-# Disk device support
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_LIBPS2 is not set
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_JBD is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_ROMFS_FS=y
-# CONFIG_MAGIC_ROM_PTR is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_MAGIC_SYSRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_NO_KERNEL_MSG is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
diff --git a/arch/v850/kernel/Makefile b/arch/v850/kernel/Makefile
deleted file mode 100644
index da5889c5357..00000000000
--- a/arch/v850/kernel/Makefile
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# arch/v850/kernel/Makefile
-#
-#  Copyright (C) 2001,02,03  NEC Electronics Corporation
-#  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-extra-y := head.o init_task.o vmlinux.lds
-
-obj-y += intv.o entry.o process.o syscalls.o time.o setup.o \
-	 signal.o irq.o mach.o ptrace.o bug.o
-obj-$(CONFIG_MODULES)		+= module.o v850_ksyms.o
-# chip-specific code
-obj-$(CONFIG_V850E_MA1)		+= ma.o
-obj-$(CONFIG_V850E_ME2)		+= me2.o
-obj-$(CONFIG_V850E_TEG)		+= teg.o
-obj-$(CONFIG_V850E_AS85EP1)	+= as85ep1.o
-obj-$(CONFIG_V850E2_ANNA)	+= anna.o
-# platform-specific code
-obj-$(CONFIG_V850E_SIM)		+= sim.o simcons.o
-obj-$(CONFIG_V850E2_SIM85E2)	+= sim85e2.o memcons.o
-obj-$(CONFIG_V850E2_FPGA85E2C)	+= fpga85e2c.o memcons.o
-obj-$(CONFIG_RTE_CB)		+= rte_cb.o rte_cb_leds.o
-obj-$(CONFIG_RTE_CB_MA1)	+= rte_ma1_cb.o
-obj-$(CONFIG_RTE_CB_ME2)	+= rte_me2_cb.o
-obj-$(CONFIG_RTE_CB_NB85E)	+= rte_nb85e_cb.o
-obj-$(CONFIG_RTE_CB_MULTI)	+= rte_cb_multi.o
-obj-$(CONFIG_RTE_MB_A_PCI)	+= rte_mb_a_pci.o
-obj-$(CONFIG_RTE_GBUS_INT)	+= gbus_int.o
-# feature-specific code
-obj-$(CONFIG_V850E_INTC)	+= v850e_intc.o
-obj-$(CONFIG_V850E_TIMER_D)	+= v850e_timer_d.o v850e_utils.o
-obj-$(CONFIG_V850E_CACHE)	+= v850e_cache.o
-obj-$(CONFIG_V850E2_CACHE)	+= v850e2_cache.o
-obj-$(CONFIG_V850E_HIGHRES_TIMER) += highres_timer.o
-obj-$(CONFIG_PROC_FS)		+= procfs.o
diff --git a/arch/v850/kernel/anna-rom.ld b/arch/v850/kernel/anna-rom.ld
deleted file mode 100644
index 7c54e7e3f1b..00000000000
--- a/arch/v850/kernel/anna-rom.ld
+++ /dev/null
@@ -1,16 +0,0 @@
-/* Linker script for the Midas labs Anna V850E2 evaluation board
-   (CONFIG_V850E2_ANNA), with kernel in ROM (CONFIG_ROM_KERNEL).  */
-
-MEMORY {
-	/* 8MB of flash ROM.  */
-	ROM   : ORIGIN = 0,          LENGTH = 0x00800000
-
-	/* 1MB of static RAM.  This memory is mirrored 64 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 64MB of DRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	ROMK_SECTIONS(ROM, SRAM)
-}
diff --git a/arch/v850/kernel/anna.c b/arch/v850/kernel/anna.c
deleted file mode 100644
index 5978a25170f..00000000000
--- a/arch/v850/kernel/anna.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * arch/v850/kernel/anna.c -- Anna V850E2 evaluation chip/board
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/major.h>
-#include <linux/irq.h>
-
-#include <asm/machdep.h>
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/v850e_timer_d.h>
-#include <asm/v850e_uart.h>
-
-#include "mach.h"
-
-
-/* SRAM and SDRAM are vaguely contiguous (with a big hole in between; see
-   mach_reserve_bootmem for details); use both as one big area.  */
-#define RAM_START 	SRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-/* The bits of this port are connected to an 8-LED bar-graph.  */
-#define LEDS_PORT	0
-
-
-static void anna_led_tick (void);
-
-
-void __init mach_early_init (void)
-{
-	ANNA_ILBEN    = 0;
-
-	V850E2_CSC(0) = 0x402F;
-	V850E2_CSC(1) = 0x4000;
-	V850E2_BPC    = 0;
-	V850E2_BSC    = 0xAAAA;
-	V850E2_BEC    = 0;
-
-#if 0
-	V850E2_BHC    = 0xFFFF;	/* icache all memory, dcache all */
-#else
-	V850E2_BHC    = 0;	/* cache no memory */
-#endif
-	V850E2_BCT(0) = 0xB088;
-	V850E2_BCT(1) = 0x0008;
-	V850E2_DWC(0) = 0x0027;
-	V850E2_DWC(1) = 0;
-	V850E2_BCC    = 0x0006;
-	V850E2_ASC    = 0;
-	V850E2_LBS    = 0x0089;
-	V850E2_SCR(3) = 0x21A9;
-	V850E2_RFS(3) = 0x8121;
-
-	v850e_intc_disable_irqs ();
-}
-
-void __init mach_setup (char **cmdline)
-{
-	ANNA_PORT_PM (LEDS_PORT) = 0;	/* Make all LED pins output pins.  */
-	mach_tick = anna_led_tick;
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void __init mach_reserve_bootmem ()
-{
-	/* The space between SRAM and SDRAM is filled with duplicate
-	   images of SRAM.  Prevent the kernel from using them.  */
-	reserve_bootmem (SRAM_ADDR + SRAM_SIZE,
-			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE),
-			 BOOTMEM_DEFAULT);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "PIN", IRQ_INTP(0),   IRQ_INTP_NUM,   1, 4 },
-	{ "CCC", IRQ_INTCCC(0),	IRQ_INTCCC_NUM, 1, 5 },
-	{ "CMD", IRQ_INTCMD(0), IRQ_INTCMD_NUM,	1, 5 },
-	{ "DMA", IRQ_INTDMA(0), IRQ_INTDMA_NUM,	1, 2 },
-	{ "DMXER", IRQ_INTDMXER,1,		1, 2 },
-	{ "SRE", IRQ_INTSRE(0), IRQ_INTSRE_NUM,	3, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM, 	3, 4 },
-	{ "ST",  IRQ_INTST(0), 	IRQ_INTST_NUM, 	3, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-void machine_restart (char *__unused)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	asm ("jmp r0"); /* Jump to the reset vector.  */
-}
-
-void machine_halt (void)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	local_irq_disable ();	/* Ignore all interrupts.  */
-	ANNA_PORT_IO(LEDS_PORT) = 0xAA;	/* Note that we halted.  */
-	for (;;)
-		asm ("halt; nop; nop; nop; nop; nop");
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-/* Called before configuring an on-chip UART.  */
-void anna_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* The Anna connects some general-purpose I/O pins on the CPU to
-	   the RTS/CTS lines of UART 1's serial connection.  I/O pins P07
-	   and P37 are RTS and CTS respectively.  */
-	if (chan == 1) {
-		ANNA_PORT_PM(0) &= ~0x80; /* P07 in output mode */
-		ANNA_PORT_PM(3) |=  0x80; /* P37 in input mode */
-	}
-}
-
-/* Minimum and maximum bounds for the moving upper LED boundary in the
-   clock tick display.  We can't use the last bit because it's used for
-   UART0's CTS output.  */
-#define MIN_MAX_POS 0
-#define MAX_MAX_POS 6
-
-/* There are MAX_MAX_POS^2 - MIN_MAX_POS^2 cycles in the animation, so if
-   we pick 6 and 0 as above, we get 49 cycles, which is when divided into
-   the standard 100 value for HZ, gives us an almost 1s total time.  */
-#define TICKS_PER_FRAME \
-	(HZ / (MAX_MAX_POS * MAX_MAX_POS - MIN_MAX_POS * MIN_MAX_POS))
-
-static void anna_led_tick ()
-{
-	static unsigned counter = 0;
-	
-	if (++counter == TICKS_PER_FRAME) {
-		static int pos = 0, max_pos = MAX_MAX_POS, dir = 1;
-
-		if (dir > 0 && pos == max_pos) {
-			dir = -1;
-			if (max_pos == MIN_MAX_POS)
-				max_pos = MAX_MAX_POS;
-			else
-				max_pos--;
-		} else {
-			if (dir < 0 && pos == 0)
-				dir = 1;
-
-			if (pos + dir <= max_pos) {
-				/* Each bit of port 0 has a LED. */
-				clear_bit (pos, &ANNA_PORT_IO(LEDS_PORT));
-				pos += dir;
-				set_bit (pos, &ANNA_PORT_IO(LEDS_PORT));
-			}
-		}
-
-		counter = 0;
-	}
-}
diff --git a/arch/v850/kernel/anna.ld b/arch/v850/kernel/anna.ld
deleted file mode 100644
index df7f80f2833..00000000000
--- a/arch/v850/kernel/anna.ld
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Linker script for the Midas labs Anna V850E2 evaluation board
-   (CONFIG_V850E2_ANNA).  */
-
-MEMORY {
-	/* 256KB of internal memory (followed by one mirror).  */
-	iMEM0 : ORIGIN = 0,	     LENGTH = 0x00040000
-	/* 256KB of internal memory (followed by one mirror).  */
-	iMEM1 : ORIGIN = 0x00040000, LENGTH = 0x00040000
-
-	/* 1MB of static RAM.  This memory is mirrored 64 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 64MB of DRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	.intv : { INTV_CONTENTS } > iMEM0
-	.sram : { RAMK_KRAM_CONTENTS } > SRAM
-	.root : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/as85ep1-rom.ld b/arch/v850/kernel/as85ep1-rom.ld
deleted file mode 100644
index fe2a9a3ab52..00000000000
--- a/arch/v850/kernel/as85ep1-rom.ld
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Linker script for the NEC AS85EP1 V850E evaluation board
-   (CONFIG_V850E_AS85EP1), with kernel in ROM (CONFIG_ROM_KERNEL).  */
-
-MEMORY {
-	/* 4MB of flash ROM.  */
-	ROM   : ORIGIN = 0,          LENGTH = 0x00400000
-
-	/* 1MB of static RAM.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-
-	/* About 58MB of DRAM.  This can actually be at one of two
-	   positions, determined by jumper JP3; we have to use the first
-	   position because the second is partially out of processor
-	   instruction addressing range (though in the second position
-	   there's actually 64MB available).  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	ROMK_SECTIONS(ROM, SRAM)
-}
diff --git a/arch/v850/kernel/as85ep1.c b/arch/v850/kernel/as85ep1.c
deleted file mode 100644
index b525ecf3aea..00000000000
--- a/arch/v850/kernel/as85ep1.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * arch/v850/kernel/as85ep1.c -- AS85EP1 V850E evaluation chip/board
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/major.h>
-#include <linux/irq.h>
-
-#include <asm/machdep.h>
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/v850e_timer_d.h>
-#include <asm/v850e_uart.h>
-
-#include "mach.h"
-
-
-/* SRAM and SDRAM are vaguely contiguous (with a big hole in between; see
-   mach_reserve_bootmem for details); use both as one big area.  */
-#define RAM_START 	SRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-/* The bits of this port are connected to an 8-LED bar-graph.  */
-#define LEDS_PORT	4
-
-
-static void as85ep1_led_tick (void);
-
-extern char _intv_copy_src_start, _intv_copy_src_end;
-extern char _intv_copy_dst_start;
-
-
-void __init mach_early_init (void)
-{
-#ifndef CONFIG_ROM_KERNEL
-	const u32 *src;
-	register u32 *dst asm ("ep");
-#endif
-
-	AS85EP1_CSC(0) = 0x0403;
-	AS85EP1_BCT(0) = 0xB8B8;
-	AS85EP1_DWC(0) = 0x0104;
-	AS85EP1_BCC    = 0x0012;
-	AS85EP1_ASC    = 0;
-	AS85EP1_LBS    = 0x00A9;
-
-	AS85EP1_PORT_PMC(6)  = 0xFF; /* valid A0,A1,A20-A25 */
-	AS85EP1_PORT_PMC(7)  = 0x0E; /* valid CS1-CS3       */
-	AS85EP1_PORT_PMC(9)  = 0xFF; /* valid D16-D23       */
-	AS85EP1_PORT_PMC(10) = 0xFF; /* valid D24-D31       */
-
-	AS85EP1_RFS(1) = 0x800c;
-	AS85EP1_RFS(3) = 0x800c;
-	AS85EP1_SCR(1) = 0x20A9;
-	AS85EP1_SCR(3) = 0x20A9;
-
-#ifndef CONFIG_ROM_KERNEL
-	/* The early chip we have is buggy, and writing the interrupt
-	   vectors into low RAM may screw up, so for non-ROM kernels, we
-	   only rely on the reset vector being downloaded, and copy the
-	   rest of the interrupt vectors into place here.  The specific bug
-	   is that writing address N, where (N & 0x10) == 0x10, will _also_
-	   write to address (N - 0x10).  We avoid this (effectively) by
-	   writing in 16-byte chunks backwards from the end.  */
-
-	AS85EP1_IRAMM = 0x3;	/* "write-mode" for the internal instruction memory */
-
-	src = (u32 *)(((u32)&_intv_copy_src_end - 1) & ~0xF);
-	dst = (u32 *)&_intv_copy_dst_start
-		+ (src - (u32 *)&_intv_copy_src_start);
-	do {
-		u32 t0 = src[0], t1 = src[1], t2 = src[2], t3 = src[3];
-		dst[0] = t0; dst[1] = t1; dst[2] = t2; dst[3] = t3;
-		dst -= 4;
-		src -= 4;
-	} while (src > (u32 *)&_intv_copy_src_start);
-
-	AS85EP1_IRAMM = 0x0;	/* "read-mode" for the internal instruction memory */
-#endif /* !CONFIG_ROM_KERNEL */
-
-	v850e_intc_disable_irqs ();
-}
-
-void __init mach_setup (char **cmdline)
-{
-	AS85EP1_PORT_PMC (LEDS_PORT) = 0; /* Make the LEDs port an I/O port. */
-	AS85EP1_PORT_PM (LEDS_PORT) = 0; /* Make all the bits output pins.  */
-	mach_tick = as85ep1_led_tick;
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-/* Convenience macros.  */
-#define SRAM_END	(SRAM_ADDR + SRAM_SIZE)
-#define SDRAM_END	(SDRAM_ADDR + SDRAM_SIZE)
-
-void __init mach_reserve_bootmem ()
-{
-	if (SDRAM_ADDR < RAM_END && SDRAM_ADDR > RAM_START)
-		/* We can't use the space between SRAM and SDRAM, so
-		   prevent the kernel from trying.  */
-		reserve_bootmem(SRAM_END, SDRAM_ADDR - SRAM_END,
-				BOOTMEM_DEFAULT);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "CCC", IRQ_INTCCC(0),	IRQ_INTCCC_NUM, 1, 5 },
-	{ "CMD", IRQ_INTCMD(0), IRQ_INTCMD_NUM,	1, 5 },
-	{ "SRE", IRQ_INTSRE(0), IRQ_INTSRE_NUM,	3, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM, 	3, 4 },
-	{ "ST",  IRQ_INTST(0), 	IRQ_INTST_NUM, 	3, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-void machine_restart (char *__unused)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	asm ("jmp r0"); /* Jump to the reset vector.  */
-}
-
-void machine_halt (void)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	local_irq_disable ();	/* Ignore all interrupts.  */
-	AS85EP1_PORT_IO (LEDS_PORT) = 0xAA;	/* Note that we halted.  */
-	for (;;)
-		asm ("halt; nop; nop; nop; nop; nop");
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-/* Called before configuring an on-chip UART.  */
-void as85ep1_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* Make the shared uart/port pins be uart pins.  */
-	AS85EP1_PORT_PMC(3) |= (0x5 << chan);
-
-	/* The AS85EP1 connects some general-purpose I/O pins on the CPU to
-	   the RTS/CTS lines of UART 1's serial connection.  I/O pins P53
-	   and P54 are RTS and CTS respectively.  */
-	if (chan == 1) {
-		/* Put P53 & P54 in I/O port mode.  */
-		AS85EP1_PORT_PMC(5) &= ~0x18;
-		/* Make P53 an output, and P54 an input.  */
-		AS85EP1_PORT_PM(5) |=  0x10;
-	}
-}
-
-/* Minimum and maximum bounds for the moving upper LED boundary in the
-   clock tick display.  */
-#define MIN_MAX_POS 0
-#define MAX_MAX_POS 7
-
-/* There are MAX_MAX_POS^2 - MIN_MAX_POS^2 cycles in the animation, so if
-   we pick 6 and 0 as above, we get 49 cycles, which is when divided into
-   the standard 100 value for HZ, gives us an almost 1s total time.  */
-#define TICKS_PER_FRAME \
-	(HZ / (MAX_MAX_POS * MAX_MAX_POS - MIN_MAX_POS * MIN_MAX_POS))
-
-static void as85ep1_led_tick ()
-{
-	static unsigned counter = 0;
-	
-	if (++counter == TICKS_PER_FRAME) {
-		static int pos = 0, max_pos = MAX_MAX_POS, dir = 1;
-
-		if (dir > 0 && pos == max_pos) {
-			dir = -1;
-			if (max_pos == MIN_MAX_POS)
-				max_pos = MAX_MAX_POS;
-			else
-				max_pos--;
-		} else {
-			if (dir < 0 && pos == 0)
-				dir = 1;
-
-			if (pos + dir <= max_pos) {
-				/* Each bit of port 0 has a LED. */
-				set_bit (pos, &AS85EP1_PORT_IO(LEDS_PORT));
-				pos += dir;
-				clear_bit (pos, &AS85EP1_PORT_IO(LEDS_PORT));
-			}
-		}
-
-		counter = 0;
-	}
-}
diff --git a/arch/v850/kernel/as85ep1.ld b/arch/v850/kernel/as85ep1.ld
deleted file mode 100644
index ef2c4399063..00000000000
--- a/arch/v850/kernel/as85ep1.ld
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Linker script for the NEC AS85EP1 V850E evaluation board
-   (CONFIG_V850E_AS85EP1).  */
-
-MEMORY {
-	/* 1MB of internal instruction memory. */
-	iMEM0 : ORIGIN = 0,	     LENGTH = 0x00100000
-
-	/* 1MB of static RAM.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-
-	/* About 58MB of DRAM.  This can actually be at one of two
-	   positions, determined by jump JP3; we have to use the first
-	   position because the second is partially out of processor
-	   instruction addressing range (though in the second position
-	   there's actually 64MB available).  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	.resetv : {
-		__intv_start = . ;
-			*(.intv.reset)	/* Reset vector */
-	} > iMEM0
-
-	.sram : {
-		RAMK_KRAM_CONTENTS
-
-		/* We stick most of the interrupt vectors here; they'll be
-		   copied into the proper location by the early init code (we
-		   can't put them directly in the right place because of
-		   hardware bugs).  The vectors shouldn't need to be
-		   relocated, so we don't have to use `> ...  AT> ...' to
-		   split the load/vm addresses (and we can't because of
-		   problems with the loader).  */
-		. = ALIGN (0x10) ;
-		__intv_copy_src_start = . ;
-			*(.intv.common)	/* Vectors common to all v850e proc. */
-			*(.intv.mach)	/* Machine-specific int. vectors.  */
-		. = ALIGN (0x10) ;
-		__intv_copy_src_end = . ;
-	} > SRAM
-
-	/* Where we end up putting the vectors.  */
-	__intv_copy_dst_start = 0x10 ;
-	__intv_copy_dst_end = __intv_copy_dst_start + (__intv_copy_src_end - __intv_copy_src_start) ;
-	__intv_end = __intv_copy_dst_end ;
-
-	.root : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/asm-offsets.c b/arch/v850/kernel/asm-offsets.c
deleted file mode 100644
index 581e6986a77..00000000000
--- a/arch/v850/kernel/asm-offsets.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * This program is used to generate definitions needed by
- * assembly language modules.
- *
- * We use the technique used in the OSF Mach kernel code:
- * generate asm statements containing #defines,
- * compile this file to assembler, and then extract the
- * #defines from the assembly-language output.
- */
-
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/ptrace.h>
-#include <linux/hardirq.h>
-#include <linux/kbuild.h>
-
-#include <asm/irq.h>
-#include <asm/errno.h>
-
-int main (void)
-{
-	/* offsets into the task struct */
-	DEFINE (TASK_STATE, offsetof (struct task_struct, state));
-	DEFINE (TASK_FLAGS, offsetof (struct task_struct, flags));
-	DEFINE (TASK_PTRACE, offsetof (struct task_struct, ptrace));
-	DEFINE (TASK_BLOCKED, offsetof (struct task_struct, blocked));
-	DEFINE (TASK_THREAD, offsetof (struct task_struct, thread));
-	DEFINE (TASK_THREAD_INFO, offsetof (struct task_struct, stack));
-	DEFINE (TASK_MM, offsetof (struct task_struct, mm));
-	DEFINE (TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm));
-	DEFINE (TASK_PID, offsetof (struct task_struct, pid));
-
-	/* offsets into the kernel_stat struct */
-	DEFINE (STAT_IRQ, offsetof (struct kernel_stat, irqs));
-
-
-	/* signal defines */
-	DEFINE (SIGSEGV, SIGSEGV);
-	DEFINE (SEGV_MAPERR, SEGV_MAPERR);
-	DEFINE (SIGTRAP, SIGTRAP);
-	DEFINE (SIGCHLD, SIGCHLD);
-	DEFINE (SIGILL, SIGILL);
-	DEFINE (TRAP_TRACE, TRAP_TRACE);
-
-	/* ptrace flag bits */
-	DEFINE (PT_PTRACED, PT_PTRACED);
-	DEFINE (PT_DTRACE, PT_DTRACE);
-
-	/* error values */
-	DEFINE (ENOSYS, ENOSYS);
-
-	/* clone flag bits */
-	DEFINE (CLONE_VFORK, CLONE_VFORK);
-	DEFINE (CLONE_VM, CLONE_VM);
-
-	return 0;
-}
diff --git a/arch/v850/kernel/bug.c b/arch/v850/kernel/bug.c
deleted file mode 100644
index c78cf750915..00000000000
--- a/arch/v850/kernel/bug.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * arch/v850/kernel/bug.c -- Bug reporting functions
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/reboot.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-
-#include <asm/errno.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/current.h>
-
-/* We should use __builtin_return_address, but it doesn't work in gcc-2.90
-   (which is currently our standard compiler on the v850).  */
-#define ret_addr() ({ register u32 lp asm ("lp"); lp; })
-#define stack_addr() ({ register u32 sp asm ("sp"); sp; })
-
-void __bug ()
-{
-	printk (KERN_CRIT "kernel BUG at PC 0x%x (SP ~0x%x)!\n",
-		ret_addr() - 4, /* - 4 for `jarl' */
-		stack_addr());
-	machine_halt ();
-}
-
-int bad_trap (int trap_num, struct pt_regs *regs)
-{
-	printk (KERN_CRIT
-		"unimplemented trap %d called at 0x%08lx, pid %d!\n",
-		trap_num, regs->pc, current->pid);
-	return -ENOSYS;
-}
-
-#ifdef CONFIG_RESET_GUARD
-void unexpected_reset (unsigned long ret_addr, unsigned long kmode,
-		       struct task_struct *task, unsigned long sp)
-{
-	printk (KERN_CRIT
-		"unexpected reset in %s mode, pid %d"
-		" (ret_addr = 0x%lx, sp = 0x%lx)\n",
-		kmode ? "kernel" : "user",
-		task ? task->pid : -1,
-		ret_addr, sp);
-
-	machine_halt ();
-}
-#endif /* CONFIG_RESET_GUARD */
-
-
-
-struct spec_reg_name {
-	const char *name;
-	int gpr;
-};
-
-struct spec_reg_name spec_reg_names[] = {
-	{ "sp", GPR_SP },
-	{ "gp", GPR_GP },
-	{ "tp", GPR_TP },
-	{ "ep", GPR_EP },
-	{ "lp", GPR_LP },
-	{ 0, 0 }
-};
-
-void show_regs (struct pt_regs *regs)
-{
-	int gpr_base, gpr_offs;
-
-	printk ("     pc 0x%08lx    psw 0x%08lx                       kernel_mode %d\n",
-		regs->pc, regs->psw, regs->kernel_mode);
-	printk ("   ctpc 0x%08lx  ctpsw 0x%08lx   ctbp 0x%08lx\n",
-		regs->ctpc, regs->ctpsw, regs->ctbp);
-
-	for (gpr_base = 0; gpr_base < NUM_GPRS; gpr_base += 4) {
-		for (gpr_offs = 0; gpr_offs < 4; gpr_offs++) {
-			int gpr = gpr_base + gpr_offs;
-			long val = regs->gpr[gpr];
-			struct spec_reg_name *srn;
-
-			for (srn = spec_reg_names; srn->name; srn++)
-				if (srn->gpr == gpr)
-					break;
-
-			if (srn->name)
-				printk ("%7s 0x%08lx", srn->name, val);
-			else
-				printk ("    r%02d 0x%08lx", gpr, val);
-		}
-
-		printk ("\n");
-	}
-}
-
-/*
- * TASK is a pointer to the task whose backtrace we want to see (or NULL
- * for current task), SP is the stack pointer of the first frame that
- * should be shown in the back trace (or NULL if the entire call-chain of
- * the task should be shown).
- */
-void show_stack (struct task_struct *task, unsigned long *sp)
-{
-	unsigned long addr, end;
-
-	if (sp)
-		addr = (unsigned long)sp;
-	else if (task)
-		addr = task_sp (task);
-	else
-		addr = stack_addr ();
-
-	addr = addr & ~3;
-	end = (addr + THREAD_SIZE - 1) & THREAD_MASK;
-
-	while (addr < end) {
-		printk ("%8lX: ", addr);
-		while (addr < end) {
-			printk (" %8lX", *(unsigned long *)addr);
-			addr += sizeof (unsigned long);
-			if (! (addr & 0xF))
-				break;
-		}
-		printk ("\n");
-	}
-}
-
-void dump_stack ()
-{
-	show_stack (0, 0);
-}
-
-EXPORT_SYMBOL(dump_stack);
diff --git a/arch/v850/kernel/entry.S b/arch/v850/kernel/entry.S
deleted file mode 100644
index e4327a8d6bc..00000000000
--- a/arch/v850/kernel/entry.S
+++ /dev/null
@@ -1,1121 +0,0 @@
-/*
- * arch/v850/kernel/entry.S -- Low-level system-call handling, trap handlers,
- *	and context-switching
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/sys.h>
-
-#include <asm/entry.h>
-#include <asm/current.h>
-#include <asm/thread_info.h>
-#include <asm/clinkage.h>
-#include <asm/processor.h>
-#include <asm/irq.h>
-#include <asm/errno.h>
-
-#include <asm/asm-offsets.h>
-
-
-/* Make a slightly more convenient alias for C_SYMBOL_NAME.  */
-#define CSYM	C_SYMBOL_NAME
-
-
-/* The offset of the struct pt_regs in a state-save-frame on the stack.  */
-#define PTO	STATE_SAVE_PT_OFFSET
-
-
-/* Save argument registers to the state-save-frame pointed to by EP.  */
-#define SAVE_ARG_REGS							      \
-	sst.w	r6, PTO+PT_GPR(6)[ep];					      \
-	sst.w	r7, PTO+PT_GPR(7)[ep];					      \
-	sst.w	r8, PTO+PT_GPR(8)[ep];					      \
-	sst.w	r9, PTO+PT_GPR(9)[ep]
-/* Restore argument registers from the state-save-frame pointed to by EP.  */
-#define RESTORE_ARG_REGS						      \
-	sld.w	PTO+PT_GPR(6)[ep], r6;					      \
-	sld.w	PTO+PT_GPR(7)[ep], r7;					      \
-	sld.w	PTO+PT_GPR(8)[ep], r8;					      \
-	sld.w	PTO+PT_GPR(9)[ep], r9
-
-/* Save value return registers to the state-save-frame pointed to by EP.  */
-#define SAVE_RVAL_REGS							      \
-	sst.w	r10, PTO+PT_GPR(10)[ep];				      \
-	sst.w	r11, PTO+PT_GPR(11)[ep]
-/* Restore value return registers from the state-save-frame pointed to by EP.  */
-#define RESTORE_RVAL_REGS						      \
-	sld.w	PTO+PT_GPR(10)[ep], r10;				      \
-	sld.w	PTO+PT_GPR(11)[ep], r11
-
-
-#define SAVE_CALL_CLOBBERED_REGS_BEFORE_ARGS				      \
-	sst.w	r1, PTO+PT_GPR(1)[ep];					      \
-	sst.w	r5, PTO+PT_GPR(5)[ep]
-#define SAVE_CALL_CLOBBERED_REGS_AFTER_RVAL				      \
-	sst.w	r12, PTO+PT_GPR(12)[ep];				      \
-	sst.w	r13, PTO+PT_GPR(13)[ep];				      \
-	sst.w	r14, PTO+PT_GPR(14)[ep];				      \
-	sst.w	r15, PTO+PT_GPR(15)[ep];				      \
-	sst.w	r16, PTO+PT_GPR(16)[ep];				      \
-	sst.w	r17, PTO+PT_GPR(17)[ep];				      \
-	sst.w	r18, PTO+PT_GPR(18)[ep];				      \
-	sst.w	r19, PTO+PT_GPR(19)[ep]
-#define RESTORE_CALL_CLOBBERED_REGS_BEFORE_ARGS				      \
-	sld.w	PTO+PT_GPR(1)[ep], r1;					      \
-	sld.w	PTO+PT_GPR(5)[ep], r5
-#define RESTORE_CALL_CLOBBERED_REGS_AFTER_RVAL				      \
-	sld.w	PTO+PT_GPR(12)[ep], r12;				      \
-	sld.w	PTO+PT_GPR(13)[ep], r13;				      \
-	sld.w	PTO+PT_GPR(14)[ep], r14;				      \
-	sld.w	PTO+PT_GPR(15)[ep], r15;				      \
-	sld.w	PTO+PT_GPR(16)[ep], r16;				      \
-	sld.w	PTO+PT_GPR(17)[ep], r17;				      \
-	sld.w	PTO+PT_GPR(18)[ep], r18;				      \
-	sld.w	PTO+PT_GPR(19)[ep], r19
-
-/* Save `call clobbered' registers to the state-save-frame pointed to by EP.  */
-#define SAVE_CALL_CLOBBERED_REGS					      \
-	SAVE_CALL_CLOBBERED_REGS_BEFORE_ARGS;				      \
-	SAVE_ARG_REGS;							      \
-	SAVE_RVAL_REGS;							      \
-	SAVE_CALL_CLOBBERED_REGS_AFTER_RVAL
-/* Restore `call clobbered' registers from the state-save-frame pointed to
-   by EP.  */
-#define RESTORE_CALL_CLOBBERED_REGS					      \
-	RESTORE_CALL_CLOBBERED_REGS_BEFORE_ARGS;			      \
-	RESTORE_ARG_REGS;						      \
-	RESTORE_RVAL_REGS;						      \
-	RESTORE_CALL_CLOBBERED_REGS_AFTER_RVAL
-
-/* Save `call clobbered' registers except for the return-value registers
-   to the state-save-frame pointed to by EP.  */
-#define SAVE_CALL_CLOBBERED_REGS_NO_RVAL				      \
-	SAVE_CALL_CLOBBERED_REGS_BEFORE_ARGS;				      \
-	SAVE_ARG_REGS;							      \
-	SAVE_CALL_CLOBBERED_REGS_AFTER_RVAL
-/* Restore `call clobbered' registers except for the return-value registers
-   from the state-save-frame pointed to by EP.  */
-#define RESTORE_CALL_CLOBBERED_REGS_NO_RVAL				      \
-	RESTORE_CALL_CLOBBERED_REGS_BEFORE_ARGS;			      \
-	RESTORE_ARG_REGS;						      \
-	RESTORE_CALL_CLOBBERED_REGS_AFTER_RVAL
-
-/* Save `call saved' registers to the state-save-frame pointed to by EP.  */
-#define SAVE_CALL_SAVED_REGS						      \
-	sst.w	r2, PTO+PT_GPR(2)[ep];					      \
-	sst.w	r20, PTO+PT_GPR(20)[ep];				      \
-	sst.w	r21, PTO+PT_GPR(21)[ep];				      \
-	sst.w	r22, PTO+PT_GPR(22)[ep];				      \
-	sst.w	r23, PTO+PT_GPR(23)[ep];				      \
-	sst.w	r24, PTO+PT_GPR(24)[ep];				      \
-	sst.w	r25, PTO+PT_GPR(25)[ep];				      \
-	sst.w	r26, PTO+PT_GPR(26)[ep];				      \
-	sst.w	r27, PTO+PT_GPR(27)[ep];				      \
-	sst.w	r28, PTO+PT_GPR(28)[ep];				      \
-	sst.w	r29, PTO+PT_GPR(29)[ep]
-/* Restore `call saved' registers from the state-save-frame pointed to by EP.  */
-#define RESTORE_CALL_SAVED_REGS						      \
-	sld.w	PTO+PT_GPR(2)[ep], r2;					      \
-	sld.w	PTO+PT_GPR(20)[ep], r20;				      \
-	sld.w	PTO+PT_GPR(21)[ep], r21;				      \
-	sld.w	PTO+PT_GPR(22)[ep], r22;				      \
-	sld.w	PTO+PT_GPR(23)[ep], r23;				      \
-	sld.w	PTO+PT_GPR(24)[ep], r24;				      \
-	sld.w	PTO+PT_GPR(25)[ep], r25;				      \
-	sld.w	PTO+PT_GPR(26)[ep], r26;				      \
-	sld.w	PTO+PT_GPR(27)[ep], r27;				      \
-	sld.w	PTO+PT_GPR(28)[ep], r28;				      \
-	sld.w	PTO+PT_GPR(29)[ep], r29
-
-
-/* Save the PC stored in the special register SAVEREG to the state-save-frame
-   pointed to by EP.  r19 is clobbered.  */
-#define SAVE_PC(savereg)						      \
-	stsr	SR_ ## savereg, r19;					      \
-	sst.w	r19, PTO+PT_PC[ep]
-/* Restore the PC from the state-save-frame pointed to by EP, to the special
-   register SAVEREG.  LP is clobbered (it is used as a scratch register
-   because the POP_STATE macro restores it, and this macro is usually used
-   inside POP_STATE).  */
-#define RESTORE_PC(savereg)						      \
-	sld.w	PTO+PT_PC[ep], lp;					      \
-	ldsr	lp, SR_ ## savereg
-/* Save the PSW register stored in the special register SAVREG to the
-   state-save-frame pointed to by EP.  r19 is clobbered.  */
-#define SAVE_PSW(savereg)						      \
-	stsr	SR_ ## savereg, r19;					      \
-	sst.w	r19, PTO+PT_PSW[ep]
-/* Restore the PSW register from the state-save-frame pointed to by EP, to
-   the special register SAVEREG.  LP is clobbered (it is used as a scratch
-   register because the POP_STATE macro restores it, and this macro is
-   usually used inside POP_STATE).  */
-#define RESTORE_PSW(savereg)						      \
-	sld.w	PTO+PT_PSW[ep], lp;					      \
-	ldsr	lp, SR_ ## savereg
-
-/* Save CTPC/CTPSW/CTBP registers to the state-save-frame pointed to by REG.
-   r19 is clobbered.  */
-#define SAVE_CT_REGS							      \
-	stsr	SR_CTPC, r19;						      \
-	sst.w	r19, PTO+PT_CTPC[ep];					      \
-	stsr	SR_CTPSW, r19;						      \
-	sst.w	r19, PTO+PT_CTPSW[ep];					      \
-	stsr	SR_CTBP, r19;						      \
-	sst.w	r19, PTO+PT_CTBP[ep]
-/* Restore CTPC/CTPSW/CTBP registers from the state-save-frame pointed to by EP.
-   LP is clobbered (it is used as a scratch register because the POP_STATE
-   macro restores it, and this macro is usually used inside POP_STATE).  */
-#define RESTORE_CT_REGS							      \
-	sld.w	PTO+PT_CTPC[ep], lp;					      \
-	ldsr	lp, SR_CTPC;						      \
-	sld.w	PTO+PT_CTPSW[ep], lp;					      \
-	ldsr	lp, SR_CTPSW;						      \
-	sld.w	PTO+PT_CTBP[ep], lp;					      \
-	ldsr	lp, SR_CTBP
-
-
-/* Push register state, except for the stack pointer, on the stack in the
-   form of a state-save-frame (plus some extra padding), in preparation for
-   a system call.  This macro makes sure that the EP, GP, and LP
-   registers are saved, and TYPE identifies the set of extra registers to
-   be saved as well.  Also copies (the new value of) SP to EP.  */
-#define PUSH_STATE(type)						      \
-	addi	-STATE_SAVE_SIZE, sp, sp; /* Make room on the stack.  */      \
-	st.w	ep, PTO+PT_GPR(GPR_EP)[sp];				      \
-	mov	sp, ep;							      \
-	sst.w	gp, PTO+PT_GPR(GPR_GP)[ep];				      \
-	sst.w	lp, PTO+PT_GPR(GPR_LP)[ep];				      \
-	type ## _STATE_SAVER
-/* Pop a register state pushed by PUSH_STATE, except for the stack pointer,
-   from the stack.  */
-#define POP_STATE(type)							      \
-	mov	sp, ep;							      \
-	type ## _STATE_RESTORER;					      \
-	sld.w	PTO+PT_GPR(GPR_GP)[ep], gp;				      \
-	sld.w	PTO+PT_GPR(GPR_LP)[ep], lp;				      \
-	sld.w	PTO+PT_GPR(GPR_EP)[ep], ep;				      \
-	addi	STATE_SAVE_SIZE, sp, sp /* Clean up our stack space.  */
-
-
-/* Switch to the kernel stack if necessary, and push register state on the
-   stack in the form of a state-save-frame.  Also load the current task
-   pointer if switching from user mode.  The stack-pointer (r3) should have
-   already been saved to the memory location SP_SAVE_LOC (the reason for
-   this is that the interrupt vectors may be beyond a 22-bit signed offset
-   jump from the actual interrupt handler, and this allows them to save the
-   stack-pointer and use that register to do an indirect jump).  This macro
-   makes sure that `special' registers, system registers, and the stack
-   pointer are saved; TYPE identifies the set of extra registers to be
-   saved as well.  SYSCALL_NUM is the register in which the system-call
-   number this state is for is stored (r0 if this isn't a system call).
-   Interrupts should already be disabled when calling this.  */
-#define SAVE_STATE(type, syscall_num, sp_save_loc)			      \
-	tst1	0, KM;			/* See if already in kernel mode.  */ \
-	bz	1f;							      \
-	ld.w	sp_save_loc, sp;	/* ... yes, use saved SP.  */	      \
-	br	2f;							      \
-1:	ld.w	KSP, sp;		/* ... no, switch to kernel stack. */ \
-2:	PUSH_STATE(type);						      \
-	ld.b	KM, r19;		/* Remember old kernel-mode.  */      \
-	sst.w	r19, PTO+PT_KERNEL_MODE[ep];				      \
-	ld.w	sp_save_loc, r19;	/* Remember old SP.  */		      \
-	sst.w	r19, PTO+PT_GPR(GPR_SP)[ep];				      \
-	mov	1, r19;			/* Now definitely in kernel-mode. */  \
-	st.b	r19, KM;						      \
-	GET_CURRENT_TASK(CURRENT_TASK);	/* Fetch the current task pointer. */ \
-	/* Save away the syscall number.  */				      \
-	sst.w	syscall_num, PTO+PT_CUR_SYSCALL[ep]
-
-
-/* Save register state not normally saved by PUSH_STATE for TYPE, to the
-   state-save-frame on the stack; also copies SP to EP.  r19 may be trashed. */
-#define SAVE_EXTRA_STATE(type)						      \
-	mov	sp, ep;							      \
-	type ## _EXTRA_STATE_SAVER
-/* Restore register state not normally restored by POP_STATE for TYPE,
-   from the state-save-frame on the stack; also copies SP to EP.
-   r19 may be trashed.  */
-#define RESTORE_EXTRA_STATE(type)					      \
-	mov	sp, ep;							      \
-	type ## _EXTRA_STATE_RESTORER
-
-/* Save any call-clobbered registers not normally saved by PUSH_STATE for
-   TYPE, to the state-save-frame on the stack.
-   EP may be trashed, but is not guaranteed to contain a copy of SP
-   (unlike after most SAVE_... macros).  r19 may be trashed.  */
-#define SAVE_EXTRA_STATE_FOR_SCHEDULE(type)				      \
-	type ## _SCHEDULE_EXTRA_STATE_SAVER
-/* Restore any call-clobbered registers not normally restored by
-   POP_STATE for TYPE, to the state-save-frame on the stack.
-   EP may be trashed, but is not guaranteed to contain a copy of SP
-   (unlike after most RESTORE_... macros).  r19 may be trashed.  */
-#define RESTORE_EXTRA_STATE_FOR_SCHEDULE(type)				      \
-	type ## _SCHEDULE_EXTRA_STATE_RESTORER
-
-
-/* These are extra_state_saver/restorer values for a user trap.  Note
-   that we save the argument registers so that restarted syscalls will
-   function properly (otherwise it wouldn't be necessary), and we must
-   _not_ restore the return-value registers (so that traps can return a
-   value!), but call-clobbered registers are not saved at all, as the
-   caller of the syscall function should have saved them.  */
-
-#define TRAP_RET reti
-/* Traps don't save call-clobbered registers (but do still save arg regs).
-   We preserve PSw to keep long-term state, namely interrupt status (for traps
-   from kernel-mode), and the single-step flag (for user traps).  */
-#define TRAP_STATE_SAVER						      \
-	SAVE_ARG_REGS;							      \
-	SAVE_PC(EIPC);							      \
-	SAVE_PSW(EIPSW)
-/* When traps return, they just leave call-clobbered registers (except for arg
-   regs) with whatever value they have from the kernel.  Traps don't preserve
-   the PSW, but we zero EIPSW to ensure it doesn't contain anything dangerous
-   (in particular, the single-step flag).  */
-#define TRAP_STATE_RESTORER						      \
-	RESTORE_ARG_REGS;						      \
-	RESTORE_PC(EIPC);						      \
-	RESTORE_PSW(EIPSW)
-/* Save registers not normally saved by traps.  We need to save r12, even
-   though it's nominally call-clobbered, because it's used when restarting
-   a system call (the signal-handling path uses SAVE_EXTRA_STATE, and
-   expects r12 to be restored when the trap returns).  */
-#define TRAP_EXTRA_STATE_SAVER						      \
-	SAVE_RVAL_REGS;							      \
-	sst.w	r12, PTO+PT_GPR(12)[ep];				      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define TRAP_EXTRA_STATE_RESTORER					      \
-	RESTORE_RVAL_REGS;						      \
-	sld.w	PTO+PT_GPR(12)[ep], r12;				      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-/* Save registers prior to calling scheduler (just before trap returns).
-   We have to save the return-value registers to preserve the trap's return
-   value.  Note that ..._SCHEDULE_EXTRA_STATE_SAVER, unlike most ..._SAVER
-   macros, is required to setup EP itself if EP is needed (this is because
-   in many cases, the macro is empty).  */
-#define TRAP_SCHEDULE_EXTRA_STATE_SAVER					      \
-	mov sp, ep;							      \
-	SAVE_RVAL_REGS
-/* Note that ..._SCHEDULE_EXTRA_STATE_RESTORER, unlike most ..._RESTORER
-   macros, is required to setup EP itself if EP is needed (this is because
-   in many cases, the macro is empty).  */
-#define TRAP_SCHEDULE_EXTRA_STATE_RESTORER				      \
-	mov sp, ep;							      \
-	RESTORE_RVAL_REGS
-
-/* Register saving/restoring for maskable interrupts.  */
-#define IRQ_RET reti
-#define IRQ_STATE_SAVER							      \
-	SAVE_CALL_CLOBBERED_REGS;					      \
-	SAVE_PC(EIPC);							      \
-	SAVE_PSW(EIPSW)
-#define IRQ_STATE_RESTORER						      \
-	RESTORE_CALL_CLOBBERED_REGS;					      \
-	RESTORE_PC(EIPC);						      \
-	RESTORE_PSW(EIPSW)
-#define IRQ_EXTRA_STATE_SAVER						      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define IRQ_EXTRA_STATE_RESTORER					      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-#define IRQ_SCHEDULE_EXTRA_STATE_SAVER	     /* nothing */
-#define IRQ_SCHEDULE_EXTRA_STATE_RESTORER    /* nothing */
-
-/* Register saving/restoring for non-maskable interrupts.  */
-#define NMI_RET reti
-#define NMI_STATE_SAVER							      \
-	SAVE_CALL_CLOBBERED_REGS;					      \
-	SAVE_PC(FEPC);							      \
-	SAVE_PSW(FEPSW);
-#define NMI_STATE_RESTORER						      \
-	RESTORE_CALL_CLOBBERED_REGS;					      \
-	RESTORE_PC(FEPC);						      \
-	RESTORE_PSW(FEPSW);
-#define NMI_EXTRA_STATE_SAVER						      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define NMI_EXTRA_STATE_RESTORER					      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-#define NMI_SCHEDULE_EXTRA_STATE_SAVER	     /* nothing */
-#define NMI_SCHEDULE_EXTRA_STATE_RESTORER    /* nothing */
-
-/* Register saving/restoring for debug traps.  */
-#define DBTRAP_RET .long 0x014607E0 /* `dbret', but gas doesn't support it. */
-#define DBTRAP_STATE_SAVER						      \
-	SAVE_CALL_CLOBBERED_REGS;					      \
-	SAVE_PC(DBPC);							      \
-	SAVE_PSW(DBPSW)
-#define DBTRAP_STATE_RESTORER						      \
-	RESTORE_CALL_CLOBBERED_REGS;					      \
-	RESTORE_PC(DBPC);						      \
-	RESTORE_PSW(DBPSW)
-#define DBTRAP_EXTRA_STATE_SAVER					      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define DBTRAP_EXTRA_STATE_RESTORER					      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-#define DBTRAP_SCHEDULE_EXTRA_STATE_SAVER	/* nothing */
-#define DBTRAP_SCHEDULE_EXTRA_STATE_RESTORER	/* nothing */
-
-/* Register saving/restoring for a context switch.  We don't need to save
-   too many registers, because context-switching looks like a function call
-   (via the function `switch_thread'), so callers will save any
-   call-clobbered registers themselves.  We do need to save the CT regs, as
-   they're normally not saved during kernel entry (the kernel doesn't use
-   them).  We save PSW so that interrupt-status state will correctly follow
-   each thread (mostly NMI vs. normal-IRQ/trap), though for the most part
-   it doesn't matter since threads are always in almost exactly the same
-   processor state during a context switch.  The stack pointer and return
-   value are handled by switch_thread itself.  */
-#define SWITCH_STATE_SAVER						      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_PSW(PSW);							      \
-	SAVE_CT_REGS
-#define SWITCH_STATE_RESTORER						      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_PSW(PSW);						      \
-	RESTORE_CT_REGS
-
-
-/* Restore register state from the state-save-frame on the stack, switch back
-   to the user stack if necessary, and return from the trap/interrupt.
-   EXTRA_STATE_RESTORER is a sequence of assembly language statements to
-   restore anything not restored by this macro.  Only registers not saved by
-   the C compiler are restored (that is, R3(sp), R4(gp), R31(lp), and
-   anything restored by EXTRA_STATE_RESTORER).  */
-#define RETURN(type)							      \
-	ld.b	PTO+PT_KERNEL_MODE[sp], r19;				      \
-	di;				/* Disable interrupts */	      \
-	cmp	r19, r0;		/* See if returning to kernel mode, */\
-	bne	2f;			/* ... if so, skip resched &c.  */    \
-									      \
-	/* We're returning to user mode, so check for various conditions that \
-	   trigger rescheduling. */					      \
-	GET_CURRENT_THREAD(r18);					      \
-	ld.w	TI_FLAGS[r18], r19;					      \
-	andi	_TIF_NEED_RESCHED, r19, r0;				      \
-	bnz	3f;			/* Call the scheduler.  */	      \
-5:	andi	_TIF_SIGPENDING, r19, r18;				      \
-	ld.w	TASK_PTRACE[CURRENT_TASK], r19; /* ptrace flags */	      \
-	or	r18, r19;		/* see if either is non-zero */	      \
-	bnz	4f;			/* if so, handle them */	      \
-									      \
-/* Return to user state.  */						      \
-1:	st.b	r0, KM;			/* Now officially in user state. */   \
-									      \
-/* Final return.  The stack-pointer fiddling is not needed when returning     \
-   to kernel-mode, but they don't hurt, and this way we can share the	      \
-   (sometimes rather lengthy) POP_STATE macro.  */			      \
-2:	POP_STATE(type);						      \
-	st.w	sp, KSP;		/* Save the kernel stack pointer. */  \
-	ld.w	PT_GPR(GPR_SP)-PT_SIZE[sp], sp; /* Restore stack pointer. */  \
-	type ## _RET;			/* Return from the trap/interrupt. */ \
-									      \
-/* Call the scheduler before returning from a syscall/trap. */		      \
-3:	SAVE_EXTRA_STATE_FOR_SCHEDULE(type); /* Prepare to call scheduler. */ \
-	jarl	call_scheduler, lp;	/* Call scheduler */		      \
-	di;				/* The scheduler enables interrupts */\
-	RESTORE_EXTRA_STATE_FOR_SCHEDULE(type);				      \
-	GET_CURRENT_THREAD(r18);					      \
-	ld.w	TI_FLAGS[r18], r19;					      \
-	br	5b;			/* Continue with return path. */      \
-									      \
-/* Handle a signal or ptraced process return.				      \
-   r18 should be non-zero if there are pending signals.  */		      \
-4:	/* Not all registers are saved by the normal trap/interrupt entry     \
-	   points (for instance, call-saved registers (because the normal     \
-	   C-compiler calling sequence in the kernel makes sure they're	      \
-	   preserved), and call-clobbered registers in the case of	      \
-	   traps), but signal handlers may want to examine or change the      \
-	   complete register state.  Here we save anything not saved by	      \
-	   the normal entry sequence, so that it may be safely restored	      \
-	   (in a possibly modified form) after do_signal returns.  */	      \
-	SAVE_EXTRA_STATE(type);		/* Save state not saved by entry. */  \
-	jarl	handle_signal_or_ptrace_return, lp;			      \
-	RESTORE_EXTRA_STATE(type);	/* Restore extra regs.  */	      \
-	br	1b
-
-
-/* Jump to the appropriate function for the system call number in r12
-   (r12 is not preserved), or return an error if r12 is not valid.  The
-   LP register should point to the location where the called function
-   should return.  [note that MAKE_SYS_CALL uses label 1]  */
-#define MAKE_SYS_CALL							      \
-	/* Figure out which function to use for this system call.  */	      \
-	shl	2, r12;							      \
-	/* See if the system call number is valid.  */			      \
-	addi	lo(CSYM(sys_call_table) - sys_call_table_end), r12, r0;	      \
-	bnh	1f;							      \
-	mov	hilo(CSYM(sys_call_table)), r19;			      \
-	add	r19, r12;						      \
-	ld.w	0[r12], r12;						      \
-	/* Make the system call.  */					      \
-	jmp	[r12];							      \
-	/* The syscall number is invalid, return an error.  */		      \
-1:	addi	-ENOSYS, r0, r10;					      \
-	jmp	[lp]
-
-
-	.text
-
-/*
- * User trap.
- *
- * Trap 0 system calls are also handled here.
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- *
- * Syscall protocol:
- *   Syscall number in r12, args in r6-r9
- *   Return value in r10
- */
-G_ENTRY(trap):
-	SAVE_STATE (TRAP, r12, ENTRY_SP) // Save registers.
-	stsr	SR_ECR, r19		// Find out which trap it was.
-	ei				// Enable interrupts.
-	mov	hilo(ret_from_trap), lp	// where the trap should return
-
-	// The following two shifts (1) clear out extraneous NMI data in the
-	// upper 16-bits, (2) convert the 0x40 - 0x5f range of trap ECR
-	// numbers into the (0-31) << 2 range we want, (3) set the flags.
-	shl	27, r19			// chop off all high bits
-	shr	25, r19			// scale back down and then << 2
-	bnz	2f			// See if not trap 0.
-
-	// Trap 0 is a `short' system call, skip general trap table.
-	MAKE_SYS_CALL			// Jump to the syscall function.
-
-2:	// For other traps, use a table lookup.
-	mov	hilo(CSYM(trap_table)), r18
-	add	r19, r18
-	ld.w	0[r18], r18
-	jmp	[r18]			// Jump to the trap handler.
-END(trap)
-
-/* This is just like ret_from_trap, but first restores extra registers
-   saved by some wrappers.  */
-L_ENTRY(restore_extra_regs_and_ret_from_trap):
-	RESTORE_EXTRA_STATE(TRAP)
-	// fall through
-END(restore_extra_regs_and_ret_from_trap)
-
-/* Entry point used to return from a syscall/trap.  */
-L_ENTRY(ret_from_trap):
-	RETURN(TRAP)
-END(ret_from_trap)
-
-
-/* This the initial entry point for a new child thread, with an appropriate
-   stack in place that makes it look that the child is in the middle of an
-   syscall.  This function is actually `returned to' from switch_thread
-   (copy_thread makes ret_from_fork the return address in each new thread's
-   saved context).  */
-C_ENTRY(ret_from_fork):
-	mov	r10, r6			// switch_thread returns the prev task.
-	jarl	CSYM(schedule_tail), lp	// ...which is schedule_tail's arg
-	mov	r0, r10			// Child's fork call should return 0.
-	br	ret_from_trap		// Do normal trap return.
-C_END(ret_from_fork)
-
-
-/*
- * Trap 1: `long' system calls
- * `Long' syscall protocol:
- *   Syscall number in r12, args in r6-r9, r13-r14
- *   Return value in r10
- */
-L_ENTRY(syscall_long):
-	// Push extra arguments on the stack.  Note that by default, the trap
-	// handler reserves enough stack space for 6 arguments, so we don't
-	// have to make any additional room.
-	st.w	r13, 16[sp]		// arg 5
-	st.w	r14, 20[sp]		// arg 6
-
-	// Make sure r13 and r14 are preserved, in case we have to restart a
-	// system call because of a signal (ep has already been set by caller).
-	st.w	r13, PTO+PT_GPR(13)[sp]
-	st.w	r14, PTO+PT_GPR(13)[sp]
-	mov	hilo(ret_from_long_syscall), lp
-
-	MAKE_SYS_CALL			// Jump to the syscall function.
-END(syscall_long)
-
-/* Entry point used to return from a long syscall.  Only needed to restore
-   r13/r14 if the general trap mechanism doesnt' do so.  */
-L_ENTRY(ret_from_long_syscall):
-	ld.w	PTO+PT_GPR(13)[sp], r13 // Restore the extra registers
-	ld.w	PTO+PT_GPR(13)[sp], r14
-	br	ret_from_trap		// The rest is the same as other traps
-END(ret_from_long_syscall)
-
-
-/* These syscalls need access to the struct pt_regs on the stack, so we
-   implement them in assembly (they're basically all wrappers anyway).  */
-
-L_ENTRY(sys_fork_wrapper):
-#ifdef CONFIG_MMU
-	addi	SIGCHLD, r0, r6		   // Arg 0: flags
-	ld.w	PTO+PT_GPR(GPR_SP)[sp], r7 // Arg 1: child SP (use parent's)
-	movea	PTO, sp, r8		   // Arg 2: parent context
-	mov	r0, r9			   // Arg 3/4/5: 0
-	st.w	r0, 16[sp]
-	st.w	r0, 20[sp]
-	mov	hilo(CSYM(do_fork)), r18   // Where the real work gets done
-	br	save_extra_state_tramp	   // Save state and go there
-#else
-	// fork almost works, enough to trick you into looking elsewhere :-(
-	addi	-EINVAL, r0, r10
-	jmp	[lp]
-#endif
-END(sys_fork_wrapper)
-
-L_ENTRY(sys_vfork_wrapper):
-	addi	CLONE_VFORK | CLONE_VM | SIGCHLD, r0, r6 // Arg 0: flags
-	ld.w	PTO+PT_GPR(GPR_SP)[sp], r7 // Arg 1: child SP (use parent's)
-	movea	PTO, sp, r8		   // Arg 2: parent context
-	mov	r0, r9			   // Arg 3/4/5: 0
-	st.w	r0, 16[sp]
-	st.w	r0, 20[sp]
-	mov	hilo(CSYM(do_fork)), r18   // Where the real work gets done
-	br	save_extra_state_tramp	   // Save state and go there
-END(sys_vfork_wrapper)
-
-L_ENTRY(sys_clone_wrapper):
-	ld.w	PTO+PT_GPR(GPR_SP)[sp], r19// parent's stack pointer
-	cmp	r7, r0			   // See if child SP arg (arg 1) is 0.
-	cmov	z, r19, r7, r7		   // ... and use the parent's if so.
-	movea	PTO, sp, r8		   // Arg 2: parent context
-	mov	r0, r9			   // Arg 3/4/5: 0
-	st.w	r0, 16[sp]
-	st.w	r0, 20[sp]
-	mov	hilo(CSYM(do_fork)), r18   // Where the real work gets done
-	br	save_extra_state_tramp	   // Save state and go there
-END(sys_clone_wrapper)
-
-
-L_ENTRY(sys_execve_wrapper):
-	movea	PTO, sp, r9		// add user context as 4th arg
-	jr	CSYM(sys_execve)	// Do real work (tail-call).
-END(sys_execve_wrapper)
-
-
-L_ENTRY(sys_sigsuspend_wrapper):
-	movea	PTO, sp, r7		// add user context as 2nd arg
-	mov	hilo(CSYM(sys_sigsuspend)), r18	// syscall function
-	jarl	save_extra_state_tramp, lp	// Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_sigsuspend_wrapper)
-L_ENTRY(sys_rt_sigsuspend_wrapper):
-	movea	PTO, sp, r8		// add user context as 3rd arg
-	mov	hilo(CSYM(sys_rt_sigsuspend)), r18 // syscall function
-	jarl	save_extra_state_tramp, lp	   // Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_rt_sigsuspend_wrapper)
-
-L_ENTRY(sys_sigreturn_wrapper):
-	movea	PTO, sp, r6		// add user context as 1st arg
-	mov	hilo(CSYM(sys_sigreturn)), r18	// syscall function
-	jarl	save_extra_state_tramp, lp	// Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_sigreturn_wrapper)
-L_ENTRY(sys_rt_sigreturn_wrapper):
-	movea	PTO, sp, r6		// add user context as 1st arg
-	mov	hilo(CSYM(sys_rt_sigreturn)), r18// syscall function
-	jarl	save_extra_state_tramp, lp	 // Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_rt_sigreturn_wrapper)
-
-
-/* Save any state not saved by SAVE_STATE(TRAP), and jump to r18.
-   It's main purpose is to share the rather lengthy code sequence that
-   SAVE_STATE expands into among the above wrapper functions.  */
-L_ENTRY(save_extra_state_tramp):
-	SAVE_EXTRA_STATE(TRAP)		// Save state not saved by entry.
-	jmp	[r18]			// Do the work the caller wants
-END(save_extra_state_tramp)
-
-
-/*
- * Hardware maskable interrupts.
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- */
-G_ENTRY(irq):
-	SAVE_STATE (IRQ, r0, ENTRY_SP)	// Save registers.
-
-	stsr	SR_ECR, r6		// Find out which interrupt it was.
-	movea	PTO, sp, r7		// User regs are arg2
-
-	// All v850 implementations I know about encode their interrupts as
-	// multiples of 0x10, starting at 0x80 (after NMIs and software
-	// interrupts).  Convert this number into a simple IRQ index for the
-	// rest of the kernel.  We also clear the upper 16 bits, which hold
-	// NMI info, and don't appear to be cleared when a NMI returns.
-	shl	16, r6			// clear upper 16 bits
-	shr	20, r6			// shift back, and remove lower nibble
-	add	-8, r6			// remove bias for irqs
-
-	// Call the high-level interrupt handling code.
-	jarl	CSYM(handle_irq), lp
-
-	RETURN(IRQ)
-END(irq)
-
-
-/*
- * Debug trap / illegal-instruction exception
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- */
-G_ENTRY(dbtrap):
-	SAVE_STATE (DBTRAP, r0, ENTRY_SP)// Save registers.
-
-	/* First see if we came from kernel mode; if so, the dbtrap
-	   instruction has a special meaning, to set the DIR (`debug
-	   information register') register.  This is because the DIR register
-	   can _only_ be manipulated/read while in `debug mode,' and debug
-	   mode is only active while we're inside the dbtrap handler.  The
-	   exact functionality is:  { DIR = (DIR | r6) & ~r7; return DIR; }. */
-	ld.b	PTO+PT_KERNEL_MODE[sp], r19
-	cmp	r19, r0
-	bz	1f
-
-	stsr	SR_DIR, r10
-	or	r6, r10
-	not	r7, r7
-	and	r7, r10
-	ldsr	r10, SR_DIR
-	stsr	SR_DIR, r10		// Confirm the value we set
-	st.w	r10, PTO+PT_GPR(10)[sp]	// return it
-	br	3f
-
-1:	ei				// Enable interrupts.
-
-	/* The default signal type we raise.  */
-	mov	SIGTRAP, r6
-
-	/* See if it's a single-step trap.  */
-	stsr	SR_DBPSW, r19
-	andi	0x0800, r19, r19
-	bnz	2f
-
-	/* Look to see if the preceding instruction was is a dbtrap or not,
-	   to decide which signal we should use.  */
-	stsr	SR_DBPC, r19		// PC following trapping insn
-	ld.hu	-2[r19], r19
-	ori	0xf840, r0, r20		// DBTRAP insn
-	cmp	r19, r20		// Was this trap caused by DBTRAP?
-	cmov	ne, SIGILL, r6, r6	// Choose signal appropriately
-
-	/* Raise the desired signal.  */
-2:	mov	CURRENT_TASK, r7	// Arg 1: task
-	jarl	CSYM(send_sig), lp	// tail call
-
-3:	RETURN(DBTRAP)
-END(dbtrap)
-
-
-/*
- * Hardware non-maskable interrupts.
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- */
-G_ENTRY(nmi):
-	SAVE_STATE (NMI, r0, NMI_ENTRY_SP); /* Save registers.  */
-
-	stsr	SR_ECR, r6;		/* Find out which nmi it was.  */
-	shr	20, r6;			/* Extract NMI code in bits 20-24. */
-	movea	PTO, sp, r7;		/* User regs are arg2.  */
-
-	/* Non-maskable interrupts always lie right after maskable interrupts.
-	   Call the generic IRQ handler, with two arguments, the IRQ number,
-	   and a pointer to the user registers, to handle the specifics.
-	   (we subtract one because the first NMI has code 1).  */
-	addi	FIRST_NMI - 1, r6, r6
-	jarl	CSYM(handle_irq), lp
-
-	RETURN(NMI)
-END(nmi)
-
-
-/*
- * Trap with no handler
- */
-L_ENTRY(bad_trap_wrapper):
-	mov	r19, r6			// Arg 0: trap number
-	movea	PTO, sp, r7		// Arg 1: user regs
-	jr	CSYM(bad_trap)		// tail call handler
-END(bad_trap_wrapper)
-
-
-/*
- * Invoke the scheduler, called from the trap/irq kernel exit path.
- *
- * This basically just calls `schedule', but also arranges for extra
- * registers to be saved for ptrace'd processes, so ptrace can modify them.
- */
-L_ENTRY(call_scheduler):
-	ld.w	TASK_PTRACE[CURRENT_TASK], r19	// See if task is ptrace'd
-	cmp	r19, r0
-	bnz	1f			// ... yes, do special stuff
-	jr	CSYM(schedule)		// ... no, just tail-call scheduler
-
-	// Save extra regs for ptrace'd task.  We want to save anything
-	// that would otherwise only be `implicitly' saved by the normal
-	// compiler calling-convention.
-1:	mov	sp, ep			// Setup EP for SAVE_CALL_SAVED_REGS
-	SAVE_CALL_SAVED_REGS		// Save call-saved registers to stack
-	mov	lp, r20			// Save LP in a callee-saved register
-
-	jarl	CSYM(schedule), lp	// Call scheduler
-
-	mov	r20, lp
-	mov	sp, ep			// We can't rely on EP after return
-	RESTORE_CALL_SAVED_REGS		// Restore (possibly modified) regs
-	jmp	[lp]			// Return to the return path
-END(call_scheduler)
-
-
-/*
- * This is an out-of-line handler for two special cases during the kernel
- * trap/irq exit sequence:
- *
- *  (1) If r18 is non-zero then a signal needs to be handled, which is
- *	done, and then the caller returned to.
- *
- *  (2) If r18 is non-zero then we're returning to a ptraced process, which
- *	has several special cases -- single-stepping and trap tracing, both
- *	of which require using the `dbret' instruction to exit the kernel
- *	instead of the normal `reti' (this is because the CPU not correctly
- *	single-step after a reti).  In this case, of course, this handler
- *	never returns to the caller.
- *
- * In either case, all registers should have been saved to the current
- * state-save-frame on the stack, except for callee-saved registers.
- *
- * [These two different cases are combined merely to avoid bloating the
- * macro-inlined code, not because they really make much sense together!]
- */
-L_ENTRY(handle_signal_or_ptrace_return):
-	cmp	r18, r0			// See if handling a signal
-	bz	1f			// ... nope, go do ptrace return
-
-	// Handle a signal
-	mov	lp, r20			// Save link-pointer
-	mov	r10, r21		// Save return-values (for trap)
-	mov	r11, r22
-
-	movea	PTO, sp, r6		// Arg 1: struct pt_regs *regs
-	mov	r0, r7			// Arg 2: sigset_t *oldset
-	jarl	CSYM(do_signal), lp	// Handle the signal
-	di				// sig handling enables interrupts
-
-	mov	r20, lp			// Restore link-pointer
-	mov	r21, r10		// Restore return-values (for trap)
-	mov	r22, r11
-	ld.w	TASK_PTRACE[CURRENT_TASK], r19  // check ptrace flags too
-	cmp	r19, r0
-	bnz	1f			// ... some set, so look more
-2:	jmp	[lp]			// ... none set, so return normally
-
-	// ptrace return
-1:	ld.w	PTO+PT_PSW[sp], r19	// Look at user-processes's flags
-	andi	0x0800, r19, r19	// See if single-step flag is set
-	bz	2b			// ... nope, return normally
-
-	// Return as if from a dbtrap insn
-	st.b	r0, KM			// Now officially in user state.
-	POP_STATE(DBTRAP)		// Restore regs
-	st.w	sp, KSP			// Save the kernel stack pointer.
-	ld.w	PT_GPR(GPR_SP)-PT_SIZE[sp], sp // Restore user stack pointer.
-	DBTRAP_RET			// Return from the trap/interrupt.
-END(handle_signal_or_ptrace_return)
-
-
-/*
- * This is where we switch between two threads.  The arguments are:
- *   r6 -- pointer to the struct thread for the `current' process
- *   r7 -- pointer to the struct thread for the `new' process.
- * when this function returns, it will return to the new thread.
- */
-C_ENTRY(switch_thread):
-	// Return the previous task (r10 is not clobbered by restore below)
-	mov	CURRENT_TASK, r10
-	// First, push the current processor state on the stack
-	PUSH_STATE(SWITCH)
-	// Now save the location of the kernel stack pointer for this thread;
-	// since we've pushed all other state on the stack, this is enough to
-	// restore it all later.
-	st.w	sp, THREAD_KSP[r6]
-	// Now restore the stack pointer from the new process
-	ld.w	THREAD_KSP[r7], sp
-	// ... and restore all state from that
-	POP_STATE(SWITCH)
-	// Update the current task pointer
-	GET_CURRENT_TASK(CURRENT_TASK)
-	// Now return into the new thread
-	jmp	[lp]
-C_END(switch_thread)
-
-
-	.data
-
-	.align 4
-C_DATA(trap_table):
-	.long bad_trap_wrapper		// trap 0, doesn't use trap table.
-	.long syscall_long		// trap 1, `long' syscall.
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-C_END(trap_table)
-
-
-	.section .rodata
-
-	.align 4
-C_DATA(sys_call_table):
-	.long CSYM(sys_restart_syscall)	// 0
-	.long CSYM(sys_exit)
-	.long sys_fork_wrapper
-	.long CSYM(sys_read)
-	.long CSYM(sys_write)
-	.long CSYM(sys_open)		// 5
-	.long CSYM(sys_close)
-	.long CSYM(sys_waitpid)
-	.long CSYM(sys_creat)
-	.long CSYM(sys_link)
-	.long CSYM(sys_unlink)		// 10
-	.long sys_execve_wrapper
-	.long CSYM(sys_chdir)
-	.long CSYM(sys_time)
-	.long CSYM(sys_mknod)
-	.long CSYM(sys_chmod)		// 15
-	.long CSYM(sys_chown)
-	.long CSYM(sys_ni_syscall)	// was: break
-	.long CSYM(sys_ni_syscall)	// was: oldstat (aka stat)
-	.long CSYM(sys_lseek)
-	.long CSYM(sys_getpid)		// 20
-	.long CSYM(sys_mount)
-	.long CSYM(sys_oldumount)
-	.long CSYM(sys_setuid)
-	.long CSYM(sys_getuid)
-	.long CSYM(sys_stime)		// 25
-	.long CSYM(sys_ptrace)
-	.long CSYM(sys_alarm)
-	.long CSYM(sys_ni_syscall)	// was: oldfstat (aka fstat)
-	.long CSYM(sys_pause)
-	.long CSYM(sys_utime)		// 30
-	.long CSYM(sys_ni_syscall)	// was: stty
-	.long CSYM(sys_ni_syscall)	// was: gtty
-	.long CSYM(sys_access)
-	.long CSYM(sys_nice)
-	.long CSYM(sys_ni_syscall)	// 35, was: ftime
-	.long CSYM(sys_sync)
-	.long CSYM(sys_kill)
-	.long CSYM(sys_rename)
-	.long CSYM(sys_mkdir)
-	.long CSYM(sys_rmdir)		// 40
-	.long CSYM(sys_dup)
-	.long CSYM(sys_pipe)
-	.long CSYM(sys_times)
-	.long CSYM(sys_ni_syscall)	// was: prof
-	.long CSYM(sys_brk)		// 45
-	.long CSYM(sys_setgid)
-	.long CSYM(sys_getgid)
-	.long CSYM(sys_signal)
-	.long CSYM(sys_geteuid)
-	.long CSYM(sys_getegid)		// 50
-	.long CSYM(sys_acct)
-	.long CSYM(sys_umount)		// recycled never used phys()
-	.long CSYM(sys_ni_syscall)	// was: lock
-	.long CSYM(sys_ioctl)
-	.long CSYM(sys_fcntl)		// 55
-	.long CSYM(sys_ni_syscall)	// was: mpx
-	.long CSYM(sys_setpgid)
-	.long CSYM(sys_ni_syscall)	// was: ulimit
-	.long CSYM(sys_ni_syscall)
-	.long CSYM(sys_umask)		// 60
-	.long CSYM(sys_chroot)
-	.long CSYM(sys_ustat)
-	.long CSYM(sys_dup2)
-	.long CSYM(sys_getppid)
-	.long CSYM(sys_getpgrp)		// 65
-	.long CSYM(sys_setsid)
-	.long CSYM(sys_sigaction)
-	.long CSYM(sys_sgetmask)
-	.long CSYM(sys_ssetmask)
-	.long CSYM(sys_setreuid)	// 70
-	.long CSYM(sys_setregid)
-	.long sys_sigsuspend_wrapper
-	.long CSYM(sys_sigpending)
-	.long CSYM(sys_sethostname)
-	.long CSYM(sys_setrlimit)	// 75
-	.long CSYM(sys_getrlimit)
-	.long CSYM(sys_getrusage)
-	.long CSYM(sys_gettimeofday)
-	.long CSYM(sys_settimeofday)
-	.long CSYM(sys_getgroups)	// 80
-	.long CSYM(sys_setgroups)
-	.long CSYM(sys_select)
-	.long CSYM(sys_symlink)
-	.long CSYM(sys_ni_syscall)	// was: oldlstat (aka lstat)
-	.long CSYM(sys_readlink)	// 85
-	.long CSYM(sys_uselib)
-	.long CSYM(sys_swapon)
-	.long CSYM(sys_reboot)
-	.long CSYM(old_readdir)
-	.long CSYM(sys_mmap)		// 90
-	.long CSYM(sys_munmap)
-	.long CSYM(sys_truncate)
-	.long CSYM(sys_ftruncate)
-	.long CSYM(sys_fchmod)
-	.long CSYM(sys_fchown)		// 95
-	.long CSYM(sys_getpriority)
-	.long CSYM(sys_setpriority)
-	.long CSYM(sys_ni_syscall)	// was: profil
-	.long CSYM(sys_statfs)
-	.long CSYM(sys_fstatfs)		// 100
-	.long CSYM(sys_ni_syscall)	// i386: ioperm
-	.long CSYM(sys_socketcall)
-	.long CSYM(sys_syslog)
-	.long CSYM(sys_setitimer)
-	.long CSYM(sys_getitimer)	// 105
-	.long CSYM(sys_newstat)
-	.long CSYM(sys_newlstat)
-	.long CSYM(sys_newfstat)
-	.long CSYM(sys_ni_syscall)	// was: olduname (aka uname)
-	.long CSYM(sys_ni_syscall)	// 110, i386: iopl
-	.long CSYM(sys_vhangup)
-	.long CSYM(sys_ni_syscall)	// was: idle
-	.long CSYM(sys_ni_syscall)	// i386: vm86old
-	.long CSYM(sys_wait4)
-	.long CSYM(sys_swapoff)		// 115
-	.long CSYM(sys_sysinfo)
-	.long CSYM(sys_ipc)
-	.long CSYM(sys_fsync)
-	.long sys_sigreturn_wrapper
-	.long sys_clone_wrapper		// 120
-	.long CSYM(sys_setdomainname)
-	.long CSYM(sys_newuname)
-	.long CSYM(sys_ni_syscall)	// i386: modify_ldt, m68k: cacheflush
-	.long CSYM(sys_adjtimex)
-	.long CSYM(sys_ni_syscall)	// 125 - sys_mprotect
-	.long CSYM(sys_sigprocmask)
-	.long CSYM(sys_ni_syscall)	// sys_create_module
-	.long CSYM(sys_init_module)
-	.long CSYM(sys_delete_module)
-	.long CSYM(sys_ni_syscall)	// 130 - sys_get_kernel_syms
-	.long CSYM(sys_quotactl)
-	.long CSYM(sys_getpgid)
-	.long CSYM(sys_fchdir)
-	.long CSYM(sys_bdflush)
-	.long CSYM(sys_sysfs)		// 135
-	.long CSYM(sys_personality)
-	.long CSYM(sys_ni_syscall)	// for afs_syscall
-	.long CSYM(sys_setfsuid)
-	.long CSYM(sys_setfsgid)
-	.long CSYM(sys_llseek)		// 140
-	.long CSYM(sys_getdents)
-	.long CSYM(sys_select)		// for backward compat; remove someday
-	.long CSYM(sys_flock)
-	.long CSYM(sys_ni_syscall)	// sys_msync
-	.long CSYM(sys_readv)		// 145
-	.long CSYM(sys_writev)
-	.long CSYM(sys_getsid)
-	.long CSYM(sys_fdatasync)
-	.long CSYM(sys_sysctl)
-	.long CSYM(sys_ni_syscall)	// 150 - sys_mlock
-	.long CSYM(sys_ni_syscall)	// sys_munlock
-	.long CSYM(sys_ni_syscall)	// sys_mlockall
-	.long CSYM(sys_ni_syscall)	// sys_munlockall
-	.long CSYM(sys_sched_setparam)
-	.long CSYM(sys_sched_getparam)	// 155
-	.long CSYM(sys_sched_setscheduler)
-	.long CSYM(sys_sched_getscheduler)
-	.long CSYM(sys_sched_yield)
-	.long CSYM(sys_sched_get_priority_max)
-	.long CSYM(sys_sched_get_priority_min)	// 160
-	.long CSYM(sys_sched_rr_get_interval)
-	.long CSYM(sys_nanosleep)
-	.long CSYM(sys_ni_syscall)	// sys_mremap
-	.long CSYM(sys_setresuid)
-	.long CSYM(sys_getresuid)	// 165
-	.long CSYM(sys_ni_syscall)	// for vm86
-	.long CSYM(sys_ni_syscall)	// sys_query_module
-	.long CSYM(sys_poll)
-	.long CSYM(sys_nfsservctl)
-	.long CSYM(sys_setresgid)	// 170
-	.long CSYM(sys_getresgid)
-	.long CSYM(sys_prctl)
-	.long sys_rt_sigreturn_wrapper
-	.long CSYM(sys_rt_sigaction)
-	.long CSYM(sys_rt_sigprocmask)	// 175
-	.long CSYM(sys_rt_sigpending)
-	.long CSYM(sys_rt_sigtimedwait)
-	.long CSYM(sys_rt_sigqueueinfo)
-	.long sys_rt_sigsuspend_wrapper
-	.long CSYM(sys_pread64)		// 180
-	.long CSYM(sys_pwrite64)
-	.long CSYM(sys_lchown)
-	.long CSYM(sys_getcwd)
-	.long CSYM(sys_capget)
-	.long CSYM(sys_capset)		// 185
-	.long CSYM(sys_sigaltstack)
-	.long CSYM(sys_sendfile)
-	.long CSYM(sys_ni_syscall)	// streams1
-	.long CSYM(sys_ni_syscall)	// streams2
-	.long sys_vfork_wrapper		// 190
-	.long CSYM(sys_ni_syscall)
-	.long CSYM(sys_mmap2)
-	.long CSYM(sys_truncate64)
-	.long CSYM(sys_ftruncate64)
-	.long CSYM(sys_stat64)		// 195
-	.long CSYM(sys_lstat64)
-	.long CSYM(sys_fstat64)
-	.long CSYM(sys_fcntl64)
-	.long CSYM(sys_getdents64)
-	.long CSYM(sys_pivot_root)	// 200
-	.long CSYM(sys_gettid)
-	.long CSYM(sys_tkill)
-sys_call_table_end:
-C_END(sys_call_table)
diff --git a/arch/v850/kernel/fpga85e2c.c b/arch/v850/kernel/fpga85e2c.c
deleted file mode 100644
index ab9cf16a85c..00000000000
--- a/arch/v850/kernel/fpga85e2c.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * arch/v850/kernel/fpga85e2c.h -- Machine-dependent defs for
- *	FPGA implementation of V850E2/NA85E2C
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/bitops.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-
-#include "mach.h"
-
-extern void memcons_setup (void);
-
-
-#define REG_DUMP_ADDR		0x220000
-
-
-extern struct irqaction reg_snap_action; /* fwd decl */
-
-
-void __init mach_early_init (void)
-{
-	int i;
-	const u32 *src;
-	register u32 *dst asm ("ep");
-	extern u32 _intv_end, _intv_load_start;
-
-	/* Set bus sizes: CS0 32-bit, CS1 16-bit, CS7 8-bit,
-	   everything else 32-bit.  */
-	V850E2_BSC = 0x2AA6;
-	for (i = 2; i <= 6; i++)
-		CSDEV(i) = 0;	/* 32 bit */
-
-	/* Ensure that the simulator halts on a panic, instead of going
-	   into an infinite loop inside the panic function.  */
-	panic_timeout = -1;
-
-	/* Move the interrupt vectors into their real location.  Note that
-	   any relocations there are relative to the real location, so we
-	   don't have to fix anything up.  We use a loop instead of calling
-	   memcpy to keep this a leaf function (to avoid a function
-	   prologue being generated).  */
-	dst = 0x10;		/* &_intv_start + 0x10.  */
-	src = &_intv_load_start;
-	do {
-		u32 t0 = src[0], t1 = src[1], t2 = src[2], t3 = src[3];
-		u32 t4 = src[4], t5 = src[5], t6 = src[6], t7 = src[7];
-		dst[0] = t0; dst[1] = t1; dst[2] = t2; dst[3] = t3;
-		dst[4] = t4; dst[5] = t5; dst[6] = t6; dst[7] = t7;
-		dst += 8;
-		src += 8;
-	} while (dst < &_intv_end);
-}
-
-void __init mach_setup (char **cmdline)
-{
-	memcons_setup ();
-
-	/* Setup up NMI0 to copy the registers to a known memory location.
-	   The FGPA board has a button that produces NMI0 when pressed, so
-	   this allows us to push the button, and then look at memory to see
-	   what's in the registers (there's no other way to easily do so).
-	   We have to use `setup_irq' instead of `request_irq' because it's
-	   still too early to do memory allocation.  */
-	setup_irq (IRQ_NMI (0), &reg_snap_action);
-}
-
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len)
-{
-	*ram_start = ERAM_ADDR;
-	*ram_len = ERAM_SIZE;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Setup up the timer interrupt.  The FPGA peripheral control
-	   registers _only_ work with single-bit writes (set1/clr1)!  */
-	__clear_bit (RPU_GTMC_CE_BIT, &RPU_GTMC);
-	__clear_bit (RPU_GTMC_CLK_BIT, &RPU_GTMC);
-	__set_bit (RPU_GTMC_CE_BIT, &RPU_GTMC);
-
-	/* We use the first RPU interrupt, which occurs every 8.192ms.  */
-	setup_irq (IRQ_RPU (0), timer_action);
-}
-
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-void machine_halt (void) __attribute__ ((noreturn));
-void machine_halt (void)
-{
-	for (;;) {
-		DWC(0) = 0x7777;
-		DWC(1) = 0x7777;
-		ASC = 0xffff;
-		FLGREG(0) = 1;	/* Halt immediately.  */
-		asm ("di; halt; nop; nop; nop; nop; nop");
-	}
-}
-
-void machine_restart (char *__unused)
-{
-	machine_halt ();
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-
-/* Interrupts */
-
-struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "RPU", IRQ_RPU(0),	IRQ_RPU_NUM,	1, 6 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize interrupts.  */
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-
-/* An interrupt handler that copies the registers to a known memory location,
-   for debugging purposes.  */
-
-static void make_reg_snap (int irq, void *dummy, struct pt_regs *regs)
-{
-	(*(unsigned *)REG_DUMP_ADDR)++;
-	(*(struct pt_regs *)(REG_DUMP_ADDR + sizeof (unsigned))) = *regs;
-}
-
-static int reg_snap_dev_id;
-static struct irqaction reg_snap_action = {
-	.handler = make_reg_snap,
-	.mask = CPU_MASK_NONE,
-	.name = "reg_snap",
-	.dev_id = &reg_snap_dev_id,
-};
diff --git a/arch/v850/kernel/fpga85e2c.ld b/arch/v850/kernel/fpga85e2c.ld
deleted file mode 100644
index b5d4578ae41..00000000000
--- a/arch/v850/kernel/fpga85e2c.ld
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Linker script for the FPGA implementation of the V850E2 NA85E2C cpu core
-   (CONFIG_V850E2_FPGA85E2C).  */
-
-MEMORY {
-	/* Reset vector.  */
-	RESET	 : ORIGIN = 0, LENGTH = 0x10
-	/* Interrupt vectors.  */
-	INTV      : ORIGIN = 0x10, LENGTH = 0x470
-	/* The `window' in RAM were we're allowed to load stuff.  */
-	RAM_LOW   : ORIGIN = 0x480, LENGTH = 0x0005FB80
-	/* Some more ram above the window were we can put bss &c.  */
-	RAM_HIGH  : ORIGIN = 0x00060000, LENGTH = 0x000A0000
-	/* This is the area visible from the outside world (we can use
-	   this only for uninitialized data).  */
-	VISIBLE   : ORIGIN = 0x00200000, LENGTH = 0x00060000
-}
-
-SECTIONS {
-	.reset : {
-		__kram_start = . ;
-		__intv_start = . ;
-	        	*(.intv.reset)	/* Reset vector */
-	} > RESET
-
-	.ram_low : {
-		__r0_ram = . ;		/* Must be near address 0.  */
-		. = . + 32 ;
-
-		TEXT_CONTENTS
-		DATA_CONTENTS
-		ROOT_FS_CONTENTS
-		RAMK_INIT_CONTENTS_NO_END
-		INITRAMFS_CONTENTS
-	} > RAM_LOW
-
-        /* Where the interrupt vectors are initially loaded.  */
-	__intv_load_start = . ;
-
-	.intv : {
-			*(.intv.common)	/* Vectors common to all v850e proc. */
-			*(.intv.mach)	/* Machine-specific int. vectors.  */
-		__intv_end = . ;
-	} > INTV  AT> RAM_LOW
-
-	.ram_high : {
-		/* This is here so that when we free init memory the
-		   load-time copy of the interrupt vectors and any empty
-		   space at the end of the `RAM_LOW' area is freed too.  */
-		. = ALIGN (4096);
-		__init_end = . ;
-
-		BSS_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-	} > RAM_HIGH
-
-	.visible : {
-		_memcons_output = . ;
-		. = . + 0x8000 ;
-		_memcons_output_end = . ;
-	} > VISIBLE
-}
diff --git a/arch/v850/kernel/gbus_int.c b/arch/v850/kernel/gbus_int.c
deleted file mode 100644
index b2bcc251f65..00000000000
--- a/arch/v850/kernel/gbus_int.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * arch/v850/kernel/gbus_int.c -- Midas labs GBUS interrupt support
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-
-#include <asm/machdep.h>
-
-
-/* The number of shared GINT interrupts. */
-#define NUM_GINTS   	4
-
-/* For each GINT interrupt, how many GBUS interrupts are using it.  */
-static unsigned gint_num_active_irqs[NUM_GINTS] = { 0 };
-
-/* A table of GINTn interrupts we actually use.
-   Note that we don't use GINT0 because all the boards we support treat it
-   specially.  */
-struct used_gint {
-	unsigned gint;
-	unsigned priority;
-} used_gint[] = {
-	{ 1, GBUS_INT_PRIORITY_HIGH },
-	{ 3, GBUS_INT_PRIORITY_LOW }
-};
-#define NUM_USED_GINTS ARRAY_SIZE(used_gint)
-
-/* A table of which GINT is used by each GBUS interrupts (they are
-   assigned based on priority).  */
-static unsigned char gbus_int_gint[IRQ_GBUS_INT_NUM];
-
-
-/* Interrupt enabling/disabling.  */
-
-/* Enable interrupt handling for interrupt IRQ.  */
-void gbus_int_enable_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-	GBUS_INT_ENABLE (GBUS_INT_IRQ_WORD(irq), gint)
-		|= GBUS_INT_IRQ_MASK (irq);
-}
-
-/* Disable interrupt handling for interrupt IRQ.  Note that any
-   interrupts received while disabled will be delivered once the
-   interrupt is enabled again, unless they are explicitly cleared using
-   `gbus_int_clear_pending_irq'.  */
-void gbus_int_disable_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-	GBUS_INT_ENABLE (GBUS_INT_IRQ_WORD(irq), gint)
-		&= ~GBUS_INT_IRQ_MASK (irq);
-}
-
-/* Return true if interrupt handling for interrupt IRQ is enabled.  */
-int gbus_int_irq_enabled (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-	return (GBUS_INT_ENABLE (GBUS_INT_IRQ_WORD(irq), gint)
-		& GBUS_INT_IRQ_MASK(irq));
-}
-
-/* Disable all GBUS irqs.  */
-void gbus_int_disable_irqs ()
-{
-	unsigned w, n;
-	for (w = 0; w < GBUS_INT_NUM_WORDS; w++)
-		for (n = 0; n < IRQ_GINT_NUM; n++)
-			GBUS_INT_ENABLE (w, n) = 0;
-}
-
-/* Clear any pending interrupts for IRQ.  */
-void gbus_int_clear_pending_irq (unsigned irq)
-{
-	GBUS_INT_CLEAR (GBUS_INT_IRQ_WORD(irq)) = GBUS_INT_IRQ_MASK (irq);
-}
-
-/* Return true if interrupt IRQ is pending (but disabled).  */
-int gbus_int_irq_pending (unsigned irq)
-{
-	return (GBUS_INT_STATUS (GBUS_INT_IRQ_WORD(irq))
-		& GBUS_INT_IRQ_MASK(irq));
-}
-
-
-/* Delegating interrupts.  */
-
-/* Handle a shared GINT interrupt by passing to the appropriate GBUS
-   interrupt handler.  */
-static irqreturn_t gbus_int_handle_irq (int irq, void *dev_id,
-					struct pt_regs *regs)
-{
-	unsigned w;
-	irqreturn_t rval = IRQ_NONE;
-	unsigned gint = irq - IRQ_GINT (0);
-
-	for (w = 0; w < GBUS_INT_NUM_WORDS; w++) {
-		unsigned status = GBUS_INT_STATUS (w);
-		unsigned enable = GBUS_INT_ENABLE (w, gint);
-
-		/* Only pay attention to enabled interrupts.  */
-		status &= enable;
-		if (status) {
-			irq = IRQ_GBUS_INT (w * GBUS_INT_BITS_PER_WORD);
-			do {
-				/* There's an active interrupt in word
-				   W, find out which one, and call its
-				   handler.  */
-
-				while (! (status & 0x1)) {
-					irq++;
-					status >>= 1;
-				}
-				status &= ~0x1;
-
-				/* Recursively call handle_irq to handle it. */
-				handle_irq (irq, regs);
-				rval = IRQ_HANDLED;
-			} while (status);
-		}
-	}
-
-	/* Toggle the `all enable' bit back and forth, which should cause
-	   another edge transition if there are any other interrupts
-	   still pending, and so result in another CPU interrupt.  */
-	GBUS_INT_ENABLE (0, gint) &= ~0x1;
-	GBUS_INT_ENABLE (0, gint) |=  0x1;
-
-	return rval;
-}
-
-
-/* Initialize GBUS interrupt sources.  */
-
-static void irq_nop (unsigned irq) { }
-
-static unsigned gbus_int_startup_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-
-	if (gint_num_active_irqs[gint] == 0) {
-		/* First enable the CPU interrupt.  */
-		int rval =
-			request_irq (IRQ_GINT(gint), gbus_int_handle_irq,
-				     IRQF_DISABLED,
-				     "gbus_int_handler",
-				     &gint_num_active_irqs[gint]);
-		if (rval != 0)
-			return rval;
-	}
-
-	gint_num_active_irqs[gint]++;
-
-	gbus_int_clear_pending_irq (irq);
-	gbus_int_enable_irq (irq);
-
-	return 0;
-}
-
-static void gbus_int_shutdown_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-
-	gbus_int_disable_irq (irq);
-
-	if (--gint_num_active_irqs[gint] == 0)
-		/* Disable the CPU interrupt.  */
-		free_irq (IRQ_GINT(gint), &gint_num_active_irqs[gint]);
-}
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-void __init gbus_int_init_irq_types (struct gbus_int_irq_init *inits,
-				     struct hw_interrupt_type *hw_irq_types)
-{
-	struct gbus_int_irq_init *init;
-	for (init = inits; init->name; init++) {
-		unsigned i;
-		struct hw_interrupt_type *hwit = hw_irq_types++;
-
-		hwit->typename = init->name;
-
-		hwit->startup  = gbus_int_startup_irq;
-		hwit->shutdown = gbus_int_shutdown_irq;
-		hwit->enable   = gbus_int_enable_irq;
-		hwit->disable  = gbus_int_disable_irq;
-		hwit->ack      = irq_nop;
-		hwit->end      = irq_nop;
-		
-		/* Initialize kernel IRQ infrastructure for this interrupt.  */
-		init_irq_handlers(init->base, init->num, init->interval, hwit);
-
-		/* Set the interrupt priorities.  */
-		for (i = 0; i < init->num; i++) {
-			unsigned j;
-			for (j = 0; j < NUM_USED_GINTS; j++)
-				if (used_gint[j].priority > init->priority)
-					break;
-			/* Wherever we stopped looking is one past the
-			   GINT we want. */
-			gbus_int_gint[init->base + i * init->interval
-				      - GBUS_INT_BASE_IRQ]
-				= used_gint[j > 0 ? j - 1 : 0].gint;
-		}
-	}
-}
-
-
-/* Initialize IRQS.  */
-
-/* Chip interrupts (GINTn) shared among GBUS interrupts.  */
-static struct hw_interrupt_type gint_hw_itypes[NUM_USED_GINTS];
-
-
-/* GBUS interrupts themselves.  */
-
-struct gbus_int_irq_init gbus_irq_inits[] __initdata = {
-	/* First set defaults.  */
-	{ "GBUS_INT", IRQ_GBUS_INT(0), IRQ_GBUS_INT_NUM, 1, 6},
-	{ 0 }
-};
-#define NUM_GBUS_IRQ_INITS (ARRAY_SIZE(gbus_irq_inits) - 1)
-
-static struct hw_interrupt_type gbus_hw_itypes[NUM_GBUS_IRQ_INITS];
-
-
-/* Initialize GBUS interrupts.  */
-void __init gbus_int_init_irqs (void)
-{
-	unsigned i;
-
-	/* First initialize the shared gint interrupts.  */
-	for (i = 0; i < NUM_USED_GINTS; i++) {
-		unsigned gint = used_gint[i].gint;
-		struct v850e_intc_irq_init gint_irq_init[2];
-
-		/* We initialize one GINT interrupt at a time.  */
-		gint_irq_init[0].name = "GINT";
-		gint_irq_init[0].base = IRQ_GINT (gint);
-		gint_irq_init[0].num = 1;
-		gint_irq_init[0].interval = 1;
-		gint_irq_init[0].priority = used_gint[i].priority;
-
-		gint_irq_init[1].name = 0; /* Terminate the vector.  */
-
-		v850e_intc_init_irq_types (gint_irq_init, gint_hw_itypes);
-	}
-
-	/* Then the GBUS interrupts.  */
-	gbus_int_disable_irqs ();
-	gbus_int_init_irq_types (gbus_irq_inits, gbus_hw_itypes);
-	/* Turn on the `all enable' bits, which are ANDed with
-	   individual interrupt enable bits; we only want to bother with
-	   the latter.  They are the first bit in the first word of each
-	   interrupt-enable area.  */
-	for (i = 0; i < NUM_USED_GINTS; i++)
-		GBUS_INT_ENABLE (0, used_gint[i].gint) = 0x1;
-}
diff --git a/arch/v850/kernel/head.S b/arch/v850/kernel/head.S
deleted file mode 100644
index c490b937ef1..00000000000
--- a/arch/v850/kernel/head.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * arch/v850/kernel/head.S -- Lowest-level startup code
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/clinkage.h>
-#include <asm/current.h>
-#include <asm/entry.h>
-#include <asm/thread_info.h>
-#include <asm/irq.h>
-
-
-/* Make a slightly more convenient alias for C_SYMBOL_NAME.  */
-#define CSYM	C_SYMBOL_NAME
-
-
-	.text
-
-	// Define `mach_early_init' as a weak symbol
-	.global	CSYM(mach_early_init)
-	.weak	CSYM(mach_early_init)
-
-C_ENTRY(start):
-	// Make sure interrupts are turned off, just in case
-	di
-
-#ifdef CONFIG_RESET_GUARD
-	// See if we got here via an unexpected reset
-	ld.w	RESET_GUARD, r19	// Check current value of reset guard
-	mov	RESET_GUARD_ACTIVE, r20
-	cmp	r19, r20
-	bne	1f			// Guard was not active
-
-	// If we get here, the reset guard was active.  Load up some
-	// interesting values as arguments, and jump to the handler.
-	st.w	r0, RESET_GUARD		// Allow further resets to succeed
-	mov	lp, r6			// Arg 0: return address
-	ld.b	KM, r7			// Arg 1: kernel mode
-	mov	sp, r9			// Arg 3: stack pointer
-	ld.w	KSP, r19		// maybe switch to kernel stack
-	cmp	r7, r0			// see if already in kernel mode
-	cmov	z, r19, sp, sp		//  and switch to kernel stack if not
-	GET_CURRENT_TASK(r8)		// Arg 2: task pointer
-	jr	CSYM(unexpected_reset)
-
-1:	st.w	r20, RESET_GUARD	// Turn on reset guard
-#endif /* CONFIG_RESET_GUARD */
-
-	// Setup a temporary stack for doing pre-initialization function calls.
-	// 
-	// We can't use the initial kernel stack, because (1) it may be
-	// located in memory we're not allowed to touch, and (2) since
-	// it's in the data segment, calling memcpy to initialize that
-	// area from ROM will overwrite memcpy's return address.
-	mov	hilo(CSYM(_init_stack_end) - 4), sp
-
-	// See if there's a platform-specific early-initialization routine
-	// defined; it's a weak symbol, so it will have an address of zero if
-	// there's not.
-	mov	hilo(CSYM(mach_early_init)), r6
-	cmp	r6, r0
-	bz	3f
-
-	// There is one, so call it.  If this function is written in C, it
-	// should be very careful -- the stack pointer is valid, but very
-	// little else is (e.g., bss is not zeroed yet, and initialized data
-	// hasn't been).
-	jarl	2f, lp			// first figure out return address
-2:	add	3f - ., lp
-	jmp	[r6]			// do call
-3:
-
-#ifdef CONFIG_ROM_KERNEL
-	// Copy the data area from ROM to RAM
-	mov	hilo(CSYM(_rom_copy_dst_start)), r6
-	mov	hilo(CSYM(_rom_copy_src_start)), r7
-	mov	hilo(CSYM(_rom_copy_dst_end)), r8
-	sub	r6, r8
-	jarl	CSYM(memcpy), lp
-#endif
-
-	// Load the initial thread's stack, and current task pointer (in r16)
-	mov	hilo(CSYM(init_thread_union)), r19
-	movea	THREAD_SIZE, r19, sp
-	ld.w	TI_TASK[r19], CURRENT_TASK
-
-#ifdef CONFIG_TIME_BOOTUP
-	/* This stuff must come after mach_early_init, because interrupts may
-	   not work until after its been called.  */
-	jarl	CSYM(highres_timer_reset), lp
-	jarl	CSYM(highres_timer_start), lp
-#endif
-
-	// Kernel stack pointer save location
-	st.w	sp, KSP
-
-	// Assert that we're in `kernel mode'
-	mov	1, r19
-	st.w	r19, KM
-
-#ifdef CONFIG_ZERO_BSS
-	// Zero bss area, since we can't rely upon any loader to do so
-	mov	hilo(CSYM(_sbss)), r6
-	mov	r0, r7
-	mov	hilo(CSYM(_ebss)), r8
-	sub	r6, r8
-	jarl	CSYM(memset), lp
-#endif
-
-	// What happens if the main kernel function returns (it shouldn't)
-	mov	hilo(CSYM(machine_halt)), lp
-
-	// Start the linux kernel.  We use an indirect jump to get extra
-	// range, because on some platforms this initial startup code
-	// (and the associated platform-specific code in mach_early_init)
-	// are located far away from the main kernel, e.g. so that they
-	// can initialize RAM first and copy the kernel or something.
-	mov	hilo(CSYM(start_kernel)), r12
-	jmp	[r12]
-C_END(start)
diff --git a/arch/v850/kernel/highres_timer.c b/arch/v850/kernel/highres_timer.c
deleted file mode 100644
index b16ad1eaf96..00000000000
--- a/arch/v850/kernel/highres_timer.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * arch/v850/kernel/highres_timer.c -- High resolution timing routines
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/system.h>
-#include <asm/v850e_timer_d.h>
-#include <asm/highres_timer.h>
-
-#define HIGHRES_TIMER_USEC_SHIFT   12
-
-/* Pre-calculated constant used for converting ticks to real time
-   units.  We initialize it to prevent it being put into BSS.  */
-static u32 highres_timer_usec_prescale = 1;
-
-void highres_timer_slow_tick_irq (void) __attribute__ ((noreturn));
-void highres_timer_slow_tick_irq (void)
-{
-	/* This is an interrupt handler, so it must be very careful to
-	   not to trash any registers.  At this point, the stack-pointer
-	   (r3) has been saved in the chip ram location ENTRY_SP by the
-	   interrupt vector, so we can use it as a scratch register; we
-	   must also restore it before returning.  */
-	asm ("ld.w	%0[r0], sp;"
-	     "add	1, sp;"
-	     "st.w	sp, %0[r0];"
-	     "ld.w	%1[r0], sp;" /* restore pre-irq stack-pointer */
-	     "reti"
-	     ::
-	      "i" (HIGHRES_TIMER_SLOW_TICKS_ADDR),
-	      "i" (ENTRY_SP_ADDR)
-	     : "memory");
-}
-
-void highres_timer_reset (void)
-{
-	V850E_TIMER_D_TMD (HIGHRES_TIMER_TIMER_D_UNIT) = 0;
-	HIGHRES_TIMER_SLOW_TICKS = 0;
-}
-
-void highres_timer_start (void)
-{
-	u32 fast_tick_rate;
-
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (HIGHRES_TIMER_TIMER_D_UNIT,
-				 HIGHRES_TIMER_SLOW_TICK_RATE);
-
-	fast_tick_rate =
-		(V850E_TIMER_D_BASE_FREQ
-		 >> V850E_TIMER_D_DIVLOG2 (HIGHRES_TIMER_TIMER_D_UNIT));
-
-	/* The obvious way of calculating microseconds from fast ticks
-	   is to do:
-
-	     usec = fast_ticks * 10^6 / fast_tick_rate
-
-	   However, divisions are much slower than multiplications, and
-	   the above calculation can overflow, so we do this instead:
-
-	     usec = fast_ticks * (10^6 * 2^12 / fast_tick_rate) / 2^12
-
-           since we can pre-calculate (10^6 * (2^12 / fast_tick_rate))
-	   and use a shift for dividing by 2^12, this avoids division,
-	   and is almost as accurate (it differs by about 2 microseconds
-	   at the extreme value of the fast-tick counter's ranger).  */
-	highres_timer_usec_prescale = ((1000000 << HIGHRES_TIMER_USEC_SHIFT)
-				       / fast_tick_rate);
-
-	/* Enable the interrupt (which is hardwired to this use), and
-	   give it the highest priority.  */
-	V850E_INTC_IC (IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT)) = 0;
-}
-
-void highres_timer_stop (void)
-{
-	/* Stop the timer.  */
-	V850E_TIMER_D_TMCD (HIGHRES_TIMER_TIMER_D_UNIT) =
-		V850E_TIMER_D_TMCD_CAE;
-	/* Disable its interrupt, just in case.  */
-	v850e_intc_disable_irq (IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT));
-}
-
-inline void highres_timer_read_ticks (u32 *slow_ticks, u32 *fast_ticks)
-{
-	int flags;
-	u32 fast_ticks_1, fast_ticks_2, _slow_ticks;
-
-	local_irq_save (flags);
-	fast_ticks_1 = V850E_TIMER_D_TMD (HIGHRES_TIMER_TIMER_D_UNIT);
-	_slow_ticks = HIGHRES_TIMER_SLOW_TICKS;
-	fast_ticks_2 = V850E_TIMER_D_TMD (HIGHRES_TIMER_TIMER_D_UNIT);
-	local_irq_restore (flags);
-
-	if (fast_ticks_2 < fast_ticks_1)
-		_slow_ticks++;
-
-	*slow_ticks = _slow_ticks;
-	*fast_ticks = fast_ticks_2;
-}
-
-inline void highres_timer_ticks_to_timeval (u32 slow_ticks, u32 fast_ticks,
-					    struct timeval *tv)
-{
-	unsigned long sec, sec_rem, usec;
-
-	usec = ((fast_ticks * highres_timer_usec_prescale)
-		>> HIGHRES_TIMER_USEC_SHIFT);
-
-	sec = slow_ticks / HIGHRES_TIMER_SLOW_TICK_RATE;
-	sec_rem = slow_ticks % HIGHRES_TIMER_SLOW_TICK_RATE;
-
-	usec += sec_rem * (1000000 / HIGHRES_TIMER_SLOW_TICK_RATE);
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-void highres_timer_read (struct timeval *tv)
-{
-	u32 fast_ticks, slow_ticks;
-	highres_timer_read_ticks (&slow_ticks, &fast_ticks);
-	highres_timer_ticks_to_timeval (slow_ticks, fast_ticks, tv);
-}
diff --git a/arch/v850/kernel/init_task.c b/arch/v850/kernel/init_task.c
deleted file mode 100644
index 44b274dff33..00000000000
--- a/arch/v850/kernel/init_task.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * arch/v850/kernel/init_task.c -- Initial task/thread structures
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct signal_struct init_signals = INIT_SIGNALS (init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM (init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK (init_task);
-
-EXPORT_SYMBOL(init_task);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry.
- */
-union thread_union init_thread_union 
-	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_THREAD_INFO(init_task) };
diff --git a/arch/v850/kernel/intv.S b/arch/v850/kernel/intv.S
deleted file mode 100644
index 671e4c6150d..00000000000
--- a/arch/v850/kernel/intv.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * arch/v850/kernel/intv.S -- Interrupt vectors
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/clinkage.h>
-#include <asm/irq.h>
-#include <asm/machdep.h>
-#include <asm/entry.h>
-
-#ifdef CONFIG_V850E_HIGHRES_TIMER
-#include <asm/highres_timer.h>
-#endif
-
-/* Jump to an interrupt/trap handler.  These handlers (defined in entry.S)
-   expect the stack-pointer to be saved in ENTRY_SP, so we use sp to do an
-   indirect jump (which avoids problems when the handler is more than a signed
-   22-bit offset away).  */
-#define JUMP_TO_HANDLER(name, sp_save_loc)				      \
-	st.w	sp, sp_save_loc;					      \
-	mov	hilo(name), sp;						      \
-	jmp	[sp]
-
-
-	/* Reset vector.  */
-	.section	.intv.reset, "ax"
-	.org	0x0
-	mov	hilo(C_SYMBOL_NAME(start)), r1;
-	jmp	[r1]
-
-
-	/* Generic interrupt vectors.  */
-	.section	.intv.common, "ax"
-	.balign	0x10
-	JUMP_TO_HANDLER (nmi, NMI_ENTRY_SP)	// 0x10 - NMI0
-	.balign	0x10
-	JUMP_TO_HANDLER (nmi, NMI_ENTRY_SP)	// 0x20 - NMI1
-	.balign	0x10
-	JUMP_TO_HANDLER (nmi, NMI_ENTRY_SP)	// 0x30 - NMI2
-	
-	.balign	0x10
-	JUMP_TO_HANDLER (trap, ENTRY_SP)	// 0x40 - TRAP0n
-	.balign	0x10
-	JUMP_TO_HANDLER (trap, ENTRY_SP)	// 0x50 - TRAP1n
-
-	.balign	0x10
-	JUMP_TO_HANDLER (dbtrap, ENTRY_SP)	// 0x60 - Illegal op / DBTRAP insn
-
-
-	/* Hardware interrupt vectors.  */
-	.section	.intv.mach, "ax"
-	.org	0x0
-
-#if defined (CONFIG_V850E_HIGHRES_TIMER) && defined (IRQ_INTCMD)
-
-	/* Interrupts before the highres timer interrupt.  */
-	.rept	IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT)
-	.balign	0x10
-	JUMP_TO_HANDLER (irq, ENTRY_SP)
-	.endr
-
-	/* The highres timer interrupt.  */
-	.balign	0x10
-	JUMP_TO_HANDLER (C_SYMBOL_NAME (highres_timer_slow_tick_irq), ENTRY_SP)
-
-	/* Interrupts after the highres timer interrupt.  */
-	.rept	NUM_CPU_IRQS - IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT) - 1
-	.balign	0x10
-	JUMP_TO_HANDLER (irq, ENTRY_SP)
-	.endr
-
-#else /* No highres timer */
-
-	.rept	NUM_CPU_IRQS
-	.balign	0x10
-	JUMP_TO_HANDLER (irq, ENTRY_SP)
-	.endr
-
-#endif /* Highres timer */
diff --git a/arch/v850/kernel/irq.c b/arch/v850/kernel/irq.c
deleted file mode 100644
index 858c45819aa..00000000000
--- a/arch/v850/kernel/irq.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * arch/v850/kernel/irq.c -- High-level interrupt handling
- *
- *  Copyright (C) 2001,02,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04,05  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1994-2000  Ralf Baechle
- *  Copyright (C) 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file was was derived from the mips version, arch/mips/kernel/irq.c
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/irq.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/seq_file.h>
-
-#include <asm/system.h>
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves, it doesn't deserve
- * a generic callback i think.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk("received IRQ %d with unknown interrupt type\n", irq);
-}
-
-volatile unsigned long irq_err_count, spurious_count;
-
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int irq = *(loff_t *) v;
-
-	if (irq == 0) {
-		int cpu;
-		seq_puts(p, "           ");
-		for (cpu=0; cpu < 1 /*smp_num_cpus*/; cpu++)
-			seq_printf(p, "CPU%d       ", cpu);
-		seq_putc(p, '\n');
-	}
-
-	if (irq < NR_IRQS) {
-		unsigned long flags;
-		struct irqaction *action;
-
-		spin_lock_irqsave(&irq_desc[irq].lock, flags);
-
-		action = irq_desc[irq].action;
-		if (action) {
-			int j;
-			int count = 0;
-			int num = -1;
-			const char *type_name = irq_desc[irq].chip->typename;
-
-			for (j = 0; j < NR_IRQS; j++)
-				if (irq_desc[j].chip->typename == type_name){
-					if (irq == j)
-						num = count;
-					count++;
-				}
-
-			seq_printf(p, "%3d: ",irq);
-			seq_printf(p, "%10u ", kstat_irqs(irq));
-			if (count > 1) {
-				int prec = (num >= 100 ? 3 : num >= 10 ? 2 : 1);
-				seq_printf(p, " %*s%d", 14 - prec,
-					   type_name, num);
-			} else
-				seq_printf(p, " %14s", type_name);
-		
-			seq_printf(p, "  %s", action->name);
-			for (action=action->next; action; action = action->next)
-				seq_printf(p, ", %s", action->name);
-			seq_putc(p, '\n');
-		}
-
-		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
-	} else if (irq == NR_IRQS)
-		seq_printf(p, "ERR: %10lu\n", irq_err_count);
-
-	return 0;
-}
-
-/* Handle interrupt IRQ.  REGS are the registers at the time of ther
-   interrupt.  */
-unsigned int handle_irq (int irq, struct pt_regs *regs)
-{
-	irq_enter();
-	__do_IRQ(irq, regs);
-	irq_exit();
-	return 1;
-}
-
-/* Initialize irq handling for IRQs.
-   BASE_IRQ, BASE_IRQ+INTERVAL, ..., BASE_IRQ+NUM*INTERVAL
-   to IRQ_TYPE.  An IRQ_TYPE of 0 means to use a generic interrupt type.  */
-void __init
-init_irq_handlers (int base_irq, int num, int interval,
-		   struct hw_interrupt_type *irq_type)
-{
-	while (num-- > 0) {
-		irq_desc[base_irq].status  = IRQ_DISABLED;
-		irq_desc[base_irq].action  = NULL;
-		irq_desc[base_irq].depth   = 1;
-		irq_desc[base_irq].chip = irq_type;
-		base_irq += interval;
-	}
-}
diff --git a/arch/v850/kernel/ma.c b/arch/v850/kernel/ma.c
deleted file mode 100644
index 143774de75e..00000000000
--- a/arch/v850/kernel/ma.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * arch/v850/kernel/ma.c -- V850E/MA series of cpu chips
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/v850e_timer_d.h>
-
-#include "mach.h"
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "CMD", IRQ_INTCMD(0), IRQ_INTCMD_NUM,	1, 5 },
-	{ "DMA", IRQ_INTDMA(0), IRQ_INTDMA_NUM,	1, 2 },
-	{ "CSI", IRQ_INTCSI(0), IRQ_INTCSI_NUM,	4, 4 },
-	{ "SER", IRQ_INTSER(0), IRQ_INTSER_NUM,	4, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM, 	4, 4 },
-	{ "ST",  IRQ_INTST(0), 	IRQ_INTST_NUM, 	4, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize MA chip interrupts.  */
-void __init ma_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-/* Called before configuring an on-chip UART.  */
-void ma_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* We only know about the first two UART channels (though
-	   specific chips may have more).  */
-	if (chan < 2) {
-		unsigned bits = 0x3 << (chan * 3);
-		/* Specify that the relevant pins on the chip should do
-		   serial I/O, not direct I/O.  */
-		MA_PORT4_PMC |= bits;
-		/* Specify that we're using the UART, not the CSI device.  */
-		MA_PORT4_PFC |= bits;
-	}
-}
diff --git a/arch/v850/kernel/mach.c b/arch/v850/kernel/mach.c
deleted file mode 100644
index b9db278d2b7..00000000000
--- a/arch/v850/kernel/mach.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * arch/v850/kernel/mach.c -- Defaults for some things defined by "mach.h"
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include "mach.h"
-
-/* Called with each timer tick, if non-zero.  */
-void (*mach_tick)(void) = 0;
diff --git a/arch/v850/kernel/mach.h b/arch/v850/kernel/mach.h
deleted file mode 100644
index 9e0e4816ec5..00000000000
--- a/arch/v850/kernel/mach.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * arch/v850/kernel/mach.h -- Machine-dependent functions used by v850 port
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MACH_H__
-#define __V850_MACH_H__
-
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-
-#include <asm/ptrace.h>
-#include <asm/entry.h>
-#include <asm/clinkage.h>
-
-void mach_setup (char **cmdline);
-void mach_gettimeofday (struct timespec *tv);
-void mach_sched_init (struct irqaction *timer_action);
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len);
-void mach_init_irqs (void);
-
-/* If defined, is called very early in the kernel initialization.  The
-   stack pointer is valid, but very little has been initialized (e.g.,
-   bss is not zeroed yet) when this is called, so care must taken.  */
-void mach_early_init (void);
-
-/* If defined, called after the bootmem allocator has been initialized,
-   to allow the platform-dependent code to reserve any areas of RAM that
-   the kernel shouldn't touch.  */
-void mach_reserve_bootmem (void) __attribute__ ((__weak__));
-
-/* Called with each timer tick, if non-zero.  */
-extern void (*mach_tick) (void);
-
-/* The following establishes aliases for various mach_ functions to the
-   name by which the rest of the kernel calls them.  These statements
-   should only have an effect in the file that defines the actual functions. */
-#define MACH_ALIAS(to, from)						      \
-   asm (".global " macrology_stringify (C_SYMBOL_NAME (to)) ";"		      \
-	macrology_stringify (C_SYMBOL_NAME (to))			      \
-	" = " macrology_stringify (C_SYMBOL_NAME (from)))
-/* e.g.: MACH_ALIAS (kernel_name,	arch_spec_name);  */
-
-#endif /* __V850_MACH_H__ */
diff --git a/arch/v850/kernel/me2.c b/arch/v850/kernel/me2.c
deleted file mode 100644
index 007115dc9ce..00000000000
--- a/arch/v850/kernel/me2.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * arch/v850/kernel/me2.c -- V850E/ME2 chip-specific support
- *
- *  Copyright (C) 2003  NEC Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/v850e_timer_d.h>
-
-#include "mach.h"
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ",    0,                NUM_CPU_IRQS,      1, 7 },
-	{ "INTP",   IRQ_INTP(0),      IRQ_INTP_NUM,      1, 5 },
-	{ "CMD",    IRQ_INTCMD(0),    IRQ_INTCMD_NUM,    1, 3 },
-	{ "UBTIRE", IRQ_INTUBTIRE(0), IRQ_INTUBTIRE_NUM, 5, 4 },
-	{ "UBTIR",  IRQ_INTUBTIR(0),  IRQ_INTUBTIR_NUM,  5, 4 },
-	{ "UBTIT",  IRQ_INTUBTIT(0),  IRQ_INTUBTIT_NUM,  5, 4 },
-	{ "UBTIF",  IRQ_INTUBTIF(0),  IRQ_INTUBTIF_NUM,  5, 4 },
-	{ "UBTITO", IRQ_INTUBTITO(0), IRQ_INTUBTITO_NUM, 5, 4 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize V850E/ME2 chip interrupts.  */
-void __init me2_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-/* Called before configuring an on-chip UART.  */
-void me2_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	if (chan == 0) {
-		/* Specify that the relevant pins on the chip should do
-		   serial I/O, not direct I/O.  */
-		ME2_PORT1_PMC |= 0xC;
-		/* Specify that we're using the UART, not the CSI device. */
-		ME2_PORT1_PFC |= 0xC;
-	} else if (chan == 1) {
-		/* Specify that the relevant pins on the chip should do
-		   serial I/O, not direct I/O.  */
-		ME2_PORT2_PMC |= 0x6;
-		/* Specify that we're using the UART, not the CSI device. */
-		ME2_PORT2_PFC |= 0x6;
-	}
-}
diff --git a/arch/v850/kernel/memcons.c b/arch/v850/kernel/memcons.c
deleted file mode 100644
index 92f514fdcc7..00000000000
--- a/arch/v850/kernel/memcons.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * arch/v850/kernel/memcons.c -- Console I/O to a memory buffer
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/console.h>
-#include <linux/tty.h>
-#include <linux/tty_driver.h>
-#include <linux/init.h>
-
-/* If this device is enabled, the linker map should define start and
-   end points for its buffer. */
-extern char memcons_output[], memcons_output_end;
-
-/* Current offset into the buffer.  */
-static unsigned long memcons_offs = 0;
-
-/* Spinlock protecting memcons_offs.  */
-static DEFINE_SPINLOCK(memcons_lock);
-
-
-static size_t write (const char *buf, size_t len)
-{
-	unsigned long flags;
-	char *point;
-
-	spin_lock_irqsave (memcons_lock, flags);
-
-	point = memcons_output + memcons_offs;
-	if (point + len >= &memcons_output_end) {
-		len = &memcons_output_end - point;
-		memcons_offs = 0;
-	} else
-		memcons_offs += len;
-
-	spin_unlock_irqrestore (memcons_lock, flags);
-
-	memcpy (point, buf, len);
-
-	return len;
-}
-
-
-/*  Low-level console. */
-
-static void memcons_write (struct console *co, const char *buf, unsigned len)
-{
-	while (len > 0)
-		len -= write (buf, len);
-}
-
-static struct tty_driver *tty_driver;
-
-static struct tty_driver *memcons_device (struct console *co, int *index)
-{
-	*index = co->index;
-	return tty_driver;
-}
-
-static struct console memcons =
-{
-    .name	= "memcons",
-    .write	= memcons_write,
-    .device	= memcons_device,
-    .flags	= CON_PRINTBUFFER,
-    .index	= -1,
-};
-
-void memcons_setup (void)
-{
-	register_console (&memcons);
-	printk (KERN_INFO "Console: static memory buffer (memcons)\n");
-}
-
-/* Higher level TTY interface.  */
-
-int memcons_tty_open (struct tty_struct *tty, struct file *filp)
-{
-	return 0;
-}
-
-int memcons_tty_write (struct tty_struct *tty, const unsigned char *buf, int len)
-{
-	return write (buf, len);
-}
-
-int memcons_tty_write_room (struct tty_struct *tty)
-{
-	return &memcons_output_end - (memcons_output + memcons_offs);
-}
-
-int memcons_tty_chars_in_buffer (struct tty_struct *tty)
-{
-	/* We have no buffer.  */
-	return 0;
-}
-
-static const struct tty_operations ops = {
-	.open = memcons_tty_open,
-	.write = memcons_tty_write,
-	.write_room = memcons_tty_write_room,
-	.chars_in_buffer = memcons_tty_chars_in_buffer,
-};
-
-int __init memcons_tty_init (void)
-{
-	int err;
-	struct tty_driver *driver = alloc_tty_driver(1);
-	if (!driver)
-		return -ENOMEM;
-
-	driver->name = "memcons";
-	driver->major = TTY_MAJOR;
-	driver->minor_start = 64;
-	driver->type = TTY_DRIVER_TYPE_SYSCONS;
-	driver->init_termios = tty_std_termios;
-	tty_set_operations(driver, &ops);
-	err = tty_register_driver(driver);
-	if (err) {
-		put_tty_driver(driver);
-		return err;
-	}
-	tty_driver = driver;
-	return 0;
-}
-__initcall (memcons_tty_init);
diff --git a/arch/v850/kernel/module.c b/arch/v850/kernel/module.c
deleted file mode 100644
index 64aeb3e37c5..00000000000
--- a/arch/v850/kernel/module.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * arch/v850/kernel/module.c -- Architecture-specific module functions
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *  Copyright (C) 2001,03  Rusty Russell
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- *
- * Derived in part from arch/ppc/kernel/module.c
- */
-
-#include <linux/kernel.h>
-#include <linux/vmalloc.h>
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , ...)
-#endif
-
-void *module_alloc (unsigned long size)
-{
-	return size == 0 ? 0 : vmalloc (size);
-}
-
-void module_free (struct module *mod, void *module_region)
-{
-	vfree (module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
-}
-
-int module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-		     struct module *mod)
-{
-	return 0;
-}
-
-/* Count how many different relocations (different symbol, different
-   addend) */
-static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
-{
-	unsigned int i, j, ret = 0;
-
-	/* Sure, this is order(n^2), but it's usually short, and not
-           time critical */
-	for (i = 0; i < num; i++) {
-		for (j = 0; j < i; j++) {
-			/* If this addend appeared before, it's
-                           already been counted */
-			if (ELF32_R_SYM(rela[i].r_info)
-			    == ELF32_R_SYM(rela[j].r_info)
-			    && rela[i].r_addend == rela[j].r_addend)
-				break;
-		}
-		if (j == i) ret++;
-	}
-	return ret;
-}
-
-/* Get the potential trampolines size required of the init and
-   non-init sections */
-static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
-				  const Elf32_Shdr *sechdrs,
-				  const char *secstrings,
-				  int is_init)
-{
-	unsigned long ret = 0;
-	unsigned i;
-
-	/* Everything marked ALLOC (this includes the exported
-           symbols) */
-	for (i = 1; i < hdr->e_shnum; i++) {
-		/* If it's called *.init*, and we're not init, we're
-                   not interested */
-		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
-		    != is_init)
-			continue;
-
-		if (sechdrs[i].sh_type == SHT_RELA) {
-			DEBUGP("Found relocations in section %u\n", i);
-			DEBUGP("Ptr: %p.  Number: %u\n",
-			       (void *)hdr + sechdrs[i].sh_offset,
-			       sechdrs[i].sh_size / sizeof(Elf32_Rela));
-			ret += count_relocs((void *)hdr
-					     + sechdrs[i].sh_offset,
-					     sechdrs[i].sh_size
-					     / sizeof(Elf32_Rela))
-				* sizeof(struct v850_plt_entry);
-		}
-	}
-
-	return ret;
-}
-
-int module_frob_arch_sections(Elf32_Ehdr *hdr,
-			      Elf32_Shdr *sechdrs,
-			      char *secstrings,
-			      struct module *me)
-{
-	unsigned int i;
-
-	/* Find .plt and .pltinit sections */
-	for (i = 0; i < hdr->e_shnum; i++) {
-		if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
-			me->arch.init_plt_section = i;
-		else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
-			me->arch.core_plt_section = i;
-	}
-	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
-		printk("Module doesn't contain .plt or .plt.init sections.\n");
-		return -ENOEXEC;
-	}
-
-	/* Override their sizes */
-	sechdrs[me->arch.core_plt_section].sh_size
-		= get_plt_size(hdr, sechdrs, secstrings, 0);
-	sechdrs[me->arch.init_plt_section].sh_size
-		= get_plt_size(hdr, sechdrs, secstrings, 1);
-	return 0;
-}
-
-int apply_relocate (Elf32_Shdr *sechdrs, const char *strtab,
-		    unsigned int symindex, unsigned int relsec,
-		    struct module *mod)
-{
-	printk ("Barf\n");
-	return -ENOEXEC;
-}
-
-/* Set up a trampoline in the PLT to bounce us to the distant function */
-static uint32_t do_plt_call (void *location, Elf32_Addr val,
-			     Elf32_Shdr *sechdrs, struct module *mod)
-{
-	struct v850_plt_entry *entry;
-	/* Instructions used to do the indirect jump.  */
-	uint32_t tramp[2];
-
-	/* We have to trash a register, so we assume that any control
-	   transfer more than 21-bits away must be a function call
-	   (so we can use a call-clobbered register).  */
-	tramp[0] = 0x0621 + ((val & 0xffff) << 16);   /* mov sym, r1 ... */
-	tramp[1] = ((val >> 16) & 0xffff) + 0x610000; /* ...; jmp r1 */
-
-	/* Init, or core PLT? */
-	if (location >= mod->module_core
-	    && location < mod->module_core + mod->core_size)
-		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
-	else
-		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
-
-	/* Find this entry, or if that fails, the next avail. entry */
-	while (entry->tramp[0])
-		if (entry->tramp[0] == tramp[0] && entry->tramp[1] == tramp[1])
-			return (uint32_t)entry;
-		else
-			entry++;
-
-	entry->tramp[0] = tramp[0];
-	entry->tramp[1] = tramp[1];
-
-	return (uint32_t)entry;
-}
-
-int apply_relocate_add (Elf32_Shdr *sechdrs, const char *strtab,
-			unsigned int symindex, unsigned int relsec,
-			struct module *mod)
-{
-	unsigned int i;
-	Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
-
-	DEBUGP ("Applying relocate section %u to %u\n", relsec,
-		sechdrs[relsec].sh_info);
-
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof (*rela); i++) {
-		/* This is where to make the change */
-		uint32_t *loc
-			= ((void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			   + rela[i].r_offset);
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		Elf32_Sym *sym
-			= ((Elf32_Sym *)sechdrs[symindex].sh_addr
-			   + ELF32_R_SYM (rela[i].r_info));
-		uint32_t val = sym->st_value + rela[i].r_addend;
-
-		switch (ELF32_R_TYPE (rela[i].r_info)) {
-		case R_V850_32:
-			/* We write two shorts instead of a long because even
-			   32-bit insns only need half-word alignment, but
-			   32-bit data writes need to be long-word aligned.  */
-			val += ((uint16_t *)loc)[0];
-			val += ((uint16_t *)loc)[1] << 16;
-			((uint16_t *)loc)[0] = val & 0xffff;
-			((uint16_t *)loc)[1] = (val >> 16) & 0xffff;
-			break;
-
-		case R_V850_22_PCREL:
-			/* Maybe jump indirectly via a PLT table entry.  */
-			if ((int32_t)(val - (uint32_t)loc) > 0x1fffff
-			    || (int32_t)(val - (uint32_t)loc) < -0x200000)
-				val = do_plt_call (loc, val, sechdrs, mod);
-
-			val -= (uint32_t)loc;
-
-			/* We write two shorts instead of a long because
-			   even 32-bit insns only need half-word alignment,
-			   but 32-bit data writes need to be long-word
-			   aligned.  */
-			((uint16_t *)loc)[0] =
-				(*(uint16_t *)loc & 0xffc0) /* opcode + reg */
-				| ((val >> 16) & 0xffc03f); /* offs high */
-			((uint16_t *)loc)[1] =
-				(val & 0xffff);		    /* offs low */
-			break;
-
-		default:
-			printk (KERN_ERR "module %s: Unknown reloc: %u\n",
-				mod->name, ELF32_R_TYPE (rela[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-
-	return 0;
-}
-
-void
-module_arch_cleanup(struct module *mod)
-{
-}
diff --git a/arch/v850/kernel/process.c b/arch/v850/kernel/process.c
deleted file mode 100644
index e4a4b8e7d5a..00000000000
--- a/arch/v850/kernel/process.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * arch/v850/kernel/process.c -- Arch-dependent process handling
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/reboot.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/pgtable.h>
-
-void (*pm_power_off)(void) = NULL;
-EXPORT_SYMBOL(pm_power_off);
-
-extern void ret_from_fork (void);
-
-
-/* The idle loop.  */
-static void default_idle (void)
-{
-	while (! need_resched ())
-		asm ("halt; nop; nop; nop; nop; nop" ::: "cc");
-}
-
-void (*idle)(void) = default_idle;
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle (void)
-{
-	/* endless idle loop with no priority at all */
-	while (1) {
-		while (!need_resched())
-			(*idle) ();
-
-		preempt_enable_no_resched();
-		schedule();
-		preempt_disable();
-	}
-}
-
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process (ie the swapper or direct descendants who
- * haven't done an "execve()") should use this: it will work within a system
- * call from a "real" process, but the process memory space will not be free'd
- * until both the parent and the child have exited.
- */
-int kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
-{
-	register mm_segment_t fs = get_fs ();
-	register unsigned long syscall asm (SYSCALL_NUM);
-	register unsigned long arg0 asm (SYSCALL_ARG0);
-	register unsigned long ret asm (SYSCALL_RET);
-
-	set_fs (KERNEL_DS);
-
-	/* Clone this thread.  Note that we don't pass the clone syscall's
-	   second argument -- it's ignored for calls from kernel mode (the
-	   child's SP is always set to the top of the kernel stack).  */
-	arg0 = flags | CLONE_VM;
-	syscall = __NR_clone;
-	asm volatile ("trap " SYSCALL_SHORT_TRAP
-		      : "=r" (ret), "=r" (syscall)
-		      : "1" (syscall), "r" (arg0)
-		      : SYSCALL_SHORT_CLOBBERS);
-
-	if (ret == 0) {
-		/* In child thread, call FN and exit.  */
-		arg0 = (*fn) (arg);
-		syscall = __NR_exit;
-		asm volatile ("trap " SYSCALL_SHORT_TRAP
-			      : "=r" (ret), "=r" (syscall)
-			      : "1" (syscall), "r" (arg0)
-			      : SYSCALL_SHORT_CLOBBERS);
-	}
-
-	/* In parent.  */
-	set_fs (fs);
-
-	return ret;
-}
-
-void flush_thread (void)
-{
-	set_fs (USER_DS);
-}
-
-int copy_thread (int nr, unsigned long clone_flags,
-		 unsigned long stack_start, unsigned long stack_size,
-		 struct task_struct *p, struct pt_regs *regs)
-{
-	/* Start pushing stuff from the top of the child's kernel stack.  */
-	unsigned long orig_ksp = task_tos(p);
-	unsigned long ksp = orig_ksp;
-	/* We push two `state save' stack fames (see entry.S) on the new
-	   kernel stack:
-	     1) The innermost one is what switch_thread would have
-	        pushed, and is used when we context switch to the child
-		thread for the first time.  It's set up to return to
-		ret_from_fork in entry.S.
-	     2) The outermost one (nearest the top) is what a syscall
-	        trap would have pushed, and is set up to return to the
-		same location as the parent thread, but with a return
-		value of 0. */
-	struct pt_regs *child_switch_regs, *child_trap_regs;
-
-	/* Trap frame.  */
-	ksp -= STATE_SAVE_SIZE;
-	child_trap_regs = (struct pt_regs *)(ksp + STATE_SAVE_PT_OFFSET);
-	/* Switch frame.  */
-	ksp -= STATE_SAVE_SIZE;
-	child_switch_regs = (struct pt_regs *)(ksp + STATE_SAVE_PT_OFFSET);
-
-	/* First copy parent's register state to child.  */
-	*child_switch_regs = *regs;
-	*child_trap_regs = *regs;
-
-	/* switch_thread returns to the restored value of the lp
-	   register (r31), so we make that the place where we want to
-	   jump when the child thread begins running.  */
-	child_switch_regs->gpr[GPR_LP] = (v850_reg_t)ret_from_fork;
-
-	if (regs->kernel_mode)
-		/* Since we're returning to kernel-mode, make sure the child's
-		   stored kernel stack pointer agrees with what the actual
-		   stack pointer will be at that point (the trap return code
-		   always restores the SP, even when returning to
-		   kernel-mode).  */
-		child_trap_regs->gpr[GPR_SP] = orig_ksp;
-	else
-		/* Set the child's user-mode stack-pointer (the name
-		   `stack_start' is a misnomer, it's just the initial SP
-		   value).  */
-		child_trap_regs->gpr[GPR_SP] = stack_start;
-
-	/* Thread state for the child (everything else is on the stack).  */
-	p->thread.ksp = ksp;
-
-	return 0;
-}
-
-/*
- * sys_execve() executes a new program.
- */
-int sys_execve (char *name, char **argv, char **envp, struct pt_regs *regs)
-{
-	char *filename = getname (name);
-	int error = PTR_ERR (filename);
-
-	if (! IS_ERR (filename)) {
-		error = do_execve (filename, argv, envp, regs);
-		putname (filename);
-	}
-
-	return error;
-}
-
-
-/*
- * These bracket the sleeping functions..
- */
-#define first_sched	((unsigned long)__sched_text_start)
-#define last_sched	((unsigned long)__sched_text_end)
-
-unsigned long get_wchan (struct task_struct *p)
-{
-#if 0  /* Barf.  Figure out the stack-layout later.  XXX  */
-	unsigned long fp, pc;
-	int count = 0;
-
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-
-	pc = thread_saved_pc (p);
-
-	/* This quite disgusting function walks up the stack, following
-	   saved return address, until it something that's out of bounds
-	   (as defined by `first_sched' and `last_sched').  It then
-	   returns the last PC that was in-bounds.  */
-	do {
-		if (fp < stack_page + sizeof (struct task_struct) ||
-		    fp >= 8184+stack_page)
-			return 0;
-		pc = ((unsigned long *)fp)[1];
-		if (pc < first_sched || pc >= last_sched)
-			return pc;
-		fp = *(unsigned long *) fp;
-	} while (count++ < 16);
-#endif
-
-	return 0;
-}
diff --git a/arch/v850/kernel/procfs.c b/arch/v850/kernel/procfs.c
deleted file mode 100644
index e433cde789b..00000000000
--- a/arch/v850/kernel/procfs.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * arch/v850/kernel/procfs.c -- Introspection functions for /proc filesystem
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include "mach.h"
-
-static int cpuinfo_print (struct seq_file *m, void *v)
-{
-	extern unsigned long loops_per_jiffy;
-	
-	seq_printf (m, "CPU-Family:	v850\nCPU-Arch:	%s\n", CPU_ARCH);
-
-#ifdef CPU_MODEL_LONG
-	seq_printf (m, "CPU-Model:	%s (%s)\n", CPU_MODEL, CPU_MODEL_LONG);
-#else
-	seq_printf (m, "CPU-Model:	%s\n", CPU_MODEL);
-#endif
-
-#ifdef CPU_CLOCK_FREQ
-	seq_printf (m, "CPU-Clock:	%ld (%ld MHz)\n",
-		    (long)CPU_CLOCK_FREQ,
-		    (long)CPU_CLOCK_FREQ / 1000000);
-#endif
-
-	seq_printf (m, "BogoMips:	%lu.%02lu\n",
-		    loops_per_jiffy/(500000/HZ),
-		    (loops_per_jiffy/(5000/HZ)) % 100);
-
-#ifdef PLATFORM_LONG
-	seq_printf (m, "Platform:	%s (%s)\n", PLATFORM, PLATFORM_LONG);
-#elif defined (PLATFORM)
-	seq_printf (m, "Platform:	%s\n", PLATFORM);
-#endif
-
-	return 0;
-}
-
-static void *cpuinfo_start (struct seq_file *m, loff_t *pos)
-{
-	return *pos < NR_CPUS ? ((void *) 0x12345678) : NULL;
-}
-
-static void *cpuinfo_next (struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return cpuinfo_start (m, pos);
-}
-
-static void cpuinfo_stop (struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start	= cpuinfo_start,
-	.next	= cpuinfo_next,
-	.stop	= cpuinfo_stop,
-	.show	= cpuinfo_print
-};
diff --git a/arch/v850/kernel/ptrace.c b/arch/v850/kernel/ptrace.c
deleted file mode 100644
index a458ac941b2..00000000000
--- a/arch/v850/kernel/ptrace.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * arch/v850/kernel/ptrace.c -- `ptrace' system call
- *
- *  Copyright (C) 2002,03,04  NEC Electronics Corporation
- *  Copyright (C) 2002,03,04  Miles Bader <miles@gnu.org>
- *
- * Derived from arch/mips/kernel/ptrace.c:
- *
- *  Copyright (C) 1992 Ross Biro
- *  Copyright (C) Linus Torvalds
- *  Copyright (C) 1994, 95, 96, 97, 98, 2000 Ralf Baechle
- *  Copyright (C) 1996 David S. Miller
- *  Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
- *  Copyright (C) 1999 MIPS Technologies, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <linux/signal.h>
-
-#include <asm/errno.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-/* Returns the address where the register at REG_OFFS in P is stashed away.  */
-static v850_reg_t *reg_save_addr (unsigned reg_offs, struct task_struct *t)
-{
-	struct pt_regs *regs;
-
-	/* Three basic cases:
-
-	   (1) A register normally saved before calling the scheduler, is
-	       available in the kernel entry pt_regs structure at the top
-	       of the kernel stack.  The kernel trap/irq exit path takes
-	       care to save/restore almost all registers for ptrace'd
-	       processes.
-
-	   (2) A call-clobbered register, where the process P entered the
-	       kernel via [syscall] trap, is not stored anywhere; that's
-	       OK, because such registers are not expected to be preserved
-	       when the trap returns anyway (so we don't actually bother to
-	       test for this case).
-
-	   (3) A few registers not used at all by the kernel, and so
-	       normally never saved except by context-switches, are in the
-	       context switch state.  */
-
-	if (reg_offs == PT_CTPC || reg_offs == PT_CTPSW || reg_offs == PT_CTBP)
-		/* Register saved during context switch.  */
-		regs = thread_saved_regs (t);
-	else
-		/* Register saved during kernel entry (or not available).  */
-		regs = task_pt_regs (t);
-
-	return (v850_reg_t *)((char *)regs + reg_offs);
-}
-
-/* Set the bits SET and clear the bits CLEAR in the v850e DIR
-   (`debug information register').  Returns the new value of DIR.  */
-static inline v850_reg_t set_dir (v850_reg_t set, v850_reg_t clear)
-{
-	register v850_reg_t rval asm ("r10");
-	register v850_reg_t arg0 asm ("r6") = set;
-	register v850_reg_t arg1 asm ("r7") = clear;
-
-	/* The dbtrap handler has exactly this functionality when called
-	   from kernel mode.  0xf840 is a `dbtrap' insn.  */
-	asm (".short 0xf840" : "=r" (rval) : "r" (arg0), "r" (arg1));
-
-	return rval;
-}
-
-/* Makes sure hardware single-stepping is (globally) enabled.
-   Returns true if successful.  */
-static inline int enable_single_stepping (void)
-{
-	static int enabled = 0;	/* Remember whether we already did it.  */
-	if (! enabled) {
-		/* Turn on the SE (`single-step enable') bit, 0x100, in the
-		   DIR (`debug information register').  This may fail if a
-		   processor doesn't support it or something.  We also try
-		   to clear bit 0x40 (`INI'), which is necessary to use the
-		   debug stuff on the v850e2; on the v850e, clearing 0x40
-		   shouldn't cause any problem.  */
-		v850_reg_t dir = set_dir (0x100, 0x40);
-		/* Make sure it really got set.  */
-		if (dir & 0x100)
-			enabled = 1;
-	}
-	return enabled;
-}
-
-/* Try to set CHILD's single-step flag to VAL.  Returns true if successful.  */
-static int set_single_step (struct task_struct *t, int val)
-{
-	v850_reg_t *psw_addr = reg_save_addr(PT_PSW, t);
-	if (val) {
-		/* Make sure single-stepping is enabled.  */
-		if (! enable_single_stepping ())
-			return 0;
-		/* Set T's single-step flag.  */
-		*psw_addr |= 0x800;
-	} else
-		*psw_addr &= ~0x800;
-	return 1;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	int rval;
-
-	switch (request) {
-		unsigned long val;
-
-	case PTRACE_PEEKTEXT: /* read word at location addr. */
-	case PTRACE_PEEKDATA:
-		rval = generic_ptrace_peekdata(child, addr, data);
-		goto out;
-
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		rval = generic_ptrace_pokedata(child, addr, data);
-		goto out;
-
-	/* Read/write the word at location ADDR in the registers.  */
-	case PTRACE_PEEKUSR:
-	case PTRACE_POKEUSR:
-		rval = 0;
-		if (addr >= PT_SIZE && request == PTRACE_PEEKUSR) {
-			/* Special requests that don't actually correspond
-			   to offsets in struct pt_regs.  */
-			if (addr == PT_TEXT_ADDR)
-				val = child->mm->start_code;
-			else if (addr == PT_DATA_ADDR)
-				val = child->mm->start_data;
-			else if (addr == PT_TEXT_LEN)
-				val = child->mm->end_code
-					- child->mm->start_code;
-			else
-				rval = -EIO;
-		} else if (addr >= 0 && addr < PT_SIZE && (addr & 0x3) == 0) {
-			v850_reg_t *reg_addr = reg_save_addr(addr, child);
-			if (request == PTRACE_PEEKUSR)
-				val = *reg_addr;
-			else
-				*reg_addr = data;
-		} else
-			rval = -EIO;
-
-		if (rval == 0 && request == PTRACE_PEEKUSR)
-			rval = put_user (val, (unsigned long *)data);
-		goto out;
-
-	/* Continue and stop at next (return from) syscall */
-	case PTRACE_SYSCALL:
-	/* Restart after a signal.  */
-	case PTRACE_CONT:
-	/* Execute a single instruction. */
-	case PTRACE_SINGLESTEP:
-		rval = -EIO;
-		if (!valid_signal(data))
-			break;
-
-		/* Turn CHILD's single-step flag on or off.  */
-		if (! set_single_step (child, request == PTRACE_SINGLESTEP))
-			break;
-
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-
-		child->exit_code = data;
-		wake_up_process(child);
-		rval = 0;
-		break;
-
-	/*
-	 * make the child exit.  Best I can do is send it a sigkill.
-	 * perhaps it should be put in the status that it wants to
-	 * exit.
-	 */
-	case PTRACE_KILL:
-		rval = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		wake_up_process(child);
-		break;
-
-	case PTRACE_DETACH: /* detach a process that was attached. */
-		set_single_step (child, 0);  /* Clear single-step flag */
-		rval = ptrace_detach(child, data);
-		break;
-
-	default:
-		rval = -EIO;
-		goto out;
-	}
- out:
-	return rval;
-}
-
-asmlinkage void syscall_trace(void)
-{
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-	/* The 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				 ? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
-
-void ptrace_disable (struct task_struct *child)
-{
-	/* nothing to do */
-}
diff --git a/arch/v850/kernel/rte_cb.c b/arch/v850/kernel/rte_cb.c
deleted file mode 100644
index 43018e1edeb..00000000000
--- a/arch/v850/kernel/rte_cb.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * include/asm-v850/rte_cb.c -- Midas lab RTE-CB series of evaluation boards
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <asm/machdep.h>
-#include <asm/v850e_uart.h>
-
-#include "mach.h"
-
-static void led_tick (void);
-
-/* LED access routines.  */
-extern unsigned read_leds (int pos, char *buf, int len);
-extern unsigned write_leds (int pos, const char *buf, int len);
-
-#ifdef CONFIG_RTE_CB_MULTI
-extern void multi_init (void);
-#endif
-
-
-void __init rte_cb_early_init (void)
-{
-	v850e_intc_disable_irqs ();
-
-#ifdef CONFIG_RTE_CB_MULTI
-	multi_init ();
-#endif
-}
-
-void __init mach_setup (char **cmdline)
-{
-#ifdef CONFIG_RTE_MB_A_PCI
-	/* Probe for Mother-A, and print a message if we find it.  */
-	*(volatile unsigned long *)MB_A_SRAM_ADDR = 0xDEADBEEF;
-	if (*(volatile unsigned long *)MB_A_SRAM_ADDR == 0xDEADBEEF) {
-		*(volatile unsigned long *)MB_A_SRAM_ADDR = 0x12345678;
-		if (*(volatile unsigned long *)MB_A_SRAM_ADDR == 0x12345678)
-			printk (KERN_INFO
-				"          NEC SolutionGear/Midas lab"
-				" RTE-MOTHER-A motherboard\n");
-	}
-#endif /* CONFIG_RTE_MB_A_PCI */
-
-	mach_tick = led_tick;
-}
-
-void machine_restart (char *__unused)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	asm ("jmp r0"); /* Jump to the reset vector.  */
-}
-
-/* This says `HALt.' in LEDese.  */
-static unsigned char halt_leds_msg[] = { 0x76, 0x77, 0x38, 0xF8 };
-
-void machine_halt (void)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-
-	/* Ignore all interrupts.  */
-	local_irq_disable ();
-
-	/* Write a little message.  */
-	write_leds (0, halt_leds_msg, sizeof halt_leds_msg);
-
-	/* Really halt.  */
-	for (;;)
-		asm ("halt; nop; nop; nop; nop; nop");
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-
-/* Animated LED display for timer tick.  */
-
-#define TICK_UPD_FREQ	6
-static int tick_frames[][10] = {
-	{ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, -1 },
-	{ 0x63, 0x5c, -1 },
-	{ 0x5c, 0x00, -1 },
-	{ 0x63, 0x00, -1 },
-	{ -1 }
-};
-
-static void led_tick ()
-{
-	static unsigned counter = 0;
-	
-	if (++counter == (HZ / TICK_UPD_FREQ)) {
-		/* Which frame we're currently displaying for each digit.  */
-		static unsigned frame_nums[LED_NUM_DIGITS] = { 0 };
-		/* Display image.  */
-		static unsigned char image[LED_NUM_DIGITS] = { 0 };
-		unsigned char prev_image[LED_NUM_DIGITS];
-		int write_to_leds = 1; /* true if we should actually display */
-		int digit;
-
-		/* We check to see if the physical LEDs contains what we last
-		   wrote to them; if not, we suppress display (this is so that
-		   users can write to the LEDs, and not have their output
-		   overwritten).  As a special case, we start writing again if
-		   all the LEDs are blank, or our display image is all zeros
-		   (indicating that this is the initial update, when the actual
-		   LEDs might contain random data).  */
-		read_leds (0, prev_image, LED_NUM_DIGITS);
-		for (digit = 0; digit < LED_NUM_DIGITS; digit++)
-			if (image[digit] != prev_image[digit]
-			    && image[digit] && prev_image[digit])
-			{
-				write_to_leds = 0;
-				break;
-			}
-
-		/* Update display image.  */
-		for (digit = 0;
-		     digit < LED_NUM_DIGITS && tick_frames[digit][0] >= 0;
-		     digit++)
-		{
-			int frame = tick_frames[digit][frame_nums[digit]];
-			if (frame < 0) {
-				image[digit] = tick_frames[digit][0];
-				frame_nums[digit] = 1;
-			} else {
-				image[digit] = frame;
-				frame_nums[digit]++;
-				break;
-			}
-		}
-
-		if (write_to_leds)
-			/* Write the display image to the physical LEDs.  */
-			write_leds (0, image, LED_NUM_DIGITS);
-
-		counter = 0;
-	}
-}
-
-
-/* Mother-A interrupts.  */
-
-#ifdef CONFIG_RTE_GBUS_INT
-
-#define L GBUS_INT_PRIORITY_LOW
-#define M GBUS_INT_PRIORITY_MEDIUM
-#define H GBUS_INT_PRIORITY_HIGH
-
-static struct gbus_int_irq_init gbus_irq_inits[] = {
-#ifdef CONFIG_RTE_MB_A_PCI
-	{ "MB_A_LAN",	IRQ_MB_A_LAN,		1,		     1, L },
-	{ "MB_A_PCI1",	IRQ_MB_A_PCI1(0),	IRQ_MB_A_PCI1_NUM,   1, L },
-	{ "MB_A_PCI2",	IRQ_MB_A_PCI2(0),	IRQ_MB_A_PCI2_NUM,   1, L },
-	{ "MB_A_EXT",	IRQ_MB_A_EXT(0),	IRQ_MB_A_EXT_NUM,    1, L },
-	{ "MB_A_USB_OC",IRQ_MB_A_USB_OC(0),	IRQ_MB_A_USB_OC_NUM, 1, L },
-	{ "MB_A_PCMCIA_OC",IRQ_MB_A_PCMCIA_OC,	1,		     1, L },
-#endif
-	{ 0 }
-};
-#define NUM_GBUS_IRQ_INITS (ARRAY_SIZE(gbus_irq_inits) - 1)
-
-static struct hw_interrupt_type gbus_hw_itypes[NUM_GBUS_IRQ_INITS];
-
-#endif /* CONFIG_RTE_GBUS_INT */
-
-
-void __init rte_cb_init_irqs (void)
-{
-#ifdef CONFIG_RTE_GBUS_INT
-	gbus_int_init_irqs ();
-	gbus_int_init_irq_types (gbus_irq_inits, gbus_hw_itypes);
-#endif /* CONFIG_RTE_GBUS_INT */
-}
diff --git a/arch/v850/kernel/rte_cb_leds.c b/arch/v850/kernel/rte_cb_leds.c
deleted file mode 100644
index aa47ab1dcd8..00000000000
--- a/arch/v850/kernel/rte_cb_leds.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * include/asm-v850/rte_cb_leds.c -- Midas lab RTE-CB board LED device support
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-
-#include <asm/uaccess.h>
-
-#define LEDS_MINOR	169	/* Minor device number, using misc major.  */
-
-/* The actual LED hardware is write-only, so we hold the contents here too.  */
-static unsigned char leds_image[LED_NUM_DIGITS] = { 0 };
-
-/* Spinlock protecting the above leds.  */
-static DEFINE_SPINLOCK(leds_lock);
-
-/* Common body of LED read/write functions, checks POS and LEN for
-   correctness, declares a variable using IMG_DECL, initialized pointing at
-   the POS position in the LED image buffer, and and iterates COPY_EXPR
-   until BUF is equal to the last buffer position; finally, sets LEN to be
-   the amount actually copied.  IMG should be a variable declaration
-   (without an initializer or a terminating semicolon); POS, BUF, and LEN
-   should all be simple variables.  */
-#define DO_LED_COPY(img_decl, pos, buf, len, copy_expr)			\
-do {									\
-	if (pos > LED_NUM_DIGITS)					\
-		len = 0;						\
-	else {								\
-		if (pos + len > LED_NUM_DIGITS)				\
-			len = LED_NUM_DIGITS - pos;			\
-									\
-		if (len > 0) {						\
-			unsigned long _flags;				\
-			const char *_end = buf + len;			\
-			img_decl = &leds_image[pos];			\
-									\
-			spin_lock_irqsave (leds_lock, _flags);		\
-			do						\
-				(copy_expr);				\
-			while (buf != _end);				\
-			spin_unlock_irqrestore (leds_lock, _flags);	\
-		}							\
-	}								\
-} while (0)
-
-/* Read LEN bytes from LEDs at position POS, into BUF.
-   Returns actual amount read.  */
-unsigned read_leds (unsigned pos, char *buf, unsigned len)
-{
-	DO_LED_COPY (const char *img, pos, buf, len, *buf++ = *img++);
-	return len;
-}
-
-/* Write LEN bytes to LEDs at position POS, from BUF.
-   Returns actual amount written.  */
-unsigned write_leds (unsigned pos, const char *buf, unsigned len)
-{
-	/* We write the actual LED values backwards, because
-	   increasing memory addresses reflect LEDs right-to-left. */
-	volatile char *led = &LED (LED_NUM_DIGITS - pos - 1);
-	/* We invert the value written to the hardware, because 1 = off,
-	   and 0 = on.  */
-	DO_LED_COPY (char *img, pos, buf, len,
-		     *led-- = 0xFF ^ (*img++ = *buf++));
-	return len;
-}
-
-
-/* Device functions.  */
-
-static ssize_t leds_dev_read (struct file *file, char *buf, size_t len,
-			      loff_t *pos)
-{
-	char temp_buf[LED_NUM_DIGITS];
-	len = read_leds (*pos, temp_buf, len);
-	if (copy_to_user (buf, temp_buf, len))
-		return -EFAULT;
-	*pos += len;
-	return len;
-}
-
-static ssize_t leds_dev_write (struct file *file, const char *buf, size_t len,
-			       loff_t *pos)
-{
-	char temp_buf[LED_NUM_DIGITS];
-	if (copy_from_user (temp_buf, buf, min_t(size_t, len, LED_NUM_DIGITS)))
-		return -EFAULT;
-	len = write_leds (*pos, temp_buf, len);
-	*pos += len;
-	return len;
-}
-
-static loff_t leds_dev_lseek (struct file *file, loff_t offs, int whence)
-{
-	if (whence == 1)
-		offs += file->f_pos; /* relative */
-	else if (whence == 2)
-		offs += LED_NUM_DIGITS; /* end-relative */
-
-	if (offs < 0 || offs > LED_NUM_DIGITS)
-		return -EINVAL;
-
-	file->f_pos = offs;
-
-	return 0;
-}
-
-static const struct file_operations leds_fops = {
-	.read		= leds_dev_read,
-	.write		= leds_dev_write,
-	.llseek		= leds_dev_lseek
-};
-
-static struct miscdevice leds_miscdev = {
-	.name		= "leds",
-	.minor		= LEDS_MINOR,
-	.fops		= &leds_fops
-};
-
-int __init leds_dev_init (void)
-{
-	return misc_register (&leds_miscdev);
-}
-
-__initcall (leds_dev_init);
diff --git a/arch/v850/kernel/rte_cb_multi.c b/arch/v850/kernel/rte_cb_multi.c
deleted file mode 100644
index 963d55ab34c..00000000000
--- a/arch/v850/kernel/rte_cb_multi.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * include/asm-v850/rte_multi.c -- Support for Multi debugger monitor ROM
- * 	on Midas lab RTE-CB series of evaluation boards
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/init.h>
-
-#include <asm/machdep.h>
-
-#define IRQ_ADDR(irq) (0x80 + (irq) * 0x10)
-
-/* A table of which interrupt vectors to install, since blindly
-   installing all of them makes the debugger stop working.  This is a
-   list of offsets in the interrupt vector area; each entry means to
-   copy that particular 16-byte vector.  An entry less than zero ends
-   the table.  */
-static long multi_intv_install_table[] = {
-	/* Trap vectors */
-	0x40, 0x50,		
-
-#ifdef CONFIG_RTE_CB_MULTI_DBTRAP
-	/* Illegal insn / dbtrap.  These are used by multi, so only handle
-	   them if configured to do so.  */
-	0x60,
-#endif
-
-	/* GINT1 - GINT3 (note, not GINT0!) */
-	IRQ_ADDR (IRQ_GINT(1)),
-	IRQ_ADDR (IRQ_GINT(2)),
-	IRQ_ADDR (IRQ_GINT(3)),
-
-	/* Timer D interrupts (up to 4 timers) */
-	IRQ_ADDR (IRQ_INTCMD(0)),
-#if IRQ_INTCMD_NUM > 1
-	IRQ_ADDR (IRQ_INTCMD(1)),
-#if IRQ_INTCMD_NUM > 2
-	IRQ_ADDR (IRQ_INTCMD(2)),
-#if IRQ_INTCMD_NUM > 3
-	IRQ_ADDR (IRQ_INTCMD(3)),
-#endif
-#endif
-#endif
-	
-	/* UART interrupts (up to 3 channels) */
-	IRQ_ADDR (IRQ_INTSER (0)), /* err */
-	IRQ_ADDR (IRQ_INTSR  (0)), /* rx */
-	IRQ_ADDR (IRQ_INTST  (0)), /* tx */
-#if IRQ_INTSR_NUM > 1
-	IRQ_ADDR (IRQ_INTSER (1)), /* err */
-	IRQ_ADDR (IRQ_INTSR  (1)), /* rx */
-	IRQ_ADDR (IRQ_INTST  (1)), /* tx */
-#if IRQ_INTSR_NUM > 2
-	IRQ_ADDR (IRQ_INTSER (2)), /* err */
-	IRQ_ADDR (IRQ_INTSR  (2)), /* rx */
-	IRQ_ADDR (IRQ_INTST  (2)), /* tx */
-#endif
-#endif
-
-	-1
-};
-
-/* Early initialization for kernel using Multi debugger ROM monitor.  */
-void __init multi_init (void)
-{
-	/* We're using the Multi debugger monitor, so we have to install
-	   the interrupt vectors.  The monitor doesn't allow them to be
-	   initially downloaded into their final destination because
-	   it's in the monitor's scratch-RAM area.  Unfortunately, Multi
-	   also doesn't deal correctly with ELF sections where the LMA
-	   and VMA differ -- it just ignores the LMA -- so we can't use
-	   that feature to work around the problem.  What we do instead
-	   is just put the interrupt vectors into a normal section, and
-	   do the necessary copying and relocation here.  Since the
-	   interrupt vector basically only contains `jr' instructions
-	   and no-ops, it's not that hard.  */
-	extern unsigned long _intv_load_start, _intv_start;
-	register unsigned long *src = &_intv_load_start;
-	register unsigned long *dst = (unsigned long *)INTV_BASE;
-	register unsigned long jr_fixup = (char *)&_intv_start - (char *)dst;
-	register long *ii;
-
-	/* Copy interrupt vectors as instructed by multi_intv_install_table. */
-	for (ii = multi_intv_install_table; *ii >= 0; ii++) {
-		/* Copy 16-byte interrupt vector at offset *ii.  */
-		int boffs;
-		for (boffs = 0; boffs < 0x10; boffs += sizeof *src) {
-			/* Copy a single word, fixing up the jump offs
-			   if it's a `jr' instruction.  */
-			int woffs = (*ii + boffs) / sizeof *src;
-			unsigned long word = src[woffs];
-
-			if ((word & 0xFC0) == 0x780) {
-				/* A `jr' insn, fix up its offset (and yes, the
-				   weird half-word swapping is intentional). */
-				unsigned short hi = word & 0xFFFF;
-				unsigned short lo = word >> 16;
-				unsigned long udisp22
-					= lo + ((hi & 0x3F) << 16);
-				long disp22 = (long)(udisp22 << 10) >> 10;
-
-				disp22 += jr_fixup;
-
-				hi = ((disp22 >> 16) & 0x3F) | 0x780;
-				lo = disp22 & 0xFFFF;
-
-				word = hi + (lo << 16);
-			}
-
-			dst[woffs] = word;
-		}
-	}
-}
diff --git a/arch/v850/kernel/rte_ma1_cb-rom.ld b/arch/v850/kernel/rte_ma1_cb-rom.ld
deleted file mode 100644
index 87b618f8253..00000000000
--- a/arch/v850/kernel/rte_ma1_cb-rom.ld
+++ /dev/null
@@ -1,14 +0,0 @@
-/* Linker script for the Midas labs RTE-V850E/MA1-CB evaluation board
-   (CONFIG_RTE_CB_MA1), with kernel in ROM.  */
-
-MEMORY {
-	ROM   : ORIGIN = 0x00000000, LENGTH = 0x00100000
-	/* 1MB of SRAM.  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 32MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	ROMK_SECTIONS(ROM, SRAM)
-}
diff --git a/arch/v850/kernel/rte_ma1_cb.c b/arch/v850/kernel/rte_ma1_cb.c
deleted file mode 100644
index 08abf3d5f8d..00000000000
--- a/arch/v850/kernel/rte_ma1_cb.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * arch/v850/kernel/rte_ma1_cb.c -- Midas labs RTE-V850E/MA1-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/ma1.h>
-#include <asm/rte_ma1_cb.h>
-#include <asm/v850e_timer_c.h>
-
-#include "mach.h"
-
-
-/* SRAM and SDRAM are almost contiguous (with a small hole in between;
-   see mach_reserve_bootmem for details), so just use both as one big area.  */
-#define RAM_START 	SRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-
-void __init mach_early_init (void)
-{
-	rte_cb_early_init ();
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void __init mach_reserve_bootmem ()
-{
-#ifdef CONFIG_RTE_CB_MULTI
-	/* Prevent the kernel from touching the monitor's scratch RAM.  */
-	reserve_bootmem(MON_SCRATCH_ADDR, MON_SCRATCH_SIZE,
-			BOOTMEM_DEFAULT);
-#endif
-
-	/* The space between SRAM and SDRAM is filled with duplicate
-	   images of SRAM.  Prevent the kernel from using them.  */
-	reserve_bootmem (SRAM_ADDR + SRAM_SIZE,
-			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE),
-			 BOOTMEM_DEFAULT);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Called before configuring an on-chip UART.  */
-void rte_ma1_cb_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud)
-{
-	/* The RTE-MA1-CB connects some general-purpose I/O pins on the
-	   CPU to the RTS/CTS lines of UART 0's serial connection.
-	   I/O pins P42 and P43 are RTS and CTS respectively.  */
-	if (chan == 0) {
-		/* Put P42 & P43 in I/O port mode.  */
-		MA_PORT4_PMC &= ~0xC;
-		/* Make P42 an output, and P43 an input.  */
-		MA_PORT4_PM = (MA_PORT4_PM & ~0xC) | 0x8;
-	}
-
-	/* Do pre-configuration for the actual UART.  */
-	ma_uart_pre_configure (chan, cflags, baud);
-}
-
-void __init mach_init_irqs (void)
-{
-	unsigned tc;
-
-	/* Initialize interrupts.  */
-	ma_init_irqs ();
-	rte_cb_init_irqs ();
-
-	/* Use falling-edge-sensitivity for interrupts .  */
-	V850E_TIMER_C_SESC (0) &= ~0xC;
-	V850E_TIMER_C_SESC (1) &= ~0xF;
-
-	/* INTP000-INTP011 are shared with `Timer C', so we have to set
-	   up Timer C to pass them through as raw interrupts.  */
-	for (tc = 0; tc < 2; tc++)
-		/* Turn on the timer.  */
-		V850E_TIMER_C_TMCC0 (tc) |= V850E_TIMER_C_TMCC0_CAE;
-
-	/* Make sure the relevant port0/port1 pins are assigned
-	   interrupt duty.  We used INTP001-INTP011 (don't screw with
-	   INTP000 because the monitor uses it).  */
-	MA_PORT0_PMC |= 0x4;	/* P02 (INTP001) in IRQ mode.  */
-	MA_PORT1_PMC |= 0x6;	/* P11 (INTP010) & P12 (INTP011) in IRQ mode.*/
-}
diff --git a/arch/v850/kernel/rte_ma1_cb.ld b/arch/v850/kernel/rte_ma1_cb.ld
deleted file mode 100644
index c8e16d16be4..00000000000
--- a/arch/v850/kernel/rte_ma1_cb.ld
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Linker script for the Midas labs RTE-V850E/MA1-CB evaluation board
-   (CONFIG_RTE_CB_MA1), with kernel in SDRAM, under Multi debugger.  */
-
-MEMORY {
-	/* 1MB of SRAM; we can't use the last 32KB, because it's used by
-	   the monitor scratch-RAM.  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = (SRAM_SIZE - MON_SCRATCH_SIZE)
-	/* Monitor scratch RAM; only the interrupt vectors should go here.  */
-	MRAM  : ORIGIN = MON_SCRATCH_ADDR,  LENGTH = MON_SCRATCH_SIZE
-	/* 32MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#ifdef CONFIG_RTE_CB_MA1_KSRAM
-# define KRAM SRAM
-#else
-# define KRAM SDRAM
-#endif
-
-SECTIONS {
-	/* We can't use RAMK_KRAM_CONTENTS because that puts the whole
-	   kernel in a single ELF segment, and the Multi debugger (which
-	   we use to load the kernel) appears to have bizarre problems
-	   dealing with it.  */
-
-	.text : {
-		__kram_start = . ;
-		TEXT_CONTENTS
-	} > KRAM
-
-	.data : {
-		DATA_CONTENTS
-		BSS_CONTENTS
-		RAMK_INIT_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-
-		/* The address at which the interrupt vectors are initially
-		   loaded by the loader.  We can't load the interrupt vectors
-		   directly into their target location, because the monitor
-		   ROM for the GHS Multi debugger barfs if we try.
-		   Unfortunately, Multi also doesn't deal correctly with ELF
-		   sections where the LMA and VMA differ (it just ignores the
-		   LMA), so we can't use that feature to work around the
-		   problem!  What we do instead is just put the interrupt
-		   vectors into a normal section, and have the
-		   `mach_early_init' function for Midas boards do the
-		   necessary copying and relocation at runtime (this section
-		   basically only contains `jr' instructions, so it's not
-		   that hard).  */
-		. = ALIGN (0x10) ;
-		__intv_load_start = . ;
-		INTV_CONTENTS
-	} > KRAM
-
-	.root ALIGN (4096) : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/rte_mb_a_pci.c b/arch/v850/kernel/rte_mb_a_pci.c
deleted file mode 100644
index 687e367d8b6..00000000000
--- a/arch/v850/kernel/rte_mb_a_pci.c
+++ /dev/null
@@ -1,819 +0,0 @@
-/*
- * arch/v850/kernel/mb_a_pci.c -- PCI support for Midas lab RTE-MOTHER-A board
- *
- *  Copyright (C) 2001,02,03,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-
-#include <asm/machdep.h>
-
-/* __nomods_init is like __devinit, but is a no-op when modules are enabled.
-   This is used by some routines that can be called either during boot
-   or by a module.  */
-#ifdef CONFIG_MODULES
-#define __nomods_init /*nothing*/
-#else
-#define __nomods_init __devinit
-#endif
-
-/* PCI devices on the Mother-A board can only do DMA to/from the MB SRAM
-   (the RTE-V850E/MA1-CB cpu board doesn't support PCI access to
-   CPU-board memory), and since linux DMA buffers are allocated in
-   normal kernel memory, we basically have to copy DMA blocks around
-   (this is like a `bounce buffer').  When a DMA block is `mapped', we
-   allocate an identically sized block in MB SRAM, and if we're doing
-   output to the device, copy the CPU-memory block to the MB-SRAM block.
-   When an active block is `unmapped', we will copy the block back to
-   CPU memory if necessary, and then deallocate the MB SRAM block.
-   Ack.  */
-
-/* Where the motherboard SRAM is in the PCI-bus address space (the
-   first 512K of it is also mapped at PCI address 0).  */
-#define PCI_MB_SRAM_ADDR 0x800000
-
-/* Convert CPU-view MB SRAM address to/from PCI-view addresses of the
-   same memory.  */
-#define MB_SRAM_TO_PCI(mb_sram_addr) \
-   ((dma_addr_t)mb_sram_addr - MB_A_SRAM_ADDR + PCI_MB_SRAM_ADDR)
-#define PCI_TO_MB_SRAM(pci_addr)     \
-   (void *)(pci_addr - PCI_MB_SRAM_ADDR + MB_A_SRAM_ADDR)
-
-static void pcibios_assign_resources (void);
-
-struct mb_pci_dev_irq {
-	unsigned dev;		/* PCI device number */
-	unsigned irq_base;	/* First IRQ  */
-	unsigned query_pin;	/* True if we should read the device's
-				   Interrupt Pin info, and allocate
-				   interrupt IRQ_BASE + PIN.  */
-};
-
-/* PCI interrupts are mapped statically to GBUS interrupts.  */
-static struct mb_pci_dev_irq mb_pci_dev_irqs[] = {
-	/* Motherboard SB82558 ethernet controller */
-	{ 10,	IRQ_MB_A_LAN,		0 },
-	/* PCI slot 1 */
-	{ 8, 	IRQ_MB_A_PCI1(0),	1 },
-	/* PCI slot 2 */
-	{ 9, 	IRQ_MB_A_PCI2(0),	1 }
-};
-#define NUM_MB_PCI_DEV_IRQS ARRAY_SIZE(mb_pci_dev_irqs)
-
-
-/* PCI configuration primitives.  */
-
-#define CONFIG_DMCFGA(bus, devfn, offs)					\
-   (0x80000000								\
-    | ((offs) & ~0x3)							\
-    | ((devfn) << 8)							\
-    | ((bus)->number << 16))
-
-static int
-mb_pci_read (struct pci_bus *bus, unsigned devfn, int offs, int size, u32 *rval)
-{
-	u32 addr;
-	int flags;
-
-	local_irq_save (flags);
-
-	MB_A_PCI_PCICR = 0x7;
-	MB_A_PCI_DMCFGA = CONFIG_DMCFGA (bus, devfn, offs);
-
-	addr = MB_A_PCI_IO_ADDR + (offs & 0x3);
-
-	switch (size) {
-	case 1:	*rval = *(volatile  u8 *)addr; break;
-	case 2:	*rval = *(volatile u16 *)addr; break;
-	case 4:	*rval = *(volatile u32 *)addr; break;
-	}
-
-        if (MB_A_PCI_PCISR & 0x2000) {
-		MB_A_PCI_PCISR = 0x2000;
-		*rval = ~0;
-        }
-
-	MB_A_PCI_DMCFGA = 0;
-
-	local_irq_restore (flags);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int
-mb_pci_write (struct pci_bus *bus, unsigned devfn, int offs, int size, u32 val)
-{
-	u32 addr;
-	int flags;
-
-	local_irq_save (flags);
-
-	MB_A_PCI_PCICR = 0x7;
-	MB_A_PCI_DMCFGA = CONFIG_DMCFGA (bus, devfn, offs);
-
-	addr = MB_A_PCI_IO_ADDR + (offs & 0x3);
-
-	switch (size) {
-	case 1: *(volatile  u8 *)addr = val; break;
-	case 2: *(volatile u16 *)addr = val; break;
-	case 4: *(volatile u32 *)addr = val; break;
-	}
-
-        if (MB_A_PCI_PCISR & 0x2000)
-		MB_A_PCI_PCISR = 0x2000;
-
-	MB_A_PCI_DMCFGA = 0;
-
-	local_irq_restore (flags);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops mb_pci_config_ops = {
-	.read	= mb_pci_read,
-	.write	= mb_pci_write,
-};
-
-
-/* PCI Initialization.  */
-
-static struct pci_bus *mb_pci_bus = 0;
-
-/* Do initial PCI setup.  */
-static int __devinit pcibios_init (void)
-{
-	u32 id = MB_A_PCI_PCIHIDR;
-	u16 vendor = id & 0xFFFF;
-	u16 device = (id >> 16) & 0xFFFF;
-
-	if (vendor == PCI_VENDOR_ID_PLX && device == PCI_DEVICE_ID_PLX_9080) {
-		printk (KERN_INFO
-			"PCI: PLX Technology PCI9080 HOST/PCI bridge\n");
-
-		MB_A_PCI_PCICR = 0x147;
-
-		MB_A_PCI_PCIBAR0 = 0x007FFF00;
-		MB_A_PCI_PCIBAR1 = 0x0000FF00;
-		MB_A_PCI_PCIBAR2 = 0x00800000;
-
-		MB_A_PCI_PCILTR = 0x20;
-
-		MB_A_PCI_PCIPBAM |= 0x3;
-
-		MB_A_PCI_PCISR =  ~0; /* Clear errors.  */
-
-		/* Reprogram the motherboard's IO/config address space,
-		   as we don't support the GCS7 address space that the
-		   default uses.  */
-
-		/* Significant address bits used for decoding PCI GCS5 space
-		   accesses.  */
-		MB_A_PCI_DMRR = ~(MB_A_PCI_MEM_SIZE - 1);
-
-		/* I don't understand this, but the SolutionGear example code
-		   uses such an offset, and it doesn't work without it.  XXX */
-#if GCS5_SIZE == 0x00800000
-#define GCS5_CFG_OFFS 0x00800000
-#else
-#define GCS5_CFG_OFFS 0
-#endif
-
-		/* Address bit values for matching.  Note that we have to give
-		   the address from the motherboard's point of view, which is
-		   different than the CPU's.  */
-		/* PCI memory space.  */
-		MB_A_PCI_DMLBAM = GCS5_CFG_OFFS + 0x0;
-		/* PCI I/O space.  */
-		MB_A_PCI_DMLBAI =
-			GCS5_CFG_OFFS + (MB_A_PCI_IO_ADDR - GCS5_ADDR);
-
-		mb_pci_bus = pci_scan_bus (0, &mb_pci_config_ops, 0);
-
-		pcibios_assign_resources ();
-	} else
-		printk (KERN_ERR "PCI: HOST/PCI bridge not found\n");
-
-	return 0;
-}
-
-subsys_initcall (pcibios_init);
-
-char __devinit *pcibios_setup (char *option)
-{
-	/* Don't handle any options. */
-	return option;
-}
-
-
-int __nomods_init pcibios_enable_device (struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx = 0; idx < 6; idx++) {
-		r = &dev->resource[idx];
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because "
-			       "of resource collisions\n", pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-
-/* Resource allocation.  */
-static void __devinit pcibios_assign_resources (void)
-{
-	struct pci_dev *dev = NULL;
-	struct resource *r;
-
-	for_each_pci_dev(dev) {
-		unsigned di_num;
-		unsigned class = dev->class >> 8;
-
-		if (class && class != PCI_CLASS_BRIDGE_HOST) {
-			unsigned r_num;
-			for(r_num = 0; r_num < 6; r_num++) {
-				r = &dev->resource[r_num];
-				if (!r->start && r->end)
-					pci_assign_resource (dev, r_num);
-			}
-		}
-
-		/* Assign interrupts.  */
-		for (di_num = 0; di_num < NUM_MB_PCI_DEV_IRQS; di_num++) {
-			struct mb_pci_dev_irq *di = &mb_pci_dev_irqs[di_num];
-
-			if (di->dev == PCI_SLOT (dev->devfn)) {
-				unsigned irq = di->irq_base;
-
-				if (di->query_pin) {
-					/* Find out which interrupt pin
-					   this device uses (each PCI
-					   slot has 4).  */
-					u8 irq_pin;
-
-					pci_read_config_byte (dev,
-							     PCI_INTERRUPT_PIN,
-							      &irq_pin);
-
-					if (irq_pin == 0)
-						/* Doesn't use interrupts.  */ 
-						continue;
-					else
-						irq += irq_pin - 1;
-				}
-
-				pcibios_update_irq (dev, irq);
-			}
-		}
-	}
-}
-
-void __devinit pcibios_update_irq (struct pci_dev *dev, int irq)
-{
-	dev->irq = irq;
-	pci_write_config_byte (dev, PCI_INTERRUPT_LINE, irq);
-}
-
-void __devinit
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			struct resource *res)
-{
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO) {
-		offset = MB_A_PCI_IO_ADDR;
-	} else if (res->flags & IORESOURCE_MEM) {
-		offset = MB_A_PCI_MEM_ADDR;
-	}
-
-	region->start = res->start - offset;
-	region->end = res->end - offset;
-}
-
-
-/* Stubs for things we don't use.  */
-
-/* Called after each bus is probed, but before its children are examined. */
-void pcibios_fixup_bus(struct pci_bus *b)
-{
-}
-
-void
-pcibios_align_resource (void *data, struct resource *res,
-			resource_size_t size, resource_size_t align)
-{
-}
-
-void pcibios_set_master (struct pci_dev *dev)
-{
-}
-
-
-/* Mother-A SRAM memory allocation.  This is a simple first-fit allocator.  */
-
-/* A memory free-list node.  */
-struct mb_sram_free_area {
-	void *mem;
-	unsigned long size;
-	struct mb_sram_free_area *next;
-};
-
-/* The tail of the free-list, which starts out containing all the SRAM.  */
-static struct mb_sram_free_area mb_sram_free_tail = {
-	(void *)MB_A_SRAM_ADDR, MB_A_SRAM_SIZE, 0
-};
-
-/* The free-list.  */
-static struct mb_sram_free_area *mb_sram_free_areas = &mb_sram_free_tail;
-
-/* The free-list of free free-list nodes. (:-)  */
-static struct mb_sram_free_area *mb_sram_free_free_areas = 0;
-
-/* Spinlock protecting the above globals.  */
-static DEFINE_SPINLOCK(mb_sram_lock);
-
-/* Allocate a memory block at least SIZE bytes long in the Mother-A SRAM
-   space.  */
-static void *alloc_mb_sram (size_t size)
-{
-	struct mb_sram_free_area *prev, *fa;
-	unsigned long flags;
-	void *mem = 0;
-
-	spin_lock_irqsave (mb_sram_lock, flags);
-
-	/* Look for a free area that can contain SIZE bytes.  */
-	for (prev = 0, fa = mb_sram_free_areas; fa; prev = fa, fa = fa->next)
-		if (fa->size >= size) {
-			/* Found one!  */
-			mem = fa->mem;
-
-			if (fa->size == size) {
-				/* In fact, it fits exactly, so remove
-				   this node from the free-list.  */
-				if (prev)
-					prev->next = fa->next;
-				else
-					mb_sram_free_areas = fa->next;
-				/* Put it on the free-list-entry-free-list. */
-				fa->next = mb_sram_free_free_areas;
-				mb_sram_free_free_areas = fa;
-			} else {
-				/* FA is bigger than SIZE, so just
-				   reduce its size to account for this
-				   allocation.  */
-				fa->mem += size;
-				fa->size -= size;
-			}
-
-			break;
-		}
-
-	spin_unlock_irqrestore (mb_sram_lock, flags);
-
-	return mem;
-}
-
-/* Return the memory area MEM of size SIZE to the MB SRAM free pool.  */
-static void free_mb_sram (void *mem, size_t size)
-{
-	struct mb_sram_free_area *prev, *fa, *new_fa;
-	unsigned long flags;
-	void *end = mem + size;
-
-	spin_lock_irqsave (mb_sram_lock, flags);
-
- retry:
-	/* Find an adjacent free-list entry.  */
-	for (prev = 0, fa = mb_sram_free_areas; fa; prev = fa, fa = fa->next)
-		if (fa->mem == end) {
-			/* FA is just after MEM, grow down to encompass it. */
-			fa->mem = mem;
-			fa->size += size;
-			goto done;
-		} else if (fa->mem + fa->size == mem) {
-			struct mb_sram_free_area *next_fa = fa->next;
-
-			/* FA is just before MEM, expand to encompass it. */
-			fa->size += size;
-
-			/* See if FA can now be merged with its successor. */
-			if (next_fa && fa->mem + fa->size == next_fa->mem) {
-				/* Yup; merge NEXT_FA's info into FA.  */
-				fa->size += next_fa->size;
-				fa->next = next_fa->next;
-				/* Free NEXT_FA.  */
-				next_fa->next = mb_sram_free_free_areas;
-				mb_sram_free_free_areas = next_fa;
-			}
-			goto done;
-		} else if (fa->mem > mem)
-			/* We've reached the right spot in the free-list
-			   without finding an adjacent free-area, so add
-			   a new free area to hold mem. */
-			break;
-
-	/* Make a new free-list entry.  */
-
-	/* First, get a free-list entry.  */
-	if (! mb_sram_free_free_areas) {
-		/* There are none, so make some.  */
-		void *block;
-		size_t block_size = sizeof (struct mb_sram_free_area) * 8;
-
-		/* Don't hold the lock while calling kmalloc (I'm not
-		   sure whether it would be a problem, since we use
-		   GFP_ATOMIC, but it makes me nervous).  */
-		spin_unlock_irqrestore (mb_sram_lock, flags);
-
-		block = kmalloc (block_size, GFP_ATOMIC);
-		if (! block)
-			panic ("free_mb_sram: can't allocate free-list entry");
-
-		/* Now get the lock back.  */
-		spin_lock_irqsave (mb_sram_lock, flags);
-
-		/* Add the new free free-list entries.  */
-		while (block_size > 0) {
-			struct mb_sram_free_area *nfa = block;
-			nfa->next = mb_sram_free_free_areas;
-			mb_sram_free_free_areas = nfa;
-			block += sizeof *nfa;
-			block_size -= sizeof *nfa;
-		}
-
-		/* Since we dropped the lock to call kmalloc, the
-		   free-list could have changed, so retry from the
-		   beginning.  */
-		goto retry;
-	}
-
-	/* Remove NEW_FA from the free-list of free-list entries.  */
-	new_fa = mb_sram_free_free_areas;
-	mb_sram_free_free_areas = new_fa->next;
-
-	/* NEW_FA initially holds only MEM.  */
-	new_fa->mem = mem;
-	new_fa->size = size;
-
-	/* Insert NEW_FA in the free-list between PREV and FA. */
-	new_fa->next = fa;
-	if (prev)
-		prev->next = new_fa;
-	else
-		mb_sram_free_areas = new_fa;
-
- done:
-	spin_unlock_irqrestore (mb_sram_lock, flags);
-}
-
-
-/* Maintainence of CPU -> Mother-A DMA mappings.  */
-
-struct dma_mapping {
-	void *cpu_addr;
-	void *mb_sram_addr;
-	size_t size;
-	struct dma_mapping *next;
-};
-
-/* A list of mappings from CPU addresses to MB SRAM addresses for active
-   DMA blocks (that have been `granted' to the PCI device).  */
-static struct dma_mapping *active_dma_mappings = 0;
-
-/* A list of free mapping objects.  */
-static struct dma_mapping *free_dma_mappings = 0;
-
-/* Spinlock protecting the above globals.  */
-static DEFINE_SPINLOCK(dma_mappings_lock);
-
-static struct dma_mapping *new_dma_mapping (size_t size)
-{
-	unsigned long flags;
-	struct dma_mapping *mapping;
-	void *mb_sram_block = alloc_mb_sram (size);
-
-	if (! mb_sram_block)
-		return 0;
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	if (! free_dma_mappings) {
-		/* We're out of mapping structures, make more.  */
-		void *mblock;
-		size_t mblock_size = sizeof (struct dma_mapping) * 8;
-
-		/* Don't hold the lock while calling kmalloc (I'm not
-		   sure whether it would be a problem, since we use
-		   GFP_ATOMIC, but it makes me nervous).  */
-		spin_unlock_irqrestore (dma_mappings_lock, flags);
-
-		mblock = kmalloc (mblock_size, GFP_ATOMIC);
-		if (! mblock) {
-			free_mb_sram (mb_sram_block, size);
-			return 0;
-		}
-
-		/* Get the lock back.  */
-		spin_lock_irqsave (dma_mappings_lock, flags);
-
-		/* Add the new mapping structures to the free-list.  */
-		while (mblock_size > 0) {
-			struct dma_mapping *fm = mblock;
-			fm->next = free_dma_mappings;
-			free_dma_mappings = fm;
-			mblock += sizeof *fm;
-			mblock_size -= sizeof *fm;
-		}
-	}
-
-	/* Get a mapping struct from the freelist.  */
-	mapping = free_dma_mappings;
-	free_dma_mappings = mapping->next;
-
-	/* Initialize the mapping.  Other fields should be filled in by
-	   caller.  */
-	mapping->mb_sram_addr = mb_sram_block;
-	mapping->size = size;
-
-	/* Add it to the list of active mappings.  */
-	mapping->next = active_dma_mappings;
-	active_dma_mappings = mapping;
-
-	spin_unlock_irqrestore (dma_mappings_lock, flags);
-
-	return mapping;
-}
-
-static struct dma_mapping *find_dma_mapping (void *mb_sram_addr)
-{
-	unsigned long flags;
-	struct dma_mapping *mapping;
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	for (mapping = active_dma_mappings; mapping; mapping = mapping->next)
-		if (mapping->mb_sram_addr == mb_sram_addr) {
-			spin_unlock_irqrestore (dma_mappings_lock, flags);
-			return mapping;
-		}
-
-	panic ("find_dma_mapping: unmapped PCI DMA addr 0x%x",
-	       MB_SRAM_TO_PCI (mb_sram_addr));
-}
-
-static struct dma_mapping *deactivate_dma_mapping (void *mb_sram_addr)
-{
-	unsigned long flags;
-	struct dma_mapping *mapping, *prev;
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	for (prev = 0, mapping = active_dma_mappings;
-	     mapping;
-	     prev = mapping, mapping = mapping->next)
-	{
-		if (mapping->mb_sram_addr == mb_sram_addr) {
-			/* This is the MAPPING; deactivate it.  */
-			if (prev)
-				prev->next = mapping->next;
-			else
-				active_dma_mappings = mapping->next;
-
-			spin_unlock_irqrestore (dma_mappings_lock, flags);
-
-			return mapping;
-		}
-	}
-
-	panic ("deactivate_dma_mapping: unmapped PCI DMA addr 0x%x",
-	       MB_SRAM_TO_PCI (mb_sram_addr));
-}
-
-/* Return MAPPING to the freelist.  */
-static inline void
-free_dma_mapping (struct dma_mapping *mapping)
-{
-	unsigned long flags;
-
-	free_mb_sram (mapping->mb_sram_addr, mapping->size);
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	mapping->next = free_dma_mappings;
-	free_dma_mappings = mapping;
-
-	spin_unlock_irqrestore (dma_mappings_lock, flags);
-}
-
-
-/* Single PCI DMA mappings.  */
-
-/* `Grant' to PDEV the memory block at CPU_ADDR, for doing DMA.  The
-   32-bit PCI bus mastering address to use is returned.  the device owns
-   this memory until either pci_unmap_single or pci_dma_sync_single is
-   performed.  */
-dma_addr_t
-pci_map_single (struct pci_dev *pdev, void *cpu_addr, size_t size, int dir)
-{
-	struct dma_mapping *mapping = new_dma_mapping (size);
-
-	if (! mapping)
-		return 0;
-
-	mapping->cpu_addr = cpu_addr;
-
-	if (dir == PCI_DMA_BIDIRECTIONAL || dir == PCI_DMA_TODEVICE)
-		memcpy (mapping->mb_sram_addr, cpu_addr, size);
-
-	return MB_SRAM_TO_PCI (mapping->mb_sram_addr);
-}
-
-/* Return to the CPU the PCI DMA memory block previously `granted' to
-   PDEV, at DMA_ADDR.  */
-void pci_unmap_single (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-		       int dir)
-{
-	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
-	struct dma_mapping *mapping = deactivate_dma_mapping (mb_sram_addr);
-
-	if (size != mapping->size)
-		panic ("pci_unmap_single: size (%d) doesn't match"
-		       " size of mapping at PCI DMA addr 0x%x (%d)\n",
-		       size, dma_addr, mapping->size);
-
-	/* Copy back the DMA'd contents if necessary.  */
-	if (dir == PCI_DMA_BIDIRECTIONAL || dir == PCI_DMA_FROMDEVICE)
-		memcpy (mapping->cpu_addr, mb_sram_addr, size);
-
-	/* Return mapping to the freelist.  */
-	free_dma_mapping (mapping);
-}
-
-/* Make physical memory consistent for a single streaming mode DMA
-   translation after a transfer.
-
-   If you perform a pci_map_single() but wish to interrogate the
-   buffer using the cpu, yet do not wish to teardown the PCI dma
-   mapping, you must call this function before doing so.  At the next
-   point you give the PCI dma address back to the card, you must first
-   perform a pci_dma_sync_for_device, and then the device again owns
-   the buffer.  */
-void
-pci_dma_sync_single_for_cpu (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-		     int dir)
-{
-	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
-	struct dma_mapping *mapping = find_dma_mapping (mb_sram_addr);
-
-	/* Synchronize the DMA buffer with the CPU buffer if necessary.  */
-	if (dir == PCI_DMA_FROMDEVICE)
-		memcpy (mapping->cpu_addr, mb_sram_addr, size);
-	else if (dir == PCI_DMA_TODEVICE)
-		; /* nothing to do */
-	else
-		panic("pci_dma_sync_single: unsupported sync dir: %d", dir);
-}
-
-void
-pci_dma_sync_single_for_device (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-				int dir)
-{
-	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
-	struct dma_mapping *mapping = find_dma_mapping (mb_sram_addr);
-
-	/* Synchronize the DMA buffer with the CPU buffer if necessary.  */
-	if (dir == PCI_DMA_FROMDEVICE)
-		; /* nothing to do */
-	else if (dir == PCI_DMA_TODEVICE)
-		memcpy (mb_sram_addr, mapping->cpu_addr, size);
-	else
-		panic("pci_dma_sync_single: unsupported sync dir: %d", dir);
-}
-
-
-/* Scatter-gather PCI DMA mappings.  */
-
-/* Do multiple DMA mappings at once.  */
-int
-pci_map_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len, int dir)
-{
-	BUG ();
-	return 0;
-}
-
-/* Unmap multiple DMA mappings at once.  */
-void
-pci_unmap_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len,int dir)
-{
-	BUG ();
-}
-
-/* Make physical memory consistent for a set of streaming mode DMA
-   translations after a transfer.  The same as pci_dma_sync_single_* but
-   for a scatter-gather list, same rules and usage.  */
-
-void
-pci_dma_sync_sg_for_cpu (struct pci_dev *dev,
-			 struct scatterlist *sg, int sg_len,
-			 int dir)
-{
-	BUG ();
-}
-
-void
-pci_dma_sync_sg_for_device (struct pci_dev *dev,
-			    struct scatterlist *sg, int sg_len,
-			    int dir)
-{
-	BUG ();
-}
-
-
-/* PCI mem mapping.  */
-
-/* Allocate and map kernel buffer using consistent mode DMA for PCI
-   device.  Returns non-NULL cpu-view pointer to the buffer if
-   successful and sets *DMA_ADDR to the pci side dma address as well,
-   else DMA_ADDR is undefined.  */
-void *
-pci_alloc_consistent (struct pci_dev *pdev, size_t size, dma_addr_t *dma_addr)
-{
-	void *mb_sram_mem = alloc_mb_sram (size);
-	if (mb_sram_mem)
-		*dma_addr = MB_SRAM_TO_PCI (mb_sram_mem);
-	return mb_sram_mem;
-}
-
-/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
-   be values that were returned from pci_alloc_consistent.  SIZE must be
-   the same as what as passed into pci_alloc_consistent.  References to
-   the memory and mappings associated with CPU_ADDR or DMA_ADDR past
-   this call are illegal.  */
-void
-pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr,
-		     dma_addr_t dma_addr)
-{
-	void *mb_sram_mem = PCI_TO_MB_SRAM (dma_addr);
-	free_mb_sram (mb_sram_mem, size);
-}
-
-
-/* iomap/iomap */
-
-void __iomem *pci_iomap (struct pci_dev *dev, int bar, unsigned long max)
-{
-	resource_size_t start = pci_resource_start (dev, bar);
-	resource_size_t len = pci_resource_len (dev, bar);
-
-	if (!start || len == 0)
-		return 0;
-
-	/* None of the ioremap functions actually do anything, other than
-	   re-casting their argument, so don't bother differentiating them.  */
-	return ioremap (start, len);
-}
-
-void pci_iounmap (struct pci_dev *dev, void __iomem *addr)
-{
-	/* nothing */
-}
-
-
-/* symbol exports (for modules) */
-
-EXPORT_SYMBOL (pci_map_single);
-EXPORT_SYMBOL (pci_unmap_single);
-EXPORT_SYMBOL (pci_alloc_consistent);
-EXPORT_SYMBOL (pci_free_consistent);
-EXPORT_SYMBOL (pci_dma_sync_single_for_cpu);
-EXPORT_SYMBOL (pci_dma_sync_single_for_device);
-EXPORT_SYMBOL (pci_iomap);
-EXPORT_SYMBOL (pci_iounmap);
diff --git a/arch/v850/kernel/rte_me2_cb.c b/arch/v850/kernel/rte_me2_cb.c
deleted file mode 100644
index 46803d48dff..00000000000
--- a/arch/v850/kernel/rte_me2_cb.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * arch/v850/kernel/rte_me2_cb.c -- Midas labs RTE-V850E/ME2-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/fs.h>
-#include <linux/major.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/me2.h>
-#include <asm/rte_me2_cb.h>
-#include <asm/machdep.h>
-#include <asm/v850e_intc.h>
-#include <asm/v850e_cache.h>
-#include <asm/irq.h>
-
-#include "mach.h"
-
-extern unsigned long *_intv_start;
-extern unsigned long *_intv_end;
-
-/* LED access routines.  */
-extern unsigned read_leds (int pos, char *buf, int len);
-extern unsigned write_leds (int pos, const char *buf, int len);
-
-
-/* SDRAM are almost contiguous (with a small hole in between;
-   see mach_reserve_bootmem for details), so just use both as one big area.  */
-#define RAM_START 	SDRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Called before configuring an on-chip UART.  */
-void rte_me2_cb_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud)
-{
-	/* The RTE-V850E/ME2-CB connects some general-purpose I/O
-	   pins on the CPU to the RTS/CTS lines of UARTB channel 0's
-	   serial connection.
-	   I/O pins P21 and P22 are RTS and CTS respectively.  */
-	if (chan == 0) {
-		/* Put P21 & P22 in I/O port mode.  */
-		ME2_PORT2_PMC &= ~0x6;
-		/* Make P21 and output, and P22 an input.  */
-		ME2_PORT2_PM = (ME2_PORT2_PM & ~0xC) | 0x4;
-	}
-
-	me2_uart_pre_configure (chan, cflags, baud);
-}
-
-void __init mach_init_irqs (void)
-{
-	/* Initialize interrupts.  */
-	me2_init_irqs ();
-	rte_me2_cb_init_irqs ();
-}
-
-#ifdef CONFIG_ROM_KERNEL
-/* Initialization for kernel in ROM.  */
-static inline rom_kernel_init (void)
-{
-	/* If the kernel is in ROM, we have to copy any initialized data
-	   from ROM into RAM.  */
-	extern unsigned long _data_load_start, _sdata, _edata;
-	register unsigned long *src = &_data_load_start;
-	register unsigned long *dst = &_sdata, *end = &_edata;
-
-	while (dst != end)
-		*dst++ = *src++;
-}
-#endif /* CONFIG_ROM_KERNEL */
-
-static void install_interrupt_vectors (void)
-{
-	unsigned long *p1, *p2;
-
-	ME2_IRAMM = 0x03; /* V850E/ME2 iRAM write mode */
-
-	/* vector copy to iRAM */
-	p1 = (unsigned long *)0; /* v85x vector start */
-	p2 = (unsigned long *)&_intv_start;
-	while (p2 < (unsigned long *)&_intv_end)
-		*p1++ = *p2++;
-
-	ME2_IRAMM = 0x00; /* V850E/ME2 iRAM read mode */
-}
-
-/* CompactFlash */
-
-static void cf_power_on (void)
-{
-	/* CF card detected? */
-	if (CB_CF_STS0 & 0x0030)
-		return;
-
-	CB_CF_REG0 = 0x0002; /* reest on */
-	mdelay (10);
-	CB_CF_REG0 = 0x0003; /* power on */
-	mdelay (10);
-	CB_CF_REG0 = 0x0001; /* reset off */
-	mdelay (10);
-}
-
-static void cf_power_off (void)
-{
-	CB_CF_REG0 = 0x0003; /* power on */
-	mdelay (10);
-	CB_CF_REG0 = 0x0002; /* reest on */
-	mdelay (10);
-}
-
-void __init mach_early_init (void)
-{
-	install_interrupt_vectors ();
-
-	/* CS1 SDRAM instruction cache enable */
-	v850e_cache_enable (0x04, 0x03, 0);
-
-	rte_cb_early_init ();
-
-	/* CompactFlash power on */
-	cf_power_on ();
-
-#if defined (CONFIG_ROM_KERNEL)
-	rom_kernel_init ();
-#endif
-}
-
-
-/* RTE-V850E/ME2-CB Programmable Interrupt Controller.  */
-
-static struct cb_pic_irq_init cb_pic_irq_inits[] = {
-	{ "CB_EXTTM0",       IRQ_CB_EXTTM0,       1, 1, 6 },
-	{ "CB_EXTSIO",       IRQ_CB_EXTSIO,       1, 1, 6 },
-	{ "CB_TOVER",        IRQ_CB_TOVER,        1, 1, 6 },
-	{ "CB_GINT0",        IRQ_CB_GINT0,        1, 1, 6 },
-	{ "CB_USB",          IRQ_CB_USB,          1, 1, 6 },
-	{ "CB_LANC",         IRQ_CB_LANC,         1, 1, 6 },
-	{ "CB_USB_VBUS_ON",  IRQ_CB_USB_VBUS_ON,  1, 1, 6 },
-	{ "CB_USB_VBUS_OFF", IRQ_CB_USB_VBUS_OFF, 1, 1, 6 },
-	{ "CB_EXTTM1",       IRQ_CB_EXTTM1,       1, 1, 6 },
-	{ "CB_EXTTM2",       IRQ_CB_EXTTM2,       1, 1, 6 },
-	{ 0 }
-};
-#define NUM_CB_PIC_IRQ_INITS (ARRAY_SIZE(cb_pic_irq_inits) - 1)
-
-static struct hw_interrupt_type cb_pic_hw_itypes[NUM_CB_PIC_IRQ_INITS];
-static unsigned char cb_pic_active_irqs = 0;
-
-void __init rte_me2_cb_init_irqs (void)
-{
-	cb_pic_init_irq_types (cb_pic_irq_inits, cb_pic_hw_itypes);
-
-	/* Initalize on board PIC1 (not PIC0) enable */
-	CB_PIC_INT0M  = 0x0000;
-	CB_PIC_INT1M  = 0x0000;
-	CB_PIC_INTR   = 0x0000;
-	CB_PIC_INTEN |= CB_PIC_INT1EN;
-
-	ME2_PORT2_PMC 	 |= 0x08;	/* INTP23/SCK1 mode */
-	ME2_PORT2_PFC 	 &= ~0x08;	/* INTP23 mode */
-	ME2_INTR(2) 	 &= ~0x08;	/* INTP23 falling-edge detect */
-	ME2_INTF(2) 	 &= ~0x08;	/*   " */
-
-	rte_cb_init_irqs ();	/* gbus &c */
-}
-
-
-/* Enable interrupt handling for interrupt IRQ.  */
-void cb_pic_enable_irq (unsigned irq)
-{
-	CB_PIC_INT1M |= 1 << (irq - CB_PIC_BASE_IRQ);
-}
-
-void cb_pic_disable_irq (unsigned irq)
-{
-	CB_PIC_INT1M &= ~(1 << (irq - CB_PIC_BASE_IRQ));
-}
-
-void cb_pic_shutdown_irq (unsigned irq)
-{
-	cb_pic_disable_irq (irq);
-
-	if (--cb_pic_active_irqs == 0)
-		free_irq (IRQ_CB_PIC, 0);
-
-	CB_PIC_INT1M &= ~(1 << (irq - CB_PIC_BASE_IRQ));
-}
-
-static irqreturn_t cb_pic_handle_irq (int irq, void *dev_id,
-				      struct pt_regs *regs)
-{
-	irqreturn_t rval = IRQ_NONE;
-	unsigned status = CB_PIC_INTR;
-	unsigned enable = CB_PIC_INT1M;
-
-	/* Only pay attention to enabled interrupts.  */
-	status &= enable;
-
-	CB_PIC_INTEN &= ~CB_PIC_INT1EN;
-
-	if (status) {
-		unsigned mask = 1;
-
-		irq = CB_PIC_BASE_IRQ;
-		do {
-			/* There's an active interrupt, find out which one,
-			   and call its handler.  */
-			while (! (status & mask)) {
-				irq++;
-				mask <<= 1;
-			}
-			status &= ~mask;
-
-			CB_PIC_INTR = mask;
-
-			/* Recursively call handle_irq to handle it. */
-			handle_irq (irq, regs);
-			rval = IRQ_HANDLED;
-		} while (status);
-	}
-
-	CB_PIC_INTEN |= CB_PIC_INT1EN;
-
-	return rval;
-}
-
-
-static void irq_nop (unsigned irq) { }
-
-static unsigned cb_pic_startup_irq (unsigned irq)
-{
-	int rval;
-
-	if (cb_pic_active_irqs == 0) {
-		rval = request_irq (IRQ_CB_PIC, cb_pic_handle_irq,
-				    IRQF_DISABLED, "cb_pic_handler", 0);
-		if (rval != 0)
-			return rval;
-	}
-
-	cb_pic_active_irqs++;
-
-	cb_pic_enable_irq (irq);
-
-	return 0;
-}
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-void __init cb_pic_init_irq_types (struct cb_pic_irq_init *inits,
-				   struct hw_interrupt_type *hw_irq_types)
-{
-	struct cb_pic_irq_init *init;
-	for (init = inits; init->name; init++) {
-		struct hw_interrupt_type *hwit = hw_irq_types++;
-
-		hwit->typename = init->name;
-
-		hwit->startup  = cb_pic_startup_irq;
-		hwit->shutdown = cb_pic_shutdown_irq;
-		hwit->enable   = cb_pic_enable_irq;
-		hwit->disable  = cb_pic_disable_irq;
-		hwit->ack      = irq_nop;
-		hwit->end      = irq_nop;
-
-		/* Initialize kernel IRQ infrastructure for this interrupt.  */
-		init_irq_handlers(init->base, init->num, init->interval, hwit);
-	}
-}
diff --git a/arch/v850/kernel/rte_me2_cb.ld b/arch/v850/kernel/rte_me2_cb.ld
deleted file mode 100644
index cf0766065ec..00000000000
--- a/arch/v850/kernel/rte_me2_cb.ld
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Linker script for the Midas labs RTE-V850E/ME2-CB evaluation board
-   (CONFIG_RTE_CB_ME2), with kernel in SDRAM.  */
-
-MEMORY {
-	/* 128Kbyte of IRAM */
-	IRAM : ORIGIN = 0x00000000, LENGTH = 0x00020000
-
-	/* 32MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#define KRAM SDRAM
-
-SECTIONS {
-	.text : {
-		__kram_start = . ;
-		TEXT_CONTENTS
-		INTV_CONTENTS	/* copy to iRAM (0x0-0x620) */
-	} > KRAM
-
-	.data : {
-		DATA_CONTENTS
-		BSS_CONTENTS
-		RAMK_INIT_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-	} > KRAM
-	
-	.root ALIGN (4096) : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/rte_nb85e_cb-multi.ld b/arch/v850/kernel/rte_nb85e_cb-multi.ld
deleted file mode 100644
index de347b4fffa..00000000000
--- a/arch/v850/kernel/rte_nb85e_cb-multi.ld
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Linker script for the Midas labs RTE-NB85E-CB evaluation board
-   (CONFIG_RTE_CB_NB85E), with the Multi debugger ROM monitor .  */
-
-MEMORY {
-	/* 1MB of SRAM; we can't use the last 96KB, because it's used by
-	   the monitor scratch-RAM.  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = (SRAM_SIZE - MON_SCRATCH_SIZE)
-	/* Monitor scratch RAM; only the interrupt vectors should go here.  */
-	MRAM  : ORIGIN = MON_SCRATCH_ADDR,  LENGTH = MON_SCRATCH_SIZE
-	/* 16MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#ifdef CONFIG_RTE_CB_NB85E_KSRAM
-# define KRAM SRAM
-#else
-# define KRAM SDRAM
-#endif
-
-SECTIONS {
-	/* We can't use RAMK_KRAM_CONTENTS because that puts the whole
-	   kernel in a single ELF segment, and the Multi debugger (which
-	   we use to load the kernel) appears to have bizarre problems
-	   dealing with it.  */
-
-	.text : {
-		__kram_start = . ;
-		TEXT_CONTENTS
-	} > KRAM
-
-	.data : {
-		DATA_CONTENTS
-		BSS_CONTENTS
-		RAMK_INIT_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-
-		/* The address at which the interrupt vectors are initially
-		   loaded by the loader.  We can't load the interrupt vectors
-		   directly into their target location, because the monitor
-		   ROM for the GHS Multi debugger barfs if we try.
-		   Unfortunately, Multi also doesn't deal correctly with ELF
-		   sections where the LMA and VMA differ (it just ignores the
-		   LMA), so we can't use that feature to work around the
-		   problem!  What we do instead is just put the interrupt
-		   vectors into a normal section, and have the
-		   `mach_early_init' function for Midas boards do the
-		   necessary copying and relocation at runtime (this section
-		   basically only contains `jr' instructions, so it's not
-		   that hard).  */
-		. = ALIGN (0x10) ;
-		__intv_load_start = . ;
-		INTV_CONTENTS
-	} > KRAM
-
-	.root ALIGN (4096) : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/rte_nb85e_cb.c b/arch/v850/kernel/rte_nb85e_cb.c
deleted file mode 100644
index b4a045da5d7..00000000000
--- a/arch/v850/kernel/rte_nb85e_cb.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * arch/v850/kernel/rte_nb85e_cb.c -- Midas labs RTE-V850E/NB85E-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/v850e.h>
-#include <asm/rte_nb85e_cb.h>
-
-#include "mach.h"
-
-void __init mach_early_init (void)
-{
-	/* Configure caching; some possible settings:
-
-	     BHC = 0x0000, DCC = 0x0000	 -- all caching disabled
-	     BHC = 0x0040, DCC = 0x0000	 -- SDRAM: icache only
-	     BHC = 0x0080, DCC = 0x0C00	 -- SDRAM: write-back dcache only
-	     BHC = 0x00C0, DCC = 0x0C00	 -- SDRAM: icache + write-back dcache
-	     BHC = 0x00C0, DCC = 0x0800	 -- SDRAM: icache + write-thru dcache
-
-	   We can only cache SDRAM (we can't use cache SRAM because it's in
-	   the same memory region as the on-chip RAM and I/O space).
-
-	   Unfortunately, the dcache seems to be buggy, so we only use the
-	   icache for now.  */
-	v850e_cache_enable (0x0040 /*BHC*/, 0x0003 /*ICC*/, 0x0000 /*DCC*/);
-
-	rte_cb_early_init ();
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	/* We just use SDRAM here.  */
-	*ram_start = SDRAM_ADDR;
-	*ram_len = SDRAM_SIZE;
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Called before configuring an on-chip UART.  */
-void rte_nb85e_cb_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud)
-{
-	/* The RTE-NB85E-CB connects some general-purpose I/O pins on the
-	   CPU to the RTS/CTS lines the UART's serial connection, as follows:
-	   P00 = CTS (in), P01 = DSR (in), P02 = RTS (out), P03 = DTR (out). */
-
-	TEG_PORT0_PM = 0x03;	/* P00 and P01 inputs, P02 and P03 outputs */
-	TEG_PORT0_IO = 0x03;	/* Accept input */
-
-	/* Do pre-configuration for the actual UART.  */
-	teg_uart_pre_configure (chan, cflags, baud);
-}
-
-void __init mach_init_irqs (void)
-{
-	teg_init_irqs ();
-	rte_cb_init_irqs ();
-}
diff --git a/arch/v850/kernel/rte_nb85e_cb.ld b/arch/v850/kernel/rte_nb85e_cb.ld
deleted file mode 100644
index b672f484f08..00000000000
--- a/arch/v850/kernel/rte_nb85e_cb.ld
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Linker script for the Midas labs RTE-NB85E-CB evaluation board
-   (CONFIG_RTE_CB_NB85E).  */
-
-MEMORY {
-	LOW   : ORIGIN = 0x0,	     LENGTH = 0x00100000
-	/* 1MB of SRAM  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 16MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#ifdef CONFIG_RTE_CB_NB85E_KSRAM
-# define KRAM SRAM
-#else
-# define KRAM SDRAM
-#endif
-
-SECTIONS {
-	.intv : { INTV_CONTENTS } > LOW
-	.sram : { RAMK_KRAM_CONTENTS } > KRAM
-	.root : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/setup.c b/arch/v850/kernel/setup.c
deleted file mode 100644
index 10335cecf7b..00000000000
--- a/arch/v850/kernel/setup.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * arch/v850/kernel/setup.c -- Arch-dependent initialization functions
- *
- *  Copyright (C) 2001,02,03,05,06  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,05,06  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/swap.h>		/* we don't have swap, but for nr_free_pages */
-#include <linux/irq.h>
-#include <linux/reboot.h>
-#include <linux/personality.h>
-#include <linux/major.h>
-#include <linux/root_dev.h>
-#include <linux/mtd/mtd.h>
-#include <linux/init.h>
-
-#include <asm/irq.h>
-#include <asm/setup.h>
-
-#include "mach.h"
-
-/* These symbols are all defined in the linker map to delineate various
-   statically allocated regions of memory.  */
-
-extern char _intv_start, _intv_end;
-/* `kram' is only used if the kernel uses part of normal user RAM.  */
-extern char _kram_start __attribute__ ((__weak__));
-extern char _kram_end __attribute__ ((__weak__));
-extern char _init_start, _init_end;
-extern char _bootmap;
-extern char _stext, _etext, _sdata, _edata, _sbss, _ebss;
-/* Many platforms use an embedded root image.  */
-extern char _root_fs_image_start __attribute__ ((__weak__));
-extern char _root_fs_image_end __attribute__ ((__weak__));
-
-
-char __initdata command_line[COMMAND_LINE_SIZE];
-
-/* Memory not used by the kernel.  */
-static unsigned long total_ram_pages;
-
-/* System RAM.  */
-static unsigned long ram_start = 0, ram_len = 0;
-
-
-#define ADDR_TO_PAGE_UP(x)   ((((unsigned long)x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define ADDR_TO_PAGE(x)	     (((unsigned long)x) >> PAGE_SHIFT)
-#define PAGE_TO_ADDR(x)	     (((unsigned long)x) << PAGE_SHIFT)
-
-static void init_mem_alloc (unsigned long ram_start, unsigned long ram_len);
-
-void set_mem_root (void *addr, size_t len, char *cmd_line);
-
-
-void __init setup_arch (char **cmdline)
-{
-	/* Keep a copy of command line */
-	*cmdline = command_line;
-	memcpy (boot_command_line, command_line, COMMAND_LINE_SIZE);
-	boot_command_line[COMMAND_LINE_SIZE - 1] = '\0';
-
-	console_verbose ();
-
-	init_mm.start_code = (unsigned long) &_stext;
-	init_mm.end_code = (unsigned long) &_etext;
-	init_mm.end_data = (unsigned long) &_edata;
-	init_mm.brk = (unsigned long) &_kram_end;
-
-	/* Find out what mem this machine has.  */
-	mach_get_physical_ram (&ram_start, &ram_len);
-	/* ... and tell the kernel about it.  */
-	init_mem_alloc (ram_start, ram_len);
-
-	printk (KERN_INFO "CPU: %s\nPlatform: %s\n",
-		CPU_MODEL_LONG, PLATFORM_LONG);
-
-	/* do machine-specific setups.  */
-	mach_setup (cmdline);
-
-#ifdef CONFIG_MTD
-	if (!ROOT_DEV && &_root_fs_image_end > &_root_fs_image_start)
-		set_mem_root (&_root_fs_image_start,
-			      &_root_fs_image_end - &_root_fs_image_start,
-			      *cmdline);
-#endif
-}
-
-void __init trap_init (void)
-{
-}
-
-#ifdef CONFIG_MTD
-
-/* From drivers/mtd/devices/slram.c */
-#define SLRAM_BLK_SZ 0x4000
-
-/* Set the root filesystem to be the given memory region.
-   Some parameter may be appended to CMD_LINE.  */
-void set_mem_root (void *addr, size_t len, char *cmd_line)
-{
-	/* Some sort of idiocy in MTD means we must supply a length that's
-	   a multiple of SLRAM_BLK_SZ.  We just round up the real length,
-	   as the file system shouldn't attempt to access anything beyond
-	   the end of the image anyway.  */
-	len = (((len - 1) + SLRAM_BLK_SZ) / SLRAM_BLK_SZ) * SLRAM_BLK_SZ;
-
-	/* The only way to pass info to the MTD slram driver is via
-	   the command line.  */
-	if (*cmd_line) {
-		cmd_line += strlen (cmd_line);
-		*cmd_line++ = ' ';
-	}
-	sprintf (cmd_line, "slram=root,0x%x,+0x%x", (u32)addr, (u32)len);
-
-	ROOT_DEV = MKDEV (MTD_BLOCK_MAJOR, 0);
-}
-#endif
-
-
-static void irq_nop (unsigned irq) { }
-static unsigned irq_zero (unsigned irq) { return 0; }
-
-static void nmi_end (unsigned irq)
-{
-	if (irq != IRQ_NMI (0)) {
-		printk (KERN_CRIT "NMI %d is unrecoverable; restarting...",
-			irq - IRQ_NMI (0));
-		machine_restart (0);
-	}
-}
-
-static struct hw_interrupt_type nmi_irq_type = {
-	.typename = "NMI",
-	.startup = irq_zero,		/* startup */
-	.shutdown = irq_nop,		/* shutdown */
-	.enable = irq_nop,		/* enable */
-	.disable = irq_nop,		/* disable */
-	.ack = irq_nop,		/* ack */
-	.end = nmi_end,		/* end */
-};
-
-void __init init_IRQ (void)
-{
-	init_irq_handlers (0, NUM_MACH_IRQS, 1, 0);
-	init_irq_handlers (IRQ_NMI (0), NUM_NMIS, 1, &nmi_irq_type);
-	mach_init_irqs ();
-}
-
-
-void __init mem_init (void)
-{
-	max_mapnr = MAP_NR (ram_start + ram_len);
-
-	num_physpages = ADDR_TO_PAGE (ram_len);
-
-	total_ram_pages = free_all_bootmem ();
-
-	printk (KERN_INFO
-		"Memory: %luK/%luK available"
-		" (%luK kernel code, %luK data)\n",
-		PAGE_TO_ADDR (nr_free_pages()) / 1024,
-		ram_len / 1024,
-		((unsigned long)&_etext - (unsigned long)&_stext) / 1024,
-		((unsigned long)&_ebss - (unsigned long)&_sdata) / 1024);
-}
-
-void free_initmem (void)
-{
-	unsigned long ram_end = ram_start + ram_len;
-	unsigned long start = PAGE_ALIGN ((unsigned long)(&_init_start));
-
-	if (start >= ram_start && start < ram_end) {
-		unsigned long addr;
-		unsigned long end = PAGE_ALIGN ((unsigned long)(&_init_end));
-
-		if (end > ram_end)
-			end = ram_end;
-
-		printk("Freeing unused kernel memory: %ldK freed\n",
-		       (end - start) / 1024);
-
-		for (addr = start; addr < end; addr += PAGE_SIZE) {
-			struct page *page = virt_to_page (addr);
-			ClearPageReserved (page);
-			init_page_count (page);
-			__free_page (page);
-			total_ram_pages++;
-		}
-	}
-}
-
-
-/* Initialize the `bootmem allocator'.  RAM_START and RAM_LEN identify
-   what RAM may be used.  */
-static void __init
-init_bootmem_alloc (unsigned long ram_start, unsigned long ram_len)
-{
-	/* The part of the kernel that's in the same managed RAM space
-	   used for general allocation.  */
-	unsigned long kram_start = (unsigned long)&_kram_start;
-	unsigned long kram_end = (unsigned long)&_kram_end;
-	/* End of the managed RAM space.  */
-	unsigned long ram_end = ram_start + ram_len;
-	/* Address range of the interrupt vector table.  */
-	unsigned long intv_start = (unsigned long)&_intv_start;
-	unsigned long intv_end = (unsigned long)&_intv_end;
-	/* True if the interrupt vectors are in the managed RAM area.  */
-	int intv_in_ram = (intv_end > ram_start && intv_start < ram_end);
-	/* True if the interrupt vectors are inside the kernel's RAM.  */
-	int intv_in_kram = (intv_end > kram_start && intv_start < kram_end);
-	/* A pointer to an optional function that reserves platform-specific
-	   memory regions.  We declare the pointer `volatile' to avoid gcc
-	   turning the call into a static call (the problem is that since
-	   it's a weak symbol, a static call may end up trying to reference
-	   the location 0x0, which is not always reachable).  */
-	void (*volatile mrb) (void) = mach_reserve_bootmem;
-	/* The bootmem allocator's allocation bitmap.  */
-	unsigned long bootmap = (unsigned long)&_bootmap;
-	unsigned long bootmap_len;
-
-	/* Round bootmap location up to next page.  */
-	bootmap = PAGE_TO_ADDR (ADDR_TO_PAGE_UP (bootmap));
-
-	/* Initialize bootmem allocator.  */
-	bootmap_len = init_bootmem_node (NODE_DATA (0),
-					 ADDR_TO_PAGE (bootmap),
-					 ADDR_TO_PAGE (PAGE_OFFSET),
-					 ADDR_TO_PAGE (ram_end));
-
-	/* Now make the RAM actually allocatable (it starts out `reserved'). */
-	free_bootmem (ram_start, ram_len);
-
-	if (kram_end > kram_start)
-		/* Reserve the RAM part of the kernel's address space, so it
-		   doesn't get allocated.  */
-		reserve_bootmem(kram_start, kram_end - kram_start,
-				BOOTMEM_DEFAULT);
-	
-	if (intv_in_ram && !intv_in_kram)
-		/* Reserve the interrupt vector space.  */
-		reserve_bootmem(intv_start, intv_end - intv_start,
-				BOOTMEM_DEFAULT);
-
-	if (bootmap >= ram_start && bootmap < ram_end)
-		/* Reserve the bootmap space.  */
-		reserve_bootmem(bootmap, bootmap_len,
-				BOOTMEM_DEFAULT);
-
-	/* Reserve the memory used by the root filesystem image if it's
-	   in RAM.  */
-	if (&_root_fs_image_end > &_root_fs_image_start
-	    && (unsigned long)&_root_fs_image_start >= ram_start
-	    && (unsigned long)&_root_fs_image_start < ram_end)
-		reserve_bootmem ((unsigned long)&_root_fs_image_start,
-				 &_root_fs_image_end - &_root_fs_image_start,
-				 BOOTMEM_DEFAULT);
-
-	/* Let the platform-dependent code reserve some too.  */
-	if (mrb)
-		(*mrb) ();
-}
-
-/* Tell the kernel about what RAM it may use for memory allocation.  */
-static void __init
-init_mem_alloc (unsigned long ram_start, unsigned long ram_len)
-{
-	unsigned i;
-	unsigned long zones_size[MAX_NR_ZONES];
-
-	init_bootmem_alloc (ram_start, ram_len);
-
-	for (i = 0; i < MAX_NR_ZONES; i++)
-		zones_size[i] = 0;
-
-	/* We stuff all the memory into one area, which includes the
-	   initial gap from PAGE_OFFSET to ram_start.  */
-	zones_size[ZONE_DMA]
-		= ADDR_TO_PAGE (ram_len + (ram_start - PAGE_OFFSET));
-
-	/* The allocator is very picky about the address of the first
-	   allocatable page -- it must be at least as aligned as the
-	   maximum allocation -- so try to detect cases where it will get
-	   confused and signal them at compile time (this is a common
-	   problem when porting to a new platform with ).  There is a
-	   similar runtime check in free_area_init_core.  */
-#if ((PAGE_OFFSET >> PAGE_SHIFT) & ((1UL << (MAX_ORDER - 1)) - 1))
-#error MAX_ORDER is too large for given PAGE_OFFSET (use CONFIG_FORCE_MAX_ZONEORDER to change it)
-#endif
-	NODE_DATA(0)->node_mem_map = NULL;
-	free_area_init_node(0, zones_size, ADDR_TO_PAGE (PAGE_OFFSET), 0);
-}
-
-
-
-/* Taken from m68knommu */
-void show_mem(void)
-{
-    unsigned long i;
-    int free = 0, total = 0, reserved = 0, shared = 0;
-    int cached = 0;
-
-    printk(KERN_INFO "\nMem-info:\n");
-    show_free_areas();
-    i = max_mapnr;
-    while (i-- > 0) {
-	total++;
-	if (PageReserved(mem_map+i))
-	    reserved++;
-	else if (PageSwapCache(mem_map+i))
-	    cached++;
-	else if (!page_count(mem_map+i))
-	    free++;
-	else
-	    shared += page_count(mem_map+i) - 1;
-    }
-    printk(KERN_INFO "%d pages of RAM\n",total);
-    printk(KERN_INFO "%d free pages\n",free);
-    printk(KERN_INFO "%d reserved pages\n",reserved);
-    printk(KERN_INFO "%d pages shared\n",shared);
-    printk(KERN_INFO "%d pages swap cached\n",cached);
-}
diff --git a/arch/v850/kernel/signal.c b/arch/v850/kernel/signal.c
deleted file mode 100644
index bf166e7e762..00000000000
--- a/arch/v850/kernel/signal.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * arch/v850/kernel/signal.c -- Signal handling
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1999,2000,2002  Niibe Yutaka & Kaz Kojima
- *  Copyright (C) 1991,1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * 1997-11-28  Modified for POSIX.1b signals by Richard Henderson
- *
- * This file was derived from the sh version, arch/sh/kernel/signal.c
- */
-
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/tty.h>
-
-#include <asm/ucontext.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/thread_info.h>
-#include <asm/cacheflush.h>
-
-#define DEBUG_SIG 0
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
-
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(old_sigset_t mask, struct pt_regs *regs)
-{
-	sigset_t saveset;
-
-	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
-	siginitset(&current->blocked, mask);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	regs->gpr[GPR_RVAL] = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
-}
-
-asmlinkage int
-sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize,
-		  struct pt_regs *regs)
-{
-	sigset_t saveset, newset;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&newset, unewset, sizeof(newset)))
-		return -EFAULT;
-	sigdelsetmask(&newset, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
-	current->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	regs->gpr[GPR_RVAL] = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
-}
-
-asmlinkage int 
-sys_sigaction(int sig, const struct old_sigaction *act,
-	      struct old_sigaction *oact)
-{
-	struct k_sigaction new_ka, old_ka;
-	int ret;
-
-	if (act) {
-		old_sigset_t mask;
-		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
-			return -EFAULT;
-		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		__get_user(mask, &act->sa_mask);
-		siginitset(&new_ka.sa.sa_mask, mask);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
-			return -EFAULT;
-		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-	}
-
-	return ret;
-}
-
-asmlinkage int
-sys_sigaltstack(const stack_t *uss, stack_t *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->gpr[GPR_SP]);
-}
-
-
-/*
- * Do a signal return; undo the signal stack.
- */
-
-struct sigframe
-{
-	struct sigcontext sc;
-	unsigned long extramask[_NSIG_WORDS-1];
-	unsigned long tramp[2];	/* signal trampoline */
-};
-
-struct rt_sigframe
-{
-	struct siginfo info;
-	struct ucontext uc;
-	unsigned long tramp[2];	/* signal trampoline */
-};
-
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *rval_p)
-{
-	unsigned int err = 0;
-
-#define COPY(x)		err |= __get_user(regs->x, &sc->regs.x)
-	COPY(gpr[0]);	COPY(gpr[1]);	COPY(gpr[2]);	COPY(gpr[3]);
-	COPY(gpr[4]);	COPY(gpr[5]);	COPY(gpr[6]);	COPY(gpr[7]);
-	COPY(gpr[8]);	COPY(gpr[9]);	COPY(gpr[10]);	COPY(gpr[11]);
-	COPY(gpr[12]);	COPY(gpr[13]);	COPY(gpr[14]);	COPY(gpr[15]);
-	COPY(gpr[16]);	COPY(gpr[17]);	COPY(gpr[18]);	COPY(gpr[19]);
-	COPY(gpr[20]);	COPY(gpr[21]);	COPY(gpr[22]);	COPY(gpr[23]);
-	COPY(gpr[24]);	COPY(gpr[25]);	COPY(gpr[26]);	COPY(gpr[27]);
-	COPY(gpr[28]);	COPY(gpr[29]);	COPY(gpr[30]);	COPY(gpr[31]);
-	COPY(pc);	COPY(psw);
-	COPY(ctpc);	COPY(ctpsw);	COPY(ctbp);
-#undef COPY
-
-	return err;
-}
-
-asmlinkage int sys_sigreturn(struct pt_regs *regs)
-{
-	struct sigframe *frame = (struct sigframe *)regs->gpr[GPR_SP];
-	sigset_t set;
-	int rval;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-
-	if (__get_user(set.sig[0], &frame->sc.oldmask)
-	    || (_NSIG_WORDS > 1
-		&& __copy_from_user(&set.sig[1], &frame->extramask,
-				    sizeof(frame->extramask))))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->sc, &rval))
-		goto badframe;
-	return rval;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
-
-asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
-{
-	struct rt_sigframe *frame = (struct rt_sigframe *)regs->gpr[GPR_SP];
-	sigset_t set;
-	stack_t st;
-	int rval;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &rval))
-		goto badframe;
-
-	if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
-		goto badframe;
-	/* It is more difficult to avoid calling this function than to
-	   call it and ignore errors.  */
-	do_sigaltstack(&st, NULL, regs->gpr[GPR_SP]);
-
-	return rval;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}	
-
-/*
- * Set up a signal frame.
- */
-
-static int
-setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
-		 unsigned long mask)
-{
-	int err = 0;
-
-#define COPY(x)		err |= __put_user(regs->x, &sc->regs.x)
-	COPY(gpr[0]);	COPY(gpr[1]);	COPY(gpr[2]);	COPY(gpr[3]);
-	COPY(gpr[4]);	COPY(gpr[5]);	COPY(gpr[6]);	COPY(gpr[7]);
-	COPY(gpr[8]);	COPY(gpr[9]);	COPY(gpr[10]);	COPY(gpr[11]);
-	COPY(gpr[12]);	COPY(gpr[13]);	COPY(gpr[14]);	COPY(gpr[15]);
-	COPY(gpr[16]);	COPY(gpr[17]);	COPY(gpr[18]);	COPY(gpr[19]);
-	COPY(gpr[20]);	COPY(gpr[21]);	COPY(gpr[22]);	COPY(gpr[23]);
-	COPY(gpr[24]);	COPY(gpr[25]);	COPY(gpr[26]);	COPY(gpr[27]);
-	COPY(gpr[28]);	COPY(gpr[29]);	COPY(gpr[30]);	COPY(gpr[31]);
-	COPY(pc);	COPY(psw);
-	COPY(ctpc);	COPY(ctpsw);	COPY(ctbp);
-#undef COPY
-
-	err |= __put_user(mask, &sc->oldmask);
-
-	return err;
-}
-
-/*
- * Determine which stack to use..
- */
-static inline void *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
-{
-	/* Default to using normal stack */
-	unsigned long sp = regs->gpr[GPR_SP];
-
-	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp))
-		sp = current->sas_ss_sp + current->sas_ss_size;
-
-	return (void *)((sp - frame_size) & -8UL);
-}
-
-static void setup_frame(int sig, struct k_sigaction *ka,
-			sigset_t *set, struct pt_regs *regs)
-{
-	struct sigframe *frame;
-	int err = 0;
-	int signal;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
-
-	if (_NSIG_WORDS > 1) {
-		err |= __copy_to_user(frame->extramask, &set->sig[1],
-				      sizeof(frame->extramask));
-	}
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		regs->gpr[GPR_LP] = (unsigned long) ka->sa.sa_restorer;
-	} else {
-		/* Note, these encodings are _little endian_!  */
-
-		/* addi  __NR_sigreturn, r0, r12  */
-		err |= __put_user(0x6600 | (__NR_sigreturn << 16),
-				  frame->tramp + 0);
-		/* trap 0 */
-		err |= __put_user(0x010007e0,
-				  frame->tramp + 1);
-
-		regs->gpr[GPR_LP] = (unsigned long)frame->tramp;
-
-		flush_cache_sigtramp (regs->gpr[GPR_LP]);
-	}
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler.  */
-	regs->pc = (v850_reg_t) ka->sa.sa_handler;
-	regs->gpr[GPR_SP] = (v850_reg_t)frame;
-	/* Signal handler args:  */
-	regs->gpr[GPR_ARG0] = signal; /* arg 0: signum */
-	regs->gpr[GPR_ARG1] = (v850_reg_t)&frame->sc;/* arg 1: sigcontext */
-
-	set_fs(USER_DS);
-
-#if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%08lx ra=%08lx\n",
-		current->comm, current->pid, frame, regs->pc, );
-#endif
-
-	return;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-}
-
-static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			   sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe *frame;
-	int err = 0;
-	int signal;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= copy_siginfo_to_user(&frame->info, info);
-
-	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user((void *)current->sas_ss_sp,
-			  &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->gpr[GPR_SP]),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext,
-			        regs, set->sig[0]);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		regs->gpr[GPR_LP] = (unsigned long) ka->sa.sa_restorer;
-	} else {
-		/* Note, these encodings are _little endian_!  */
-
-		/* addi  __NR_sigreturn, r0, r12  */
-		err |= __put_user(0x6600 | (__NR_sigreturn << 16),
-				  frame->tramp + 0);
-		/* trap 0 */
-		err |= __put_user(0x010007e0,
-				  frame->tramp + 1);
-
-		regs->gpr[GPR_LP] = (unsigned long)frame->tramp;
-
-		flush_cache_sigtramp (regs->gpr[GPR_LP]);
-	}
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler.  */
-	regs->pc = (v850_reg_t) ka->sa.sa_handler;
-	regs->gpr[GPR_SP] = (v850_reg_t)frame;
-	/* Signal handler args:  */
-	regs->gpr[GPR_ARG0] = signal; /* arg 0: signum */
-	regs->gpr[GPR_ARG1] = (v850_reg_t)&frame->info; /* arg 1: siginfo */
-	regs->gpr[GPR_ARG2] = (v850_reg_t)&frame->uc; /* arg 2: ucontext */
-
-	set_fs(USER_DS);
-
-#if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-		current->comm, current->pid, frame, regs->pc, regs->pr);
-#endif
-
-	return;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-}
-
-/*
- * OK, we're invoking a handler
- */	
-
-static void
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset,	struct pt_regs * regs)
-{
-	/* Are we from a system call? */
-	if (PT_REGS_SYSCALL (regs)) {
-		/* If so, check system call restarting.. */
-		switch (regs->gpr[GPR_RVAL]) {
-		case -ERESTART_RESTARTBLOCK:
-			current_thread_info()->restart_block.fn =
-				do_no_restart_syscall;
-			/* fall through */
-		case -ERESTARTNOHAND:
-			regs->gpr[GPR_RVAL] = -EINTR;
-			break;
-
-		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
-				regs->gpr[GPR_RVAL] = -EINTR;
-				break;
-			}
-			/* fallthrough */
-		case -ERESTARTNOINTR:
-			regs->gpr[12] = PT_REGS_SYSCALL (regs);
-			regs->pc -= 4; /* Size of `trap 0' insn.  */
-		}
-
-		PT_REGS_SET_SYSCALL (regs, 0);
-	}
-
-	/* Set up the stack frame */
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		setup_rt_frame(sig, ka, info, oldset, regs);
-	else
-		setup_frame(sig, ka, oldset, regs);
-
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked,sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
- * Note that we go through the signals twice: once to check the signals that
- * the kernel can handle, and then we build all the user-level signal handling
- * stack-frames in one go after that.
- */
-int do_signal(struct pt_regs *regs, sigset_t *oldset)
-{
-	siginfo_t info;
-	int signr;
-	struct k_sigaction ka;
-
-	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
-	 * if so.
-	 */
-	if (!user_mode(regs))
-		return 1;
-
-	if (!oldset)
-		oldset = &current->blocked;
-
-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-	if (signr > 0) {
-		/* Whee!  Actually deliver the signal.  */
-		handle_signal(signr, &info, &ka, oldset, regs);
-		return 1;
-	}
-
-	/* Did we come from a system call? */
-	if (PT_REGS_SYSCALL (regs)) {
-		int rval = (int)regs->gpr[GPR_RVAL];
-		/* Restart the system call - no handlers present */
-		if (rval == -ERESTARTNOHAND
-		    || rval == -ERESTARTSYS
-		    || rval == -ERESTARTNOINTR)
-		{
-			regs->gpr[12] = PT_REGS_SYSCALL (regs);
-			regs->pc -= 4; /* Size of `trap 0' insn.  */
-		}
-		else if (rval == -ERESTART_RESTARTBLOCK) {
-			regs->gpr[12] = __NR_restart_syscall;
-			regs->pc -= 4; /* Size of `trap 0' insn.  */
-		}
-	}
-	return 0;
-}
diff --git a/arch/v850/kernel/sim.c b/arch/v850/kernel/sim.c
deleted file mode 100644
index 467b4aa0acd..00000000000
--- a/arch/v850/kernel/sim.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * arch/v850/kernel/sim.c -- Machine-specific stuff for GDB v850e simulator
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/simsyscall.h>
-
-#include "mach.h"
-
-/* The name of a file containing the root filesystem.  */
-#define ROOT_FS "rootfs.image"
-
-extern void simcons_setup (void);
-extern void simcons_poll_ttys (void);
-extern void set_mem_root (void *addr, size_t len, char *cmd_line);
-
-static int read_file (const char *name,
-		      unsigned long *addr, unsigned long *len,
-		      const char **err);
-
-void __init mach_setup (char **cmdline)
-{
-	const char *err;
-	unsigned long root_dev_addr, root_dev_len;
-
-	simcons_setup ();
-
-	printk (KERN_INFO "Reading root filesystem: %s", ROOT_FS);
-
-	if (read_file (ROOT_FS, &root_dev_addr, &root_dev_len, &err)) {
-		printk (" (size %luK)\n", root_dev_len / 1024);
-		set_mem_root ((void *)root_dev_addr, (size_t)root_dev_len,
-			      *cmdline);
-	} else
-		printk ("...%s failed!\n", err);
-}
-
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len)
-{
-	*ram_start = RAM_ADDR;
-	*ram_len = RAM_SIZE;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* ...do magic timer initialization?...  */
-	mach_tick = simcons_poll_ttys;
-	setup_irq (0, timer_action);
-}
-
-
-static void irq_nop (unsigned irq) { }
-static unsigned irq_zero (unsigned irq) { return 0; }
-
-static struct hw_interrupt_type sim_irq_type = {
-	.typename = "IRQ",
-	.startup = irq_zero,		/* startup */
-	.shutdown = irq_nop,		/* shutdown */
-	.enable = irq_nop,		/* enable */
-	.disable = irq_nop,		/* disable */
-	.ack = irq_nop,		/* ack */
-	.end = irq_nop,		/* end */
-};
-
-void __init mach_init_irqs (void)
-{
-	init_irq_handlers (0, NUM_MACH_IRQS, 1, &sim_irq_type);
-}
-
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	long timeval[2], timezone[2];
-	int rval = V850_SIM_SYSCALL (gettimeofday, timeval, timezone);
-	if (rval == 0) {
-		tv->tv_sec = timeval[0];
-		tv->tv_nsec = timeval[1] * 1000;
-	}
-}
-
-void machine_restart (char *__unused)
-{
-	V850_SIM_SYSCALL (write, 1, "RESTART\n", 8);
-	V850_SIM_SYSCALL (exit, 0);
-}
-
-void machine_halt (void)
-{
-	V850_SIM_SYSCALL (write, 1, "HALT\n", 5);
-	V850_SIM_SYSCALL (exit, 0);
-}
-
-void machine_power_off (void)
-{
-	V850_SIM_SYSCALL (write, 1, "POWER OFF\n", 10);
-	V850_SIM_SYSCALL (exit, 0);
-}
-
-
-/* Load data from a file called NAME into ram.  The address and length
-   of the data image are returned in ADDR and LEN.  */
-static int __init
-read_file (const char *name,
-	   unsigned long *addr, unsigned long *len,
-	   const char **err)
-{
-	int rval, fd;
-	unsigned long cur, left;
-	/* Note this is not a normal stat buffer, it's an ad-hoc
-	   structure defined by the simulator.  */
-	unsigned long stat_buf[10];
-
-	/* Stat the file to find out the length.  */
-	rval = V850_SIM_SYSCALL (stat, name, stat_buf);
-	if (rval < 0) {
-		if (err) *err = "stat";
-		return 0;
-	}
-	*len = stat_buf[4];
-
-	/* Open the file; `0' is O_RDONLY.  */
-	fd = V850_SIM_SYSCALL (open, name, 0);
-	if (fd < 0) {
-		if (err) *err = "open";
-		return 0;
-	}
-
-	*addr = (unsigned long)alloc_bootmem(*len);
-	if (! *addr) {
-		V850_SIM_SYSCALL (close, fd);
-		if (err) *err = "alloc_bootmem";
-		return 0;
-	}
-
-	cur = *addr;
-	left = *len;
-	while (left > 0) {
-		int chunk = V850_SIM_SYSCALL (read, fd, cur, left);
-		if (chunk <= 0)
-			break;
-		cur += chunk;
-		left -= chunk;
-	}
-	V850_SIM_SYSCALL (close, fd);
-	if (left > 0) {
-		/* Some read failed.  */
-		free_bootmem (*addr, *len);
-		if (err) *err = "read";
-		return 0;
-	}
-
-	return 1;
-}
diff --git a/arch/v850/kernel/sim.ld b/arch/v850/kernel/sim.ld
deleted file mode 100644
index 101885f3c9f..00000000000
--- a/arch/v850/kernel/sim.ld
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Linker script for the gdb v850e simulator (CONFIG_V850E_SIM).  */
-
-MEMORY {
-	/* Interrupt vectors.  */
-	INTV  : ORIGIN = 0x0, LENGTH = 0xe0
-	/* Main RAM.  */
-	RAM   : ORIGIN = RAM_ADDR, LENGTH = RAM_SIZE
-}
-
-SECTIONS {
-	.intv : { INTV_CONTENTS } > INTV
-	.ram : { RAMK_KRAM_CONTENTS } > RAM
-}
diff --git a/arch/v850/kernel/sim85e2.c b/arch/v850/kernel/sim85e2.c
deleted file mode 100644
index 566dde5e607..00000000000
--- a/arch/v850/kernel/sim85e2.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * arch/v850/kernel/sim85e2.c -- Machine-specific stuff for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-
-#include "mach.h"
-
-
-/* There are 4 possible areas we can use:
-
-     IRAM (1MB) is fast for instruction fetches, but slow for data
-     DRAM (1020KB) is fast for data, but slow for instructions
-     ERAM is cached, so should be fast for both insns and data
-     SDRAM is external DRAM, similar to ERAM
-*/
-
-#define INIT_MEMC_FOR_SDRAM
-#define USE_SDRAM_AREA
-#define KERNEL_IN_SDRAM_AREA
-
-#define DCACHE_MODE	V850E2_CACHE_BTSC_DCM_WT
-/*#define DCACHE_MODE	V850E2_CACHE_BTSC_DCM_WB_ALLOC*/
-
-#ifdef USE_SDRAM_AREA
-#define RAM_START 	SDRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-#else
-/* When we use DRAM, we need to account for the fact that the end of it is
-   used for R0_RAM.  */
-#define RAM_START	DRAM_ADDR
-#define RAM_END		R0_RAM_ADDR
-#endif
-
-
-extern void memcons_setup (void);
-
-
-#ifdef KERNEL_IN_SDRAM_AREA
-#define EARLY_INIT_SECTION_ATTR __attribute__ ((section (".early.text")))
-#else
-#define EARLY_INIT_SECTION_ATTR __init
-#endif
-
-void EARLY_INIT_SECTION_ATTR mach_early_init (void)
-{
-	/* The sim85e2 simulator tracks `undefined' values, so to make
-	   debugging easier, we begin by zeroing out all otherwise
-	   undefined registers.  This is not strictly necessary.
-
-	   The registers we zero are:
-	       Every GPR except:
-	           stack-pointer (r3)
-		   task-pointer (r16)
-		   our return addr (r31)
-	       Every system register (SPR) that we know about except for
-	       the PSW (SPR 5), which we zero except for the
-	       disable-interrupts bit.
-	*/
-
-	/* GPRs */
-	asm volatile ("             mov r0, r1 ; mov r0, r2              ");
-	asm volatile ("mov r0, r4 ; mov r0, r5 ; mov r0, r6 ; mov r0, r7 ");
-	asm volatile ("mov r0, r8 ; mov r0, r9 ; mov r0, r10; mov r0, r11");
-	asm volatile ("mov r0, r12; mov r0, r13; mov r0, r14; mov r0, r15");
-	asm volatile ("             mov r0, r17; mov r0, r18; mov r0, r19");
-	asm volatile ("mov r0, r20; mov r0, r21; mov r0, r22; mov r0, r23");
-	asm volatile ("mov r0, r24; mov r0, r25; mov r0, r26; mov r0, r27");
-	asm volatile ("mov r0, r28; mov r0, r29; mov r0, r30");
-
-	/* SPRs */
-	asm volatile ("ldsr r0, 0;  ldsr r0, 1;  ldsr r0, 2;  ldsr r0, 3");
-	asm volatile ("ldsr r0, 4");
-	asm volatile ("addi 0x20, r0, r1; ldsr r1, 5"); /* PSW */
-	asm volatile ("ldsr r0, 16; ldsr r0, 17; ldsr r0, 18; ldsr r0, 19");
-	asm volatile ("ldsr r0, 20");
-
-
-#ifdef INIT_MEMC_FOR_SDRAM
-	/* Settings for SDRAM controller.  */
-	V850E2_VSWC   = 0x0042;
-	V850E2_BSC    = 0x9286;
-	V850E2_BCT(0) = 0xb000;	/* was: 0 */
-	V850E2_BCT(1) = 0x000b;
-	V850E2_ASC    = 0;
-	V850E2_LBS    = 0xa9aa;	/* was: 0xaaaa */
-	V850E2_LBC(0) = 0;
-	V850E2_LBC(1) = 0;	/* was: 0x3 */
-	V850E2_BCC    = 0;
-	V850E2_RFS(4) = 0x800a;	/* was: 0xf109 */
-	V850E2_SCR(4) = 0x2091;	/* was: 0x20a1 */
-	V850E2_RFS(3) = 0x800c;
-	V850E2_SCR(3) = 0x20a1;
-	V850E2_DWC(0) = 0;
-	V850E2_DWC(1) = 0;
-#endif
-
-#if 0
-#ifdef CONFIG_V850E2_SIM85E2S
-	/* Turn on the caches.  */
-	V850E2_CACHE_BTSC = V850E2_CACHE_BTSC_ICM | DCACHE_MODE;
-	V850E2_BHC  = 0x1010;
-#elif CONFIG_V850E2_SIM85E2C
-	V850E2_CACHE_BTSC |= (V850E2_CACHE_BTSC_ICM | V850E2_CACHE_BTSC_DCM0);
-	V850E2_BUSM_BHC = 0xFFFF;
-#endif
-#else
-	V850E2_BHC  = 0;
-#endif
-
-	/* Don't stop the simulator at `halt' instructions.  */
-	SIM85E2_NOTHAL = 1;
-
-	/* Ensure that the simulator halts on a panic, instead of going
-	   into an infinite loop inside the panic function.  */
-	panic_timeout = -1;
-}
-
-void __init mach_setup (char **cmdline)
-{
-	memcons_setup ();
-}
-
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* The simulator actually cycles through all interrupts
-	   periodically.  We just pay attention to IRQ0, which gives us
-	   1/64 the rate of the periodic interrupts.  */
-	setup_irq (0, timer_action);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Interrupts */
-
-struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, NUM_MACH_IRQS, 1, 7 },
-	{ 0 }
-};
-struct hw_interrupt_type hw_itypes[1];
-
-/* Initialize interrupts.  */
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-
-void machine_halt (void) __attribute__ ((noreturn));
-void machine_halt (void)
-{
-	SIM85E2_SIMFIN = 0;	/* Halt immediately.  */
-	for (;;) {}
-}
-
-void machine_restart (char *__unused)
-{
-	machine_halt ();
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
diff --git a/arch/v850/kernel/sim85e2.ld b/arch/v850/kernel/sim85e2.ld
deleted file mode 100644
index 7470fd2ffb5..00000000000
--- a/arch/v850/kernel/sim85e2.ld
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Linker script for the sim85e2c simulator, which is a verilog simulation of
-   the V850E2 NA85E2C cpu core (CONFIG_V850E2_SIM85E2C).  */
-
-MEMORY {
-	/* 1MB of `instruction RAM', starting at 0.
-	   Instruction fetches are much faster from IRAM than from DRAM.  */
-	IRAM : ORIGIN = IRAM_ADDR, LENGTH = IRAM_SIZE
-
-	/* 1MB of `data RAM', below and contiguous with the I/O space.
-	   Data fetches are much faster from DRAM than from IRAM.  */
-	DRAM : ORIGIN = DRAM_ADDR, LENGTH = DRAM_SIZE
-
-	/* `external ram' (CS1 area), comes after IRAM.  */
-	ERAM : ORIGIN = ERAM_ADDR, LENGTH = ERAM_SIZE
-
-	/* Dynamic RAM; uses memory controller.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	.iram : {
-		INTV_CONTENTS
-		*arch/v850/kernel/head.o
-		*(.early.text)
-	} > IRAM
-	.dram : {
-		_memcons_output = . ;
-		. = . + 0x8000 ;
-		_memcons_output_end = . ;
-	} > DRAM
-	.sdram : {
-		/* We stick console output into a buffer here.  */
-		RAMK_KRAM_CONTENTS
-		ROOT_FS_CONTENTS
-	} > SDRAM
-}
diff --git a/arch/v850/kernel/simcons.c b/arch/v850/kernel/simcons.c
deleted file mode 100644
index 9973596ae30..00000000000
--- a/arch/v850/kernel/simcons.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * arch/v850/kernel/simcons.c -- Console I/O for GDB v850e simulator
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/console.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/tty_driver.h>
-#include <linux/init.h>
-
-#include <asm/poll.h>
-#include <asm/string.h>
-#include <asm/simsyscall.h>
-
-
-/*  Low-level console. */
-
-static void simcons_write (struct console *co, const char *buf, unsigned len)
-{
-	V850_SIM_SYSCALL (write, 1, buf, len);
-}
-
-static int simcons_read (struct console *co, char *buf, unsigned len)
-{
-	return V850_SIM_SYSCALL (read, 0, buf, len);
-}
-
-static struct tty_driver *tty_driver;
-static struct tty_driver *simcons_device (struct console *c, int *index)
-{
-	*index = c->index;
-	return tty_driver;
-}
-
-static struct console simcons =
-{
-    .name	= "simcons",
-    .write	= simcons_write,
-    .read	= simcons_read,
-    .device	= simcons_device,
-    .flags	= CON_PRINTBUFFER,
-    .index	= -1,
-};
-
-/* Higher level TTY interface.  */
-
-int simcons_tty_open (struct tty_struct *tty, struct file *filp)
-{
-	return 0;
-}
-
-int simcons_tty_write (struct tty_struct *tty,
-		       const unsigned char *buf, int count)
-{
-	return V850_SIM_SYSCALL (write, 1, buf, count);
-}
-
-int simcons_tty_write_room (struct tty_struct *tty)
-{
-	/* Completely arbitrary.  */
-	return 0x100000;
-}
-
-int simcons_tty_chars_in_buffer (struct tty_struct *tty)
-{
-	/* We have no buffer.  */
-	return 0;
-}
-
-static const struct tty_operations ops = {
-	.open = simcons_tty_open,
-	.write = simcons_tty_write,
-	.write_room = simcons_tty_write_room,
-	.chars_in_buffer = simcons_tty_chars_in_buffer,
-};
-
-int __init simcons_tty_init (void)
-{
-	struct tty_driver *driver = alloc_tty_driver(1);
-	int err;
-	if (!driver)
-		return -ENOMEM;
-	driver->name = "simcons";
-	driver->major = TTY_MAJOR;
-	driver->minor_start = 64;
-	driver->type = TTY_DRIVER_TYPE_SYSCONS;
-	driver->init_termios = tty_std_termios;
-	tty_set_operations(driver, &ops);
-	err = tty_register_driver(driver);
-	if (err) {
-		put_tty_driver(driver);
-		return err;
-	}
-	tty_driver = driver;
-	return 0;
-}
-/* We use `late_initcall' instead of just `__initcall' as a workaround for
-   the fact that (1) simcons_tty_init can't be called before tty_init,
-   (2) tty_init is called via `module_init', (3) if statically linked,
-   module_init == device_init, and (4) there's no ordering of init lists.
-   We can do this easily because simcons is always statically linked, but
-   other tty drivers that depend on tty_init and which must use
-   `module_init' to declare their init routines are likely to be broken.  */
-late_initcall(simcons_tty_init);
-
-/* Poll for input on the console, and if there's any, deliver it to the
-   tty driver.  */
-void simcons_poll_tty (struct tty_struct *tty)
-{
-	char buf[32];	/* Not the nicest way to do it but I need it correct first */
-	int flip = 0, send_break = 0;
-	struct pollfd pfd;
-	pfd.fd = 0;
-	pfd.events = POLLIN;
-
-	if (V850_SIM_SYSCALL (poll, &pfd, 1, 0) > 0) {
-		if (pfd.revents & POLLIN) {
-			/* Real block hardware knows the transfer size before
-			   transfer so the new tty buffering doesn't try to handle
-			   this rather weird simulator specific case well */
-			int rd = V850_SIM_SYSCALL (read, 0, buf, 32);
-			if (rd > 0) {
-				tty_insert_flip_string(tty, buf, rd);
-				flip = 1;
-			} else
-				send_break = 1;
-		} else if (pfd.revents & POLLERR)
-			send_break = 1;
-	}
-
-	if (send_break) {
-		tty_insert_flip_char (tty, 0, TTY_BREAK);		
-		flip = 1;
-	}
-
-	if (flip)
-		tty_schedule_flip (tty);
-}
-
-void simcons_poll_ttys (void)
-{
-	if (tty_driver && tty_driver->ttys[0])
-		simcons_poll_tty (tty_driver->ttys[0]);
-}
-
-void simcons_setup (void)
-{
-	V850_SIM_SYSCALL (make_raw, 0);
-	register_console (&simcons);
-	printk (KERN_INFO "Console: GDB V850E simulator stdio\n");
-}
diff --git a/arch/v850/kernel/syscalls.c b/arch/v850/kernel/syscalls.c
deleted file mode 100644
index 1a83daf8e24..00000000000
--- a/arch/v850/kernel/syscalls.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * arch/v850/kernel/syscalls.c -- Various system-call definitions not
- * 	defined in machine-independent code
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file was derived the ppc version, arch/ppc/kernel/syscalls.c
- * ... which was derived from "arch/i386/kernel/sys_i386.c" by Gary Thomas;
- *     modified by Cort Dougan (cort@cs.nmt.edu)
- *     and Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/syscalls.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/mman.h>
-#include <linux/sys.h>
-#include <linux/ipc.h>
-#include <linux/utsname.h>
-#include <linux/file.h>
-
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-int
-sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	ret = -EINVAL;
-	switch (call) {
-	case SEMOP:
-		ret = sys_semop (first, (struct sembuf *)ptr, second);
-		break;
-	case SEMGET:
-		ret = sys_semget (first, second, third);
-		break;
-	case SEMCTL:
-	{
-		union semun fourth;
-
-		if (!ptr)
-			break;
-		if ((ret = access_ok(VERIFY_READ, ptr, sizeof(long)) ? 0 : -EFAULT)
-		    || (ret = get_user(fourth.__pad, (void **)ptr)))
-			break;
-		ret = sys_semctl (first, second, third, fourth);
-		break;
-	}
-	case MSGSND:
-		ret = sys_msgsnd (first, (struct msgbuf *) ptr, second, third);
-		break;
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-
-			if (!ptr)
-				break;
-			if ((ret = access_ok(VERIFY_READ, ptr, sizeof(tmp)) ? 0 : -EFAULT)
-			    || (ret = copy_from_user(&tmp,
-						(struct ipc_kludge *) ptr,
-						sizeof (tmp))))
-				break;
-			ret = sys_msgrcv (first, tmp.msgp, second, tmp.msgtyp,
-					  third);
-			break;
-			}
-		default:
-			ret = sys_msgrcv (first, (struct msgbuf *) ptr,
-					  second, fifth, third);
-			break;
-		}
-		break;
-	case MSGGET:
-		ret = sys_msgget ((key_t) first, second);
-		break;
-	case MSGCTL:
-		ret = sys_msgctl (first, second, (struct msqid_ds *) ptr);
-		break;
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-
-			if ((ret = access_ok(VERIFY_WRITE, (ulong*) third,
-					       sizeof(ulong)) ? 0 : -EFAULT))
-				break;
-			ret = do_shmat (first, (char *) ptr, second, &raddr);
-			if (ret)
-				break;
-			ret = put_user (raddr, (ulong *) third);
-			break;
-			}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				break;
-			ret = do_shmat (first, (char *) ptr, second,
-					 (ulong *) third);
-			break;
-		}
-		break;
-	case SHMDT: 
-		ret = sys_shmdt ((char *)ptr);
-		break;
-	case SHMGET:
-		ret = sys_shmget (first, second, third);
-		break;
-	case SHMCTL:
-		ret = sys_shmctl (first, second, (struct shmid_ds *) ptr);
-		break;
-	}
-
-	return ret;
-}
-
-static inline unsigned long
-do_mmap2 (unsigned long addr, size_t len,
-	 unsigned long prot, unsigned long flags,
-	 unsigned long fd, unsigned long pgoff)
-{
-	struct file * file = NULL;
-	int ret = -EBADF;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (! (flags & MAP_ANONYMOUS)) {
-		if (!(file = fget (fd)))
-			goto out;
-	}
-	
-	down_write (&current->mm->mmap_sem);
-	ret = do_mmap_pgoff (file, addr, len, prot, flags, pgoff);
-	up_write (&current->mm->mmap_sem);
-	if (file)
-		fput (file);
-out:
-	return ret;
-}
-
-unsigned long sys_mmap2 (unsigned long addr, size_t len,
-			unsigned long prot, unsigned long flags,
-			unsigned long fd, unsigned long pgoff)
-{
-	return do_mmap2 (addr, len, prot, flags, fd, pgoff);
-}
-
-unsigned long sys_mmap (unsigned long addr, size_t len,
-		       unsigned long prot, unsigned long flags,
-		       unsigned long fd, off_t offset)
-{
-	int err = -EINVAL;
-
-	if (offset & ~PAGE_MASK)
-		goto out;
-
-	err = do_mmap2 (addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
-out:
-	return err;
-}
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
-{
-	register char *__a __asm__ ("r6") = filename;
-	register void *__b __asm__ ("r7") = argv;
-	register void *__c __asm__ ("r8") = envp;
-	register unsigned long __syscall __asm__ ("r12") = __NR_execve;
-	register unsigned long __ret __asm__ ("r10");
-	__asm__ __volatile__ ("trap 0"
-			: "=r" (__ret), "=r" (__syscall)
-			: "1" (__syscall), "r" (__a), "r" (__b), "r" (__c)
-			: "r1", "r5", "r11", "r13", "r14",
-			  "r15", "r16", "r17", "r18", "r19");
-	return __ret;
-}
diff --git a/arch/v850/kernel/teg.c b/arch/v850/kernel/teg.c
deleted file mode 100644
index 699248f92aa..00000000000
--- a/arch/v850/kernel/teg.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * arch/v850/kernel/teg.c -- NB85E-TEG cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/v850e_timer_d.h>
-
-#include "mach.h"
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Select timer interrupt instead of external pin.  */
-	TEG_ISS |= 0x1;
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0,		NUM_CPU_IRQS,	1, 7 },
-	{ "CMD", IRQ_INTCMD(0),	IRQ_INTCMD_NUM,	1, 5 },
-	{ "SER", IRQ_INTSER(0),	IRQ_INTSER_NUM,	1, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM,	1, 4 },
-	{ "ST",	 IRQ_INTST(0),	IRQ_INTST_NUM,	1, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize MA chip interrupts.  */
-void __init teg_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-/* Called before configuring an on-chip UART.  */
-void teg_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* Enable UART I/O pins instead of external interrupt pins, and
-	   UART interrupts instead of external pin interrupts.  */
-	TEG_ISS |= 0x4E;
-}
diff --git a/arch/v850/kernel/time.c b/arch/v850/kernel/time.c
deleted file mode 100644
index d810c93fe66..00000000000
--- a/arch/v850/kernel/time.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * linux/arch/v850/kernel/time.c -- Arch-dependent timer functions
- *
- *  Copyright (C) 1991, 1992, 1995, 2001, 2002  Linus Torvalds
- *
- * This file contains the v850-specific time handling details.
- * Most of the stuff is located in the machine specific files.
- *
- * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
- *		"A Kernel Model for Precision Timekeeping" by Dave Mills
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/timex.h>
-#include <linux/profile.h>
-
-#include <asm/io.h>
-
-#include "mach.h"
-
-#define TICK_SIZE	(tick_nsec / 1000)
-
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static irqreturn_t timer_interrupt (int irq, void *dummy, struct pt_regs *regs)
-{
-#if 0
-	/* last time the cmos clock got updated */
-	static long last_rtc_update=0;
-#endif
-
-	/* may need to kick the hardware timer */
-	if (mach_tick)
-	  mach_tick ();
-
-	do_timer (1);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(regs));
-#endif
-	profile_tick(CPU_PROFILING, regs);
-#if 0
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-	  if (set_rtc_mmss (xtime.tv_sec) == 0)
-	    last_rtc_update = xtime.tv_sec;
-	  else
-	    last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
-	}
-#ifdef CONFIG_HEARTBEAT
-	/* use power LED as a heartbeat instead -- much more useful
-	   for debugging -- based on the version for PReP by Cort */
-	/* acts like an actual heart beat -- ie thump-thump-pause... */
-	if (mach_heartbeat) {
-	    static unsigned cnt = 0, period = 0, dist = 0;
-
-	    if (cnt == 0 || cnt == dist)
-		mach_heartbeat ( 1 );
-	    else if (cnt == 7 || cnt == dist+7)
-		mach_heartbeat ( 0 );
-
-	    if (++cnt > period) {
-		cnt = 0;
-		/* The hyperbolic function below modifies the heartbeat period
-		 * length in dependency of the current (5min) load. It goes
-		 * through the points f(0)=126, f(1)=86, f(5)=51,
-		 * f(inf)->30. */
-		period = ((672<<FSHIFT)/(5*avenrun[0]+(7<<FSHIFT))) + 30;
-		dist = period / 4;
-	    }
-	}
-#endif /* CONFIG_HEARTBEAT */
-#endif /* 0 */
-
-	return IRQ_HANDLED;
-}
-
-static int timer_dev_id;
-static struct irqaction timer_irqaction = {
-	.handler = timer_interrupt,
-	.flags = IRQF_DISABLED,
-	.mask = CPU_MASK_NONE,
-	.name = "timer",
-	.dev_id = &timer_dev_id,
-};
-
-void time_init (void)
-{
-	mach_gettimeofday (&xtime);
-	mach_sched_init (&timer_irqaction);
-}
diff --git a/arch/v850/kernel/v850_ksyms.c b/arch/v850/kernel/v850_ksyms.c
deleted file mode 100644
index 8d386a5dbc4..00000000000
--- a/arch/v850/kernel/v850_ksyms.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <linux/module.h>
-#include <linux/linkage.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/user.h>
-#include <linux/elfcore.h>
-#include <linux/in6.h>
-#include <linux/interrupt.h>
-
-#include <asm/pgalloc.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/checksum.h>
-#include <asm/current.h>
-
-
-extern void *trap_table;
-EXPORT_SYMBOL (trap_table);
-
-/* platform dependent support */
-EXPORT_SYMBOL (kernel_thread);
-EXPORT_SYMBOL (__bug);
-
-/* Networking helper routines. */
-EXPORT_SYMBOL (csum_partial_copy_nocheck);
-EXPORT_SYMBOL (csum_partial_copy_from_user);
-EXPORT_SYMBOL (ip_compute_csum);
-EXPORT_SYMBOL (ip_fast_csum);
-
-/* string / mem functions */
-EXPORT_SYMBOL (memset);
-EXPORT_SYMBOL (memcpy);
-EXPORT_SYMBOL (memmove);
-
-/*
- * libgcc functions - functions that are used internally by the
- * compiler...  (prototypes are not correct though, but that
- * doesn't really matter since they're not versioned).
- */
-extern void __ashldi3 (void);
-extern void __ashrdi3 (void);
-extern void __lshrdi3 (void);
-extern void __muldi3 (void);
-extern void __negdi2 (void);
-
-EXPORT_SYMBOL (__ashldi3);
-EXPORT_SYMBOL (__ashrdi3);
-EXPORT_SYMBOL (__lshrdi3);
-EXPORT_SYMBOL (__muldi3);
-EXPORT_SYMBOL (__negdi2);
diff --git a/arch/v850/kernel/v850e2_cache.c b/arch/v850/kernel/v850e2_cache.c
deleted file mode 100644
index 4570312c689..00000000000
--- a/arch/v850/kernel/v850e2_cache.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * arch/v850/kernel/v850e2_cache.c -- Cache control for V850E2 cache
- * 	memories
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/mm.h>
-
-#include <asm/v850e2_cache.h>
-
-/* Cache operations we can do.  The encoding corresponds directly to the
-   value we need to write into the COPR register.  */
-enum cache_op {
-	OP_SYNC_IF_DIRTY 	   = V850E2_CACHE_COPR_CFC(0), /* 000 */
-	OP_SYNC_IF_VALID 	   = V850E2_CACHE_COPR_CFC(1), /* 001 */
-	OP_SYNC_IF_VALID_AND_CLEAR = V850E2_CACHE_COPR_CFC(3), /* 011 */
-	OP_WAY_CLEAR 		   = V850E2_CACHE_COPR_CFC(4), /* 100 */
-	OP_FILL 		   = V850E2_CACHE_COPR_CFC(5), /* 101 */
-	OP_CLEAR 		   = V850E2_CACHE_COPR_CFC(6), /* 110 */
-	OP_CREATE_DIRTY 	   = V850E2_CACHE_COPR_CFC(7)  /* 111 */
-};
-
-/* Which cache to use.  This encoding also corresponds directly to the
-   value we need to write into the COPR register. */
-enum cache {
-	ICACHE = 0,
-	DCACHE = V850E2_CACHE_COPR_LBSL
-};
-
-/* Returns ADDR rounded down to the beginning of its cache-line.  */
-#define CACHE_LINE_ADDR(addr)  \
-   ((addr) & ~(V850E2_CACHE_LINE_SIZE - 1))
-/* Returns END_ADDR rounded up to the `limit' of its cache-line.  */
-#define CACHE_LINE_END_ADDR(end_addr)  \
-   CACHE_LINE_ADDR(end_addr + (V850E2_CACHE_LINE_SIZE - 1))
-
-
-/* Low-level cache ops.  */
-
-/* Apply cache-op OP to all entries in CACHE.  */
-static inline void cache_op_all (enum cache_op op, enum cache cache)
-{
-	int cmd = op | cache | V850E2_CACHE_COPR_WSLE | V850E2_CACHE_COPR_STRT;
-
-	if (op != OP_WAY_CLEAR) {
-		/* The WAY_CLEAR operation does the whole way, but other
-		   ops take begin-index and count params; we just indicate
-		   the entire cache.  */
-		V850E2_CACHE_CADL = 0;
-		V850E2_CACHE_CADH = 0;
-		V850E2_CACHE_CCNT = V850E2_CACHE_WAY_SIZE - 1;
-	}
-
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(0); /* way 0 */
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(1); /* way 1 */
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(2); /* way 2 */
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(3); /* way 3 */
-}
-
-/* Apply cache-op OP to all entries in CACHE covering addresses ADDR
-   through ADDR+LEN.  */
-static inline void cache_op_range (enum cache_op op, u32 addr, u32 len,
-				   enum cache cache)
-{
-	u32 start = CACHE_LINE_ADDR (addr);
-	u32 end = CACHE_LINE_END_ADDR (addr + len);
-	u32 num_lines = (end - start) >> V850E2_CACHE_LINE_SIZE_BITS;
-
-	V850E2_CACHE_CADL = start & 0xFFFF;
-	V850E2_CACHE_CADH = start >> 16;
-	V850E2_CACHE_CCNT = num_lines - 1;
-
-	V850E2_CACHE_COPR = op | cache | V850E2_CACHE_COPR_STRT;
-}
-
-
-/* High-level ops.  */
-
-static void cache_exec_after_store_all (void)
-{
-	cache_op_all (OP_SYNC_IF_DIRTY, DCACHE);
-	cache_op_all (OP_WAY_CLEAR, ICACHE);
-}
-
-static void cache_exec_after_store_range (u32 start, u32 len)
-{
-	cache_op_range (OP_SYNC_IF_DIRTY, start, len, DCACHE);
-	cache_op_range (OP_CLEAR, start, len, ICACHE);
-}
-
-
-/* Exported functions.  */
-
-void flush_icache (void)
-{
-	cache_exec_after_store_all ();
-}
-
-void flush_icache_range (unsigned long start, unsigned long end)
-{
-	cache_exec_after_store_range (start, end - start);
-}
-
-void flush_icache_page (struct vm_area_struct *vma, struct page *page)
-{
-	cache_exec_after_store_range (page_to_virt (page), PAGE_SIZE);
-}
-
-void flush_icache_user_range (struct vm_area_struct *vma, struct page *page,
-			      unsigned long addr, int len)
-{
-	cache_exec_after_store_range (addr, len);
-}
-
-void flush_cache_sigtramp (unsigned long addr)
-{
-	/* For the exact size, see signal.c, but 16 bytes should be enough.  */
-	cache_exec_after_store_range (addr, 16);
-}
diff --git a/arch/v850/kernel/v850e_cache.c b/arch/v850/kernel/v850e_cache.c
deleted file mode 100644
index ea3e51cfb25..00000000000
--- a/arch/v850/kernel/v850e_cache.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * arch/v850/kernel/v850e_cache.c -- Cache control for V850E cache memories
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* This file implements cache control for the rather simple cache used on
-   some V850E CPUs, specifically the NB85E/TEG CPU-core and the V850E/ME2
-   CPU.  V850E2 processors have their own (better) cache
-   implementation.  */
-
-#include <asm/entry.h>
-#include <asm/cacheflush.h>
-#include <asm/v850e_cache.h>
-
-#define WAIT_UNTIL_CLEAR(value) while (value) {}
-
-/* Set caching params via the BHC and DCC registers.  */
-void v850e_cache_enable (u16 bhc, u16 icc, u16 dcc)
-{
-	unsigned long *r0_ram = (unsigned long *)R0_RAM_ADDR;
-	register u16 bhc_val asm ("r6") = bhc;
-
-	/* Read the instruction cache control register (ICC) and confirm
-	   that bits 0 and 1 (TCLR0, TCLR1) are all cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-	V850E_CACHE_ICC = icc;
-
-#ifdef V850E_CACHE_DCC
-	/* Configure data-cache.  */
-	V850E_CACHE_DCC = dcc;
-#endif /* V850E_CACHE_DCC */
-
-	/* Configure caching for various memory regions by writing the BHC
-	   register.  The documentation says that an instruction _cannot_
-	   enable/disable caching for the memory region in which the
-	   instruction itself exists; to work around this, we store
-	   appropriate instructions into the on-chip RAM area (which is never
-	   cached), and briefly jump there to do the work.  */
-#ifdef V850E_CACHE_WRITE_IBS
-	*r0_ram++ 	= 0xf0720760;	/* st.h r0, 0xfffff072[r0] */
-#endif
-	*r0_ram++ 	= 0xf06a3760;	/* st.h r6, 0xfffff06a[r0] */
-	*r0_ram 	= 0x5640006b;	/* jmp [r11] */
-
-	asm ("mov hilo(1f), r11; jmp [%1]; 1:;"
-	     :: "r" (bhc_val), "r" (R0_RAM_ADDR) : "r11");
-}
-
-static void clear_icache (void)
-{
-	/* 1. Read the instruction cache control register (ICC) and confirm
-	      that bits 0 and 1 (TCLR0, TCLR1) are all cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-
-	/* 2. Read the ICC register and confirm that bit 12 (LOCK0) is
-  	      cleared.  Bit 13 of the ICC register is always cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x1000);
-
-	/* 3. Set the TCLR0 and TCLR1 bits of the ICC register as follows,
-	      when clearing way 0 and way 1 at the same time:
-	        (a) Set the TCLR0 and TCLR1 bits.
-		(b) Read the TCLR0 and TCLR1 bits to confirm that these bits
-		    are cleared.
-		(c) Perform (a) and (b) above again.  */
-	V850E_CACHE_ICC |= 0x3;
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-
-#ifdef V850E_CACHE_REPEAT_ICC_WRITE
-	/* Do it again.  */
-	V850E_CACHE_ICC |= 0x3;
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-#endif
-}
-
-#ifdef V850E_CACHE_DCC
-/* Flush or clear (or both) the data cache, depending on the value of FLAGS;
-   the procedure is the same for both, just the control bits used differ (and
-   both may be performed simultaneously).  */
-static void dcache_op (unsigned short flags)
-{
-	/* 1. Read the data cache control register (DCC) and confirm that bits
-	      0, 1, 4, and 5 (DC00, DC01, DC04, DC05) are all cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_DCC & 0x33);
-
-	/* 2. Clear DCC register bit 12 (DC12), bit 13 (DC13), or both
-	      depending on the way for which tags are to be cleared.  */
-	V850E_CACHE_DCC &= ~0xC000;
-
-	/* 3. Set DCC register bit 0 (DC00), bit 1 (DC01) or both depending on
-	      the way for which tags are to be cleared.
-	      ...
-	      Set DCC register bit 4 (DC04), bit 5 (DC05), or both depending
-	      on the way to be data flushed.  */
-	V850E_CACHE_DCC |= flags;
-
-	/* 4. Read DCC register bit DC00, DC01 [DC04, DC05], or both depending
-	      on the way for which tags were cleared [flushed] and confirm
-	      that that bit is cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_DCC & flags);
-}
-#endif /* V850E_CACHE_DCC */
-
-/* Flushes the contents of the dcache to memory.  */
-static inline void flush_dcache (void)
-{
-#ifdef V850E_CACHE_DCC
-	/* We only need to do something if in write-back mode.  */
-	if (V850E_CACHE_DCC & 0x0400)
-		dcache_op (0x30);
-#endif /* V850E_CACHE_DCC */
-}
-
-/* Flushes the contents of the dcache to memory, and then clears it.  */
-static inline void clear_dcache (void)
-{
-#ifdef V850E_CACHE_DCC
-	/* We only need to do something if the dcache is enabled.  */
-	if (V850E_CACHE_DCC & 0x0C00)
-		dcache_op (0x33);
-#endif /* V850E_CACHE_DCC */
-}
-
-/* Clears the dcache without flushing to memory first.  */
-static inline void clear_dcache_no_flush (void)
-{
-#ifdef V850E_CACHE_DCC
-	/* We only need to do something if the dcache is enabled.  */
-	if (V850E_CACHE_DCC & 0x0C00)
-		dcache_op (0x3);
-#endif /* V850E_CACHE_DCC */
-}
-
-static inline void cache_exec_after_store (void)
-{
-	flush_dcache ();
-	clear_icache ();
-}
-
-
-/* Exported functions.  */
-
-void flush_icache (void)
-{
-	cache_exec_after_store ();
-}
-
-void flush_icache_range (unsigned long start, unsigned long end)
-{
-	cache_exec_after_store ();
-}
-
-void flush_icache_page (struct vm_area_struct *vma, struct page *page)
-{
-	cache_exec_after_store ();
-}
-
-void flush_icache_user_range (struct vm_area_struct *vma, struct page *page,
-			      unsigned long adr, int len)
-{
-	cache_exec_after_store ();
-}
-
-void flush_cache_sigtramp (unsigned long addr)
-{
-	cache_exec_after_store ();
-}
diff --git a/arch/v850/kernel/v850e_intc.c b/arch/v850/kernel/v850e_intc.c
deleted file mode 100644
index 8d39a52ee6d..00000000000
--- a/arch/v850/kernel/v850e_intc.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * arch/v850/kernel/v850e_intc.c -- V850E interrupt controller (INTC)
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-
-#include <asm/v850e_intc.h>
-
-static void irq_nop (unsigned irq) { }
-
-static unsigned v850e_intc_irq_startup (unsigned irq)
-{
-	v850e_intc_clear_pending_irq (irq);
-	v850e_intc_enable_irq (irq);
-	return 0;
-}
-
-static void v850e_intc_end_irq (unsigned irq)
-{
-	unsigned long psw, temp;
-
-	/* Clear the highest-level bit in the In-service priority register
-	   (ISPR), to allow this interrupt (or another of the same or
-	   lesser priority) to happen again.
-
-	   The `reti' instruction normally does this automatically when the
-	   PSW bits EP and NP are zero, but we can't always rely on reti
-	   being used consistently to return after an interrupt (another
-	   process can be scheduled, for instance, which can delay the
-	   associated reti for a long time, or this process may be being
-	   single-stepped, which uses the `dbret' instruction to return
-	   from the kernel).
-
-	   We also set the PSW EP bit, which prevents reti from also
-	   trying to modify the ISPR itself.  */
-
-	/* Get PSW and disable interrupts.  */
-	asm volatile ("stsr psw, %0; di" : "=r" (psw));
-	/* We don't want to do anything for NMIs (they don't use the ISPR).  */
-	if (! (psw & 0xC0)) {
-		/* Transition to `trap' state, so that an eventual real
-		   reti instruction won't modify the ISPR.  */
-		psw |= 0x40;
-		/* Fake an interrupt return, which automatically clears the
-		   appropriate bit in the ISPR.  */
-		asm volatile ("mov hilo(1f), %0;"
-			      "ldsr %0, eipc; ldsr %1, eipsw;"
-			      "reti;"
-			      "1:"
-			      : "=&r" (temp) : "r" (psw));
-	}
-}
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-void __init v850e_intc_init_irq_types (struct v850e_intc_irq_init *inits,
-				       struct hw_interrupt_type *hw_irq_types)
-{
-	struct v850e_intc_irq_init *init;
-	for (init = inits; init->name; init++) {
-		unsigned i;
-		struct hw_interrupt_type *hwit = hw_irq_types++;
-
-		hwit->typename = init->name;
-
-		hwit->startup  = v850e_intc_irq_startup;
-		hwit->shutdown = v850e_intc_disable_irq;
-		hwit->enable   = v850e_intc_enable_irq;
-		hwit->disable  = v850e_intc_disable_irq;
-		hwit->ack      = irq_nop;
-		hwit->end      = v850e_intc_end_irq;
-		
-		/* Initialize kernel IRQ infrastructure for this interrupt.  */
-		init_irq_handlers(init->base, init->num, init->interval, hwit);
-
-		/* Set the interrupt priorities.  */
-		for (i = 0; i < init->num; i++) {
-			unsigned irq = init->base + i * init->interval;
-
-			/* If the interrupt is currently enabled (all
-			   interrupts are initially disabled), then
-			   assume whoever enabled it has set things up
-			   properly, and avoid messing with it.  */
-			if (! v850e_intc_irq_enabled (irq))
-				/* This write also (1) disables the
-				   interrupt, and (2) clears any pending
-				   interrupts.  */
-				V850E_INTC_IC (irq)
-					= (V850E_INTC_IC_PR (init->priority)
-					   | V850E_INTC_IC_MK);
-		}
-	}
-}
diff --git a/arch/v850/kernel/v850e_timer_d.c b/arch/v850/kernel/v850e_timer_d.c
deleted file mode 100644
index d2a4ece2574..00000000000
--- a/arch/v850/kernel/v850e_timer_d.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * include/asm-v850/v850e_timer_d.c -- `Timer D' component often used
- *	with V850E CPUs
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-
-#include <asm/v850e_utils.h>
-#include <asm/v850e_timer_d.h>
-
-/* Start interval timer TIMER (0-3).  The timer will issue the
-   corresponding INTCMD interrupt RATE times per second.
-   This function does not enable the interrupt.  */
-void v850e_timer_d_configure (unsigned timer, unsigned rate)
-{
-	unsigned divlog2, count;
-
-	/* Calculate params for timer.  */
-	if (! calc_counter_params (
-		    V850E_TIMER_D_BASE_FREQ, rate,
-		    V850E_TIMER_D_TMCD_CS_MIN, V850E_TIMER_D_TMCD_CS_MAX, 16,
-		    &divlog2, &count))
-		printk (KERN_WARNING
-			"Cannot find interval timer %d setting suitable"
-			" for rate of %dHz.\n"
-			"Using rate of %dHz instead.\n",
-			timer, rate,
-			(V850E_TIMER_D_BASE_FREQ >> divlog2) >> 16);
-
-	/* Do the actual hardware timer initialization:  */
-
-	/* Enable timer.  */
-	V850E_TIMER_D_TMCD(timer) = V850E_TIMER_D_TMCD_CAE;
-	/* Set clock divider.  */
-	V850E_TIMER_D_TMCD(timer)
-		= V850E_TIMER_D_TMCD_CAE
-		| V850E_TIMER_D_TMCD_CS(divlog2);
-	/* Set timer compare register.  */
-	V850E_TIMER_D_CMD(timer) = count;
-	/* Start counting.  */
-	V850E_TIMER_D_TMCD(timer)
-		= V850E_TIMER_D_TMCD_CAE
-		| V850E_TIMER_D_TMCD_CS(divlog2)
-		| V850E_TIMER_D_TMCD_CE;
-}
diff --git a/arch/v850/kernel/v850e_utils.c b/arch/v850/kernel/v850e_utils.c
deleted file mode 100644
index e6807ef8dee..00000000000
--- a/arch/v850/kernel/v850e_utils.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * include/asm-v850/v850e_utils.h -- Utility functions associated with
- *	V850E CPUs
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/v850e_utils.h>
-
-/* Calculate counter clock-divider and count values to attain the
-   desired frequency RATE from the base frequency BASE_FREQ.  The
-   counter is expected to have a clock-divider, which can divide the
-   system cpu clock by a power of two value from MIN_DIVLOG2 to
-   MAX_DIV_LOG2, and a word-size of COUNTER_SIZE bits (the counter
-   counts up and resets whenever it's equal to the compare register,
-   generating an interrupt or whatever when it does so).  The returned
-   values are: *DIVLOG2 -- log2 of the desired clock divider and *COUNT
-   -- the counter compare value to use.  Returns true if it was possible
-   to find a reasonable value, otherwise false (and the other return
-   values will be set to be as good as possible).  */
-int calc_counter_params (unsigned long base_freq,
-			 unsigned long rate,
-			 unsigned min_divlog2, unsigned max_divlog2,
-			 unsigned counter_size,
-			 unsigned *divlog2, unsigned *count)
-{
-	unsigned _divlog2;
-	int ok = 0;
-
-	/* Find the lowest clock divider setting that can represent RATE.  */
-	for (_divlog2 = min_divlog2; _divlog2 <= max_divlog2; _divlog2++) {
-		/* Minimum interrupt rate possible using this divider.  */
-		unsigned min_int_rate
-			= (base_freq >> _divlog2) >> counter_size;
-
-		if (min_int_rate <= rate) {
-			/* This setting is the highest resolution
-			   setting that's slow enough enough to attain
-			   RATE interrupts per second, so use it.  */
-			ok = 1;
-			break;
-		}
-	}
-
-	if (_divlog2 > max_divlog2)
-		/* Can't find correct setting.  */
-		_divlog2 = max_divlog2;
-
-	if (divlog2)
-		*divlog2 = _divlog2;
-	if (count)
-		*count = ((base_freq >> _divlog2) + rate/2) / rate;
-
-	return ok;
-}
diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S
deleted file mode 100644
index d08cd1d27f2..00000000000
--- a/arch/v850/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * arch/v850/vmlinux.lds.S -- kernel linker script for v850 platforms
- *
- *  Copyright (C) 2002,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2002,03,04,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
-#include <asm-generic/vmlinux.lds.h>
-
-/* For most platforms, this will define useful things like RAM addr/size.  */
-#include <asm/machdep.h>
-
-
-/* The following macros contain the usual definitions for various data areas.
-   The prefix `RAMK_' is used to indicate macros suitable for kernels loaded
-   into RAM, and similarly `ROMK_' for ROM-resident kernels.  Note that all
-   symbols are prefixed with an extra `_' for compatibility with the v850
-   toolchain.  */
-
-	
-/* Interrupt vectors.  */
-#define INTV_CONTENTS							      \
-		. = ALIGN (0x10) ;					      \
-		__intv_start = . ;					      \
-			*(.intv.reset)	/* Reset vector */		      \
-		. = __intv_start + 0x10 ;				      \
-			*(.intv.common)	/* Vectors common to all v850e proc */\
-		. = __intv_start + 0x80 ;				      \
-			*(.intv.mach)	/* Machine-specific int. vectors.  */ \
-		__intv_end = . ;
-
-#define RODATA_CONTENTS							      \
-		. = ALIGN (16) ;					      \
-			*(.rodata) *(.rodata.*)				      \
-			*(__vermagic)		/* Kernel version magic */    \
-			*(.rodata1)					      \
-		/* PCI quirks */					      \
-		___start_pci_fixups_early = . ;				      \
-			*(.pci_fixup_early)				      \
-		___end_pci_fixups_early = . ;				      \
-		___start_pci_fixups_header = . ;			      \
-			*(.pci_fixup_header)				      \
-		___end_pci_fixups_header = . ;				      \
-		___start_pci_fixups_final = . ;				      \
-			*(.pci_fixup_final)				      \
-		___end_pci_fixups_final = . ;				      \
-		___start_pci_fixups_enable = . ;			      \
-			*(.pci_fixup_enable)				      \
-		___end_pci_fixups_enable = . ;				      \
-		/* Kernel symbol table: Normal symbols */		      \
-		___start___ksymtab = .;					      \
-			*(__ksymtab)					      \
-		___stop___ksymtab = .;					      \
-		/* Kernel symbol table: GPL-only symbols */		      \
-		___start___ksymtab_gpl = .;				      \
-			*(__ksymtab_gpl)				      \
-		___stop___ksymtab_gpl = .;				      \
-		/* Kernel symbol table: GPL-future symbols */		      \
-		___start___ksymtab_gpl_future = .;			      \
-			*(__ksymtab_gpl_future)				      \
-		___stop___ksymtab_gpl_future = .;			      \
-		/* Kernel symbol table: strings */			      \
-			*(__ksymtab_strings)				      \
-		/* Kernel symbol table: Normal symbols */		      \
-		___start___kcrctab = .;					      \
-			*(__kcrctab)					      \
-		___stop___kcrctab = .;					      \
-		/* Kernel symbol table: GPL-only symbols */		      \
-		___start___kcrctab_gpl = .;				      \
-			*(__kcrctab_gpl)				      \
-		___stop___kcrctab_gpl = .;				      \
-		/* Kernel symbol table: GPL-future symbols */		      \
-		___start___kcrctab_gpl_future = .;			      \
-			*(__kcrctab_gpl_future)				      \
-		___stop___kcrctab_gpl_future = .;			      \
-		/* Built-in module parameters */			      \
-		. = ALIGN (4) ;						      \
-		___start___param = .;					      \
-		*(__param)						      \
-		___stop___param = .;
-
-
-/* Kernel text segment, and some constant data areas.  */
-#define TEXT_CONTENTS							      \
-		_text = .;						      \
-		__stext = . ;						      \
-		TEXT_TEXT						      \
-		SCHED_TEXT						      \
-			*(.exit.text)	/* 2.5 convention */		      \
-			*(.text.exit)	/* 2.4 convention */		      \
-			*(.text.lock)					      \
-			*(.exitcall.exit)				      \
-		__real_etext = . ;	/* There may be data after here.  */  \
-		RODATA_CONTENTS						      \
-		. = ALIGN (4) ;						      \
-		    	*(.call_table_data)				      \
-			*(.call_table_text)				      \
-		. = ALIGN (16) ;	/* Exception table.  */		      \
-		___start___ex_table = . ;				      \
-			*(__ex_table)					      \
-		___stop___ex_table = . ;				      \
-		. = ALIGN (4) ;						      \
-		__etext = . ;
-
-/* Kernel data segment.  */
-#define DATA_CONTENTS							      \
-		__sdata = . ;						      \
-		DATA_DATA						      \
-			EXIT_DATA	/* 2.5 convention */		      \
-			*(.data.exit)	/* 2.4 convention */		      \
-		. = ALIGN (16) ;					      \
-		*(.data.cacheline_aligned)				      \
-		. = ALIGN (0x2000) ;					      \
-        	*(.data.init_task)					      \
-		. = ALIGN (0x2000) ;					      \
-		__edata = . ;
-
-/* Kernel BSS segment.  */
-#define BSS_CONTENTS							      \
-		__sbss = . ;						      \
-			*(.bss)						      \
-			*(COMMON)					      \
-		. = ALIGN (4) ;						      \
-		__init_stack_end = . ;					      \
-		__ebss = . ;
-
-/* `initcall' tables.  */
-#define INITCALL_CONTENTS						      \
-		. = ALIGN (16) ;					      \
-		___setup_start = . ;					      \
-			*(.init.setup)	/* 2.5 convention */		      \
-			*(.setup.init)	/* 2.4 convention */		      \
-		___setup_end = . ;					      \
-		___initcall_start = . ;					      \
-			*(.initcall.init)				      \
-			INITCALLS					      \
-		. = ALIGN (4) ;						      \
-		___initcall_end = . ;					      \
-		___con_initcall_start = .;				      \
-			*(.con_initcall.init)				      \
-		___con_initcall_end = .;
-
-/* Contents of `init' section for a kernel that's loaded into RAM.  */
-#define RAMK_INIT_CONTENTS						      \
-		RAMK_INIT_CONTENTS_NO_END				      \
-		__init_end = . ;
-/* Same as RAMK_INIT_CONTENTS, but doesn't define the `__init_end' symbol.  */
-#define RAMK_INIT_CONTENTS_NO_END					      \
-		. = ALIGN (4096) ;					      \
-		__init_start = . ;					      \
-			__sinittext = .;				      \
-			INIT_TEXT	/* 2.5 convention */		      \
-			__einittext = .;				      \
-			INIT_DATA					      \
-			*(.text.init)	/* 2.4 convention */		      \
-			*(.data.init)					      \
-		INITCALL_CONTENTS					      \
-		INITRAMFS_CONTENTS
-
-/* The contents of `init' section for a ROM-resident kernel which
-   should go into RAM.  */	
-#define ROMK_INIT_RAM_CONTENTS						      \
-		. = ALIGN (4096) ;					      \
-		__init_start = . ;					      \
-			INIT_DATA	/* 2.5 convention */		      \
-			*(.data.init)	/* 2.4 convention */		      \
-		__init_end = . ;					      \
-		. = ALIGN (4096) ;
-
-/* The contents of `init' section for a ROM-resident kernel which
-   should go into ROM.  */	
-#define ROMK_INIT_ROM_CONTENTS						      \
-			_sinittext = .;					      \
-			INIT_TEXT	/* 2.5 convention */		      \
-			_einittext = .;					      \
-			*(.text.init)	/* 2.4 convention */		      \
-		INITCALL_CONTENTS					      \
-		INITRAMFS_CONTENTS
-
-/* A root filesystem image, for kernels with an embedded root filesystem.  */
-#define ROOT_FS_CONTENTS						      \
-		__root_fs_image_start = . ;				      \
-		*(.root)						      \
-		__root_fs_image_end = . ;
-
-#ifdef CONFIG_BLK_DEV_INITRD
-/* The initramfs archive.  */
-#define INITRAMFS_CONTENTS						      \
-		. = ALIGN (4) ;						      \
-		___initramfs_start = . ;				      \
-			*(.init.ramfs)					      \
-		___initramfs_end = . ;
-#endif
-
-/* Where the initial bootmap (bitmap for the boot-time memory allocator) 
-   should be place.  */
-#define BOOTMAP_CONTENTS						      \
-		. = ALIGN (4096) ;					      \
-		__bootmap = . ;						      \
-		. = . + 4096 ;		/* enough for 128MB.   */
-
-/* The contents of a `typical' kram area for a kernel in RAM.  */
-#define RAMK_KRAM_CONTENTS						      \
-		__kram_start = . ;					      \
-		TEXT_CONTENTS						      \
-		DATA_CONTENTS						      \
-		BSS_CONTENTS						      \
-		RAMK_INIT_CONTENTS					      \
-		__kram_end = . ;					      \
-		BOOTMAP_CONTENTS
-
-
-/* Define output sections normally used for a ROM-resident kernel.  
-   ROM and RAM should be appropriate memory areas to use for kernel
-   ROM and RAM data.  This assumes that ROM starts at 0 (and thus can
-   hold the interrupt vectors).  */
-#define ROMK_SECTIONS(ROM, RAM)						      \
-	.rom : {							      \
-		INTV_CONTENTS						      \
-		TEXT_CONTENTS						      \
-		ROMK_INIT_ROM_CONTENTS					      \
-		ROOT_FS_CONTENTS					      \
-	} > ROM								      \
-									      \
-	__rom_copy_src_start = . ;					      \
-									      \
-	.data : {							      \
-		__kram_start = . ;					      \
-		__rom_copy_dst_start = . ;				      \
-		DATA_CONTENTS						      \
-		ROMK_INIT_RAM_CONTENTS					      \
-		__rom_copy_dst_end = . ;				      \
-	} > RAM  AT> ROM						      \
-									      \
-	.bss ALIGN (4) : {						      \
-		BSS_CONTENTS						      \
-		__kram_end = . ;					      \
-		BOOTMAP_CONTENTS					      \
-	} > RAM
-
-
-/* The 32-bit variable `jiffies' is just the lower 32-bits of `jiffies_64'.  */
-_jiffies = _jiffies_64 ;
-
-
-/* Include an appropriate platform-dependent linker-script (which
-   usually should use the above macros to do most of the work).  */
-
-#ifdef CONFIG_V850E_SIM
-# include "sim.ld"
-#endif
-
-#ifdef CONFIG_V850E2_SIM85E2
-# include "sim85e2.ld"
-#endif
-
-#ifdef CONFIG_V850E2_FPGA85E2C
-# include "fpga85e2c.ld"
-#endif
-
-#ifdef CONFIG_V850E2_ANNA
-# ifdef CONFIG_ROM_KERNEL
-#  include "anna-rom.ld"
-# else
-#  include "anna.ld"
-# endif
-#endif
-
-#ifdef CONFIG_V850E_AS85EP1
-# ifdef CONFIG_ROM_KERNEL
-#  include "as85ep1-rom.ld"
-# else
-#  include "as85ep1.ld"
-# endif
-#endif
-
-#ifdef CONFIG_RTE_CB_MA1
-# ifdef CONFIG_ROM_KERNEL
-#  include "rte_ma1_cb-rom.ld"
-# else
-#  include "rte_ma1_cb.ld"
-# endif
-#endif
-
-#ifdef CONFIG_RTE_CB_NB85E
-# ifdef CONFIG_ROM_KERNEL
-#  include "rte_nb85e_cb-rom.ld"
-# elif defined(CONFIG_RTE_CB_MULTI)
-#  include "rte_nb85e_cb-multi.ld"
-# else
-#  include "rte_nb85e_cb.ld"
-# endif
-#endif
-
-#ifdef CONFIG_RTE_CB_ME2
-#  include "rte_me2_cb.ld"
-#endif
-
diff --git a/arch/v850/lib/Makefile b/arch/v850/lib/Makefile
deleted file mode 100644
index 1c78b728a11..00000000000
--- a/arch/v850/lib/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-#
-# arch/v850/lib/Makefile
-#
-
-lib-y  = ashrdi3.o ashldi3.o lshrdi3.o muldi3.o negdi2.o \
-	 checksum.o memcpy.o memset.o
diff --git a/arch/v850/lib/ashldi3.c b/arch/v850/lib/ashldi3.c
deleted file mode 100644
index 9e792d53f0e..00000000000
--- a/arch/v850/lib/ashldi3.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* ashldi3.c extracted from gcc-2.95.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__ashldi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      w.s.low = 0;
-      w.s.high = (USItype)uu.s.low << -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.low >> bm;
-      w.s.low = (USItype)uu.s.low << b;
-      w.s.high = ((USItype)uu.s.high << b) | carries;
-    }
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/ashrdi3.c b/arch/v850/lib/ashrdi3.c
deleted file mode 100644
index 78efb65e315..00000000000
--- a/arch/v850/lib/ashrdi3.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* ashrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__ashrdi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      /* w.s.high = 1..1 or 0..0 */
-      w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1);
-      w.s.low = uu.s.high >> -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.high << bm;
-      w.s.high = uu.s.high >> b;
-      w.s.low = ((USItype)uu.s.low >> b) | carries;
-    }
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/checksum.c b/arch/v850/lib/checksum.c
deleted file mode 100644
index 042158dfe17..00000000000
--- a/arch/v850/lib/checksum.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		MIPS specific IP/TCP/UDP checksumming routines
- *
- * Authors:	Ralf Baechle, <ralf@waldorf-gmbh.de>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- * $Id: checksum.c,v 1.1 2002/09/28 14:58:40 gerg Exp $
- */
-#include <net/checksum.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-#include <asm/string.h>
-#include <asm/uaccess.h>
-
-static inline unsigned short from32to16 (unsigned long sum)
-{
-	unsigned int result;
-	/*
-			        %0		%1
-	      hsw %1, %0	H     L		L     H
-	      add %1, %0	H     L		H+L+C H+L
-	*/
-	asm ("hsw %1, %0; add %1, %0" : "=&r" (result) : "r" (sum));
-	return result >> 16;
-}
-
-static inline unsigned int do_csum(const unsigned char * buff, int len)
-{
-	int odd, count;
-	unsigned int result = 0;
-
-	if (len <= 0)
-		goto out;
-	odd = 1 & (unsigned long) buff;
-	if (odd) {
-		result = be16_to_cpu(*buff);
-		len--;
-		buff++;
-	}
-	count = len >> 1;		/* nr of 16-bit words.. */
-	if (count) {
-		if (2 & (unsigned long) buff) {
-			result += *(unsigned short *) buff;
-			count--;
-			len -= 2;
-			buff += 2;
-		}
-		count >>= 1;		/* nr of 32-bit words.. */
-		if (count) {
-			unsigned int carry = 0;
-			do {
-				unsigned int w = *(unsigned int *) buff;
-				count--;
-				buff += 4;
-				result += carry;
-				result += w;
-				carry = (w > result);
-			} while (count);
-			result += carry;
-			result = (result & 0xffff) + (result >> 16);
-		}
-		if (len & 2) {
-			result += *(unsigned short *) buff;
-			buff += 2;
-		}
-	}
-	if (len & 1)
-		result += le16_to_cpu(*buff);
-	result = from32to16(result);
-	if (odd)
-		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
-	return result;
-}
-
-/*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
- */
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	return (__force __sum16)~do_csum(iph,ihl*4);
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-__sum16 ip_compute_csum(const void *buff, int len)
-{
-	return (__force __sum16)~do_csum(buff,len);
-}
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
-{
-	unsigned int result = do_csum(buff, len);
-
-	/* add in old sum, and carry.. */
-	result += (__force u32)sum;
-	if ((__force u32)sum > result)
-		result += 1;
-	return (__force __wsum)result;
-}
-
-EXPORT_SYMBOL(csum_partial);
-
-/*
- * copy while checksumming, otherwise like csum_partial
- */
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-                               int len, __wsum sum)
-{
-	/*
-	 * It's 2:30 am and I don't feel like doing it real ...
-	 * This is lots slower than the real thing (tm)
-	 */
-	sum = csum_partial(src, len, sum);
-	memcpy(dst, src, len);
-
-	return sum;
-}
-
-/*
- * Copy from userspace and compute checksum.  If we catch an exception
- * then zero the rest of the buffer.
- */
-__wsum csum_partial_copy_from_user (const void *src,
-					  void *dst,
-                                          int len, __wsum sum,
-                                          int *err_ptr)
-{
-	int missing;
-
-	missing = copy_from_user(dst, src, len);
-	if (missing) {
-		memset(dst + len - missing, 0, missing);
-		*err_ptr = -EFAULT;
-	}
-		
-	return csum_partial(dst, len, sum);
-}
diff --git a/arch/v850/lib/lshrdi3.c b/arch/v850/lib/lshrdi3.c
deleted file mode 100644
index 93b1cb6fdee..00000000000
--- a/arch/v850/lib/lshrdi3.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* lshrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__lshrdi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      w.s.high = 0;
-      w.s.low = (USItype)uu.s.high >> -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.high << bm;
-      w.s.high = (USItype)uu.s.high >> b;
-      w.s.low = ((USItype)uu.s.low >> b) | carries;
-    }
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/memcpy.c b/arch/v850/lib/memcpy.c
deleted file mode 100644
index 492847b3e61..00000000000
--- a/arch/v850/lib/memcpy.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * arch/v850/lib/memcpy.c -- Memory copying
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/types.h>
-#include <asm/string.h>
-
-#define CHUNK_SIZE		32 /* bytes */
-#define CHUNK_ALIGNED(addr)	(((unsigned long)addr & 0x3) == 0)
-
-/* Note that this macro uses 8 call-clobbered registers (not including
-   R1), which are few enough so that the following functions don't need
-   to spill anything to memory.  It also uses R1, which is nominally
-   reserved for the assembler, but here it should be OK.  */
-#define COPY_CHUNK(src, dst)			\
-   asm ("mov %0, ep;"				\
-	"sld.w 0[ep], r1; sld.w 4[ep], r12;"	\
-	"sld.w 8[ep], r13; sld.w 12[ep], r14;"	\
-	"sld.w 16[ep], r15; sld.w 20[ep], r17;"	\
-	"sld.w 24[ep], r18; sld.w 28[ep], r19;"	\
-	"mov %1, ep;"				\
-	"sst.w r1, 0[ep]; sst.w r12, 4[ep];"	\
-	"sst.w r13, 8[ep]; sst.w r14, 12[ep];"	\
-	"sst.w r15, 16[ep]; sst.w r17, 20[ep];"	\
-	"sst.w r18, 24[ep]; sst.w r19, 28[ep]"	\
-	:: "r" (src), "r" (dst)			\
-	: "r1", "r12", "r13", "r14", "r15",	\
-	  "r17", "r18", "r19", "ep", "memory");
-
-void *memcpy (void *dst, const void *src, __kernel_size_t size)
-{
-	char *_dst = dst;
-	const char *_src = src;
-
-	if (size >= CHUNK_SIZE && CHUNK_ALIGNED(_src) && CHUNK_ALIGNED(_dst)) {
-		/* Copy large blocks efficiently.  */
-		unsigned count;
-		for (count = size / CHUNK_SIZE; count; count--) {
-			COPY_CHUNK (_src, _dst);
-			_src += CHUNK_SIZE;
-			_dst += CHUNK_SIZE;
-		}
-		size %= CHUNK_SIZE;
-	}
-
-	if (size > 0)
-		do
-			*_dst++ = *_src++;
-		while (--size);
-
-	return dst;
-}
-
-void *memmove (void *dst, const void *src, __kernel_size_t size)
-{
-	if ((unsigned long)dst < (unsigned long)src
-	    || (unsigned long)src + size < (unsigned long)dst)
-		return memcpy (dst, src, size);
-	else {
-		char *_dst = dst + size;
-		const char *_src = src + size;
-
-		if (size >= CHUNK_SIZE
-		    && CHUNK_ALIGNED (_src) && CHUNK_ALIGNED (_dst))
-		{
-			/* Copy large blocks efficiently.  */
-			unsigned count;
-			for (count = size / CHUNK_SIZE; count; count--) {
-				_src -= CHUNK_SIZE;
-				_dst -= CHUNK_SIZE;
-				COPY_CHUNK (_src, _dst);
-			}
-			size %= CHUNK_SIZE;
-		}
-
-		if (size > 0)
-			do
-				*--_dst = *--_src;
-			while (--size);
-
-		return _dst;
-	}
-}
diff --git a/arch/v850/lib/memset.c b/arch/v850/lib/memset.c
deleted file mode 100644
index d1b2ad821b1..00000000000
--- a/arch/v850/lib/memset.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * arch/v850/lib/memset.c -- Memory initialization
- *
- *  Copyright (C) 2001,02,04  NEC Corporation
- *  Copyright (C) 2001,02,04  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/types.h>
-
-void *memset (void *dst, int val, __kernel_size_t count)
-{
-	if (count) {
-		register unsigned loop;
-		register void *ptr asm ("ep") = dst;
-
-		/* replicate VAL into a long.  */
-		val &= 0xff;
-		val |= val << 8;
-		val |= val << 16;
-
-		/* copy initial unaligned bytes.  */
-		if ((long)ptr & 1) {
-			*(char *)ptr = val;
-			ptr = (void *)((char *)ptr + 1);
-			count--;
-		}
-		if (count > 2 && ((long)ptr & 2)) {
-			*(short *)ptr = val;
-			ptr = (void *)((short *)ptr + 1);
-			count -= 2;
-		}
-
-		/* 32-byte copying loop.  */
-		for (loop = count / 32; loop; loop--) {
-			asm ("sst.w %0, 0[ep]; sst.w %0, 4[ep];"
-			     "sst.w %0, 8[ep]; sst.w %0, 12[ep];"
-			     "sst.w %0, 16[ep]; sst.w %0, 20[ep];"
-			     "sst.w %0, 24[ep]; sst.w %0, 28[ep]"
-			     :: "r" (val) : "memory");
-			ptr += 32;
-		}
-		count %= 32;
-
-		/* long copying loop.  */
-		for (loop = count / 4; loop; loop--) {
-			*(long *)ptr = val;
-			ptr = (void *)((long *)ptr + 1);
-		}
-		count %= 4;
-
-		/* finish up with any trailing bytes.  */
-		if (count & 2) {
-			*(short *)ptr = val;
-			ptr = (void *)((short *)ptr + 1);
-		}
-		if (count & 1) {
-			*(char *)ptr = val;
-		}
-	}
-
-	return dst;
-}
diff --git a/arch/v850/lib/muldi3.c b/arch/v850/lib/muldi3.c
deleted file mode 100644
index 277ca25c82c..00000000000
--- a/arch/v850/lib/muldi3.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* muldi3.c extracted from gcc-2.7.2.3/libgcc2.c and 
-			   gcc-2.7.2.3/longlong.h which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulu %3, %0, %1"						\
-           : "=r" ((USItype)(w0)),					\
-             "=r" ((USItype)(w1))					\
-           : "%0" ((USItype)(u)),					\
-             "r" ((USItype)(v)))
-
-#define __umulsidi3(u, v) \
-  ({DIunion __w;							\
-    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
-    __w.ll; })
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__muldi3 (DItype u, DItype v)
-{
-  DIunion w;
-  DIunion uu, vv;
-
-  uu.ll = u,
-  vv.ll = v;
-
-  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
-  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
-	       + (USItype) uu.s.high * (USItype) vv.s.low);
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/negdi2.c b/arch/v850/lib/negdi2.c
deleted file mode 100644
index 571e04fc619..00000000000
--- a/arch/v850/lib/negdi2.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * arch/v850/lib/negdi2.c -- 64-bit negation
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-typedef		 int DItype	__attribute__ ((mode (DI)));
-
-DItype __negdi2 (DItype x)
-{
-	__asm__ __volatile__
-		("not	r6, r10;"
-		 "add	1, r10;"
-		 "setf	c, r6;"
-		 "not	r7, r11;"
-		 "add	r6, r11"
-		 ::: "r6", "r7", "r10", "r11");
-}
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 8fc7451c004..3b4a14e355c 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -942,22 +942,6 @@ config SERIAL_IP22_ZILOG_CONSOLE
 	depends on SERIAL_IP22_ZILOG=y
 	select SERIAL_CORE_CONSOLE
 
-config V850E_UART
-	bool "NEC V850E on-chip UART support"
-	depends on V850E_MA1 || V850E_ME2 || V850E_TEG || V850E2_ANNA || V850E_AS85EP1
-	select SERIAL_CORE
-	default y
-
-config V850E_UARTB
-        bool
-	depends on V850E_UART && V850E_ME2
-	default y
-
-config V850E_UART_CONSOLE
-	bool "Use NEC V850E on-chip UART for console"
-	depends on V850E_UART
-	select SERIAL_CORE_CONSOLE
-
 config SERIAL_SH_SCI
 	tristate "SuperH SCI(F) serial port support"
 	depends on SUPERH || H8300
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index ccb78f66c2b..48399e134c0 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -788,8 +788,6 @@ config WATCHDOG_RIO
 	  machines.  The watchdog timeout period is normally one minute but
 	  can be changed with a boot-time parameter.
 
-# V850 Architecture
-
 # XTENSA Architecture
 
 #
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 25b352b664d..edd305a64e6 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -119,8 +119,6 @@ obj-$(CONFIG_SH_WDT) += shwdt.o
 
 # SPARC64 Architecture
 
-# V850 Architecture
-
 # XTENSA Architecture
 
 # Architecture Independant
diff --git a/include/asm-v850/Kbuild b/include/asm-v850/Kbuild
deleted file mode 100644
index c68e1680da0..00000000000
--- a/include/asm-v850/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-include include/asm-generic/Kbuild.asm
diff --git a/include/asm-v850/a.out.h b/include/asm-v850/a.out.h
deleted file mode 100644
index e9439a0708f..00000000000
--- a/include/asm-v850/a.out.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __V850_A_OUT_H__
-#define __V850_A_OUT_H__
-
-struct exec
-{
-  unsigned long a_info;		/* Use macros N_MAGIC, etc for access */
-  unsigned a_text;		/* length of text, in bytes */
-  unsigned a_data;		/* length of data, in bytes */
-  unsigned a_bss;		/* length of uninitialized data area for file, in bytes */
-  unsigned a_syms;		/* length of symbol table data in file, in bytes */
-  unsigned a_entry;		/* start address */
-  unsigned a_trsize;		/* length of relocation info for text, in bytes */
-  unsigned a_drsize;		/* length of relocation info for data, in bytes */
-};
-
-#define N_TRSIZE(a)	((a).a_trsize)
-#define N_DRSIZE(a)	((a).a_drsize)
-#define N_SYMSIZE(a)	((a).a_syms)
-
-
-#endif /* __V850_A_OUT_H__ */
diff --git a/include/asm-v850/anna.h b/include/asm-v850/anna.h
deleted file mode 100644
index cd5eaee103b..00000000000
--- a/include/asm-v850/anna.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * include/asm-v850/anna.h -- Anna V850E2 evaluation cpu chip/board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ANNA_H__
-#define __V850_ANNA_H__
-
-#include <asm/v850e2.h>		/* Based on V850E2 core.  */
-
-
-#define CPU_MODEL	"v850e2/anna"
-#define CPU_MODEL_LONG	"NEC V850E2/Anna"
-#define PLATFORM	"anna"
-#define PLATFORM_LONG	"NEC/Midas lab V850E2/Anna evaluation board"
-
-#define CPU_CLOCK_FREQ	200000000 /*  200MHz */
-#define SYS_CLOCK_FREQ	 33300000 /* 33.3MHz */
-
-
-/* 1MB of static RAM.  This memory is mirrored 64 times.  */
-#define SRAM_ADDR	0x04000000
-#define SRAM_SIZE	0x00100000 /* 1MB */
-/* 64MB of DRAM.  */
-#define SDRAM_ADDR	0x08000000	
-#define SDRAM_SIZE	0x04000000 /* 64MB */
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 	SRAM_ADDR
-
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  The Anna chip has
-   128K of `dLB' ram nominally located at 0xFFF00000, but it's mirrored
-   every 128K, so we can use the `last mirror' (except for the portion at
-   the top which is overridden by I/O space).  In addition, the early
-   sample chip we're using has lots of memory errors in the dLB ram, so we
-   use a specially chosen location that has at least 20 bytes of contiguous
-   valid memory (xxxF0020 - xxxF003F).  */
-#define R0_RAM_ADDR			0xFFFF8020
-
-
-/* Anna specific control registers.  */
-#define ANNA_ILBEN_ADDR			0xFFFFF7F2
-#define ANNA_ILBEN			(*(volatile u16 *)ANNA_ILBEN_ADDR)
-
-
-/* I/O port P0-P3. */
-/* Direct I/O.  Bits 0-7 are pins Pn0-Pn7.  */
-#define ANNA_PORT_IO_ADDR(n)		(0xFFFFF400 + (n) * 2)
-#define ANNA_PORT_IO(n)			(*(volatile u8 *)ANNA_PORT_IO_ADDR(n))
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ANNA_PORT_PM_ADDR(n)		(0xFFFFF410 + (n) * 2)
-#define ANNA_PORT_PM(n)			(*(volatile u8 *)ANNA_PORT_PM_ADDR(n))
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTP(n)	(n)	/* Pnnn (pin) interrupts 0-15 */
-#define IRQ_INTP_NUM	16
-#define IRQ_INTOV(n)	(0x10 + (n)) /* 0-2 */
-#define IRQ_INTOV_NUM	2
-#define IRQ_INTCCC(n)	(0x12 + (n))
-#define IRQ_INTCCC_NUM	4
-#define IRQ_INTCMD(n)	(0x16 + (n)) /* interval timer interrupts 0-5 */
-#define IRQ_INTCMD_NUM	6
-#define IRQ_INTDMA(n)	(0x1C + (n)) /* DMA interrupts 0-3 */
-#define IRQ_INTDMA_NUM	4
-#define IRQ_INTDMXER	0x20
-#define IRQ_INTSRE(n)	(0x21 + (n)*3) /* UART 0-1 reception error */
-#define IRQ_INTSRE_NUM	2
-#define IRQ_INTSR(n)	(0x22 + (n)*3) /* UART 0-1 reception completion */
-#define IRQ_INTSR_NUM	2
-#define IRQ_INTST(n)	(0x23 + (n)*3) /* UART 0-1 transmission completion */
-#define IRQ_INTST_NUM	2
-
-#define NUM_CPU_IRQS	64
-
-#ifndef __ASSEMBLY__
-/* Initialize chip interrupts.  */
-extern void anna_init_irqs (void);
-#endif
-
-
-/* Anna UART details (basically the same as the V850E/MA1, but 2 channels).  */
-#define V850E_UART_NUM_CHANNELS		2
-#define V850E_UART_BASE_FREQ		(SYS_CLOCK_FREQ / 2)
-#define V850E_UART_CHIP_NAME 		"V850E2/NA85E2A"
-
-/* This is the UART channel that's actually connected on the board.  */
-#define V850E_UART_CONSOLE_CHANNEL	1
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	anna_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void anna_uart_pre_configure (unsigned chan,
-				     unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 1. */
-
-/* CTS for UART channel 1 is pin P37 (bit 7 of port 3).  */
-#define V850E_UART_CTS(chan)	((chan) == 1 ? !(ANNA_PORT_IO(3) & 0x80) : 1)
-/* RTS for UART channel 1 is pin P07 (bit 7 of port 0).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 1) {						      \
-		   unsigned old = ANNA_PORT_IO(0); 			      \
-		   if (val)						      \
-			   ANNA_PORT_IO(0) = old & ~0x80;		      \
-		   else							      \
-			   ANNA_PORT_IO(0) = old | 0x80;		      \
-	   }								      \
-   } while (0)
-
-
-/* Timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-/* Timer D details (the Anna actually has 5 of these; should change later). */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		SYS_CLOCK_FREQ
-#define V850E_TIMER_D_TMCD_CS_MIN	1 /* min 2^1 divider */
-
-
-#endif /* __V850_ANNA_H__ */
diff --git a/include/asm-v850/as85ep1.h b/include/asm-v850/as85ep1.h
deleted file mode 100644
index 5a5ca9073d0..00000000000
--- a/include/asm-v850/as85ep1.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * include/asm-v850/as85ep1.h -- AS85EP1 evaluation CPU chip/board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_AS85EP1_H__
-#define __V850_AS85EP1_H__
-
-#include <asm/v850e.h>
-
-
-#define CPU_MODEL	"as85ep1"
-#define CPU_MODEL_LONG	"NEC V850E/AS85EP1"
-#define PLATFORM	"AS85EP1"
-#define PLATFORM_LONG	"NEC V850E/AS85EP1 evaluation board"
-
-#define CPU_CLOCK_FREQ	96000000 /*  96MHz */
-#define SYS_CLOCK_FREQ	CPU_CLOCK_FREQ
-
-
-/* 1MB of static RAM.  */
-#define SRAM_ADDR	0x00400000
-#define SRAM_SIZE	0x00100000 /* 1MB */
-/* About 58MB of DRAM.  This can actually be at one of two positions,
-   determined by jump JP3; we have to use the first position because the
-   second is partially out of processor instruction addressing range
-   (though in the second position there's actually 64MB available).  */
-#define SDRAM_ADDR	0x00600000
-#define SDRAM_SIZE	0x039F8000 /* approx 58MB */
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 	SRAM_ADDR
-
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  The AS85EP1 chip
-   16K of internal RAM located slightly before I/O space.  */
-#define R0_RAM_ADDR	0xFFFF8000
-
-
-/* AS85EP1 specific control registers.  */
-#define AS85EP1_CSC_ADDR(n)	(0xFFFFF060 + (n) * 2)
-#define AS85EP1_CSC(n)		(*(volatile u16 *)AS85EP1_CSC_ADDR(n))
-#define AS85EP1_BSC_ADDR	0xFFFFF066
-#define AS85EP1_BSC		(*(volatile u16 *)AS85EP1_BSC_ADDR)
-#define AS85EP1_BCT_ADDR(n)	(0xFFFFF480 + (n) * 2)
-#define AS85EP1_BCT(n)		(*(volatile u16 *)AS85EP1_BCT_ADDR(n))
-#define AS85EP1_DWC_ADDR(n)	(0xFFFFF484 + (n) * 2)
-#define AS85EP1_DWC(n)		(*(volatile u16 *)AS85EP1_DWC_ADDR(n))
-#define AS85EP1_BCC_ADDR	0xFFFFF488
-#define AS85EP1_BCC		(*(volatile u16 *)AS85EP1_BCC_ADDR)
-#define AS85EP1_ASC_ADDR	0xFFFFF48A
-#define AS85EP1_ASC		(*(volatile u16 *)AS85EP1_ASC_ADDR)
-#define AS85EP1_BCP_ADDR	0xFFFFF48C
-#define AS85EP1_BCP		(*(volatile u16 *)AS85EP1_BCP_ADDR)
-#define AS85EP1_LBS_ADDR	0xFFFFF48E
-#define AS85EP1_LBS		(*(volatile u16 *)AS85EP1_LBS_ADDR)
-#define AS85EP1_BMC_ADDR	0xFFFFF498
-#define AS85EP1_BMC		(*(volatile u16 *)AS85EP1_BMC_ADDR)
-#define AS85EP1_PRC_ADDR	0xFFFFF49A
-#define AS85EP1_PRC		(*(volatile u16 *)AS85EP1_PRC_ADDR)
-#define AS85EP1_SCR_ADDR(n)	(0xFFFFF4A0 + (n) * 4)
-#define AS85EP1_SCR(n)		(*(volatile u16 *)AS85EP1_SCR_ADDR(n))
-#define AS85EP1_RFS_ADDR(n)	(0xFFFFF4A2 + (n) * 4)
-#define AS85EP1_RFS(n)		(*(volatile u16 *)AS85EP1_RFS_ADDR(n))
-#define AS85EP1_IRAMM_ADDR	0xFFFFF80A
-#define AS85EP1_IRAMM		(*(volatile u8 *)AS85EP1_IRAMM_ADDR)
-
-
-
-/* I/O port P0-P13. */
-/* Direct I/O.  Bits 0-7 are pins Pn0-Pn7.  */
-#define AS85EP1_PORT_IO_ADDR(n)	(0xFFFFF400 + (n) * 2)
-#define AS85EP1_PORT_IO(n)	(*(volatile u8 *)AS85EP1_PORT_IO_ADDR(n))
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define AS85EP1_PORT_PM_ADDR(n)	(0xFFFFF420 + (n) * 2)
-#define AS85EP1_PORT_PM(n)	(*(volatile u8 *)AS85EP1_PORT_PM_ADDR(n))
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define AS85EP1_PORT_PMC_ADDR(n) (0xFFFFF440 + (n) * 2)
-#define AS85EP1_PORT_PMC(n)	(*(volatile u8 *)AS85EP1_PORT_PMC_ADDR(n))
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTCCC(n)	(0x0C + (n))
-#define IRQ_INTCCC_NUM	8
-#define IRQ_INTCMD(n)	(0x14 + (n)) /* interval timer interrupts 0-5 */
-#define IRQ_INTCMD_NUM	6
-#define IRQ_INTSRE(n)	(0x1E + (n)*3) /* UART 0-1 reception error */
-#define IRQ_INTSRE_NUM	2
-#define IRQ_INTSR(n)	(0x1F + (n)*3) /* UART 0-1 reception completion */
-#define IRQ_INTSR_NUM	2
-#define IRQ_INTST(n)	(0x20 + (n)*3) /* UART 0-1 transmission completion */
-#define IRQ_INTST_NUM	2
-
-#define NUM_CPU_IRQS	64
-
-#ifndef __ASSEMBLY__
-/* Initialize chip interrupts.  */
-extern void as85ep1_init_irqs (void);
-#endif
-
-
-/* AS85EP1 UART details (basically the same as the V850E/MA1, but 2 channels).  */
-#define V850E_UART_NUM_CHANNELS		2
-#define V850E_UART_BASE_FREQ		(SYS_CLOCK_FREQ / 4)
-#define V850E_UART_CHIP_NAME 		"V850E/NA85E"
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	as85ep1_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void as85ep1_uart_pre_configure (unsigned chan,
-					unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 1. */
-
-/* CTS for UART channel 1 is pin P54 (bit 4 of port 5).  */
-#define V850E_UART_CTS(chan)   ((chan) == 1 ? !(AS85EP1_PORT_IO(5) & 0x10) : 1)
-/* RTS for UART channel 1 is pin P53 (bit 3 of port 5).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 1) {						      \
-		   unsigned old = AS85EP1_PORT_IO(5); 			      \
-		   if (val)						      \
-			   AS85EP1_PORT_IO(5) = old & ~0x8;		      \
-		   else							      \
-			   AS85EP1_PORT_IO(5) = old | 0x8;		      \
-	   }								      \
-   } while (0)
-
-
-/* Timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-/* Timer D details (the AS85EP1 actually has 5 of these; should change later). */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		SYS_CLOCK_FREQ
-#define V850E_TIMER_D_TMCD_CS_MIN	2 /* min 2^2 divider */
-
-
-#endif /* __V850_AS85EP1_H__ */
diff --git a/include/asm-v850/asm.h b/include/asm-v850/asm.h
deleted file mode 100644
index bf1e785a5dd..00000000000
--- a/include/asm-v850/asm.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * include/asm-v850/asm.h -- Macros for writing assembly code
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#define G_ENTRY(name)							      \
-   .balign 4;								      \
-   .globl name;								      \
-   .type  name,@function;						      \
-   name
-#define G_DATA(name)							      \
-   .globl name;								      \
-   .type  name,@object;							      \
-   name
-#define END(name)							      \
-   .size  name,.-name
-
-#define L_ENTRY(name)							      \
-   .balign 4;								      \
-   .type  name,@function;						      \
-   name
-#define L_DATA(name)							      \
-   .type  name,@object;							      \
-   name
diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h
deleted file mode 100644
index e4e57de08f7..00000000000
--- a/include/asm-v850/atomic.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * include/asm-v850/atomic.h -- Atomic operations
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ATOMIC_H__
-#define __V850_ATOMIC_H__
-
-
-#include <asm/system.h>
-
-#ifdef CONFIG_SMP
-#error SMP not supported
-#endif
-
-typedef struct { int counter; } atomic_t;
-
-#define ATOMIC_INIT(i)	{ (i) }
-
-#ifdef __KERNEL__
-
-#define atomic_read(v)		((v)->counter)
-#define atomic_set(v,i)		(((v)->counter) = (i))
-
-static inline int atomic_add_return (int i, volatile atomic_t *v)
-{
-	unsigned long flags;
-	int res;
-
-	local_irq_save (flags);
-	res = v->counter + i;
-	v->counter = res;
-	local_irq_restore (flags);
-
-	return res;
-}
-
-static __inline__ int atomic_sub_return (int i, volatile atomic_t *v)
-{
-	unsigned long flags;
-	int res;
-
-	local_irq_save (flags);
-	res = v->counter - i;
-	v->counter = res;
-	local_irq_restore (flags);
-
-	return res;
-}
-
-static __inline__ void atomic_clear_mask (unsigned long mask, unsigned long *addr)
-{
-	unsigned long flags;
-
-	local_irq_save (flags);
-	*addr &= ~mask;
-	local_irq_restore (flags);
-}
-
-#endif
-
-#define atomic_add(i, v)	atomic_add_return ((i), (v))
-#define atomic_sub(i, v)	atomic_sub_return ((i), (v))
-
-#define atomic_dec_return(v)	atomic_sub_return (1, (v))
-#define atomic_inc_return(v)	atomic_add_return (1, (v))
-#define atomic_inc(v) 		atomic_inc_return (v)
-#define atomic_dec(v) 		atomic_dec_return (v)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_sub_and_test(i,v)	(atomic_sub_return ((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return (1, (v)) == 0)
-#define atomic_add_negative(i,v)	(atomic_add_return ((i), (v)) < 0)
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-	int ret;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	ret = v->counter;
-	if (likely(ret == old))
-		v->counter = new;
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-static inline int atomic_add_unless(atomic_t *v, int a, int u)
-{
-	int ret;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	ret = v->counter;
-	if (ret != u)
-		v->counter += a;
-	local_irq_restore(flags);
-
-	return ret != u;
-}
-
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-/* Atomic operations are already serializing on ARM */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
-
-#include <asm-generic/atomic.h>
-#endif /* __V850_ATOMIC_H__ */
diff --git a/include/asm-v850/auxvec.h b/include/asm-v850/auxvec.h
deleted file mode 100644
index f493232d022..00000000000
--- a/include/asm-v850/auxvec.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __V850_AUXVEC_H__
-#define __V850_AUXVEC_H__
-
-#endif /* __V850_AUXVEC_H__ */
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
deleted file mode 100644
index f82f5b4a56e..00000000000
--- a/include/asm-v850/bitops.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * include/asm-v850/bitops.h -- Bit operations
- *
- *  Copyright (C) 2001,02,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04,05  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1992  Linus Torvalds.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#ifndef __V850_BITOPS_H__
-#define __V850_BITOPS_H__
-
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <linux/compiler.h>	/* unlikely  */
-#include <asm/byteorder.h>	/* swab32 */
-#include <asm/system.h>		/* interrupt enable/disable */
-
-
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/ffz.h>
-
-/*
- * The __ functions are not atomic
- */
-
-/* In the following constant-bit-op macros, a "g" constraint is used when
-   we really need an integer ("i" constraint).  This is to avoid
-   warnings/errors from the compiler in the case where the associated
-   operand _isn't_ an integer, and shouldn't produce bogus assembly because
-   use of that form is protected by a guard statement that checks for
-   constants, and should otherwise be removed by the optimizer.  This
-   _usually_ works -- however, __builtin_constant_p returns true for a
-   variable with a known constant value too, and unfortunately gcc will
-   happily put the variable in a register and use the register for the "g"
-   constraint'd asm operand.  To avoid the latter problem, we add a
-   constant offset to the operand and subtract it back in the asm code;
-   forcing gcc to do arithmetic on the value is usually enough to get it
-   to use a real constant value.  This is horrible, and ultimately
-   unreliable too, but it seems to work for now (hopefully gcc will offer
-   us more control in the future, so we can do a better job).  */
-
-#define __const_bit_op(op, nr, addr)					\
-  ({ __asm__ (op " (%0 - 0x123), %1"					\
-	      :: "g" (((nr) & 0x7) + 0x123),				\
-		 "m" (*((char *)(addr) + ((nr) >> 3)))			\
-	      : "memory"); })
-#define __var_bit_op(op, nr, addr)					\
-  ({ int __nr = (nr);							\
-     __asm__ (op " %0, [%1]"						\
-	      :: "r" (__nr & 0x7),					\
-		 "r" ((char *)(addr) + (__nr >> 3))			\
-	      : "memory"); })
-#define __bit_op(op, nr, addr)						\
-  ((__builtin_constant_p (nr) && (unsigned)(nr) <= 0x7FFFF)		\
-   ? __const_bit_op (op, nr, addr)					\
-   : __var_bit_op (op, nr, addr))
-
-#define __set_bit(nr, addr)		__bit_op ("set1", nr, addr)
-#define __clear_bit(nr, addr)		__bit_op ("clr1", nr, addr)
-#define __change_bit(nr, addr)		__bit_op ("not1", nr, addr)
-
-/* The bit instructions used by `non-atomic' variants are actually atomic.  */
-#define set_bit __set_bit
-#define clear_bit __clear_bit
-#define change_bit __change_bit
-
-
-#define __const_tns_bit_op(op, nr, addr)				      \
-  ({ int __tns_res;							      \
-     __asm__ __volatile__ (						      \
-	     "tst1 (%1 - 0x123), %2; setf nz, %0; " op " (%1 - 0x123), %2"    \
-	     : "=&r" (__tns_res)					      \
-	     : "g" (((nr) & 0x7) + 0x123),				      \
-	       "m" (*((char *)(addr) + ((nr) >> 3)))			      \
-	     : "memory");						      \
-     __tns_res;								      \
-  })
-#define __var_tns_bit_op(op, nr, addr)					      \
-  ({ int __nr = (nr);							      \
-     int __tns_res;							      \
-     __asm__ __volatile__ (						      \
-	     "tst1 %1, [%2]; setf nz, %0; " op " %1, [%2]"		      \
-	      : "=&r" (__tns_res)					      \
-	      : "r" (__nr & 0x7),					      \
-		"r" ((char *)(addr) + (__nr >> 3))			      \
-	      : "memory");						      \
-     __tns_res;								      \
-  })
-#define __tns_bit_op(op, nr, addr)					\
-  ((__builtin_constant_p (nr) && (unsigned)(nr) <= 0x7FFFF)		\
-   ? __const_tns_bit_op (op, nr, addr)					\
-   : __var_tns_bit_op (op, nr, addr))
-#define __tns_atomic_bit_op(op, nr, addr)				\
-  ({ int __tns_atomic_res, __tns_atomic_flags;				\
-     local_irq_save (__tns_atomic_flags);				\
-     __tns_atomic_res = __tns_bit_op (op, nr, addr);			\
-     local_irq_restore (__tns_atomic_flags);				\
-     __tns_atomic_res;							\
-  })
-
-#define __test_and_set_bit(nr, addr)	__tns_bit_op ("set1", nr, addr)
-#define test_and_set_bit(nr, addr)	__tns_atomic_bit_op ("set1", nr, addr)
-
-#define __test_and_clear_bit(nr, addr)	__tns_bit_op ("clr1", nr, addr)
-#define test_and_clear_bit(nr, addr)	__tns_atomic_bit_op ("clr1", nr, addr)
-
-#define __test_and_change_bit(nr, addr)	__tns_bit_op ("not1", nr, addr)
-#define test_and_change_bit(nr, addr)	__tns_atomic_bit_op ("not1", nr, addr)
-
-
-#define __const_test_bit(nr, addr)					      \
-  ({ int __test_bit_res;						      \
-     __asm__ __volatile__ ("tst1 (%1 - 0x123), %2; setf nz, %0"		      \
-			   : "=r" (__test_bit_res)			      \
-			   : "g" (((nr) & 0x7) + 0x123),		      \
-			     "m" (*((const char *)(addr) + ((nr) >> 3))));    \
-     __test_bit_res;							      \
-  })
-static inline int __test_bit (int nr, const void *addr)
-{
-	int res;
-	__asm__ __volatile__ ("tst1 %1, [%2]; setf nz, %0"
-			      : "=r" (res)
-			      : "r" (nr & 0x7), "r" (addr + (nr >> 3)));
-	return res;
-}
-#define test_bit(nr,addr)						\
-  ((__builtin_constant_p (nr) && (unsigned)(nr) <= 0x7FFFF)		\
-   ? __const_test_bit ((nr), (addr))					\
-   : __test_bit ((nr), (addr)))
-
-
-/* clear_bit doesn't provide any barrier for the compiler.  */
-#define smp_mb__before_clear_bit()	barrier ()
-#define smp_mb__after_clear_bit()	barrier ()
-
-#include <asm-generic/bitops/ffs.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/find.h>
-#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
-
-#include <asm-generic/bitops/ext2-non-atomic.h>
-#define ext2_set_bit_atomic(l,n,a)      test_and_set_bit(n,a)
-#define ext2_clear_bit_atomic(l,n,a)    test_and_clear_bit(n,a)
-
-#include <asm-generic/bitops/minix.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_BITOPS_H__ */
diff --git a/include/asm-v850/bug.h b/include/asm-v850/bug.h
deleted file mode 100644
index b0ed2d35f3e..00000000000
--- a/include/asm-v850/bug.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * include/asm-v850/bug.h -- Bug reporting
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_BUG_H__
-#define __V850_BUG_H__
-
-#ifdef CONFIG_BUG
-extern void __bug (void) __attribute__ ((noreturn));
-#define BUG()		__bug()
-#define HAVE_ARCH_BUG
-#endif
-
-#include <asm-generic/bug.h>
-
-#endif /* __V850_BUG_H__ */
diff --git a/include/asm-v850/bugs.h b/include/asm-v850/bugs.h
deleted file mode 100644
index 71110a65c1d..00000000000
--- a/include/asm-v850/bugs.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *  include/asm-v850e/bugs.h
- *
- *  Copyright (C) 1994  Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- *	void check_bugs(void);
- */
-
-static void check_bugs(void)
-{
-}
diff --git a/include/asm-v850/byteorder.h b/include/asm-v850/byteorder.h
deleted file mode 100644
index a6f07530050..00000000000
--- a/include/asm-v850/byteorder.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * include/asm-v850/byteorder.h -- Endian id and conversion ops
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_BYTEORDER_H__
-#define __V850_BYTEORDER_H__
-
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#ifdef __GNUC__
-
-static __inline__ __attribute_const__ __u32 ___arch__swab32 (__u32 word)
-{
-	__u32 res;
-	__asm__ ("bsw %1, %0" : "=r" (res) : "r" (word));
-	return res;
-}
-
-static __inline__ __attribute_const__ __u16 ___arch__swab16 (__u16 half_word)
-{
-	__u16 res;
-	__asm__ ("bsh %1, %0" : "=r" (res) : "r" (half_word));
-	return res;
-}
-
-#define __arch__swab32(x) ___arch__swab32(x)
-#define __arch__swab16(x) ___arch__swab16(x)
-
-#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
-#  define __BYTEORDER_HAS_U64__
-#  define __SWAB_64_THRU_32__
-#endif
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/little_endian.h>
-
-#endif /* __V850_BYTEORDER_H__ */
diff --git a/include/asm-v850/cache.h b/include/asm-v850/cache.h
deleted file mode 100644
index 8832c7ea324..00000000000
--- a/include/asm-v850/cache.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-v850/cache.h -- Cache operations
- *
- *  Copyright (C) 2001,05  NEC Corporation
- *  Copyright (C) 2001,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CACHE_H__
-#define __V850_CACHE_H__
-
-/* All cache operations are machine-dependent.  */
-#include <asm/machdep.h>
-
-#ifndef L1_CACHE_BYTES
-/* This processor has no cache, so just choose an arbitrary value.  */
-#define L1_CACHE_BYTES		16
-#define L1_CACHE_SHIFT		4
-#endif
-
-#endif /* __V850_CACHE_H__ */
diff --git a/include/asm-v850/cacheflush.h b/include/asm-v850/cacheflush.h
deleted file mode 100644
index 9ece05a202e..00000000000
--- a/include/asm-v850/cacheflush.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * include/asm-v850/cacheflush.h
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CACHEFLUSH_H__
-#define __V850_CACHEFLUSH_H__
-
-/* Somebody depends on this; sigh...  */
-#include <linux/mm.h>
-
-#include <asm/machdep.h>
-
-
-/* The following are all used by the kernel in ways that only affect
-   systems with MMUs, so we don't need them.  */
-#define flush_cache_all()			((void)0)
-#define flush_cache_mm(mm)			((void)0)
-#define flush_cache_dup_mm(mm)			((void)0)
-#define flush_cache_range(vma, start, end)	((void)0)
-#define flush_cache_page(vma, vmaddr, pfn)	((void)0)
-#define flush_dcache_page(page)			((void)0)
-#define flush_dcache_mmap_lock(mapping)		((void)0)
-#define flush_dcache_mmap_unlock(mapping)	((void)0)
-#define flush_cache_vmap(start, end)		((void)0)
-#define flush_cache_vunmap(start, end)		((void)0)
-
-#ifdef CONFIG_NO_CACHE
-
-/* Some systems have no cache at all, in which case we don't need these
-   either.  */
-#define flush_icache()				((void)0)
-#define flush_icache_range(start, end)		((void)0)
-#define flush_icache_page(vma,pg)		((void)0)
-#define flush_icache_user_range(vma,pg,adr,len)	((void)0)
-#define flush_cache_sigtramp(vaddr)		((void)0)
-
-#else /* !CONFIG_NO_CACHE */
-
-struct page;
-struct mm_struct;
-struct vm_area_struct;
-
-/* Otherwise, somebody had better define them.  */
-extern void flush_icache (void);
-extern void flush_icache_range (unsigned long start, unsigned long end);
-extern void flush_icache_page (struct vm_area_struct *vma, struct page *page);
-extern void flush_icache_user_range (struct vm_area_struct *vma,
-				     struct page *page,
-				     unsigned long adr, int len);
-extern void flush_cache_sigtramp (unsigned long addr);
-
-#endif /* CONFIG_NO_CACHE */
-
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { memcpy(dst, src, len); \
-     flush_icache_user_range(vma, page, vaddr, len); \
-} while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-	memcpy(dst, src, len)
-
-#endif /* __V850_CACHEFLUSH_H__ */
diff --git a/include/asm-v850/checksum.h b/include/asm-v850/checksum.h
deleted file mode 100644
index d1dddd93826..00000000000
--- a/include/asm-v850/checksum.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * include/asm-v850/checksum.h -- Checksum ops
- *
- *  Copyright (C) 2001,2005  NEC Corporation
- *  Copyright (C) 2001,2005  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CHECKSUM_H__
-#define __V850_CHECKSUM_H__
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- * the same as csum_partial, but copies from src while it
- * checksums
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-extern __wsum csum_partial_copy_nocheck(const void *src,
-				   void *dst, int len, __wsum sum);
-
-
-/*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-extern __wsum csum_partial_copy_from_user (const void *src,
-					     void *dst,
-					     int len, __wsum sum,
-					     int *csum_err);
-
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
-/*
- *	Fold a partial checksum
- */
-static inline __sum16 csum_fold (__wsum sum)
-{
-	unsigned int result;
-	/*
-			        %0		%1
-	      hsw %1, %0	H     L		L     H
-	      add %1, %0	H     L		H+L+C H+L
-	*/
-	asm ("hsw %1, %0; add %1, %0" : "=&r" (result) : "r" (sum));
-	return (__force __sum16)(~result >> 16);
-}
-
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __wsum
-csum_tcpudp_nofold (__be32 saddr, __be32 daddr,
-		    unsigned short len,
-		    unsigned short proto, __wsum sum)
-{
-	int __carry;
-	__asm__ ("add %2, %0;"
-		 "setf c, %1;"
-		 "add %1, %0;"
-		 "add %3, %0;"
-		 "setf c, %1;"
-		 "add %1, %0;"
-		 "add %4, %0;"
-		 "setf c, %1;"
-		 "add %1, %0"
-		 : "=&r" (sum), "=&r" (__carry)
-		 : "r" (daddr), "r" (saddr),
-		 "r" ((len + proto) << 8),
-		 "0" (sum));
-	return sum;
-}
-
-static inline __sum16
-csum_tcpudp_magic (__be32 saddr, __be32 daddr,
-		   unsigned short len,
-		   unsigned short proto, __wsum sum)
-{
-	return csum_fold (csum_tcpudp_nofold (saddr, daddr, len, proto, sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-extern __sum16 ip_compute_csum(const void *buff, int len);
-
-
-#endif /* __V850_CHECKSUM_H__ */
diff --git a/include/asm-v850/clinkage.h b/include/asm-v850/clinkage.h
deleted file mode 100644
index c389691d6f8..00000000000
--- a/include/asm-v850/clinkage.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-v850/clinkage.h -- Macros to reflect C symbol-naming conventions
- *
- *  Copyright (C) 2001,02  NEC Corporatione
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CLINKAGE_H__
-#define __V850_CLINKAGE_H__
-
-#include <asm/macrology.h>
-#include <asm/asm.h>
-
-#define C_SYMBOL_NAME(name) 	macrology_paste(_, name)
-#define C_SYMBOL_STRING(name)	macrology_stringify(C_SYMBOL_NAME(name))
-#define C_ENTRY(name)		G_ENTRY(C_SYMBOL_NAME(name))
-#define C_DATA(name)		G_DATA(C_SYMBOL_NAME(name))
-#define C_END(name)		END(C_SYMBOL_NAME(name))
-
-#endif /* __V850_CLINKAGE_H__ */
diff --git a/include/asm-v850/cputime.h b/include/asm-v850/cputime.h
deleted file mode 100644
index 7c799c33b8a..00000000000
--- a/include/asm-v850/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_CPUTIME_H
-#define __V850_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __V850_CPUTIME_H */
diff --git a/include/asm-v850/current.h b/include/asm-v850/current.h
deleted file mode 100644
index 30aae567377..00000000000
--- a/include/asm-v850/current.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * include/asm-v850/current.h -- Current task
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CURRENT_H__
-#define __V850_CURRENT_H__
-
-#ifndef __ASSEMBLY__ /* <linux/thread_info.h> is not asm-safe.  */
-#include <linux/thread_info.h>
-#endif
-
-#include <asm/macrology.h>
-
-
-/* Register used to hold the current task pointer while in the kernel.
-   Any `call clobbered' register without a special meaning should be OK,
-   but check asm/v850/kernel/entry.S to be sure.  */
-#define CURRENT_TASK_REGNUM	16
-#define CURRENT_TASK 		macrology_paste (r, CURRENT_TASK_REGNUM)
-
-
-#ifdef __ASSEMBLY__
-
-/* Put a pointer to the current task structure into REG.  */
-#define GET_CURRENT_TASK(reg)						\
-	GET_CURRENT_THREAD(reg);					\
-	ld.w	TI_TASK[reg], reg
-
-#else /* !__ASSEMBLY__ */
-
-/* A pointer to the current task.  */
-register struct task_struct *current					\
-   __asm__ (macrology_stringify (CURRENT_TASK));
-
-#endif /* __ASSEMBLY__ */
-
-
-#endif /* _V850_CURRENT_H */
diff --git a/include/asm-v850/delay.h b/include/asm-v850/delay.h
deleted file mode 100644
index 6d028e6b235..00000000000
--- a/include/asm-v850/delay.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * include/asm-v850/delay.h -- Delay routines, using a pre-computed
- * 	"loops_per_second" value
- *
- *  Copyright (C) 2001,03  NEC Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1994 Hamish Macdonald
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#ifndef __V850_DELAY_H__
-#define __V850_DELAY_H__
-
-#include <asm/param.h>
-
-static inline void __delay(unsigned long loops)
-{
-	if (loops)
-		__asm__ __volatile__ ("1: add -1, %0; bnz 1b"
-				      : "=r" (loops) : "0" (loops));
-}
-
-/*
- * Use only for very small delays ( < 1 msec).  Should probably use a
- * lookup table, really, as the multiplications take much too long with
- * short delays.  This is a "reasonable" implementation, though (and the
- * first constant multiplications gets optimized away if the delay is
- * a constant)  
- */
-
-extern unsigned long loops_per_jiffy;
-
-static inline void udelay(unsigned long usecs)
-{
-	register unsigned long full_loops, part_loops;
-
-	full_loops = ((usecs * HZ) / 1000000) * loops_per_jiffy;
-	usecs %= (1000000 / HZ);
-	part_loops = (usecs * HZ * loops_per_jiffy) / 1000000;
-
-	__delay(full_loops + part_loops);
-}
-
-#endif /* __V850_DELAY_H__ */
diff --git a/include/asm-v850/device.h b/include/asm-v850/device.h
deleted file mode 100644
index d8f9872b0e2..00000000000
--- a/include/asm-v850/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/include/asm-v850/div64.h b/include/asm-v850/div64.h
deleted file mode 100644
index 6cd978cefb2..00000000000
--- a/include/asm-v850/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/include/asm-v850/dma-mapping.h b/include/asm-v850/dma-mapping.h
deleted file mode 100644
index 1cc42c603a1..00000000000
--- a/include/asm-v850/dma-mapping.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __V850_DMA_MAPPING_H__
-#define __V850_DMA_MAPPING_H__
-
-
-#ifdef CONFIG_PCI
-#include <asm-generic/dma-mapping.h>
-#else
-#include <asm-generic/dma-mapping-broken.h>
-#endif
-
-#endif /* __V850_DMA_MAPPING_H__ */
diff --git a/include/asm-v850/dma.h b/include/asm-v850/dma.h
deleted file mode 100644
index 2369849e2d0..00000000000
--- a/include/asm-v850/dma.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef __V850_DMA_H__
-#define __V850_DMA_H__
-
-/* What should this be?  */
-#define MAX_DMA_ADDRESS	0xFFFFFFFF
-
-/* reserve a DMA channel */
-extern int request_dma (unsigned int dmanr, const char * device_id);
-/* release it again */
-extern void free_dma (unsigned int dmanr);
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy    (0)
-#endif
-
-#endif /* __V850_DMA_H__ */
diff --git a/include/asm-v850/elf.h b/include/asm-v850/elf.h
deleted file mode 100644
index 28f5b176ff1..00000000000
--- a/include/asm-v850/elf.h
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifndef __V850_ELF_H__
-#define __V850_ELF_H__
-
-/*
- * ELF register definitions..
- */
-
-#include <asm/ptrace.h>
-#include <asm/user.h>
-#include <asm/byteorder.h>
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_fpu_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x)  \
-  ((x)->e_machine == EM_V850 || (x)->e_machine == EM_CYGNUS_V850)
-
-
-/* v850 relocation types.  */
-#define R_V850_NONE		0
-#define R_V850_9_PCREL		1
-#define R_V850_22_PCREL		2
-#define R_V850_HI16_S		3
-#define R_V850_HI16		4
-#define R_V850_LO16		5
-#define R_V850_32		6
-#define R_V850_16		7
-#define R_V850_8		8
-#define R_V850_SDA_16_16_OFFSET	9	/* For ld.b, st.b, set1, clr1,
-					   not1, tst1, movea, movhi */
-#define R_V850_SDA_15_16_OFFSET	10	/* For ld.w, ld.h, ld.hu, st.w, st.h */
-#define R_V850_ZDA_16_16_OFFSET	11	/* For ld.b, st.b, set1, clr1,
-					   not1, tst1, movea, movhi */
-#define R_V850_ZDA_15_16_OFFSET	12	/* For ld.w, ld.h, ld.hu, st.w, st.h */
-#define R_V850_TDA_6_8_OFFSET	13	/* For sst.w, sld.w */
-#define R_V850_TDA_7_8_OFFSET	14	/* For sst.h, sld.h */
-#define R_V850_TDA_7_7_OFFSET	15	/* For sst.b, sld.b */
-#define R_V850_TDA_16_16_OFFSET	16	/* For set1, clr1, not1, tst1,
-					   movea, movhi */
-#define R_V850_NUM		17
-
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#ifdef __LITTLE_ENDIAN__
-#define ELF_DATA	ELFDATA2LSB
-#else
-#define ELF_DATA	ELFDATA2MSB
-#endif
-#define ELF_ARCH	EM_V850
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE	4096
-
-
-#define ELF_CORE_COPY_REGS(_dest,_regs)				\
-	memcpy((char *) &_dest, (char *) _regs,			\
-	       sizeof(struct pt_regs));
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this CPU supports.  This could be done in user space,
-   but it's not easy, and we've already done it here.  */
-
-#define ELF_HWCAP	(0)
-
-/* This yields a string that ld.so will use to load implementation
-   specific libraries for optimization.  This is more specific in
-   intent than poking at uname or /proc/cpuinfo.
-
-   For the moment, we have only optimizations for the Intel generations,
-   but that could change... */
-
-#define ELF_PLATFORM  (NULL)
-
-#define ELF_PLAT_INIT(_r, load_addr)					      \
-  do {									      \
-	 _r->gpr[0] =  _r->gpr[1] =  _r->gpr[2] =  _r->gpr[3] =		      \
-	 _r->gpr[4] =  _r->gpr[5] =  _r->gpr[6] =  _r->gpr[7] =		      \
-	 _r->gpr[8] =  _r->gpr[9] = _r->gpr[10] = _r->gpr[11] =		      \
-	_r->gpr[12] = _r->gpr[13] = _r->gpr[14] = _r->gpr[15] =		      \
-	_r->gpr[16] = _r->gpr[17] = _r->gpr[18] = _r->gpr[19] =		      \
-	_r->gpr[20] = _r->gpr[21] = _r->gpr[22] = _r->gpr[23] =		      \
-	_r->gpr[24] = _r->gpr[25] = _r->gpr[26] = _r->gpr[27] =		      \
-	_r->gpr[28] = _r->gpr[29] = _r->gpr[30] = _r->gpr[31] =		      \
-	0;								      \
-  } while (0)
-
-#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT)
-
-#endif /* __V850_ELF_H__ */
diff --git a/include/asm-v850/emergency-restart.h b/include/asm-v850/emergency-restart.h
deleted file mode 100644
index 108d8c48e42..00000000000
--- a/include/asm-v850/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-v850/entry.h b/include/asm-v850/entry.h
deleted file mode 100644
index d9df8ac4858..00000000000
--- a/include/asm-v850/entry.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * include/asm-v850/entry.h -- Definitions used by low-level trap handlers
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ENTRY_H__
-#define __V850_ENTRY_H__
-
-
-#include <asm/ptrace.h>
-#include <asm/machdep.h>
-
-
-/* These are special variables using by the kernel trap/interrupt code
-   to save registers in, at a time when there are no spare registers we
-   can use to do so, and we can't depend on the value of the stack
-   pointer.  This means that they must be within a signed 16-bit
-   displacement of 0x00000000.  */
-
-#define KERNEL_VAR_SPACE_ADDR	R0_RAM_ADDR
-
-#ifdef __ASSEMBLY__
-#define KERNEL_VAR(addr)	addr[r0]
-#else
-#define KERNEL_VAR(addr)	(*(volatile unsigned long *)(addr))
-#endif
-
-/* Kernel stack pointer, 4 bytes.  */
-#define KSP_ADDR		(KERNEL_VAR_SPACE_ADDR +  0)
-#define KSP			KERNEL_VAR (KSP_ADDR)
-/* 1 if in kernel-mode, 0 if in user mode, 1 byte.  */
-#define KM_ADDR 		(KERNEL_VAR_SPACE_ADDR +  4)
-#define KM			KERNEL_VAR (KM_ADDR)
-/* Temporary storage for interrupt handlers, 4 bytes.  */
-#define INT_SCRATCH_ADDR	(KERNEL_VAR_SPACE_ADDR +  8)
-#define INT_SCRATCH		KERNEL_VAR (INT_SCRATCH_ADDR)
-/* Where the stack-pointer is saved when jumping to various sorts of
-   interrupt handlers.  ENTRY_SP is used by everything except NMIs,
-   which have their own location.  Higher-priority NMIs can clobber the
-   value written by a lower priority NMI, since they can't be disabled,
-   but that's OK, because only NMI0 (the lowest-priority one) is allowed
-   to return.  */
-#define ENTRY_SP_ADDR		(KERNEL_VAR_SPACE_ADDR + 12)
-#define ENTRY_SP		KERNEL_VAR (ENTRY_SP_ADDR)
-#define NMI_ENTRY_SP_ADDR	(KERNEL_VAR_SPACE_ADDR + 16)
-#define NMI_ENTRY_SP		KERNEL_VAR (NMI_ENTRY_SP_ADDR)
-
-#ifdef CONFIG_RESET_GUARD
-/* Used to detect unexpected resets (since the v850 has no MMU, any call
-   through a null pointer will jump to the reset vector).  We detect
-   such resets by checking for a magic value, RESET_GUARD_ACTIVE, in
-   this location.  Properly resetting the machine stores zero there, so
-   it shouldn't trigger the guard; the power-on value is uncertain, but
-   it's unlikely to be RESET_GUARD_ACTIVE.  */
-#define RESET_GUARD_ADDR	(KERNEL_VAR_SPACE_ADDR + 28)
-#define RESET_GUARD		KERNEL_VAR (RESET_GUARD_ADDR)
-#define RESET_GUARD_ACTIVE	0xFAB4BEEF
-#endif /* CONFIG_RESET_GUARD */
-
-#ifdef CONFIG_V850E_HIGHRES_TIMER
-#define HIGHRES_TIMER_SLOW_TICKS_ADDR (KERNEL_VAR_SPACE_ADDR + 32)
-#define HIGHRES_TIMER_SLOW_TICKS     KERNEL_VAR (HIGHRES_TIMER_SLOW_TICKS_ADDR)
-#endif /* CONFIG_V850E_HIGHRES_TIMER */
-
-#ifndef __ASSEMBLY__
-
-#ifdef CONFIG_RESET_GUARD
-/* Turn off reset guard, so that resetting the machine works normally.
-   This should be called in the various machine_halt, etc., functions.  */
-static inline void disable_reset_guard (void)
-{
-	RESET_GUARD = 0;
-}
-#endif /* CONFIG_RESET_GUARD */
-
-#endif /* !__ASSEMBLY__ */
-
-
-/* A `state save frame' is a struct pt_regs preceded by some extra space
-   suitable for a function call stack frame.  */
-
-/* Amount of room on the stack reserved for arguments and to satisfy the
-   C calling conventions, in addition to the space used by the struct
-   pt_regs that actually holds saved values.  */
-#define STATE_SAVE_ARG_SPACE	(6*4) /* Up to six arguments.  */
-
-
-#ifdef __ASSEMBLY__
-
-/* The size of a state save frame.  */
-#define STATE_SAVE_SIZE		(PT_SIZE + STATE_SAVE_ARG_SPACE)
-
-#else /* !__ASSEMBLY__ */
-
-/* The size of a state save frame.  */
-#define STATE_SAVE_SIZE	       (sizeof (struct pt_regs) + STATE_SAVE_ARG_SPACE)
-
-#endif /* __ASSEMBLY__ */
-
-
-/* Offset of the struct pt_regs in a state save frame.  */
-#define STATE_SAVE_PT_OFFSET	STATE_SAVE_ARG_SPACE
-
-
-#endif /* __V850_ENTRY_H__ */
diff --git a/include/asm-v850/errno.h b/include/asm-v850/errno.h
deleted file mode 100644
index 31c91df0120..00000000000
--- a/include/asm-v850/errno.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_ERRNO_H__
-#define __V850_ERRNO_H__
-
-#include <asm-generic/errno.h>
-
-#endif /* __V850_ERRNO_H__ */
diff --git a/include/asm-v850/fb.h b/include/asm-v850/fb.h
deleted file mode 100644
index c7df3803099..00000000000
--- a/include/asm-v850/fb.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/include/asm-v850/fcntl.h b/include/asm-v850/fcntl.h
deleted file mode 100644
index 3af4d56776d..00000000000
--- a/include/asm-v850/fcntl.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __V850_FCNTL_H__
-#define __V850_FCNTL_H__
-
-#define O_DIRECTORY	040000	/* must be a directory */
-#define O_NOFOLLOW     0100000	/* don't follow links */
-#define O_DIRECT       0200000	/* direct disk access hint - currently ignored */
-#define O_LARGEFILE    0400000
-
-#include <asm-generic/fcntl.h>
-
-#endif /* __V850_FCNTL_H__ */
diff --git a/include/asm-v850/flat.h b/include/asm-v850/flat.h
deleted file mode 100644
index 17f0ea56661..00000000000
--- a/include/asm-v850/flat.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * include/asm-v850/flat.h -- uClinux flat-format executables
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_FLAT_H__
-#define __V850_FLAT_H__
-
-/* The amount by which a relocation can exceed the program image limits
-   without being regarded as an error.  On the v850, the relocations of
-   some base-pointers can be offset by 0x8000 (to allow better usage of the
-   space offered by 16-bit signed offsets -- in most cases the offsets used
-   with such a base-pointer will be negative).  */
-
-#define	flat_reloc_valid(reloc, size)	((reloc) <= (size + 0x8000))
-
-#define	flat_stack_align(sp)		/* nothing needed */
-#define	flat_argvp_envp_on_stack()	0
-#define	flat_old_ram_flag(flags)	(flags)
-#define	flat_set_persistent(relval, p)	0
-
-/* We store the type of relocation in the top 4 bits of the `relval.' */
-
-/* Convert a relocation entry into an address.  */
-static inline unsigned long
-flat_get_relocate_addr (unsigned long relval)
-{
-	return relval & 0x0fffffff; /* Mask out top 4-bits */
-}
-
-#define flat_v850_get_reloc_type(relval) ((relval) >> 28)
-
-#define FLAT_V850_R_32		0 /* Normal 32-bit reloc */
-#define FLAT_V850_R_HI16S_LO15	1 /* High 16-bits + signed 15-bit low field */
-#define FLAT_V850_R_HI16S_LO16	2 /* High 16-bits + signed 16-bit low field */
-
-/* Extract the address to be relocated from the symbol reference at RP;
-   RELVAL is the raw relocation-table entry from which RP is derived.
-   For the v850, RP should always be half-word aligned.  */
-static inline unsigned long flat_get_addr_from_rp (unsigned long *rp,
-						   unsigned long relval,
-						   unsigned long flags,
-						   unsigned long *persistent)
-{
-	short *srp = (short *)rp;
-
-	switch (flat_v850_get_reloc_type (relval))
-	{
-	case FLAT_V850_R_32:
-		/* Simple 32-bit address.  */
-		return srp[0] | (srp[1] << 16);
-
-	case FLAT_V850_R_HI16S_LO16:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the 2nd word of the 32-bit instruction
-		   following that, respectively.  The low half is _signed_
-		   so we have to sign-extend it and add it to the upper
-		   half instead of simply or-ing them together.
-
-		   Unlike most relocated address, this one is stored in
-		   native (little-endian) byte-order to avoid problems with
-		   trashing the low-order bit, so we have to convert to
-		   network-byte-order before returning, as that's what the
-		   caller expects.  */
-		return htonl ((srp[0] << 16) + srp[2]);
-
-	case FLAT_V850_R_HI16S_LO15:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the upper 15 bits of the 2nd word of the
-		   32-bit instruction following that, respectively.  The
-		   low half is _signed_ so we have to sign-extend it and
-		   add it to the upper half instead of simply or-ing them
-		   together.  The lowest bit is always zero.
-
-		   Unlike most relocated address, this one is stored in
-		   native (little-endian) byte-order to avoid problems with
-		   trashing the low-order bit, so we have to convert to
-		   network-byte-order before returning, as that's what the
-		   caller expects.  */
-		return htonl ((srp[0] << 16) + (srp[2] & ~0x1));
-
-	default:
-		return ~0;	/* bogus value */
-	}
-}
-
-/* Insert the address ADDR into the symbol reference at RP;
-   RELVAL is the raw relocation-table entry from which RP is derived.
-   For the v850, RP should always be half-word aligned.  */
-static inline void flat_put_addr_at_rp (unsigned long *rp, unsigned long addr,
-					unsigned long relval)
-{
-	short *srp = (short *)rp;
-
-	switch (flat_v850_get_reloc_type (relval)) {
-	case FLAT_V850_R_32:
-		/* Simple 32-bit address.  */
-		srp[0] = addr & 0xFFFF;
-		srp[1] = (addr >> 16);
-		break;
-
-	case FLAT_V850_R_HI16S_LO16:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the 2nd word of the 32-bit instruction
-		   following that, respectively.  The low half is _signed_
-		   so we must carry its sign bit to the upper half before
-		   writing the upper half.  */
-		srp[0] = (addr >> 16) + ((addr >> 15) & 0x1);
-		srp[2] = addr & 0xFFFF;
-		break;
-
-	case FLAT_V850_R_HI16S_LO15:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the upper 15 bits of the 2nd word of the
-		   32-bit instruction following that, respectively.  The
-		   low half is _signed_ so we must carry its sign bit to
-		   the upper half before writing the upper half.  The
-		   lowest bit we preserve from the existing instruction.  */
-		srp[0] = (addr >> 16) + ((addr >> 15) & 0x1);
-		srp[2] = (addr & 0xFFFE) | (srp[2] & 0x1);
-		break;
-	}
-}
-
-#endif /* __V850_FLAT_H__ */
diff --git a/include/asm-v850/fpga85e2c.h b/include/asm-v850/fpga85e2c.h
deleted file mode 100644
index 23aae666c71..00000000000
--- a/include/asm-v850/fpga85e2c.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * include/asm-v850/fpga85e2c.h -- Machine-dependent defs for
- *	FPGA implementation of V850E2/NA85E2C
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_FPGA85E2C_H__
-#define __V850_FPGA85E2C_H__
-
-#include <asm/v850e2.h>
-#include <asm/clinkage.h>
-
-
-#define CPU_MODEL	"v850e2/fpga85e2c"
-#define CPU_MODEL_LONG	"NEC V850E2/NA85E2C"
-#define PLATFORM	"fpga85e2c"
-#define PLATFORM_LONG	"NA85E2C FPGA implementation"
-
-
-/* `external ram'.  */
-#define ERAM_ADDR		0
-#define ERAM_SIZE		0x00100000 /* 1MB */
-
-
-/* FPGA specific control registers.  */
-
-/* Writing a non-zero value to FLGREG(0) will signal the controlling CPU
-   to stop execution.  */
-#define FLGREG_ADDR(n)		(0xFFE80100 + 2*(n))
-#define FLGREG(n)		(*(volatile unsigned char *)FLGREG_ADDR (n))
-#define FLGREG_NUM		2
-
-#define CSDEV_ADDR(n)		(0xFFE80110 + 2*(n))
-#define CSDEV(n)		(*(volatile unsigned char *)CSDEV_ADDR (n))
-
-
-/* Timer interrupts 0-3, interrupt at intervals from CLK/4096 to CLK/16384.  */
-#define IRQ_RPU(n)		(60 + (n))
-#define IRQ_RPU_NUM		4
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		64
-
-
-/* General-purpose timer.  */
-/* control/status register (can only be read/written via bit insns) */
-#define RPU_GTMC_ADDR		0xFFFFFB00
-#define RPU_GTMC		(*(volatile unsigned char *)RPU_GTMC_ADDR)
-#define RPU_GTMC_CE_BIT		7 /* clock enable (control) */
-#define RPU_GTMC_OV_BIT		6 /* overflow (status) */
-#define RPU_GTMC_CLK_BIT	1 /* 0 = .5 MHz CLK, 1 = 1 Mhz (control) */
-/* 32-bit count (8 least-significant bits are always zero).  */
-#define RPU_GTM_ADDR		0xFFFFFB28
-#define RPU_GTM			(*(volatile unsigned long *)RPU_GTMC_ADDR)
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET		ERAM_ADDR /* minimum allocatable address */
-
-
-/* For <asm/entry.h> */
-/* `R0 RAM', used for a few miscellaneous variables that must be accessible
-   using a load instruction relative to R0.  The FPGA implementation
-   actually has no on-chip RAM, so we use part of main ram just after the
-   interrupt vectors.  */
-#ifdef __ASSEMBLY__
-#define R0_RAM_ADDR		lo(C_SYMBOL_NAME(_r0_ram))
-#else
-extern char _r0_ram;
-#define R0_RAM_ADDR		((unsigned long)&_r0_ram);
-#endif
-
-
-#endif /* __V850_FPGA85E2C_H__ */
diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h
deleted file mode 100644
index 6a332a9f099..00000000000
--- a/include/asm-v850/futex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/include/asm-v850/gbus_int.h b/include/asm-v850/gbus_int.h
deleted file mode 100644
index 0c4bce753c7..00000000000
--- a/include/asm-v850/gbus_int.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * include/asm-v850/gbus_int.h -- Midas labs GBUS interrupt support
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_GBUS_INT_H__
-#define __V850_GBUS_INT_H__
-
-
-/* The GBUS interrupt interface has 32 interrupts shared among 4
-   processor interrupts.  The 32 GBUS interrupts are divided into two
-   sets of 16 each, for allocating among control registers, etc (there
-   are two of each control register, with bits 0-15 controlling an
-   interrupt each).  */
-
-/* The GBUS interrupts themselves.  */
-#define IRQ_GBUS_INT(n)		(GBUS_INT_BASE_IRQ + (n))
-#define IRQ_GBUS_INT_NUM	32
-
-/* Control registers.  */
-#define GBUS_INT_STATUS_ADDR(w)	(GBUS_INT_BASE_ADDR + (w)*0x40)
-#define GBUS_INT_STATUS(w)	(*(volatile u16 *)GBUS_INT_STATUS_ADDR(w))
-#define GBUS_INT_CLEAR_ADDR(w)	(GBUS_INT_BASE_ADDR + 0x10 + (w)*0x40)
-#define GBUS_INT_CLEAR(w)	(*(volatile u16 *)GBUS_INT_CLEAR_ADDR(w))
-#define GBUS_INT_EDGE_ADDR(w)	(GBUS_INT_BASE_ADDR + 0x20 + (w)*0x40)
-#define GBUS_INT_EDGE(w)	(*(volatile u16 *)GBUS_INT_EDGE_ADDR(w))
-#define GBUS_INT_POLARITY_ADDR(w)	(GBUS_INT_BASE_ADDR + 0x30 + (w)*0x40)
-#define GBUS_INT_POLARITY(w)	(*(volatile u16 *)GBUS_INT_POLARITY_ADDR(w))
-/* This allows enabling interrupt bits in word W for interrupt GINTn.  */
-#define GBUS_INT_ENABLE_ADDR(w, n) \
-   (GBUS_INT_BASE_ADDR + 0x100 + (w)*0x10 + (n)*0x20)
-#define GBUS_INT_ENABLE(w, n)	(*(volatile u16 *)GBUS_INT_ENABLE_ADDR(w, n))
-
-/* Mapping between kernel interrupt numbers and hardware control regs/bits.  */
-#define GBUS_INT_BITS_PER_WORD	16
-#define GBUS_INT_NUM_WORDS	(IRQ_GBUS_INT_NUM / GBUS_INT_BITS_PER_WORD)
-#define GBUS_INT_IRQ_WORD(irq)	(((irq) - GBUS_INT_BASE_IRQ) >> 4)
-#define GBUS_INT_IRQ_BIT(irq)	(((irq) - GBUS_INT_BASE_IRQ) & 0xF)
-#define GBUS_INT_IRQ_MASK(irq)	(1 << GBUS_INT_IRQ_BIT(irq))
-
-
-/* Possible priorities for GBUS interrupts.  */
-#define GBUS_INT_PRIORITY_HIGH		2
-#define GBUS_INT_PRIORITY_MEDIUM	4
-#define GBUS_INT_PRIORITY_LOW		6
-
-
-#ifndef __ASSEMBLY__
-
-/* Enable interrupt handling for interrupt IRQ.  */
-extern void gbus_int_enable_irq (unsigned irq);
-/* Disable interrupt handling for interrupt IRQ.  Note that any
-   interrupts received while disabled will be delivered once the
-   interrupt is enabled again, unless they are explicitly cleared using
-   `gbus_int_clear_pending_irq'.  */
-extern void gbus_int_disable_irq (unsigned irq);
-/* Return true if interrupt handling for interrupt IRQ is enabled.  */
-extern int gbus_int_irq_enabled (unsigned irq);
-/* Disable all GBUS irqs.  */
-extern void gbus_int_disable_irqs (void);
-/* Clear any pending interrupts for IRQ.  */
-extern void gbus_int_clear_pending_irq (unsigned irq);
-/* Return true if interrupt IRQ is pending (but disabled).  */
-extern int gbus_int_irq_pending (unsigned irq);
-
-
-struct gbus_int_irq_init {
-	const char *name;	/* name of interrupt type */
-
-	/* Range of kernel irq numbers for this type:
-	   BASE, BASE+INTERVAL, ..., BASE+INTERVAL*NUM  */
-	unsigned base, num, interval;
-
-	unsigned priority;	/* interrupt priority to assign */
-};
-struct hw_interrupt_type;	/* fwd decl */
-
-/* Initialize HW_IRQ_TYPES for GBUS irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-extern void gbus_int_init_irq_types (struct gbus_int_irq_init *inits,
-				     struct hw_interrupt_type *hw_irq_types);
-
-/* Initialize GBUS interrupts.  */
-extern void gbus_int_init_irqs (void);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_GBUS_INT_H__ */
diff --git a/include/asm-v850/hardirq.h b/include/asm-v850/hardirq.h
deleted file mode 100644
index 04e20127c5a..00000000000
--- a/include/asm-v850/hardirq.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef __V850_HARDIRQ_H__
-#define __V850_HARDIRQ_H__
-
-#include <linux/threads.h>
-#include <linux/cache.h>
-
-#include <asm/irq.h>
-
-typedef struct {
-	unsigned int __softirq_pending;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
-void ack_bad_irq(unsigned int irq);
-
-#endif /* __V850_HARDIRQ_H__ */
diff --git a/include/asm-v850/highres_timer.h b/include/asm-v850/highres_timer.h
deleted file mode 100644
index 486fb49ceab..00000000000
--- a/include/asm-v850/highres_timer.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * include/asm-v850/highres_timer.h -- High resolution timing routines
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_HIGHRES_TIMER_H__
-#define __V850_HIGHRES_TIMER_H__
-
-#ifndef __ASSEMBLY__
-#include <linux/time.h>
-#endif
-
-#include <asm/entry.h>
-
-
-/* Frequency of the `slow ticks' (one tick each time the fast-tick
-   counter overflows).  */
-#define HIGHRES_TIMER_SLOW_TICK_RATE	25
-
-/* Which timer in the V850E `Timer D' we use.  */
-#define HIGHRES_TIMER_TIMER_D_UNIT	3
-
-
-#ifndef __ASSEMBLY__
-
-extern void highres_timer_start (void), highres_timer_stop (void);
-extern void highres_timer_reset (void);
-extern void highres_timer_read_ticks (u32 *slow_ticks, u32 *fast_ticks);
-extern void highres_timer_ticks_to_timeval (u32 slow_ticks, u32 fast_ticks,
-					    struct timeval *tv);
-extern void highres_timer_read (struct timeval *tv);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_HIGHRES_TIMER_H__ */
diff --git a/include/asm-v850/hw_irq.h b/include/asm-v850/hw_irq.h
deleted file mode 100644
index 043e94bb6bd..00000000000
--- a/include/asm-v850/hw_irq.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __V850_HW_IRQ_H__
-#define __V850_HW_IRQ_H__
-
-#endif /* __V850_HW_IRQ_H__ */
diff --git a/include/asm-v850/io.h b/include/asm-v850/io.h
deleted file mode 100644
index cdad251fba9..00000000000
--- a/include/asm-v850/io.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * include/asm-v850/io.h -- Misc I/O operations
- *
- *  Copyright (C) 2001,02,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_IO_H__
-#define __V850_IO_H__
-
-#define IO_SPACE_LIMIT 0xFFFFFFFF
-
-#define readb(addr) \
-  ({ unsigned char __v = (*(volatile unsigned char *) (addr)); __v; })
-#define readw(addr) \
-  ({ unsigned short __v = (*(volatile unsigned short *) (addr)); __v; })
-#define readl(addr) \
-  ({ unsigned long __v = (*(volatile unsigned long *) (addr)); __v; })
-
-#define readb_relaxed(a) readb(a)
-#define readw_relaxed(a) readw(a)
-#define readl_relaxed(a) readl(a)
-
-#define writeb(val, addr) \
-  (void)((*(volatile unsigned char *) (addr)) = (val))
-#define writew(val, addr) \
-  (void)((*(volatile unsigned short *) (addr)) = (val))
-#define writel(val, addr) \
-  (void)((*(volatile unsigned int *) (addr)) = (val))
-
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-
-#define inb(addr)	readb (addr)
-#define inw(addr)	readw (addr)
-#define inl(addr)	readl (addr)
-#define outb(x, addr)	((void) writeb (x, addr))
-#define outw(x, addr)	((void) writew (x, addr))
-#define outl(x, addr)	((void) writel (x, addr))
-
-#define inb_p(port)		inb((port))
-#define outb_p(val, port)	outb((val), (port))
-#define inw_p(port)		inw((port))
-#define outw_p(val, port)	outw((val), (port))
-#define inl_p(port)		inl((port))
-#define outl_p(val, port)	outl((val), (port))
-
-static inline void insb (unsigned long port, void *dst, unsigned long count)
-{
-	unsigned char *p = dst;
-	while (count--)
-		*p++ = inb (port);
-}
-static inline void insw (unsigned long port, void *dst, unsigned long count)
-{
-	unsigned short *p = dst;
-	while (count--)
-		*p++ = inw (port);
-}
-static inline void insl (unsigned long port, void *dst, unsigned long count)
-{
-	unsigned long *p = dst;
-	while (count--)
-		*p++ = inl (port);
-}
-
-static inline void
-outsb (unsigned long port, const void *src, unsigned long count)
-{
-	const unsigned char *p = src;
-	while (count--)
-		outb (*p++, port);
-}
-static inline void
-outsw (unsigned long port, const void *src, unsigned long count)
-{
-	const unsigned short *p = src;
-	while (count--)
-		outw (*p++, port);
-}
-static inline void
-outsl (unsigned long port, const void *src, unsigned long count)
-{
-	const unsigned long *p = src;
-	while (count--)
-		outl (*p++, port);
-}
-
-
-/* Some places try to pass in an loff_t for PHYSADDR (?!), so we cast it to
-   long before casting it to a pointer to avoid compiler warnings.  */
-#define ioremap(physaddr, size)	((void __iomem *)(unsigned long)(physaddr))
-#define iounmap(addr)		((void)0)
-
-#define ioremap_nocache(physaddr, size)		ioremap (physaddr, size)
-#define ioremap_writethrough(physaddr, size)	ioremap (physaddr, size)
-#define ioremap_fullcache(physaddr, size)	ioremap (physaddr, size)
-
-#define ioread8(addr)		readb (addr)
-#define ioread16(addr)		readw (addr)
-#define ioread32(addr)		readl (addr)
-#define iowrite8(val, addr)	writeb (val, addr)
-#define iowrite16(val, addr)	writew (val, addr)
-#define iowrite32(val, addr)	writel (val, addr)
-
-#define mmiowb()
-
-#define page_to_phys(page)      ((page - mem_map) << PAGE_SHIFT)
-#if 0
-/* This is really stupid; don't define it.  */
-#define page_to_bus(page)       page_to_phys (page)
-#endif
-
-/* Conversion between virtual and physical mappings.  */
-#define phys_to_virt(addr)	((void *)__phys_to_virt (addr))
-#define virt_to_phys(addr)	((unsigned long)__virt_to_phys (addr))
-
-#define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
-#define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)
-
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)	__va(p)
-
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p)	p
-
-#endif /* __V850_IO_H__ */
diff --git a/include/asm-v850/ioctl.h b/include/asm-v850/ioctl.h
deleted file mode 100644
index b279fe06dfe..00000000000
--- a/include/asm-v850/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/include/asm-v850/ioctls.h b/include/asm-v850/ioctls.h
deleted file mode 100644
index 5313abd5f38..00000000000
--- a/include/asm-v850/ioctls.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef __V850_IOCTLS_H__
-#define __V850_IOCTLS_H__
-
-#include <asm/ioctl.h>
-
-/* 0x54 is just a magic number to make these relatively unique ('T') */
-
-#define TCGETS		0x5401
-#define TCSETS		0x5402
-#define TCSETSW		0x5403
-#define TCSETSF		0x5404
-#define TCGETA		0x5405
-#define TCSETA		0x5406
-#define TCSETAW		0x5407
-#define TCSETAF		0x5408
-#define TCSBRK		0x5409
-#define TCXONC		0x540A
-#define TCFLSH		0x540B
-#define TIOCEXCL	0x540C
-#define TIOCNXCL	0x540D
-#define TIOCSCTTY	0x540E
-#define TIOCGPGRP	0x540F
-#define TIOCSPGRP	0x5410
-#define TIOCOUTQ	0x5411
-#define TIOCSTI		0x5412
-#define TIOCGWINSZ	0x5413
-#define TIOCSWINSZ	0x5414
-#define TIOCMGET	0x5415
-#define TIOCMBIS	0x5416
-#define TIOCMBIC	0x5417
-#define TIOCMSET	0x5418
-#define TIOCGSOFTCAR	0x5419
-#define TIOCSSOFTCAR	0x541A
-#define FIONREAD	0x541B
-#define TIOCINQ		FIONREAD
-#define TIOCLINUX	0x541C
-#define TIOCCONS	0x541D
-#define TIOCGSERIAL	0x541E
-#define TIOCSSERIAL	0x541F
-#define TIOCPKT		0x5420
-#define FIONBIO		0x5421
-#define TIOCNOTTY	0x5422
-#define TIOCSETD	0x5423
-#define TIOCGETD	0x5424
-#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
-#define TIOCSBRK	0x5427  /* BSD compatibility */
-#define TIOCCBRK	0x5428  /* BSD compatibility */
-#define TIOCGSID	0x5429  /* Return the session ID of FD */
-#define TCGETS2		_IOR('T',0x2A, struct termios2)
-#define TCSETS2		_IOW('T',0x2B, struct termios2)
-#define TCSETSW2	_IOW('T',0x2C, struct termios2)
-#define TCSETSF2	_IOW('T',0x2D, struct termios2)
-#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
-#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
-
-#define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
-#define FIOCLEX		0x5451
-#define FIOASYNC	0x5452
-#define TIOCSERCONFIG	0x5453
-#define TIOCSERGWILD	0x5454
-#define TIOCSERSWILD	0x5455
-#define TIOCGLCKTRMIOS	0x5456
-#define TIOCSLCKTRMIOS	0x5457
-#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
-#define TIOCSERGETLSR   0x5459 /* Get line status register */
-#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TIOCSERSETMULTI 0x545B /* Set multiport config */
-
-#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
-#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
-#define FIOQSIZE	0x545E
-
-/* Used for packet mode */
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-
-#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
-
-#endif /* __V850_IOCTLS_H__ */
diff --git a/include/asm-v850/ipcbuf.h b/include/asm-v850/ipcbuf.h
deleted file mode 100644
index d8cbe9886d9..00000000000
--- a/include/asm-v850/ipcbuf.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef __V850E_IPCBUF_H__
-#define __V850E_IPCBUF_H__
-
-/*
- * The user_ipc_perm structure for v850e architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 32-bit mode_t and seq
- * - 2 miscellaneous 32-bit values
- */
-
-struct ipc64_perm
-{
-	__kernel_key_t		key;
-	__kernel_uid32_t	uid;
-	__kernel_gid32_t	gid;
-	__kernel_uid32_t	cuid;
-	__kernel_gid32_t	cgid;
-	__kernel_mode_t		mode;
-	unsigned short		__pad1;
-	unsigned short		seq;
-	unsigned short		__pad2;
-	unsigned long		__unused1;
-	unsigned long		__unused2;
-};
-
-#endif /* __V850E_IPCBUF_H__ */
diff --git a/include/asm-v850/irq.h b/include/asm-v850/irq.h
deleted file mode 100644
index 7d0d4cd1ce5..00000000000
--- a/include/asm-v850/irq.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * include/asm-v850/irq.h -- Machine interrupt handling
- *
- *  Copyright (C) 2001,02,04  NEC Electronics Corporation
- *  Copyright (C) 2001,02,04  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_IRQ_H__
-#define __V850_IRQ_H__
-
-#include <asm/machdep.h>
-
-/* Default NUM_MACH_IRQS.  */
-#ifndef NUM_MACH_IRQS
-#define NUM_MACH_IRQS	NUM_CPU_IRQS
-#endif
-
-/* NMIs have IRQ numbers from FIRST_NMI to FIRST_NMI+NUM_NMIS-1.  */
-#define FIRST_NMI	NUM_MACH_IRQS
-#define IRQ_NMI(n)	(FIRST_NMI + (n))
-/* v850 processors have 3 non-maskable interrupts.  */
-#define NUM_NMIS	3
-
-/* Includes both maskable and non-maskable irqs.  */
-#define NR_IRQS		(NUM_MACH_IRQS + NUM_NMIS)
-
-
-#ifndef __ASSEMBLY__
-
-struct pt_regs;
-struct hw_interrupt_type;
-struct irqaction;
-
-#define irq_canonicalize(irq)	(irq)
-
-/* Initialize irq handling for IRQs.
-   BASE_IRQ, BASE_IRQ+INTERVAL, ..., BASE_IRQ+NUM*INTERVAL
-   to IRQ_TYPE.  An IRQ_TYPE of 0 means to use a generic interrupt type.  */
-extern void
-init_irq_handlers (int base_irq, int num, int interval,
-		   struct hw_interrupt_type *irq_type);
-
-/* Handle interrupt IRQ.  REGS are the registers at the time of ther
-   interrupt.  */
-extern unsigned int handle_irq (int irq, struct pt_regs *regs);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __V850_IRQ_H__ */
diff --git a/include/asm-v850/irq_regs.h b/include/asm-v850/irq_regs.h
deleted file mode 100644
index 3dd9c0b7027..00000000000
--- a/include/asm-v850/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/include/asm-v850/kdebug.h b/include/asm-v850/kdebug.h
deleted file mode 100644
index 6ece1b03766..00000000000
--- a/include/asm-v850/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/include/asm-v850/kmap_types.h b/include/asm-v850/kmap_types.h
deleted file mode 100644
index 3288976b161..00000000000
--- a/include/asm-v850/kmap_types.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __V850_KMAP_TYPES_H__
-#define __V850_KMAP_TYPES_H__
-
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_TYPE_NR
-};
-
-#endif /* __V850_KMAP_TYPES_H__ */
diff --git a/include/asm-v850/kvm.h b/include/asm-v850/kvm.h
deleted file mode 100644
index 3f729b79feb..00000000000
--- a/include/asm-v850/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_V850_H
-#define __LINUX_KVM_V850_H
-
-/* v850 does not support KVM */
-
-#endif
diff --git a/include/asm-v850/linkage.h b/include/asm-v850/linkage.h
deleted file mode 100644
index b6185d3cfe6..00000000000
--- a/include/asm-v850/linkage.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#ifdef __ASSEMBLY__
-#include <asm/asm.h>
-#endif
-
-#endif
diff --git a/include/asm-v850/local.h b/include/asm-v850/local.h
deleted file mode 100644
index 705148abe27..00000000000
--- a/include/asm-v850/local.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_LOCAL_H__
-#define __V850_LOCAL_H__
-
-#include <asm-generic/local.h>
-
-#endif /* __V850_LOCAL_H__ */
diff --git a/include/asm-v850/ma.h b/include/asm-v850/ma.h
deleted file mode 100644
index 89e66473a17..00000000000
--- a/include/asm-v850/ma.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * include/asm-v850/ma.h -- V850E/MA series of cpu chips
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MA_H__
-#define __V850_MA_H__
-
-/* The MA series uses the V850E cpu core.  */
-#include <asm/v850e.h>
-
-
-/* For <asm/entry.h> */
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  The amount
-   varies between chip models, but there's always at least 4K, and it
-   should always start at FFFFC000.  */
-#define R0_RAM_ADDR			0xFFFFC000
-
-
-/* MA series UART details.  */
-#define V850E_UART_BASE_FREQ		CPU_CLOCK_FREQ
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	ma_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void ma_uart_pre_configure (unsigned chan,
-				   unsigned cflags, unsigned baud);
-#endif
-
-
-/* MA series timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-
-/* MA series timer D details.  */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		CPU_CLOCK_FREQ
-
-
-/* Port 0 */
-/* Direct I/O.  Bits 0-7 are pins P00-P07.  */
-#define MA_PORT0_IO_ADDR		0xFFFFF400
-#define MA_PORT0_IO			(*(volatile u8 *)MA_PORT0_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define MA_PORT0_PM_ADDR		0xFFFFF420
-#define MA_PORT0_PM			(*(volatile u8 *)MA_PORT0_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define MA_PORT0_PMC_ADDR		0xFFFFF440
-#define MA_PORT0_PMC			(*(volatile u8 *)MA_PORT0_PMC_ADDR)
-/* Port function control (for P04-P07, 0 = IRQ, 1 = DMARQ).  */
-#define MA_PORT0_PFC_ADDR		0xFFFFF460
-#define MA_PORT0_PFC			(*(volatile u8 *)MA_PORT0_PFC_ADDR)
-
-/* Port 1 */
-/* Direct I/O.  Bits 0-3 are pins P10-P13.  */
-#define MA_PORT1_IO_ADDR		0xFFFFF402
-#define MA_PORT1_IO			(*(volatile u8 *)MA_PORT1_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define MA_PORT1_PM_ADDR		0xFFFFF420
-#define MA_PORT1_PM			(*(volatile u8 *)MA_PORT1_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define MA_PORT1_PMC_ADDR		0xFFFFF442
-#define MA_PORT1_PMC			(*(volatile u8 *)MA_PORT1_PMC_ADDR)
-
-/* Port 4 */
-/* Direct I/O.  Bits 0-5 are pins P40-P45.  */
-#define MA_PORT4_IO_ADDR		0xFFFFF408
-#define MA_PORT4_IO			(*(volatile u8 *)MA_PORT4_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define MA_PORT4_PM_ADDR		0xFFFFF428
-#define MA_PORT4_PM			(*(volatile u8 *)MA_PORT4_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define MA_PORT4_PMC_ADDR		0xFFFFF448
-#define MA_PORT4_PMC			(*(volatile u8 *)MA_PORT4_PMC_ADDR)
-/* Port function control (for serial interfaces, 0 = CSI, 1 = UART).  */
-#define MA_PORT4_PFC_ADDR		0xFFFFF468
-#define MA_PORT4_PFC			(*(volatile u8 *)MA_PORT4_PFC_ADDR)
-
-
-#ifndef __ASSEMBLY__
-
-/* Initialize MA chip interrupts.  */
-extern void ma_init_irqs (void);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_MA_H__ */
diff --git a/include/asm-v850/ma1.h b/include/asm-v850/ma1.h
deleted file mode 100644
index ede1f1de2b7..00000000000
--- a/include/asm-v850/ma1.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * include/asm-v850/ma1.h -- V850E/MA1 cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MA1_H__
-#define __V850_MA1_H__
-
-/* Inherit more generic details from MA series.  */
-#include <asm/ma.h>
-
-
-#define CPU_MODEL	"v850e/ma1"
-#define CPU_MODEL_LONG	"NEC V850E/MA1"
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTOV(n)	(n)	/* 0-3 */
-#define IRQ_INTOV_NUM	4
-#define IRQ_INTP(n)	(0x4  + (n)) /* Pnnn (pin) interrupts */
-#define IRQ_INTP_NUM	24
-#define IRQ_INTCMD(n)	(0x1c + (n)) /* interval timer interrupts 0-3 */
-#define IRQ_INTCMD_NUM	4
-#define IRQ_INTDMA(n)	(0x20 + (n)) /* DMA interrupts 0-3 */
-#define IRQ_INTDMA_NUM	4
-#define IRQ_INTCSI(n)	(0x24 + (n)*4)/* CSI 0-2 transmit/receive completion */
-#define IRQ_INTCSI_NUM	3
-#define IRQ_INTSER(n)	(0x25 + (n)*4) /* UART 0-2 reception error */
-#define IRQ_INTSER_NUM	3
-#define IRQ_INTSR(n)	(0x26 + (n)*4) /* UART 0-2 reception completion */
-#define IRQ_INTSR_NUM	3
-#define IRQ_INTST(n)	(0x27 + (n)*4) /* UART 0-2 transmission completion */
-#define IRQ_INTST_NUM	3
-
-#define NUM_CPU_IRQS	0x30
-
-
-/* The MA1 has a UART with 3 channels.  */
-#define V850E_UART_NUM_CHANNELS	3
-
-
-#endif /* __V850_MA1_H__ */
diff --git a/include/asm-v850/machdep.h b/include/asm-v850/machdep.h
deleted file mode 100644
index f1e3b8b9150..00000000000
--- a/include/asm-v850/machdep.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * include/asm-v850/machdep.h -- Machine-dependent definitions
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MACHDEP_H__
-#define __V850_MACHDEP_H__
-
-
-/* chips */
-#ifdef CONFIG_V850E_MA1
-#include <asm/ma1.h>
-#endif
-#ifdef CONFIG_V850E_ME2
-#include <asm/me2.h>
-#endif
-#ifdef CONFIG_V850E_TEG
-#include <asm/teg.h>
-#endif
-
-/* These are both chips _and_ platforms, so put them in the middle... */
-#ifdef CONFIG_V850E2_ANNA
-#include <asm/anna.h>
-#endif
-#ifdef CONFIG_V850E_AS85EP1
-#include <asm/as85ep1.h>
-#endif
-
-/* platforms */
-#ifdef CONFIG_RTE_CB_MA1
-#include <asm/rte_ma1_cb.h>
-#endif
-#ifdef CONFIG_RTE_CB_ME2
-#include <asm/rte_me2_cb.h>
-#endif
-#ifdef CONFIG_RTE_CB_NB85E
-#include <asm/rte_nb85e_cb.h>
-#endif
-#ifdef CONFIG_V850E_SIM
-#include <asm/sim.h>
-#endif
-#ifdef CONFIG_V850E2_SIM85E2C
-#include <asm/sim85e2c.h>
-#endif
-#ifdef CONFIG_V850E2_SIM85E2S
-#include <asm/sim85e2s.h>
-#endif
-#ifdef CONFIG_V850E2_FPGA85E2C
-#include <asm/fpga85e2c.h>
-#endif
-
-#endif /* __V850_MACHDEP_H__ */
diff --git a/include/asm-v850/macrology.h b/include/asm-v850/macrology.h
deleted file mode 100644
index 37abf874832..00000000000
--- a/include/asm-v850/macrology.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * include/asm-v850/macrology.h -- Various useful CPP macros
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#define macrology_paste(arg1, arg2)	macrology_paste_1(arg1, arg2)
-#define macrology_paste_1(arg1, arg2)	arg1 ## arg2
-#define macrology_stringify(sym)	macrology_stringify_1(sym)
-#define macrology_stringify_1(sym)	#sym
diff --git a/include/asm-v850/me2.h b/include/asm-v850/me2.h
deleted file mode 100644
index ac7c9ce0bdc..00000000000
--- a/include/asm-v850/me2.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * include/asm-v850/me2.h -- V850E/ME2 cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ME2_H__
-#define __V850_ME2_H__
-
-#include <asm/v850e.h>
-#include <asm/v850e_cache.h>
-
-
-#define CPU_MODEL	"v850e/me2"
-#define CPU_MODEL_LONG	"NEC V850E/ME2"
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTP(n)       (n) /* Pnnn (pin) interrupts */
-#define IRQ_INTP_NUM      31
-#define IRQ_INTCMD(n)     (0x31 + (n)) /* interval timer interrupts 0-3 */
-#define IRQ_INTCMD_NUM    4
-#define IRQ_INTDMA(n)     (0x41 + (n)) /* DMA interrupts 0-3 */
-#define IRQ_INTDMA_NUM    4
-#define IRQ_INTUBTIRE(n)  (0x49 + (n)*5)/* UARTB 0-1 reception error */
-#define IRQ_INTUBTIRE_NUM 2
-#define IRQ_INTUBTIR(n)   (0x4a + (n)*5) /* UARTB 0-1 reception complete */
-#define IRQ_INTUBTIR_NUM  2
-#define IRQ_INTUBTIT(n)   (0x4b + (n)*5) /* UARTB 0-1 transmission complete */
-#define IRQ_INTUBTIT_NUM  2
-#define IRQ_INTUBTIF(n)   (0x4c + (n)*5) /* UARTB 0-1 FIFO trans. complete */
-#define IRQ_INTUBTIF_NUM  2
-#define IRQ_INTUBTITO(n)  (0x4d + (n)*5) /* UARTB 0-1 reception timeout */
-#define IRQ_INTUBTITO_NUM 2
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		0x59 /* V850E/ME2 */
-
-
-/* For <asm/entry.h> */
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  */
-#define R0_RAM_ADDR			0xFFFFB000 /* V850E/ME2 */
-
-
-/* V850E/ME2 UARTB details.*/
-#define V850E_UART_NUM_CHANNELS		2
-#define V850E_UARTB_BASE_FREQ		(CPU_CLOCK_FREQ / 4)
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	me2_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void me2_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud);
-#endif /* __ASSEMBLY__ */
-
-
-/* V850E/ME2 timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-
-/* V850E/ME2 timer D details.  */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		(CPU_CLOCK_FREQ / 2)
-
-
-/* Select iRAM mode.  */
-#define ME2_IRAMM_ADDR			0xFFFFF80A
-#define ME2_IRAMM			(*(volatile u8*)ME2_IRAMM_ADDR)
-
-
-/* Interrupt edge-detection configuration.  INTF(n) and INTR(n) are only
-   valid for n == 1, 2, or 5.  */
-#define ME2_INTF_ADDR(n)		(0xFFFFFC00 + (n) * 0x2)
-#define ME2_INTF(n)			(*(volatile u8*)ME2_INTF_ADDR(n))
-#define ME2_INTR_ADDR(n)		(0xFFFFFC20 + (n) * 0x2)
-#define ME2_INTR(n)			(*(volatile u8*)ME2_INTR_ADDR(n))
-#define ME2_INTFAL_ADDR			0xFFFFFC10
-#define ME2_INTFAL			(*(volatile u8*)ME2_INTFAL_ADDR)
-#define ME2_INTRAL_ADDR			0xFFFFFC30
-#define ME2_INTRAL			(*(volatile u8*)ME2_INTRAL_ADDR)
-#define ME2_INTFDH_ADDR			0xFFFFFC16
-#define ME2_INTFDH			(*(volatile u16*)ME2_INTFDH_ADDR)
-#define ME2_INTRDH_ADDR			0xFFFFFC36
-#define ME2_INTRDH			(*(volatile u16*)ME2_INTRDH_ADDR)
-#define ME2_SESC_ADDR(n)		(0xFFFFF609 + (n) * 0x10)
-#define ME2_SESC(n)			(*(volatile u8*)ME2_SESC_ADDR(n))
-#define ME2_SESA10_ADDR			0xFFFFF5AD
-#define ME2_SESA10			(*(volatile u8*)ME2_SESA10_ADDR)
-#define ME2_SESA11_ADDR			0xFFFFF5DD
-#define ME2_SESA11			(*(volatile u8*)ME2_SESA11_ADDR)
-
-
-/* Port 1 */
-/* Direct I/O.  Bits 0-3 are pins P10-P13.  */
-#define ME2_PORT1_IO_ADDR		0xFFFFF402
-#define ME2_PORT1_IO			(*(volatile u8 *)ME2_PORT1_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT1_PM_ADDR		0xFFFFF422
-#define ME2_PORT1_PM			(*(volatile u8 *)ME2_PORT1_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT1_PMC_ADDR		0xFFFFF442
-#define ME2_PORT1_PMC			(*(volatile u8 *)ME2_PORT1_PMC_ADDR)
-/* Port function control (for serial interfaces, 0 = CSI30, 1 = UARTB0 ).  */
-#define ME2_PORT1_PFC_ADDR		0xFFFFF462
-#define ME2_PORT1_PFC			(*(volatile u8 *)ME2_PORT1_PFC_ADDR)
-
-/* Port 2 */
-/* Direct I/O.  Bits 0-3 are pins P20-P25.  */
-#define ME2_PORT2_IO_ADDR		0xFFFFF404
-#define ME2_PORT2_IO			(*(volatile u8 *)ME2_PORT2_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT2_PM_ADDR		0xFFFFF424
-#define ME2_PORT2_PM			(*(volatile u8 *)ME2_PORT2_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT2_PMC_ADDR		0xFFFFF444
-#define ME2_PORT2_PMC			(*(volatile u8 *)ME2_PORT2_PMC_ADDR)
-/* Port function control (for serial interfaces, 0 = INTP2x, 1 = UARTB1 ).  */
-#define ME2_PORT2_PFC_ADDR		0xFFFFF464
-#define ME2_PORT2_PFC			(*(volatile u8 *)ME2_PORT2_PFC_ADDR)
-
-/* Port 5 */
-/* Direct I/O.  Bits 0-5 are pins P50-P55.  */
-#define ME2_PORT5_IO_ADDR		0xFFFFF40A
-#define ME2_PORT5_IO			(*(volatile u8 *)ME2_PORT5_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT5_PM_ADDR		0xFFFFF42A
-#define ME2_PORT5_PM			(*(volatile u8 *)ME2_PORT5_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT5_PMC_ADDR		0xFFFFF44A
-#define ME2_PORT5_PMC			(*(volatile u8 *)ME2_PORT5_PMC_ADDR)
-/* Port function control ().  */
-#define ME2_PORT5_PFC_ADDR		0xFFFFF46A
-#define ME2_PORT5_PFC			(*(volatile u8 *)ME2_PORT5_PFC_ADDR)
-
-/* Port 6 */
-/* Direct I/O.  Bits 5-7 are pins P65-P67.  */
-#define ME2_PORT6_IO_ADDR		0xFFFFF40C
-#define ME2_PORT6_IO			(*(volatile u8 *)ME2_PORT6_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT6_PM_ADDR		0xFFFFF42C
-#define ME2_PORT6_PM			(*(volatile u8 *)ME2_PORT6_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT6_PMC_ADDR		0xFFFFF44C
-#define ME2_PORT6_PMC			(*(volatile u8 *)ME2_PORT6_PMC_ADDR)
-/* Port function control ().  */
-#define ME2_PORT6_PFC_ADDR		0xFFFFF46C
-#define ME2_PORT6_PFC			(*(volatile u8 *)ME2_PORT6_PFC_ADDR)
-
-/* Port 7 */
-/* Direct I/O.  Bits 2-7 are pins P72-P77.  */
-#define ME2_PORT7_IO_ADDR		0xFFFFF40E
-#define ME2_PORT7_IO			(*(volatile u8 *)ME2_PORT7_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT7_PM_ADDR		0xFFFFF42E
-#define ME2_PORT7_PM			(*(volatile u8 *)ME2_PORT7_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT7_PMC_ADDR		0xFFFFF44E
-#define ME2_PORT7_PMC			(*(volatile u8 *)ME2_PORT7_PMC_ADDR)
-/* Port function control ().  */
-#define ME2_PORT7_PFC_ADDR		0xFFFFF46E
-#define ME2_PORT7_PFC			(*(volatile u8 *)ME2_PORT7_PFC_ADDR)
-
-
-#ifndef __ASSEMBLY__
-/* Initialize V850E/ME2 chip interrupts.  */
-extern void me2_init_irqs (void);
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_ME2_H__ */
diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h
deleted file mode 100644
index edbf6edbfb3..00000000000
--- a/include/asm-v850/mman.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __V850_MMAN_H__
-#define __V850_MMAN_H__
-
-#include <asm-generic/mman.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __V850_MMAN_H__ */
diff --git a/include/asm-v850/mmu.h b/include/asm-v850/mmu.h
deleted file mode 100644
index 267768c66ef..00000000000
--- a/include/asm-v850/mmu.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* Copyright (C) 2002, 2005, David McCullough <davidm@snapgear.com> */
-
-#ifndef __V850_MMU_H__
-#define __V850_MMU_H__
-
-typedef struct {
-	struct vm_list_struct	*vmlist;
-	unsigned long		end_brk;
-} mm_context_t;
-
-#endif /* __V850_MMU_H__ */
diff --git a/include/asm-v850/mmu_context.h b/include/asm-v850/mmu_context.h
deleted file mode 100644
index 01daacd5474..00000000000
--- a/include/asm-v850/mmu_context.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __V850_MMU_CONTEXT_H__
-#define __V850_MMU_CONTEXT_H__
-
-#include <asm-generic/mm_hooks.h>
-
-#define destroy_context(mm)		((void)0)
-#define init_new_context(tsk,mm)	0
-#define switch_mm(prev,next,tsk)	((void)0)
-#define deactivate_mm(tsk,mm)		do { } while (0)
-#define activate_mm(prev,next)		((void)0)
-#define enter_lazy_tlb(mm,tsk)		((void)0)
-
-#endif /* __V850_MMU_CONTEXT_H__ */
diff --git a/include/asm-v850/module.h b/include/asm-v850/module.h
deleted file mode 100644
index 2c2f4944f09..00000000000
--- a/include/asm-v850/module.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * include/asm-v850/module.h -- Architecture-specific module hooks
- *
- *  Copyright (C) 2001,02,03,04  NEC Corporation
- *  Copyright (C) 2001,02,03,04  Miles Bader <miles@gnu.org>
- *  Copyright (C) 2001,03  Rusty Russell
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- *
- * Derived in part from include/asm-ppc/module.h
- */
-
-#ifndef __V850_MODULE_H__
-#define __V850_MODULE_H__
-
-#define MODULE_SYMBOL_PREFIX "_"
-
-struct v850_plt_entry
-{
-	/* Indirect jump instruction sequence (6-byte mov + 2-byte jr).  */
-	unsigned long tramp[2];
-};
-
-struct mod_arch_specific
-{
-	/* Indices of PLT sections within module. */
-	unsigned int core_plt_section, init_plt_section;
-};
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-/* Make empty sections for module_frob_arch_sections to expand. */
-#ifdef MODULE
-asm(".section .plt,\"ax\",@nobits; .align 3; .previous");
-asm(".section .init.plt,\"ax\",@nobits; .align 3; .previous");
-#endif
-
-/* We don't do exception tables.  */
-struct exception_table_entry;
-static inline const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
-	       unsigned long value)
-{
-	return 0;
-}
-#define ARCH_HAS_SEARCH_EXTABLE
-static inline void
-sort_extable(struct exception_table_entry *start,
-	     struct exception_table_entry *finish)
-{
-	/* nada */
-}
-#define ARCH_HAS_SORT_EXTABLE
-
-#endif /* __V850_MODULE_H__ */
diff --git a/include/asm-v850/msgbuf.h b/include/asm-v850/msgbuf.h
deleted file mode 100644
index ed07dbd0163..00000000000
--- a/include/asm-v850/msgbuf.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef __V850_MSGBUF_H__
-#define __V850_MSGBUF_H__
-
-/* 
- * The msqid64_ds structure for v850 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	unsigned long	__unused1;
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	unsigned long	__unused2;
-	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned long	__unused3;
-	unsigned long  msg_cbytes;	/* current number of bytes on queue */
-	unsigned long  msg_qnum;	/* number of messages in queue */
-	unsigned long  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long  __unused4;
-	unsigned long  __unused5;
-};
-
-#endif /* __V850_MSGBUF_H__ */
diff --git a/include/asm-v850/mutex.h b/include/asm-v850/mutex.h
deleted file mode 100644
index 458c1f7fbc1..00000000000
--- a/include/asm-v850/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-v850/page.h b/include/asm-v850/page.h
deleted file mode 100644
index f9de35d873f..00000000000
--- a/include/asm-v850/page.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * include/asm-v850/page.h -- VM ops
- *
- *  Copyright (C) 2001,02,03,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PAGE_H__
-#define __V850_PAGE_H__
-
-#include <asm/machdep.h>
-
-
-#define PAGE_SHIFT	12
-#define PAGE_SIZE       (1UL << PAGE_SHIFT)
-#define PAGE_MASK       (~(PAGE_SIZE-1))
-
-
-/*
- * PAGE_OFFSET -- the first address of the first page of memory. For archs with
- * no MMU this corresponds to the first free page in physical memory (aligned
- * on a page boundary).
- */
-#ifndef PAGE_OFFSET
-#define PAGE_OFFSET  0x0000000
-#endif
-
-
-#ifndef __ASSEMBLY__
-
-#define STRICT_MM_TYPECHECKS
-
-#define clear_page(page)	memset ((void *)(page), 0, PAGE_SIZE)
-#define copy_page(to, from)	memcpy ((void *)(to), (void *)from, PAGE_SIZE)
-
-#define clear_user_page(addr, vaddr, page)	\
-	do { 	clear_page(addr);		\
-		flush_dcache_page(page);	\
-	} while (0)
-#define copy_user_page(to, from, vaddr, page)	\
-	do {	copy_page(to, from);		\
-		flush_dcache_page(page);	\
-	} while (0)
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pmd; } pmd_t;
-typedef struct { unsigned long pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-typedef struct page *pgtable_t;
-
-#define pte_val(x)      ((x).pte)
-#define pmd_val(x)      ((x).pmd)
-#define pgd_val(x)      ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
-
-#define __pte(x)        ((pte_t) { (x) } )
-#define __pmd(x)        ((pmd_t) { (x) } )
-#define __pgd(x)        ((pgd_t) { (x) } )
-#define __pgprot(x)     ((pgprot_t) { (x) } )
-
-#else /* !STRICT_MM_TYPECHECKS */
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)      (x)
-#define pmd_val(x)      (x)
-#define pgd_val(x)      (x)
-#define pgprot_val(x)   (x)
-
-#define __pte(x)        (x)
-#define __pmd(x)        (x)
-#define __pgd(x)        (x)
-#define __pgprot(x)     (x)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
-#endif /* !__ASSEMBLY__ */
-
-
-/* No current v850 processor has virtual memory.  */
-#define __virt_to_phys(addr)	(addr)
-#define __phys_to_virt(addr)	(addr)
-
-#define virt_to_pfn(kaddr)	(__virt_to_phys (kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)	__phys_to_virt ((pfn) << PAGE_SHIFT)
-
-#define MAP_NR(kaddr) \
-  (((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT)
-#define virt_to_page(kaddr)	(mem_map + MAP_NR (kaddr))
-#define page_to_virt(page) \
-  ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
-
-#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
-#define pfn_valid(pfn)	        ((pfn) < max_mapnr)
-
-#define	virt_addr_valid(kaddr)						\
-  (((void *)(kaddr) >= (void *)PAGE_OFFSET) && MAP_NR (kaddr) < max_mapnr)
-
-
-#define __pa(x)		     __virt_to_phys ((unsigned long)(x))
-#define __va(x)		     ((void *)__phys_to_virt ((unsigned long)(x)))
-
-
-#include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
-
-#endif /* __V850_PAGE_H__ */
diff --git a/include/asm-v850/param.h b/include/asm-v850/param.h
deleted file mode 100644
index 4391f5fe020..00000000000
--- a/include/asm-v850/param.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * include/asm-v850/param.h -- Varions kernel parameters
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PARAM_H__
-#define __V850_PARAM_H__
-
-#define EXEC_PAGESIZE	4096
-
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
-#ifdef __KERNEL__
-# define HZ		CONFIG_HZ
-# define USER_HZ	100
-# define CLOCKS_PER_SEC	USER_HZ
-#else
-# define HZ		100
-#endif
-
-#endif /* __V850_PARAM_H__ */
diff --git a/include/asm-v850/pci.h b/include/asm-v850/pci.h
deleted file mode 100644
index de2a7d0a81c..00000000000
--- a/include/asm-v850/pci.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * include/asm-v850/pci.h -- PCI support
- *
- *  Copyright (C) 2001,02,05  NEC Corporation
- *  Copyright (C) 2001,02,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PCI_H__
-#define __V850_PCI_H__
-
-/* Get any platform-dependent definitions.  */
-#include <asm/machdep.h>
-
-#define pcibios_scan_all_fns(a, b)	0
-
-/* Generic declarations.  */
-
-struct scatterlist;
-
-extern void pcibios_set_master (struct pci_dev *dev);
-
-/* `Grant' to PDEV the memory block at CPU_ADDR, for doing DMA.  The
-   32-bit PCI bus mastering address to use is returned.  the device owns
-   this memory until either pci_unmap_single or pci_dma_sync_single_for_cpu is
-   performed.  */
-extern dma_addr_t
-pci_map_single (struct pci_dev *pdev, void *cpu_addr, size_t size, int dir);
-
-/* Return to the CPU the PCI DMA memory block previously `granted' to
-   PDEV, at DMA_ADDR.  */
-extern void
-pci_unmap_single (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-		  int dir);
-
-/* Make physical memory consistent for a single streaming mode DMA
-   translation after a transfer.
-
-   If you perform a pci_map_single() but wish to interrogate the
-   buffer using the cpu, yet do not wish to teardown the PCI dma
-   mapping, you must call this function before doing so.  At the next
-   point you give the PCI dma address back to the card, you must first
-   perform a pci_dma_sync_for_device, and then the device again owns
-   the buffer.  */
-extern void
-pci_dma_sync_single_for_cpu (struct pci_dev *dev, dma_addr_t dma_addr,
-			     size_t size, int dir);
-
-extern void
-pci_dma_sync_single_for_device (struct pci_dev *dev, dma_addr_t dma_addr,
-				size_t size, int dir);
-
-
-/* Do multiple DMA mappings at once.  */
-extern int
-pci_map_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len, int dir);
-
-/* Unmap multiple DMA mappings at once.  */
-extern void
-pci_unmap_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len,
-	      int dir);
-
-/* SG-list versions of pci_dma_sync functions.  */
-extern void
-pci_dma_sync_sg_for_cpu (struct pci_dev *dev,
-			 struct scatterlist *sg, int sg_len,
-			 int dir);
-extern void
-pci_dma_sync_sg_for_device (struct pci_dev *dev,
-			    struct scatterlist *sg, int sg_len,
-			    int dir);
-
-#define pci_map_page(dev, page, offs, size, dir) \
-  pci_map_single(dev, (page_address(page) + (offs)), size, dir)
-#define pci_unmap_page(dev,addr,sz,dir) \
-  pci_unmap_single(dev, addr, sz, dir)
-
-/* Test for pci_map_single or pci_map_page having generated an error.  */
-static inline int
-pci_dma_mapping_error (dma_addr_t dma_addr)
-{
-	return dma_addr == 0;
-}
-
-/* Allocate and map kernel buffer using consistent mode DMA for PCI
-   device.  Returns non-NULL cpu-view pointer to the buffer if
-   successful and sets *DMA_ADDR to the pci side dma address as well,
-   else DMA_ADDR is undefined.  */
-extern void *
-pci_alloc_consistent (struct pci_dev *pdev, size_t size, dma_addr_t *dma_addr);
-
-/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
-   be values that were returned from pci_alloc_consistent.  SIZE must be
-   the same as what as passed into pci_alloc_consistent.  References to
-   the memory and mappings assosciated with CPU_ADDR or DMA_ADDR past
-   this call are illegal.  */
-extern void
-pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr,
-		     dma_addr_t dma_addr);
-
-#ifdef CONFIG_PCI
-static inline void pci_dma_burst_advice(struct pci_dev *pdev,
-					enum pci_dma_burst_strategy *strat,
-					unsigned long *strategy_parameter)
-{
-	*strat = PCI_DMA_BURST_INFINITY;
-	*strategy_parameter = ~0UL;
-}
-#endif
-
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
-extern void pci_iounmap (struct pci_dev *dev, void __iomem *addr);
-
-#endif /* __V850_PCI_H__ */
diff --git a/include/asm-v850/percpu.h b/include/asm-v850/percpu.h
deleted file mode 100644
index 755ac6522b6..00000000000
--- a/include/asm-v850/percpu.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __V850_PERCPU_H__
-#define __V850_PERCPU_H__
-
-#include <asm-generic/percpu.h>
-
-/* This is a stupid hack to satisfy some grotty implicit include-file
-   dependency; basically, <linux/smp.h> uses BUG_ON, which calls BUG, but
-   doesn't include the necessary headers to define it.  In the twisted
-   festering mess of includes this must all be resolved somehow on other
-   platforms, but I haven't the faintest idea how, and don't care; here will
-   do, even though doesn't actually make any sense.  */
-#include <asm/page.h>
-
-#endif /* __V850_PERCPU_H__ */
diff --git a/include/asm-v850/pgalloc.h b/include/asm-v850/pgalloc.h
deleted file mode 100644
index b91eb2d02bf..00000000000
--- a/include/asm-v850/pgalloc.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * include/asm-v850/pgalloc.h
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PGALLOC_H__
-#define __V850_PGALLOC_H__
-
-#include <linux/mm.h>  /* some crap code expects this */
-
-/* ... and then, there was one.  */
-#define check_pgt_cache()	((void)0)
-
-#endif /* __V850_PGALLOC_H__ */
diff --git a/include/asm-v850/pgtable.h b/include/asm-v850/pgtable.h
deleted file mode 100644
index 1ea2a900f0f..00000000000
--- a/include/asm-v850/pgtable.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef __V850_PGTABLE_H__
-#define __V850_PGTABLE_H__
-
-#include <asm-generic/4level-fixup.h>
-
-#include <asm/page.h>
-
-
-#define pgd_present(pgd)	(1) /* pages are always present on NO_MM */
-#define pgd_none(pgd)		(0)
-#define pgd_bad(pgd)		(0)
-#define pgd_clear(pgdp)		((void)0)
-
-#define	pmd_offset(a, b)	((void *)0)
-
-#define kern_addr_valid(addr)	(1)
-
-
-#define __swp_type(x)		(0)
-#define __swp_offset(x)		(0)
-#define __swp_entry(typ,off)	((swp_entry_t) { ((typ) | ((off) << 7)) })
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-
-static inline int pte_file (pte_t pte) { return 0; }
-
-
-/* These mean nothing to !CONFIG_MMU.  */
-#define PAGE_NONE		__pgprot(0)
-#define PAGE_SHARED		__pgprot(0)
-#define PAGE_COPY		__pgprot(0)
-#define PAGE_READONLY		__pgprot(0)
-#define PAGE_KERNEL		__pgprot(0)
-
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc.  When CONFIG_MMU is not defined, this
- * should never actually be used, so just define it to something that's
- * will hopefully cause a bus error if it is.
- */
-#define ZERO_PAGE(vaddr)	((void *)0x87654321)
-
-
-/* Some bogus code in procfs uses these; whatever.  */
-#define VMALLOC_START	0
-#define VMALLOC_END	(~0)
-
-
-extern void paging_init (void);
-#define swapper_pg_dir ((pgd_t *) 0)
-
-#define pgtable_cache_init()   ((void)0)
-
-
-extern unsigned int kobjsize(const void *objp);
-
-
-#endif /* __V850_PGTABLE_H__ */
diff --git a/include/asm-v850/poll.h b/include/asm-v850/poll.h
deleted file mode 100644
index 803cad0b9b5..00000000000
--- a/include/asm-v850/poll.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __V850_POLL_H__
-#define __V850_POLL_H__
-
-#define POLLWRNORM	POLLOUT
-#define POLLWRBAND	0x0100
-
-#include <asm-generic/poll.h>
-
-#endif /* __V850_POLL_H__ */
diff --git a/include/asm-v850/posix_types.h b/include/asm-v850/posix_types.h
deleted file mode 100644
index 7f403b76539..00000000000
--- a/include/asm-v850/posix_types.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * include/asm-v850/posix_types.h -- Kernel versions of standard types
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_POSIX_TYPES_H__
-#define __V850_POSIX_TYPES_H__
-
-typedef unsigned long	__kernel_ino_t;
-typedef unsigned long long __kernel_ino64_t;
-typedef unsigned int	__kernel_mode_t;
-typedef unsigned int	__kernel_nlink_t;
-typedef long		__kernel_off_t;
-typedef long long	__kernel_loff_t;
-typedef int		__kernel_pid_t;
-typedef unsigned short	__kernel_ipc_pid_t;
-typedef unsigned int	__kernel_uid_t;
-typedef unsigned int	__kernel_gid_t;
-typedef unsigned int	__kernel_size_t;
-typedef int		__kernel_ssize_t;
-typedef int		__kernel_ptrdiff_t;
-typedef long		__kernel_time_t;
-typedef long		__kernel_suseconds_t;
-typedef long		__kernel_clock_t;
-typedef int		__kernel_timer_t;
-typedef int		__kernel_clockid_t;
-typedef int		__kernel_daddr_t;
-typedef char *		__kernel_caddr_t;
-typedef unsigned short	__kernel_uid16_t;
-typedef unsigned short	__kernel_gid16_t;
-typedef unsigned int	__kernel_uid32_t;
-typedef unsigned int	__kernel_gid32_t;
-
-/* Some bogus code depends on this; we don't care.  */
-typedef __kernel_uid_t __kernel_old_uid_t;
-typedef unsigned int	__kernel_old_dev_t;
-
-typedef struct {
-	int	val[2];
-} __kernel_fsid_t;
-
-
-#if defined(__KERNEL__)
-
-/* We used to include <asm/bitops.h> here, which seems the right thing, but
-   it caused nasty include-file definition order problems.  Removing the
-   include seems to work, so fingers crossed...  */
-
-#undef	__FD_SET
-#define __FD_SET(fd, fd_set) \
-  __set_bit (fd, (void *)&((__kernel_fd_set *)fd_set)->fds_bits)
-#undef __FD_CLR
-#define __FD_CLR(fd, fd_set) \
-  __clear_bit (fd, (void *)&((__kernel_fd_set *)fd_set)->fds_bits)
-#undef	__FD_ISSET
-#define __FD_ISSET(fd, fd_set) \
-  __test_bit (fd, (void *)&((__kernel_fd_set *)fd_set)->fds_bits)
-#undef	__FD_ZERO
-#define __FD_ZERO(fd_set) \
-  memset (fd_set, 0, sizeof (*(fd_set *)fd_set))
-
-#endif /* defined(__KERNEL__) */
-
-#endif /* __V850_POSIX_TYPES_H__ */
diff --git a/include/asm-v850/processor.h b/include/asm-v850/processor.h
deleted file mode 100644
index 979e3467f9a..00000000000
--- a/include/asm-v850/processor.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * include/asm-v850/processor.h
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PROCESSOR_H__
-#define __V850_PROCESSOR_H__
-
-#ifndef __ASSEMBLY__ /* <linux/thread_info.h> is not asm-safe.  */
-#include <linux/thread_info.h>
-#endif
-
-#include <linux/compiler.h>
-#include <asm/ptrace.h>
-#include <asm/entry.h>
-
-/* Some code expects `segment' stuff to be defined here.  */
-#include <asm/segment.h>
-
-
-/*
- * The only places this is used seem to be horrible bletcherous kludges,
- * so we just define it to be as large as possible.
- */
-#define TASK_SIZE	(0xFFFFFFFF)
-
-/*
- * This decides where the kernel will search for a free chunk of vm
- * space during mmap's.  We won't be using it.
- */
-#define TASK_UNMAPPED_BASE	0
-
-
-#ifndef __ASSEMBLY__
-
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr()	({ __label__ _l; _l: &&_l;})
-
-/* If you change this, you must change the associated assembly-languages
-   constants defined below, THREAD_*.  */
-struct thread_struct {
-	/* kernel stack pointer (must be first field in structure) */
-	unsigned long  ksp;
-};
-
-#define INIT_THREAD { sizeof init_stack + (unsigned long)init_stack }
-
-
-/* Do necessary setup to start up a newly executed thread.  */
-static inline void start_thread (struct pt_regs *regs,
-				 unsigned long pc, unsigned long usp)
-{
-	regs->pc = pc;
-	regs->gpr[GPR_SP] = usp;
-	regs->kernel_mode = 0;
-}
-
-/* Free all resources held by a thread. */
-static inline void release_thread (struct task_struct *dead_task)
-{
-}
-
-/* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk)	do { } while (0)
-
-extern int kernel_thread (int (*fn)(void *), void * arg, unsigned long flags);
-
-/* Free current thread data structures etc.  */
-static inline void exit_thread (void)
-{
-}
-
-
-/* Return the registers saved during context-switch by the currently
-   not-running thread T.  Note that this only includes some registers!
-   See entry.S for details.  */
-#define thread_saved_regs(t) \
-   ((struct pt_regs*)((t)->thread.ksp + STATE_SAVE_PT_OFFSET))
-/* Return saved (kernel) PC of a blocked thread.  Actually, we return the
-   LP register, because the thread is actually blocked in switch_thread,
-   and we're interested in the PC it will _return_ to.  */
-#define thread_saved_pc(t)   (thread_saved_regs(t)->gpr[GPR_LP])
-
-
-unsigned long get_wchan (struct task_struct *p);
-
-
-/* Return some info about the user process TASK.  */
-#define task_tos(task)	((unsigned long)task_stack_page(task) + THREAD_SIZE)
-#define task_pt_regs(task) ((struct pt_regs *)task_tos (task) - 1)
-#define task_sp(task)	(task_pt_regs (task)->gpr[GPR_SP])
-#define task_pc(task)	(task_pt_regs (task)->pc)
-/* Grotty old names for some.  */
-#define KSTK_EIP(task)	task_pc (task)
-#define KSTK_ESP(task)	task_sp (task)
-
-
-#define cpu_relax()    barrier()
-
-
-#else /* __ASSEMBLY__ */
-
-#define THREAD_KSP	0
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_PROCESSOR_H__ */
diff --git a/include/asm-v850/ptrace.h b/include/asm-v850/ptrace.h
deleted file mode 100644
index 4f35cf2cd64..00000000000
--- a/include/asm-v850/ptrace.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * include/asm-v850/ptrace.h -- Access to CPU registers
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PTRACE_H__
-#define __V850_PTRACE_H__
-
-
-/* v850 general purpose registers with special meanings.  */
-#define GPR_ZERO	0	/* constant zero */
-#define GPR_ASM		1	/* reserved for assembler */
-#define GPR_SP		3	/* stack pointer */
-#define GPR_GP		4	/* global data pointer */
-#define GPR_TP		5	/* `text pointer' */
-#define GPR_EP		30	/* `element pointer' */
-#define GPR_LP		31	/* link pointer (current return address) */
-
-/* These aren't official names, but they make some code more descriptive.  */
-#define GPR_ARG0	6
-#define GPR_ARG1	7
-#define GPR_ARG2	8
-#define GPR_ARG3	9
-#define GPR_RVAL0	10
-#define GPR_RVAL1	11
-#define GPR_RVAL	GPR_RVAL0
-
-#define NUM_GPRS	32
-
-/* v850 `system' registers.  */
-#define SR_EIPC		0
-#define SR_EIPSW	1
-#define SR_FEPC		2
-#define SR_FEPSW	3
-#define SR_ECR		4
-#define SR_PSW		5
-#define SR_CTPC		16
-#define SR_CTPSW	17
-#define SR_DBPC		18
-#define SR_DBPSW	19
-#define SR_CTBP		20
-#define SR_DIR		21
-#define SR_ASID		23
-
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long v850_reg_t;
-
-/* How processor state is stored on the stack during a syscall/signal.
-   If you change this structure, change the associated assembly-language
-   macros below too (PT_*)!  */
-struct pt_regs
-{
-	/* General purpose registers.  */
-	v850_reg_t gpr[NUM_GPRS];
-
-	v850_reg_t pc;		/* program counter */
-	v850_reg_t psw;		/* program status word */
-
-	/* Registers used by `callt' instruction:  */
-	v850_reg_t ctpc;	/* saved program counter */
-	v850_reg_t ctpsw;	/* saved psw */
-	v850_reg_t ctbp;	/* base pointer for callt table */
-
-	char kernel_mode;	/* 1 if in `kernel mode', 0 if user mode */
-};
-
-
-#define instruction_pointer(regs)	((regs)->pc)
-#define profile_pc(regs) instruction_pointer(regs)
-#define user_mode(regs)			(!(regs)->kernel_mode)
-
-/* When a struct pt_regs is used to save user state for a system call in
-   the kernel, the system call is stored in the space for R0 (since it's
-   never used otherwise, R0 being a constant 0).  Non-system-calls
-   simply store 0 there.  */
-#define PT_REGS_SYSCALL(regs)		(regs)->gpr[0]
-#define PT_REGS_SET_SYSCALL(regs, val)	((regs)->gpr[0] = (val))
-
-#endif /* !__ASSEMBLY__ */
-
-
-/* The number of bytes used to store each register.  */
-#define _PT_REG_SIZE	4
-
-/* Offset of a general purpose register in a struct pt_regs.  */
-#define PT_GPR(num)	((num) * _PT_REG_SIZE)
-
-/* Offsets of various special registers & fields in a struct pt_regs.  */
-#define PT_PC		((NUM_GPRS + 0) * _PT_REG_SIZE)
-#define PT_PSW		((NUM_GPRS + 1) * _PT_REG_SIZE)
-#define PT_CTPC		((NUM_GPRS + 2) * _PT_REG_SIZE)
-#define PT_CTPSW	((NUM_GPRS + 3) * _PT_REG_SIZE)
-#define PT_CTBP		((NUM_GPRS + 4) * _PT_REG_SIZE)
-#define PT_KERNEL_MODE	((NUM_GPRS + 5) * _PT_REG_SIZE)
-
-/* Where the current syscall number is stashed; obviously only valid in
-   the kernel!  */
-#define PT_CUR_SYSCALL	PT_GPR(0)
-
-/* Size of struct pt_regs, including alignment.  */
-#define PT_SIZE		((NUM_GPRS + 6) * _PT_REG_SIZE)
-
-
-/* These are `magic' values for PTRACE_PEEKUSR that return info about where
-   a process is located in memory.  */
-#define PT_TEXT_ADDR	(PT_SIZE + 1)
-#define PT_TEXT_LEN	(PT_SIZE + 2)
-#define PT_DATA_ADDR	(PT_SIZE + 3)
-
-
-#endif /* __V850_PTRACE_H__ */
diff --git a/include/asm-v850/resource.h b/include/asm-v850/resource.h
deleted file mode 100644
index 4b9dcd44f8d..00000000000
--- a/include/asm-v850/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_RESOURCE_H__
-#define __V850_RESOURCE_H__
-
-#include <asm-generic/resource.h>
-
-#endif /* __V850_RESOURCE_H__ */
diff --git a/include/asm-v850/rte_cb.h b/include/asm-v850/rte_cb.h
deleted file mode 100644
index db9879f00aa..00000000000
--- a/include/asm-v850/rte_cb.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * include/asm-v850/rte_cb.h -- Midas labs RTE-CB series of evaluation boards
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_CB_H__
-#define __V850_RTE_CB_H__
-
-
-/* The SRAM on the Mother-A motherboard.  */
-#define MB_A_SRAM_ADDR		GCS0_ADDR
-#define MB_A_SRAM_SIZE		0x00200000 /* 2MB */
-
-
-#ifdef CONFIG_RTE_GBUS_INT
-/* GBUS interrupt support.  */
-
-# include <asm/gbus_int.h>
-
-# define GBUS_INT_BASE_IRQ	NUM_RTE_CB_IRQS
-# define GBUS_INT_BASE_ADDR	(GCS2_ADDR + 0x00006000)
-
-/* Some specific interrupts.  */
-# define IRQ_MB_A_LAN		IRQ_GBUS_INT(10)
-# define IRQ_MB_A_PCI1(n)	(IRQ_GBUS_INT(16) + (n))
-# define IRQ_MB_A_PCI1_NUM	4
-# define IRQ_MB_A_PCI2(n)	(IRQ_GBUS_INT(20) + (n))
-# define IRQ_MB_A_PCI2_NUM	4
-# define IRQ_MB_A_EXT(n)	(IRQ_GBUS_INT(24) + (n))
-# define IRQ_MB_A_EXT_NUM	4
-# define IRQ_MB_A_USB_OC(n)	(IRQ_GBUS_INT(28) + (n))
-# define IRQ_MB_A_USB_OC_NUM	2
-# define IRQ_MB_A_PCMCIA_OC	IRQ_GBUS_INT(30)
-
-/* We define NUM_MACH_IRQS to include extra interrupts from the GBUS.  */
-# define NUM_MACH_IRQS		(NUM_RTE_CB_IRQS + IRQ_GBUS_INT_NUM)
-
-#else /* !CONFIG_RTE_GBUS_INT */
-
-# define NUM_MACH_IRQS		NUM_RTE_CB_IRQS
-
-#endif /* CONFIG_RTE_GBUS_INT */
-
-
-#ifdef CONFIG_RTE_MB_A_PCI
-/* Mother-A PCI bus support.  */
-
-# include <asm/rte_mb_a_pci.h>
-
-/* These are the base addresses used for allocating device address
-   space.  512K of the motherboard SRAM is in the same space, so we have
-   to be careful not to let it be allocated.  */
-# define PCIBIOS_MIN_MEM	(MB_A_PCI_MEM_ADDR + 0x80000)
-# define PCIBIOS_MIN_IO		MB_A_PCI_IO_ADDR
-
-/* As we don't really support PCI DMA to cpu memory, and use bounce-buffers
-   instead, perversely enough, this becomes always true! */
-# define pci_dma_supported(dev, mask)		1
-# define pcibios_assign_all_busses()		1
-
-#endif /* CONFIG_RTE_MB_A_PCI */
-
-
-#ifndef __ASSEMBLY__
-extern void rte_cb_early_init (void);
-extern void rte_cb_init_irqs (void);
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_RTE_CB_H__ */
diff --git a/include/asm-v850/rte_ma1_cb.h b/include/asm-v850/rte_ma1_cb.h
deleted file mode 100644
index bd3162ab984..00000000000
--- a/include/asm-v850/rte_ma1_cb.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * include/asm-v850/rte_ma1_cb.h -- Midas labs RTE-V850/MA1-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_MA1_CB_H__
-#define __V850_RTE_MA1_CB_H__
-
-#include <asm/rte_cb.h>		/* Common defs for Midas RTE-CB boards.  */
-
-
-#define PLATFORM		"rte-v850e/ma1-cb"
-#define PLATFORM_LONG		"Midas lab RTE-V850E/MA1-CB"
-
-#define CPU_CLOCK_FREQ		50000000 /* 50MHz */
-
-/* 1MB of onboard SRAM.  Note that the monitor ROM uses parts of this
-   for its own purposes, so care must be taken.  Some address lines are
-   not decoded, so the SRAM area is mirrored every 1MB from 0x400000 to
-   0x800000 (exclusive).  */
-#define SRAM_ADDR		0x00400000
-#define SRAM_SIZE		0x00100000 /* 1MB */
-
-/* 32MB of onbard SDRAM.  */
-#define SDRAM_ADDR		0x00800000
-#define SDRAM_SIZE		0x02000000 /* 32MB */
-
-
-/* CPU addresses of GBUS memory spaces.  */
-#define GCS0_ADDR		0x05000000 /* GCS0 - Common SRAM (2MB) */
-#define GCS0_SIZE		0x00200000 /*   2MB */
-#define GCS1_ADDR		0x06000000 /* GCS1 - Flash ROM (8MB) */
-#define GCS1_SIZE		0x00800000 /*   8MB */
-#define GCS2_ADDR		0x07900000 /* GCS2 - I/O registers */
-#define GCS2_SIZE		0x00400000 /*   4MB */
-#define GCS5_ADDR		0x04000000 /* GCS5 - PCI bus space */
-#define GCS5_SIZE		0x01000000 /*   16MB */
-#define GCS6_ADDR		0x07980000 /* GCS6 - PCI control registers */
-#define GCS6_SIZE		0x00000200 /*   512B */
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 		SRAM_ADDR
-
-
-/* The GBUS GINT0 - GINT3 interrupts are connected to the INTP000 - INTP011
-   pins on the CPU.  These are shared among the GBUS interrupts.  */
-#define IRQ_GINT(n)		IRQ_INTP(n)
-#define IRQ_GINT_NUM		4
-
-/* Used by <asm/rte_cb.h> to derive NUM_MACH_IRQS.  */
-#define NUM_RTE_CB_IRQS		NUM_CPU_IRQS
-
-
-#ifdef CONFIG_ROM_KERNEL
-/* Kernel is in ROM, starting at address 0.  */
-
-#define INTV_BASE		0
-
-#else /* !CONFIG_ROM_KERNEL */
-
-#ifdef CONFIG_RTE_CB_MULTI
-/* Using RAM kernel with ROM monitor for Multi debugger.  */
-
-/* The chip's real interrupt vectors are in ROM, but they jump to a
-   secondary interrupt vector table in RAM.  */
-#define INTV_BASE		0x004F8000
-
-/* Scratch memory used by the ROM monitor, which shouldn't be used by
-   linux (except for the alternate interrupt vector area, defined
-   above).  */
-#define MON_SCRATCH_ADDR	0x004F8000
-#define MON_SCRATCH_SIZE	0x00008000 /* 32KB */
-
-#else /* !CONFIG_RTE_CB_MULTI */
-/* Using RAM-kernel.  Assume some sort of boot-loader got us loaded at
-   address 0.  */
-
-#define INTV_BASE		0
-
-#endif /* CONFIG_RTE_CB_MULTI */
-
-#endif /* CONFIG_ROM_KERNEL */
-
-
-/* Some misc. on-board devices.  */
-
-/* Seven-segment LED display (two digits).  Write-only.  */
-#define LED_ADDR(n)		(0x07802000 + (n))
-#define LED(n)			(*(volatile unsigned char *)LED_ADDR(n))
-#define LED_NUM_DIGITS		2
-
-
-/* Override the basic MA uart pre-initialization so that we can
-   initialize extra stuff.  */
-#undef V850E_UART_PRE_CONFIGURE	/* should be defined by <asm/ma.h> */
-#define V850E_UART_PRE_CONFIGURE	rte_ma1_cb_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void rte_ma1_cb_uart_pre_configure (unsigned chan,
-					   unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 0. */
-
-/* CTS for UART channel 0 is pin P43 (bit 3 of port 4).  */
-#define V850E_UART_CTS(chan)	((chan) == 0 ? !(MA_PORT4_IO & 0x8) : 1)
-/* RTS for UART channel 0 is pin P42 (bit 2 of port 4).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 0) {						      \
-		   unsigned old = MA_PORT4_IO; 				      \
-		   if (val)						      \
-			   MA_PORT4_IO = old & ~0x4;			      \
-		   else							      \
-			   MA_PORT4_IO = old | 0x4;			      \
-	   }								      \
-   } while (0)
-
-
-#endif /* __V850_RTE_MA1_CB_H__ */
diff --git a/include/asm-v850/rte_mb_a_pci.h b/include/asm-v850/rte_mb_a_pci.h
deleted file mode 100644
index 41ac185ca9c..00000000000
--- a/include/asm-v850/rte_mb_a_pci.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * include/asm-v850/mb_a_pci.h -- PCI support for Midas lab RTE-MOTHER-A board
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MB_A_PCI_H__
-#define __V850_MB_A_PCI_H__
-
-
-#define MB_A_PCI_MEM_ADDR	GCS5_ADDR
-#define MB_A_PCI_MEM_SIZE	(GCS5_SIZE / 2)
-#define MB_A_PCI_IO_ADDR	(GCS5_ADDR + MB_A_PCI_MEM_SIZE)
-#define MB_A_PCI_IO_SIZE	(GCS5_SIZE / 2)
-#define MB_A_PCI_REG_BASE_ADDR	GCS6_ADDR
-
-#define MB_A_PCI_PCICR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x4)
-#define MB_A_PCI_PCICR		(*(volatile u16 *)MB_A_PCI_PCICR_ADDR)
-#define MB_A_PCI_PCISR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x6)
-#define MB_A_PCI_PCISR		(*(volatile u16 *)MB_A_PCI_PCISR_ADDR)
-#define MB_A_PCI_PCILTR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xD)
-#define MB_A_PCI_PCILTR		(*(volatile u8 *)MB_A_PCI_PCILTR_ADDR)
-#define MB_A_PCI_PCIBAR0_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x10)
-#define MB_A_PCI_PCIBAR0	(*(volatile u32 *)MB_A_PCI_PCIBAR0_ADDR)
-#define MB_A_PCI_PCIBAR1_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x14)
-#define MB_A_PCI_PCIBAR1	(*(volatile u32 *)MB_A_PCI_PCIBAR1_ADDR)
-#define MB_A_PCI_PCIBAR2_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x18)
-#define MB_A_PCI_PCIBAR2	(*(volatile u32 *)MB_A_PCI_PCIBAR2_ADDR)
-#define MB_A_PCI_VENDOR_ID_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x2C)
-#define MB_A_PCI_VENDOR_ID	(*(volatile u16 *)MB_A_PCI_VENDOR_ID_ADDR)
-#define MB_A_PCI_DEVICE_ID_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x2E)
-#define MB_A_PCI_DEVICE_ID	(*(volatile u16 *)MB_A_PCI_DEVICE_ID_ADDR)
-#define MB_A_PCI_DMRR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x9C)
-#define MB_A_PCI_DMRR		(*(volatile u32 *)MB_A_PCI_DMRR_ADDR)
-#define MB_A_PCI_DMLBAM_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xA0)
-#define MB_A_PCI_DMLBAM		(*(volatile u32 *)MB_A_PCI_DMLBAM_ADDR)
-#define MB_A_PCI_DMLBAI_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xA4)
-#define MB_A_PCI_DMLBAI		(*(volatile u32 *)MB_A_PCI_DMLBAI_ADDR)
-#define MB_A_PCI_PCIPBAM_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xA8)
-#define MB_A_PCI_PCIPBAM	(*(volatile u32 *)MB_A_PCI_PCIPBAM_ADDR)
-/* `PCI Configuration Address Register for Direct Master to PCI IO/CFG'  */
-#define MB_A_PCI_DMCFGA_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xAC)
-#define MB_A_PCI_DMCFGA		(*(volatile u32 *)MB_A_PCI_DMCFGA_ADDR)
-/* `PCI Permanent Configuration ID Register'  */
-#define MB_A_PCI_PCIHIDR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xF0)
-#define MB_A_PCI_PCIHIDR	(*(volatile u32 *)MB_A_PCI_PCIHIDR_ADDR)
-
-
-#endif /* __V850_MB_A_PCI_H__ */
diff --git a/include/asm-v850/rte_me2_cb.h b/include/asm-v850/rte_me2_cb.h
deleted file mode 100644
index 9922c85c85a..00000000000
--- a/include/asm-v850/rte_me2_cb.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * include/asm-v850/rte_me2_cb.h -- Midas labs RTE-V850E/ME2-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_ME2_CB_H__
-#define __V850_RTE_ME2_CB_H__
-
-#include <asm/rte_cb.h>		/* Common defs for Midas RTE-CB boards.  */
-
-
-#define PLATFORM		"rte-v850e/me2-cb"
-#define PLATFORM_LONG		"Midas lab RTE-V850E/ME2-CB"
-
-#define CPU_CLOCK_FREQ		150000000 /* 150MHz */
-#define FIXED_BOGOMIPS		50
-
-/* 32MB of onbard SDRAM.  */
-#define SDRAM_ADDR		0x00800000
-#define SDRAM_SIZE		0x02000000 /* 32MB */
-
-
-/* CPU addresses of GBUS memory spaces.  */
-#define GCS0_ADDR		0x04000000 /* GCS0 - Common SRAM (2MB) */
-#define GCS0_SIZE		0x00800000 /*   8MB */
-#define GCS1_ADDR		0x04800000 /* GCS1 - Flash ROM (8MB) */
-#define GCS1_SIZE		0x00800000 /*   8MB */
-#define GCS2_ADDR		0x07000000 /* GCS2 - I/O registers */
-#define GCS2_SIZE		0x00800000 /*   8MB */
-#define GCS5_ADDR		0x08000000 /* GCS5 - PCI bus space */
-#define GCS5_SIZE		0x02000000 /*   32MB */
-#define GCS6_ADDR		0x07800000 /* GCS6 - PCI control registers */
-#define GCS6_SIZE		0x00800000 /*   8MB */
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 		SDRAM_ADDR
-
-
-#ifdef CONFIG_ROM_KERNEL
-/* Kernel is in ROM, starting at address 0.  */
-
-#define INTV_BASE		0
-#define ROOT_FS_IMAGE_RW	0
-
-#else /* !CONFIG_ROM_KERNEL */
-/* Using RAM-kernel.  Assume some sort of boot-loader got us loaded at
-   address 0.  */
-
-#define INTV_BASE		0
-#define ROOT_FS_IMAGE_RW	1
-
-#endif /* CONFIG_ROM_KERNEL */
-
-
-/* Some misc. on-board devices.  */
-
-/* Seven-segment LED display (four digits).  */
-#define LED_ADDR(n)		(0x0FE02000 + (n))
-#define LED(n)			(*(volatile unsigned char *)LED_ADDR(n))
-#define LED_NUM_DIGITS		4
-
-
-/* On-board PIC.  */
-
-#define CB_PIC_BASE_ADDR 	0x0FE04000
-
-#define CB_PIC_INT0M_ADDR 	(CB_PIC_BASE_ADDR + 0x00)
-#define CB_PIC_INT0M      	(*(volatile u16 *)CB_PIC_INT0M_ADDR)
-#define CB_PIC_INT1M_ADDR 	(CB_PIC_BASE_ADDR + 0x10)
-#define CB_PIC_INT1M      	(*(volatile u16 *)CB_PIC_INT1M_ADDR)
-#define CB_PIC_INTR_ADDR  	(CB_PIC_BASE_ADDR + 0x20)
-#define CB_PIC_INTR       	(*(volatile u16 *)CB_PIC_INTR_ADDR)
-#define CB_PIC_INTEN_ADDR 	(CB_PIC_BASE_ADDR + 0x30)
-#define CB_PIC_INTEN      	(*(volatile u16 *)CB_PIC_INTEN_ADDR)
-
-#define CB_PIC_INT0EN        	0x0001
-#define CB_PIC_INT1EN        	0x0002
-#define CB_PIC_INT0SEL       	0x0080
-
-/* The PIC interrupts themselves.  */
-#define CB_PIC_BASE_IRQ		NUM_CPU_IRQS
-#define IRQ_CB_PIC_NUM		10
-
-/* Some specific CB_PIC interrupts. */
-#define IRQ_CB_EXTTM0		(CB_PIC_BASE_IRQ + 0)
-#define IRQ_CB_EXTSIO		(CB_PIC_BASE_IRQ + 1)
-#define IRQ_CB_TOVER		(CB_PIC_BASE_IRQ + 2)
-#define IRQ_CB_GINT0		(CB_PIC_BASE_IRQ + 3)
-#define IRQ_CB_USB		(CB_PIC_BASE_IRQ + 4)
-#define IRQ_CB_LANC		(CB_PIC_BASE_IRQ + 5)
-#define IRQ_CB_USB_VBUS_ON	(CB_PIC_BASE_IRQ + 6)
-#define IRQ_CB_USB_VBUS_OFF	(CB_PIC_BASE_IRQ + 7)
-#define IRQ_CB_EXTTM1		(CB_PIC_BASE_IRQ + 8)
-#define IRQ_CB_EXTTM2		(CB_PIC_BASE_IRQ + 9)
-
-/* The GBUS GINT1 - GINT3 (note, not GINT0!) interrupts are connected to
-   the INTP65 - INTP67 pins on the CPU.  These are shared among the GBUS
-   interrupts.  */
-#define IRQ_GINT(n)		IRQ_INTP((n) + 9)  /* 0 is unused! */
-#define IRQ_GINT_NUM		4		   /* 0 is unused! */
-
-/* The shared interrupt line from the PIC is connected to CPU pin INTP23.  */
-#define IRQ_CB_PIC		IRQ_INTP(4) /* P23 */
-
-/* Used by <asm/rte_cb.h> to derive NUM_MACH_IRQS.  */
-#define NUM_RTE_CB_IRQS		(NUM_CPU_IRQS + IRQ_CB_PIC_NUM)
-
-
-#ifndef __ASSEMBLY__
-struct cb_pic_irq_init {
-	const char *name;	/* name of interrupt type */
-
-	/* Range of kernel irq numbers for this type:
-	   BASE, BASE+INTERVAL, ..., BASE+INTERVAL*NUM  */
-	unsigned base, num, interval;
-
-	unsigned priority;	/* interrupt priority to assign */
-};
-struct hw_interrupt_type;	/* fwd decl */
-
-/* Enable interrupt handling for interrupt IRQ.  */
-extern void cb_pic_enable_irq (unsigned irq);
-/* Disable interrupt handling for interrupt IRQ.  Note that any interrupts
-   received while disabled will be delivered once the interrupt is enabled
-   again, unless they are explicitly cleared using `cb_pic_clear_pending_irq'.  */
-extern void cb_pic_disable_irq (unsigned irq);
-/* Initialize HW_IRQ_TYPES for PIC irqs described in array INITS (which is
-   terminated by an entry with the name field == 0).  */
-extern void cb_pic_init_irq_types (struct cb_pic_irq_init *inits,
-				   struct hw_interrupt_type *hw_irq_types);
-/* Initialize PIC interrupts.  */
-extern void cb_pic_init_irqs (void);
-#endif /* __ASSEMBLY__ */
-
-
-/* TL16C550C on board UART see also asm/serial.h */
-#define CB_UART_BASE    	0x0FE08000
-#define CB_UART_REG_GAP 	0x10
-#define CB_UART_CLOCK   	0x16000000
-
-/* CompactFlash setting */
-#define CB_CF_BASE     		0x0FE0C000
-#define CB_CF_CCR_ADDR 		(CB_CF_BASE+0x200)
-#define CB_CF_CCR      		(*(volatile u8 *)CB_CF_CCR_ADDR)
-#define CB_CF_REG0_ADDR		(CB_CF_BASE+0x1000)
-#define CB_CF_REG0     		(*(volatile u16 *)CB_CF_REG0_ADDR)
-#define CB_CF_STS0_ADDR		(CB_CF_BASE+0x1004)
-#define CB_CF_STS0     		(*(volatile u16 *)CB_CF_STS0_ADDR)
-#define CB_PCATA_BASE  		(CB_CF_BASE+0x800)
-#define CB_IDE_BASE    		(CB_CF_BASE+0x9F0)
-#define CB_IDE_CTRL    		(CB_CF_BASE+0xBF6)
-#define CB_IDE_REG_OFFS		0x1
-
-
-/* SMSC LAN91C111 setting */
-#if defined(CONFIG_SMC91111)
-#define CB_LANC_BASE 		0x0FE10300
-#define CONFIG_SMC16BITONLY
-#define ETH0_ADDR 		CB_LANC_BASE
-#define ETH0_IRQ 		IRQ_CB_LANC
-#endif /* CONFIG_SMC16BITONLY */
-
-
-#undef V850E_UART_PRE_CONFIGURE
-#define V850E_UART_PRE_CONFIGURE	rte_me2_cb_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void rte_me2_cb_uart_pre_configure (unsigned chan,
-					   unsigned cflags, unsigned baud);
-#endif /* __ASSEMBLY__ */
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 0. */
-
-/* CTS for UART channel 0 is pin P22 (bit 2 of port 2).  */
-#define V850E_UART_CTS(chan)	((chan) == 0 ? !(ME2_PORT2_IO & 0x4) : 1)
-/* RTS for UART channel 0 is pin P21 (bit 1 of port 2).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 0) {						      \
-		   unsigned old = ME2_PORT2_IO; 			      \
-		   if (val)						      \
-			   ME2_PORT2_IO = old & ~0x2;			      \
-		   else							      \
-			   ME2_PORT2_IO = old | 0x2;			      \
-	   }								      \
-   } while (0)
-
-
-#ifndef __ASSEMBLY__
-extern void rte_me2_cb_init_irqs (void);
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_RTE_ME2_CB_H__ */
diff --git a/include/asm-v850/rte_nb85e_cb.h b/include/asm-v850/rte_nb85e_cb.h
deleted file mode 100644
index f56591cad90..00000000000
--- a/include/asm-v850/rte_nb85e_cb.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * include/asm-v850/rte_nb85e_cb.h -- Midas labs RTE-V850/NB85E-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_NB85E_CB_H__
-#define __V850_RTE_NB85E_CB_H__
-
-#include <asm/rte_cb.h>		/* Common defs for Midas RTE-CB boards.  */
-
-
-#define PLATFORM		"rte-v850e/nb85e-cb"
-#define PLATFORM_LONG		"Midas lab RTE-V850E/NB85E-CB"
-
-#define CPU_CLOCK_FREQ		50000000 /* 50MHz */
-
-/* 1MB of onboard SRAM.  Note that the monitor ROM uses parts of this
-   for its own purposes, so care must be taken.  */
-#define SRAM_ADDR		0x03C00000
-#define SRAM_SIZE		0x00100000 /* 1MB */
-
-/* 16MB of onbard SDRAM.  */
-#define SDRAM_ADDR		0x01000000
-#define SDRAM_SIZE		0x01000000 /* 16MB */
-
-
-/* CPU addresses of GBUS memory spaces.  */
-#define GCS0_ADDR		0x00400000 /* GCS0 - Common SRAM (2MB) */
-#define GCS0_SIZE		0x00400000 /*   4MB */
-#define GCS1_ADDR		0x02000000 /* GCS1 - Flash ROM (8MB) */
-#define GCS1_SIZE		0x00800000 /*   8MB */
-#define GCS2_ADDR		0x03900000 /* GCS2 - I/O registers */
-#define GCS2_SIZE		0x00080000 /*   512KB */
-#define GCS3_ADDR		0x02800000 /* GCS3 - EXT-bus: memory space */
-#define GCS3_SIZE		0x00800000 /*   8MB */
-#define GCS4_ADDR		0x03A00000 /* GCS4 - EXT-bus: I/O space */
-#define GCS4_SIZE		0x00200000 /*   2MB */
-#define GCS5_ADDR		0x00800000 /* GCS5 - PCI bus space */
-#define GCS5_SIZE		0x00800000 /*   8MB */
-#define GCS6_ADDR		0x03980000 /* GCS6 - PCI control registers */
-#define GCS6_SIZE		0x00010000 /*   64KB */
-
-
-/* The GBUS GINT0 - GINT3 interrupts are connected to CPU interrupts 10-12.
-   These are shared among the GBUS interrupts.  */
-#define IRQ_GINT(n)		(10 + (n))
-#define IRQ_GINT_NUM		3
-
-/* Used by <asm/rte_cb.h> to derive NUM_MACH_IRQS.  */
-#define NUM_RTE_CB_IRQS		NUM_CPU_IRQS
-
-
-#ifdef CONFIG_ROM_KERNEL
-/* Kernel is in ROM, starting at address 0.  */
-
-#define INTV_BASE	0
-
-#else /* !CONFIG_ROM_KERNEL */
-/* We're using the ROM monitor.  */
-
-/* The chip's real interrupt vectors are in ROM, but they jump to a
-   secondary interrupt vector table in RAM.  */
-#define INTV_BASE		0x03CF8000
-
-/* Scratch memory used by the ROM monitor, which shouldn't be used by
-   linux (except for the alternate interrupt vector area, defined
-   above).  */
-#define MON_SCRATCH_ADDR	0x03CE8000
-#define MON_SCRATCH_SIZE	0x00018000 /* 96KB */
-
-#endif /* CONFIG_ROM_KERNEL */
-
-
-/* Some misc. on-board devices.  */
-
-/* Seven-segment LED display (two digits).  Write-only.  */
-#define LED_ADDR(n)	(0x03802000 + (n))
-#define LED(n)		(*(volatile unsigned char *)LED_ADDR(n))
-#define LED_NUM_DIGITS	4
-
-
-/* Override the basic TEG UART pre-initialization so that we can
-   initialize extra stuff.  */
-#undef V850E_UART_PRE_CONFIGURE	/* should be defined by <asm/teg.h> */
-#define V850E_UART_PRE_CONFIGURE	rte_nb85e_cb_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void rte_nb85e_cb_uart_pre_configure (unsigned chan,
-					     unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART. */
-
-/* CTS is pin P00.  */
-#define V850E_UART_CTS(chan)	(! (TEG_PORT0_IO & 0x1))
-/* RTS is pin P02.  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   unsigned old = TEG_PORT0_IO;					      \
-	   TEG_PORT0_IO = val ? (old & ~0x4) : (old | 0x4);		      \
-   } while (0)
-
-
-#endif /* __V850_RTE_NB85E_CB_H__ */
diff --git a/include/asm-v850/scatterlist.h b/include/asm-v850/scatterlist.h
deleted file mode 100644
index 02d27b3fb06..00000000000
--- a/include/asm-v850/scatterlist.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * include/asm-v850/scatterlist.h
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SCATTERLIST_H__
-#define __V850_SCATTERLIST_H__
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-	unsigned long	sg_magic;
-#endif
-	unsigned long	page_link;
-	unsigned	offset;
-	dma_addr_t	dma_address;
-	unsigned	length;
-};
-
-#define ISA_DMA_THRESHOLD	(~0UL)
-
-#endif /* __V850_SCATTERLIST_H__ */
diff --git a/include/asm-v850/sections.h b/include/asm-v850/sections.h
deleted file mode 100644
index e0238253a0d..00000000000
--- a/include/asm-v850/sections.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_SECTIONS_H__
-#define __V850_SECTIONS_H__
-
-#include <asm-generic/sections.h>
-
-#endif /* __V850_SECTIONS_H__ */
diff --git a/include/asm-v850/segment.h b/include/asm-v850/segment.h
deleted file mode 100644
index 5e2b15dcf3d..00000000000
--- a/include/asm-v850/segment.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __V850_SEGMENT_H__
-#define __V850_SEGMENT_H__
-
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long mm_segment_t;	/* domain register */
-
-#endif /* !__ASSEMBLY__ */
-
-
-#define __KERNEL_CS	0x0
-#define __KERNEL_DS	0x0
-
-#define __USER_CS	0x1
-#define __USER_DS	0x1
-
-#define KERNEL_DS	__KERNEL_DS
-#define KERNEL_CS	__KERNEL_CS
-#define USER_DS		__USER_DS
-#define USER_CS		__USER_CS
-
-#define segment_eq(a,b)	((a) == (b))
-
-#define get_ds()	(KERNEL_DS)
-#define get_fs()	(USER_DS)
-
-#define set_fs(seg)	((void)(seg))
-
-
-#define copy_segments(task, mm)	((void)((void)(task), (mm)))
-#define release_segments(mm)	((void)(mm))
-#define forget_segments()	((void)0)
-
-
-#endif /* __V850_SEGMENT_H__ */
diff --git a/include/asm-v850/semaphore.h b/include/asm-v850/semaphore.h
deleted file mode 100644
index d9b2034ed1d..00000000000
--- a/include/asm-v850/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-v850/sembuf.h b/include/asm-v850/sembuf.h
deleted file mode 100644
index 1622231a8b8..00000000000
--- a/include/asm-v850/sembuf.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __V850_SEMBUF_H__
-#define __V850_SEMBUF_H__
-
-/* 
- * The semid64_ds structure for v850 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	__kernel_time_t	sem_otime;		/* last semop time */
-	unsigned long	__unused1;
-	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned long	__unused2;
-	unsigned long	sem_nsems;		/* no. of semaphores in array */
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* __V850_SEMBUF_H__ */
diff --git a/include/asm-v850/serial.h b/include/asm-v850/serial.h
deleted file mode 100644
index 36d8f4cbbf3..00000000000
--- a/include/asm-v850/serial.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999 by Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- */ 
-
-#ifdef CONFIG_RTE_CB_ME2
-
-#include <asm/rte_me2_cb.h>
-
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-
-#define irq_cannonicalize(x) (x)
-#define BASE_BAUD	250000	/* (16MHz / (16 * 38400)) * 9600 */
-#define SERIAL_PORT_DFNS \
-   { 0, BASE_BAUD, CB_UART_BASE, IRQ_CB_EXTSIO, STD_COM_FLAGS },
-
-/* Redefine UART register offsets.  */
-#undef UART_RX
-#undef UART_TX
-#undef UART_DLL
-#undef UART_TRG
-#undef UART_DLM
-#undef UART_IER
-#undef UART_FCTR
-#undef UART_IIR
-#undef UART_FCR
-#undef UART_EFR
-#undef UART_LCR
-#undef UART_MCR
-#undef UART_LSR
-#undef UART_MSR
-#undef UART_SCR
-#undef UART_EMSR
-
-#define UART_RX		(0 * CB_UART_REG_GAP)
-#define UART_TX		(0 * CB_UART_REG_GAP)
-#define UART_DLL	(0 * CB_UART_REG_GAP)
-#define UART_TRG	(0 * CB_UART_REG_GAP)
-#define UART_DLM	(1 * CB_UART_REG_GAP)
-#define UART_IER	(1 * CB_UART_REG_GAP)
-#define UART_FCTR	(1 * CB_UART_REG_GAP)
-#define UART_IIR	(2 * CB_UART_REG_GAP)
-#define UART_FCR	(2 * CB_UART_REG_GAP)
-#define UART_EFR	(2 * CB_UART_REG_GAP)
-#define UART_LCR	(3 * CB_UART_REG_GAP)
-#define UART_MCR	(4 * CB_UART_REG_GAP)
-#define UART_LSR	(5 * CB_UART_REG_GAP)
-#define UART_MSR	(6 * CB_UART_REG_GAP)
-#define UART_SCR	(7 * CB_UART_REG_GAP)
-#define UART_EMSR	(7 * CB_UART_REG_GAP)
-
-#endif /* CONFIG_RTE_CB_ME2 */
diff --git a/include/asm-v850/setup.h b/include/asm-v850/setup.h
deleted file mode 100644
index c48a9b97d05..00000000000
--- a/include/asm-v850/setup.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _V850_SETUP_H
-#define _V850_SETUP_H
-
-#define COMMAND_LINE_SIZE	512
-
-#endif /* __SETUP_H */
diff --git a/include/asm-v850/shmbuf.h b/include/asm-v850/shmbuf.h
deleted file mode 100644
index 3d085c9c418..00000000000
--- a/include/asm-v850/shmbuf.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef __V850_SHMBUF_H__
-#define __V850_SHMBUF_H__
-
-/* 
- * The shmid64_ds structure for v850 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	unsigned long		__unused1;
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	unsigned long		__unused2;
-	__kernel_time_t		shm_ctime;	/* last change time */
-	unsigned long		__unused3;
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused4;
-	unsigned long		__unused5;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* __V850_SHMBUF_H__ */
diff --git a/include/asm-v850/shmparam.h b/include/asm-v850/shmparam.h
deleted file mode 100644
index 7dcb6739073..00000000000
--- a/include/asm-v850/shmparam.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_SHMPARAM_H__
-#define __V850_SHMPARAM_H__
-
-#define	SHMLBA		PAGE_SIZE	/* attach addr a multiple of this */
-
-#endif /* __V850_SHMPARAM_H__ */
diff --git a/include/asm-v850/sigcontext.h b/include/asm-v850/sigcontext.h
deleted file mode 100644
index e0890f6f4bc..00000000000
--- a/include/asm-v850/sigcontext.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * include/asm-v850/sigcontext.h -- Signal contexts
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIGCONTEXT_H__
-#define __V850_SIGCONTEXT_H__
-
-#include <asm/ptrace.h>
-
-struct sigcontext
-{
-	struct pt_regs 	regs;
-	unsigned long	oldmask;
-};
-
-#endif /* __V850_SIGCONTEXT_H__ */
diff --git a/include/asm-v850/siginfo.h b/include/asm-v850/siginfo.h
deleted file mode 100644
index 7eb94703dce..00000000000
--- a/include/asm-v850/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_SIGINFO_H__
-#define __V850_SIGINFO_H__
-
-#include <asm-generic/siginfo.h>
-
-#endif /* __V850_SIGINFO_H__ */
diff --git a/include/asm-v850/signal.h b/include/asm-v850/signal.h
deleted file mode 100644
index a38df0834bb..00000000000
--- a/include/asm-v850/signal.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef __V850_SIGNAL_H__
-#define __V850_SIGNAL_H__
-
-#include <linux/types.h>
-
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-
-#ifdef __KERNEL__
-
-/* Most things should be clean enough to redefine this at will, if care
-   is taken to make libc match.  */
-#define _NSIG		64
-#define _NSIG_BPW	32
-#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
-
-typedef unsigned long old_sigset_t;		/* at least 32 bits */
-
-typedef struct {
-	unsigned long sig[_NSIG_WORDS];
-} sigset_t;
-
-#else /* !__KERNEL__ */
-
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-#define NSIG		32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
-
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGBUS		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGUSR1		10
-#define SIGSEGV		11
-#define SIGUSR2		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGSTKFLT	16
-#define SIGCHLD		17
-#define SIGCONT		18
-#define SIGSTOP		19
-#define SIGTSTP		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGURG		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGIO		29
-#define SIGPOLL		SIGIO
-/*
-#define SIGLOST		29
-*/
-#define SIGPWR		30
-#define SIGSYS		31
-#define	SIGUNUSED	31
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	32
-#define SIGRTMAX	_NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000
-
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
-
-#include <asm-generic/signal.h>
-
-#ifdef __KERNEL__
-
-struct old_sigaction {
-	__sighandler_t sa_handler;
-	old_sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
-
-struct k_sigaction {
-	struct sigaction sa;
-};
-
-#else /* !__KERNEL__ */
-
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-struct sigaction {
-	union {
-	  __sighandler_t _sa_handler;
-	  void (*_sa_sigaction)(int, struct siginfo *, void *);
-	} _u;
-	sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-#define sa_handler	_u._sa_handler
-#define sa_sigaction	_u._sa_sigaction
-
-#endif /* __KERNEL__ */
-
-
-typedef struct sigaltstack {
-	void *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
-
-#include <asm/sigcontext.h>
-#undef __HAVE_ARCH_SIG_BITOPS
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_SIGNAL_H__ */
diff --git a/include/asm-v850/sim.h b/include/asm-v850/sim.h
deleted file mode 100644
index 026932d476c..00000000000
--- a/include/asm-v850/sim.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * include/asm-v850/sim.h -- Machine-dependent defs for GDB v850e simulator
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM_H__
-#define __V850_SIM_H__
-
-
-#define CPU_ARCH		"v850e"
-#define CPU_MODEL		"v850e"
-#define CPU_MODEL_LONG		"NEC V850E"
-#define PLATFORM		"gdb/v850e"
-#define PLATFORM_LONG		"GDB V850E simulator"
-
-
-/* We use a weird value for RAM, not just 0, for testing purposes.
-   These must match the values used in the linker script.  */
-#define RAM_ADDR		0x8F000000
-#define RAM_SIZE		0x03000000
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 		RAM_ADDR
-
-
-/* For <asm/entry.h> */
-/* `R0 RAM', used for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  On real
-   processors, this usually is on-chip RAM, but here we just
-   choose an arbitrary address that meets the above constraint.  */
-#define R0_RAM_ADDR		0xFFFFF000
-
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		6
-
-
-#endif /* __V850_SIM_H__ */
diff --git a/include/asm-v850/sim85e2.h b/include/asm-v850/sim85e2.h
deleted file mode 100644
index 8b4d6974066..00000000000
--- a/include/asm-v850/sim85e2.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * include/asm-v850/sim85e2.h -- Machine-dependent defs for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM85E2_H__
-#define __V850_SIM85E2_H__
-
-
-#include <asm/v850e2.h>		/* Based on V850E2 core.  */
-
-
-/* Various memory areas supported by the simulator.
-   These should match the corresponding definitions in the linker script.  */
-
-/* `instruction RAM'; instruction fetches are much faster from IRAM than
-   from DRAM.  */
-#define IRAM_ADDR		0
-#define IRAM_SIZE		0x00100000 /* 1MB */
-/* `data RAM', below and contiguous with the I/O space.
-   Data fetches are much faster from DRAM than from IRAM.  */
-#define DRAM_ADDR		0xfff00000
-#define DRAM_SIZE		0x000ff000 /* 1020KB */
-/* `external ram'.  Unlike the above RAM areas, this memory is cached,
-   so both instruction and data fetches should be (mostly) fast --
-   however, currently only write-through caching is supported, so writes
-   to ERAM will be slow.  */
-#define ERAM_ADDR		0x00100000
-#define ERAM_SIZE		0x07f00000 /* 127MB (max) */
-/* Dynamic RAM; uses memory controller.  */
-#define SDRAM_ADDR		0x10000000
-#define SDRAM_SIZE		0x01000000 /* 16MB */
-
-
-/* Simulator specific control registers.  */
-/* NOTHAL controls whether the simulator will stop at a `halt' insn.  */
-#define SIM85E2_NOTHAL_ADDR	0xffffff22
-#define SIM85E2_NOTHAL		(*(volatile u8 *)SIM85E2_NOTHAL_ADDR)
-/* The simulator will stop N cycles after N is written to SIMFIN.  */
-#define SIM85E2_SIMFIN_ADDR	0xffffff24
-#define SIM85E2_SIMFIN		(*(volatile u16 *)SIM85E2_SIMFIN_ADDR)
-
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		64
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET		SDRAM_ADDR
-
-
-/* For <asm/entry.h> */
-/* `R0 RAM', used for a few miscellaneous variables that must be accessible
-   using a load instruction relative to R0.  The sim85e2 simulator
-   actually puts 1020K of RAM from FFF00000 to FFFFF000, so we arbitarily
-   choose a small portion at the end of that.  */
-#define R0_RAM_ADDR		0xFFFFE000
-
-
-#endif /* __V850_SIM85E2_H__ */
diff --git a/include/asm-v850/sim85e2c.h b/include/asm-v850/sim85e2c.h
deleted file mode 100644
index eee543ff3af..00000000000
--- a/include/asm-v850/sim85e2c.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-v850/sim85e2c.h -- Machine-dependent defs for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2002  NEC Corporation
- *  Copyright (C) 2002  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM85E2C_H__
-#define __V850_SIM85E2C_H__
-
-/* Use generic sim85e2 settings, other than the various names.  */
-#include <asm/sim85e2.h>
-
-#define CPU_MODEL	"v850e2"
-#define CPU_MODEL_LONG	"NEC V850E2"
-#define PLATFORM	"sim85e2c"
-#define PLATFORM_LONG	"SIM85E2C V850E2 simulator"
-
-#endif /* __V850_SIM85E2C_H__ */
diff --git a/include/asm-v850/sim85e2s.h b/include/asm-v850/sim85e2s.h
deleted file mode 100644
index ee066d5d3c5..00000000000
--- a/include/asm-v850/sim85e2s.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * include/asm-v850/sim85e2s.h -- Machine-dependent defs for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM85E2S_H__
-#define __V850_SIM85E2S_H__
-
-#include <asm/sim85e2.h>	/* Use generic sim85e2 settings.  */
-#if 0
-#include <asm/v850e2_cache.h>	/* + cache */
-#endif
-
-#define CPU_MODEL	"v850e2"
-#define CPU_MODEL_LONG	"NEC V850E2"
-#define PLATFORM	"sim85e2s"
-#define PLATFORM_LONG	"SIM85E2S V850E2 simulator"
-
-#endif /* __V850_SIM85E2S_H__ */
diff --git a/include/asm-v850/simsyscall.h b/include/asm-v850/simsyscall.h
deleted file mode 100644
index 4a19d5ae9d1..00000000000
--- a/include/asm-v850/simsyscall.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * include/asm-v850/simsyscall.h -- `System calls' under the v850e emulator
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIMSYSCALL_H__
-#define __V850_SIMSYSCALL_H__
-
-#define V850_SIM_SYS_exit(a...)		V850_SIM_SYSCALL_1 (1	, ##a)
-#define V850_SIM_SYS_fork(a...)		V850_SIM_SYSCALL_0 (2	, ##a)
-#define V850_SIM_SYS_read(a...)		V850_SIM_SYSCALL_3 (3	, ##a)
-#define V850_SIM_SYS_write(a...)	V850_SIM_SYSCALL_3 (4	, ##a)
-#define V850_SIM_SYS_open(a...)		V850_SIM_SYSCALL_2 (5	, ##a)
-#define V850_SIM_SYS_close(a...)	V850_SIM_SYSCALL_1 (6	, ##a)
-#define V850_SIM_SYS_wait4(a...)	V850_SIM_SYSCALL_4 (7	, ##a)
-/* #define V850_SIM_SYS_creat(a...)	V850_SIM_SYSCALL_1 (8	, ##a) */
-/* #define V850_SIM_SYS_link(a...)	V850_SIM_SYSCALL_1 (9	, ##a) */
-/* #define V850_SIM_SYS_unlink(a...)	V850_SIM_SYSCALL_1 (10	, ##a) */
-#define V850_SIM_SYS_execv(a...)	V850_SIM_SYSCALL_2 (11	, ##a)
-/* #define V850_SIM_SYS_chdir(a...)	V850_SIM_SYSCALL_1 (12	, ##a) */
-/* #define V850_SIM_SYS_mknod(a...)	V850_SIM_SYSCALL_1 (14	, ##a) */
-#define V850_SIM_SYS_chmod(a...)	V850_SIM_SYSCALL_2 (15	, ##a)
-#define V850_SIM_SYS_chown(a...)	V850_SIM_SYSCALL_2 (16	, ##a)
-#define V850_SIM_SYS_lseek(a...)	V850_SIM_SYSCALL_3 (19	, ##a)
-/* #define V850_SIM_SYS_getpid(a...)	V850_SIM_SYSCALL_1 (20	, ##a) */
-/* #define V850_SIM_SYS_isatty(a...)	V850_SIM_SYSCALL_1 (21	, ##a) */
-/* #define V850_SIM_SYS_fstat(a...)	V850_SIM_SYSCALL_1 (22	, ##a) */
-#define V850_SIM_SYS_time(a...)		V850_SIM_SYSCALL_1 (23	, ##a)
-#define V850_SIM_SYS_poll(a...)		V850_SIM_SYSCALL_3 (24	, ##a)
-#define V850_SIM_SYS_stat(a...)		V850_SIM_SYSCALL_2 (38	, ##a)
-#define V850_SIM_SYS_pipe(a...)		V850_SIM_SYSCALL_1 (42	, ##a)
-#define V850_SIM_SYS_times(a...)	V850_SIM_SYSCALL_1 (43	, ##a)
-#define V850_SIM_SYS_execve(a...)	V850_SIM_SYSCALL_3 (59	, ##a)
-#define V850_SIM_SYS_gettimeofday(a...)	V850_SIM_SYSCALL_2 (116	, ##a)
-/* #define V850_SIM_SYS_utime(a...)	V850_SIM_SYSCALL_2 (201	, ##a) */
-/* #define V850_SIM_SYS_wait(a...)	V850_SIM_SYSCALL_1 (202	, ##a) */
-
-#define V850_SIM_SYS_make_raw(a...)	V850_SIM_SYSCALL_1 (1024 , ##a)
-
-
-#define V850_SIM_SYSCALL_0(_call)					      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call)				      \
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-#define V850_SIM_SYSCALL_1(_call, _arg0)				      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register long arg0 __asm__ ("r7") = (long)_arg0;		      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call), "r" (arg0)			      \
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-#define V850_SIM_SYSCALL_2(_call, _arg0, _arg1)				      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register long arg0 __asm__ ("r7") = (long)_arg0;		      \
-	register long arg1 __asm__ ("r8") = (long)_arg1;		      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call), "r" (arg0), "r" (arg1)	      \
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-#define V850_SIM_SYSCALL_3(_call, _arg0, _arg1, _arg2)			      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register long arg0 __asm__ ("r7") = (long)_arg0;		      \
-	register long arg1 __asm__ ("r8") = (long)_arg1;		      \
-	register long arg2 __asm__ ("r9") = (long)_arg2;		      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call), "r" (arg0), "r" (arg1), "r" (arg2)\
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-
-#define V850_SIM_SYSCALL(call, args...) \
-   V850_SIM_SYS_##call (args)
-
-#endif /* __V850_SIMSYSCALL_H__ */
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h
deleted file mode 100644
index e199a2bf12a..00000000000
--- a/include/asm-v850/socket.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef __V850_SOCKET_H__
-#define __V850_SOCKET_H__
-
-#include <asm/sockios.h>
-
-/* For setsockoptions(2) */
-#define SOL_SOCKET	1
-
-#define SO_DEBUG	1
-#define SO_REUSEADDR	2
-#define SO_TYPE		3
-#define SO_ERROR	4
-#define SO_DONTROUTE	5
-#define SO_BROADCAST	6
-#define SO_SNDBUF	7
-#define SO_RCVBUF	8
-#define SO_SNDBUFFORCE	32
-#define SO_RCVBUFFORCE	33
-#define SO_KEEPALIVE	9
-#define SO_OOBINLINE	10
-#define SO_NO_CHECK	11
-#define SO_PRIORITY	12
-#define SO_LINGER	13
-#define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
-#define SO_PASSCRED	16
-#define SO_PEERCRED	17
-#define SO_RCVLOWAT	18
-#define SO_SNDLOWAT	19
-#define SO_RCVTIMEO	20
-#define SO_SNDTIMEO	21
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION		22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
-#define SO_SECURITY_ENCRYPTION_NETWORK		24
-
-#define SO_BINDTODEVICE	25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER        26
-#define SO_DETACH_FILTER        27
-
-#define SO_PEERNAME             28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
-
-#define SO_ACCEPTCONN		30
-
-#define SO_PEERSEC		31
-#define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
-
-#define SO_MARK			36
-
-#endif /* __V850_SOCKET_H__ */
diff --git a/include/asm-v850/sockios.h b/include/asm-v850/sockios.h
deleted file mode 100644
index 823e106e6cd..00000000000
--- a/include/asm-v850/sockios.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __V850_SOCKIOS_H__
-#define __V850_SOCKIOS_H__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 	0x8901
-#define SIOCSPGRP	0x8902
-#define FIOGETOWN	0x8903
-#define SIOCGPGRP	0x8904
-#define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
-#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
-
-#endif /* __V850_SOCKIOS_H__ */
diff --git a/include/asm-v850/stat.h b/include/asm-v850/stat.h
deleted file mode 100644
index c68c60d06e2..00000000000
--- a/include/asm-v850/stat.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * include/asm-v850/stat.h -- v850 stat structure
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_STAT_H__
-#define __V850_STAT_H__
-
-#include <asm/posix_types.h>
-
-struct stat {
-	unsigned int	st_dev;
-	unsigned long	st_ino;
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-	unsigned int 	st_uid;
-	unsigned int 	st_gid;
-	unsigned int	st_rdev;
-	long		st_size;
-	unsigned long	st_blksize;
-	unsigned long	st_blocks;
-	unsigned long	st_atime;
-	unsigned long	__unused1;
-	unsigned long	st_mtime;
-	unsigned long	__unused2;
-	unsigned long	st_ctime;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-	unsigned long	__unused5;
-};
-
-struct stat64 {
-	unsigned long long	st_dev;
-	unsigned long	__unused1;
-
-	unsigned long long	st_ino;
-
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-
-	unsigned long long	st_rdev;
-	unsigned long	__unused3;
-
-	long long	st_size;
-	unsigned long	st_blksize;
-
-	unsigned long	st_blocks; /* No. of 512-byte blocks allocated */
-	unsigned long	__unused4; /* future possible st_blocks high bits */
-
-	unsigned long	st_atime;
-	unsigned long	st_atime_nsec;
-
-	unsigned long	st_mtime;
-	unsigned long	st_mtime_nsec;
-
-	unsigned long	st_ctime;
-	unsigned long	st_ctime_nsec;
-
-	unsigned long	__unused8;
-};
-
-#endif /* __V850_STAT_H__ */
diff --git a/include/asm-v850/statfs.h b/include/asm-v850/statfs.h
deleted file mode 100644
index ea1596607f2..00000000000
--- a/include/asm-v850/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_STATFS_H__
-#define __V850_STATFS_H__
-
-#include <asm-generic/statfs.h>
-
-#endif /* __V850_STATFS_H__ */
diff --git a/include/asm-v850/string.h b/include/asm-v850/string.h
deleted file mode 100644
index 478e234789d..00000000000
--- a/include/asm-v850/string.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * include/asm-v850/string.h -- Architecture specific string routines
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_STRING_H__
-#define __V850_STRING_H__
-
-#define __HAVE_ARCH_MEMCPY
-#define __HAVE_ARCH_MEMSET
-#define __HAVE_ARCH_MEMMOVE
-
-extern void *memcpy (void *, const void *, __kernel_size_t);
-extern void *memset (void *, int, __kernel_size_t);
-extern void *memmove (void *, const void *, __kernel_size_t);
-
-#endif /* __V850_STRING_H__ */
diff --git a/include/asm-v850/system.h b/include/asm-v850/system.h
deleted file mode 100644
index 7daf1fdee11..00000000000
--- a/include/asm-v850/system.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * include/asm-v850/system.h -- Low-level interrupt/thread ops
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SYSTEM_H__
-#define __V850_SYSTEM_H__
-
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-
-/*
- * switch_to(n) should switch tasks to task ptr, first checking that
- * ptr isn't the current task, in which case it does nothing.
- */
-struct thread_struct;
-extern void *switch_thread (struct thread_struct *last,
-			    struct thread_struct *next);
-#define switch_to(prev,next,last)					      \
-  do {									      \
-        if (prev != next) {						      \
- 		(last) = switch_thread (&prev->thread, &next->thread);	      \
-	}								      \
-  } while (0)
-
-
-/* Enable/disable interrupts.  */
-#define local_irq_enable()	__asm__ __volatile__ ("ei")
-#define local_irq_disable()	__asm__ __volatile__ ("di")
-
-#define local_save_flags(flags) \
-  __asm__ __volatile__ ("stsr %1, %0" : "=r" (flags) : "i" (SR_PSW))
-#define local_restore_flags(flags) \
-  __asm__ __volatile__ ("ldsr %0, %1" :: "r" (flags), "i" (SR_PSW))
-
-/* For spinlocks etc */
-#define	local_irq_save(flags) \
-  do { local_save_flags (flags); local_irq_disable (); } while (0) 
-#define local_irq_restore(flags) \
-  local_restore_flags (flags);
-
-
-static inline int irqs_disabled (void)
-{
-	unsigned flags;
-	local_save_flags (flags);
-	return !!(flags & 0x20);
-}
-
-
-/*
- * Force strict CPU ordering.
- * Not really required on v850...
- */
-#define nop()			__asm__ __volatile__ ("nop")
-#define mb()			__asm__ __volatile__ ("" ::: "memory")
-#define rmb()			mb ()
-#define wmb()			mb ()
-#define read_barrier_depends()	((void)0)
-#define set_mb(var, value)	do { xchg (&var, value); } while (0)
-
-#define smp_mb()	mb ()
-#define smp_rmb()	rmb ()
-#define smp_wmb()	wmb ()
-#define smp_read_barrier_depends()	read_barrier_depends()
-
-#define xchg(ptr, with) \
-  ((__typeof__ (*(ptr)))__xchg ((unsigned long)(with), (ptr), sizeof (*(ptr))))
-
-static inline unsigned long __xchg (unsigned long with,
-				    __volatile__ void *ptr, int size)
-{
-	unsigned long tmp, flags;
-
-	local_irq_save (flags);
-
-	switch (size) {
-	case 1:
-		tmp = *(unsigned char *)ptr;
-		*(unsigned char *)ptr = with;
-		break;
-	case 2:
-		tmp = *(unsigned short *)ptr;
-		*(unsigned short *)ptr = with;
-		break;
-	case 4:
-		tmp = *(unsigned long *)ptr;
-		*(unsigned long *)ptr = with;
-		break;
-	}
-
-	local_irq_restore (flags);
-
-	return tmp;
-}
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)				  	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
-#ifndef CONFIG_SMP
-#include <asm-generic/cmpxchg.h>
-#endif
-
-#define arch_align_stack(x) (x)
-
-#endif /* __V850_SYSTEM_H__ */
diff --git a/include/asm-v850/teg.h b/include/asm-v850/teg.h
deleted file mode 100644
index acc8c7d9532..00000000000
--- a/include/asm-v850/teg.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * include/asm-v850/teg.h -- NB85E-TEG cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_TEG_H__
-#define __V850_TEG_H__
-
-
-/* The TEG uses the V850E cpu core.  */
-#include <asm/v850e.h>
-#include <asm/v850e_cache.h>
-
-
-#define CPU_MODEL	"v850e/nb85e-teg"
-#define CPU_MODEL_LONG	"NEC V850E/NB85E TEG"
-
-
-/* For <asm/entry.h> */
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  On the NB85E/TEG,
-   There's 60KB of iRAM starting at 0xFFFF0000, however we need the base
-   address to be addressable by a 16-bit signed offset, so we only use the
-   second half of it starting from 0xFFFF8000.  */
-#define R0_RAM_ADDR			0xFFFF8000
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).
-   Some of these are parameterized even though there's only a single
-   interrupt, for compatibility with some generic code that works on other
-   processor models.  */
-#define IRQ_INTCMD(n)	6	/* interval timer interrupt */
-#define IRQ_INTCMD_NUM	1
-#define IRQ_INTSER(n)	16	/* UART reception error */
-#define IRQ_INTSER_NUM	1
-#define IRQ_INTSR(n)	17	/* UART reception completion */
-#define IRQ_INTSR_NUM	1
-#define IRQ_INTST(n)	18	/* UART transmission completion */
-#define IRQ_INTST_NUM	1
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS	64
-
-
-/* TEG UART details.  */
-#define V850E_UART_BASE_ADDR(n)		(0xFFFFF600 + 0x10 * (n))
-#define V850E_UART_ASIM_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x0)
-#define V850E_UART_ASIS_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x2)
-#define V850E_UART_ASIF_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x4)
-#define V850E_UART_CKSR_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x6)
-#define V850E_UART_BRGC_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x8)
-#define V850E_UART_TXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0xA)
-#define V850E_UART_RXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0xC)
-#define V850E_UART_NUM_CHANNELS		1
-#define V850E_UART_BASE_FREQ		CPU_CLOCK_FREQ
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	teg_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void teg_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud);
-#endif
-
-
-/* The TEG RTPU.  */
-#define V850E_RTPU_BASE_ADDR		0xFFFFF210
-
-
-/* TEG series timer D details.  */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF210
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x8)
-#define V850E_TIMER_D_BASE_FREQ		CPU_CLOCK_FREQ
-
-
-/* `Interrupt Source Select' control register.  */
-#define TEG_ISS_ADDR			0xFFFFF7FA
-#define TEG_ISS				(*(volatile u8 *)TEG_ISS_ADDR)
-
-/* Port 0 I/O register (bits 0-3 used).  */
-#define TEG_PORT0_IO_ADDR		0xFFFFF7F2
-#define TEG_PORT0_IO			(*(volatile u8 *)TEG_PORT0_IO_ADDR)
-/* Port 0 control register (bits 0-3 control mode, 0 = output, 1 = input).  */
-#define TEG_PORT0_PM_ADDR		0xFFFFF7F4
-#define TEG_PORT0_PM			(*(volatile u8 *)TEG_PORT0_PM_ADDR)
-
-
-#ifndef __ASSEMBLY__
-extern void teg_init_irqs (void);
-#endif
-
-
-#endif /* __V850_TEG_H__ */
diff --git a/include/asm-v850/termbits.h b/include/asm-v850/termbits.h
deleted file mode 100644
index 295d7bf6945..00000000000
--- a/include/asm-v850/termbits.h
+++ /dev/null
@@ -1,200 +0,0 @@
-#ifndef __V850_TERMBITS_H__
-#define __V850_TERMBITS_H__
-
-#include <linux/posix_types.h>
-
-typedef unsigned char	cc_t;
-typedef unsigned int	speed_t;
-typedef unsigned int	tcflag_t;
-
-#define NCCS 19
-struct termios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-};
-
-struct termios2 {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-struct ktermios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-
-/* c_iflag bits */
-#define IGNBRK	0000001
-#define BRKINT	0000002
-#define IGNPAR	0000004
-#define PARMRK	0000010
-#define INPCK	0000020
-#define ISTRIP	0000040
-#define INLCR	0000100
-#define IGNCR	0000200
-#define ICRNL	0000400
-#define IUCLC	0001000
-#define IXON	0002000
-#define IXANY	0004000
-#define IXOFF	0010000
-#define IMAXBEL	0020000
-#define IUTF8	0040000
-
-/* c_oflag bits */
-#define OPOST	0000001
-#define OLCUC	0000002
-#define ONLCR	0000004
-#define OCRNL	0000010
-#define ONOCR	0000020
-#define ONLRET	0000040
-#define OFILL	0000100
-#define OFDEL	0000200
-#define NLDLY	0000400
-#define   NL0	0000000
-#define   NL1	0000400
-#define CRDLY	0003000
-#define   CR0	0000000
-#define   CR1	0001000
-#define   CR2	0002000
-#define   CR3	0003000
-#define TABDLY	0014000
-#define   TAB0	0000000
-#define   TAB1	0004000
-#define   TAB2	0010000
-#define   TAB3	0014000
-#define   XTABS	0014000
-#define BSDLY	0020000
-#define   BS0	0000000
-#define   BS1	0020000
-#define VTDLY	0040000
-#define   VT0	0000000
-#define   VT1	0040000
-#define FFDLY	0100000
-#define   FF0	0000000
-#define   FF1	0100000
-
-/* c_cflag bit meaning */
-#define CBAUD	0010017
-#define  B0	0000000		/* hang up */
-#define  B50	0000001
-#define  B75	0000002
-#define  B110	0000003
-#define  B134	0000004
-#define  B150	0000005
-#define  B200	0000006
-#define  B300	0000007
-#define  B600	0000010
-#define  B1200	0000011
-#define  B1800	0000012
-#define  B2400	0000013
-#define  B4800	0000014
-#define  B9600	0000015
-#define  B19200	0000016
-#define  B38400	0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE	0000060
-#define   CS5	0000000
-#define   CS6	0000020
-#define   CS7	0000040
-#define   CS8	0000060
-#define CSTOPB	0000100
-#define CREAD	0000200
-#define PARENB	0000400
-#define PARODD	0001000
-#define HUPCL	0002000
-#define CLOCAL	0004000
-#define CBAUDEX 0010000
-#define    BOTHER 0010000
-#define    B57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B500000 0010005
-#define   B576000 0010006
-#define   B921600 0010007
-#define  B1000000 0010010
-#define  B1152000 0010011
-#define  B1500000 0010012
-#define  B2000000 0010013
-#define  B2500000 0010014
-#define  B3000000 0010015
-#define  B3500000 0010016
-#define  B4000000 0010017
-#define CIBAUD	  002003600000		/* input baud rate */
-#define CMSPAR	  010000000000		/* mark or space (stick) parity */
-#define CRTSCTS	  020000000000		/* flow control */
-
-#define	IBSHIFT	16		/* Shifr from CBAUD to CIBAUD */
-
-/* c_lflag bits */
-#define ISIG	0000001
-#define ICANON	0000002
-#define XCASE	0000004
-#define ECHO	0000010
-#define ECHOE	0000020
-#define ECHOK	0000040
-#define ECHONL	0000100
-#define NOFLSH	0000200
-#define TOSTOP	0000400
-#define ECHOCTL	0001000
-#define ECHOPRT	0002000
-#define ECHOKE	0004000
-#define FLUSHO	0010000
-#define PENDIN	0040000
-#define IEXTEN	0100000
-
-
-/* tcflow() and TCXONC use these */
-#define	TCOOFF		0
-#define	TCOON		1
-#define	TCIOFF		2
-#define	TCION		3
-
-/* tcflush() and TCFLSH use these */
-#define	TCIFLUSH	0
-#define	TCOFLUSH	1
-#define	TCIOFLUSH	2
-
-/* tcsetattr uses these */
-#define	TCSANOW		0
-#define	TCSADRAIN	1
-#define	TCSAFLUSH	2
-
-#endif /* __V850_TERMBITS_H__ */
diff --git a/include/asm-v850/termios.h b/include/asm-v850/termios.h
deleted file mode 100644
index fcd171838d9..00000000000
--- a/include/asm-v850/termios.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef __V850_TERMIOS_H__
-#define __V850_TERMIOS_H__
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
-
-/*	intr=^C		quit=^\		erase=del	kill=^U
-	eof=^D		vtime=\0	vmin=\1		sxtc=\0
-	start=^Q	stop=^S		susp=^Z		eol=\0
-	reprint=^R	discard=^U	werase=^W	lnext=^V
-	eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
-	unsigned short __tmp; \
-	get_user(__tmp,&(termio)->x); \
-	*(unsigned short *) &(termios)->x = __tmp; \
-}
-
-#define user_termio_to_kernel_termios(termios, termio) \
-({ \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \
-	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
-})
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-#define kernel_termios_to_user_termio(termio, termios) \
-({ \
-	put_user((termios)->c_iflag, &(termio)->c_iflag); \
-	put_user((termios)->c_oflag, &(termio)->c_oflag); \
-	put_user((termios)->c_cflag, &(termio)->c_cflag); \
-	put_user((termios)->c_lflag, &(termio)->c_lflag); \
-	put_user((termios)->c_line,  &(termio)->c_line); \
-	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
-})
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
-#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
-
-#endif	/* __KERNEL__ */
-
-#endif	/* __V850_TERMIOS_H__ */
diff --git a/include/asm-v850/thread_info.h b/include/asm-v850/thread_info.h
deleted file mode 100644
index 1a9e6ae0c5f..00000000000
--- a/include/asm-v850/thread_info.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * include/asm-v850/thread_info.h -- v850 low-level thread information
- *
- *  Copyright (C) 2002  NEC Corporation
- *  Copyright (C) 2002  Miles Bader <miles@gnu.org>
- *  Copyright (C) 2002  David Howells (dhowells@redhat.com)
- *    - Incorporating suggestions made by Linus Torvalds and Dave Miller
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file was derived from the PPC version, include/asm-ppc/thread_info.h
- * which was adapted from the i386 version by Paul Mackerras
- */
-
-#ifndef __V850_THREAD_INFO_H__
-#define __V850_THREAD_INFO_H__
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-/*
- * low level task data.
- * If you change this, change the TI_* offsets below to match.
- */
-struct thread_info {
-	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
-	unsigned long		flags;		/* low level flags */
-	int			cpu;		/* cpu we're on */
-	int			preempt_count;	/* 0 => preemptable,
-						   <0 => BUG */
-	struct restart_block	restart_block;
-};
-
-#define INIT_THREAD_INFO(tsk)						      \
-{									      \
-	.task =		&tsk,						      \
-	.exec_domain =	&default_exec_domain,				      \
-	.flags =	0,						      \
-	.cpu =		0,						      \
-	.preempt_count = 1,						      \
-	.restart_block = {						      \
-		.fn = do_no_restart_syscall,				      \
-	},								      \
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
-/*
- * macros/functions for gaining access to the thread information structure
- */
-
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-				__get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti)	free_pages((unsigned long) (ti), 1)
-
-#endif /* __ASSEMBLY__ */
-
-
-/*
- * Offsets in thread_info structure, used in assembly code
- */
-#define TI_TASK		0
-#define TI_EXECDOMAIN	4
-#define TI_FLAGS	8
-#define TI_CPU		12
-#define TI_PREEMPT	16
-
-#define PREEMPT_ACTIVE		0x4000000
-
-/*
- * thread information flag bit numbers
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling
-					   TIF_NEED_RESCHED */
-#define TIF_MEMDIE		4
-
-/* as above, but as bit values */
-#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
-
-
-/* Size of kernel stack for each process.  */
-#define THREAD_SIZE		0x2000
-
-/* The alignment of kernel threads, with thread_info structures at their
-   base.  Thus, a pointer for a task's task structure can be derived from
-   its kernel stack pointer.  */
-#define THREAD_ALIGNMENT	THREAD_SIZE
-#define THREAD_MASK		(-THREAD_ALIGNMENT)
-
-
-#ifdef __ASSEMBLY__
-
-/* Put a pointer to the current thread_info structure into REG.  Note that
-   this definition requires THREAD_MASK to be representable as a signed
-   16-bit value.  */
-#define GET_CURRENT_THREAD(reg)						\
-        /* Use `addi' and then `and' instead of just `andi', because	\
-	   `addi' sign-extends the immediate value, whereas `andi'	\
-	   zero-extends it.  */						\
-	addi	THREAD_MASK, r0, reg;					\
-	and	sp, reg
-
-#else
-
-/* Return a pointer to the current thread_info structure.  */
-static inline struct thread_info *current_thread_info (void)
-{
-	register unsigned long sp __asm__ ("sp");
-	return (struct thread_info *)(sp & THREAD_MASK);
-}
-
-#endif /* __ASSEMBLY__ */
-
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_THREAD_INFO_H__ */
diff --git a/include/asm-v850/timex.h b/include/asm-v850/timex.h
deleted file mode 100644
index 6279e5a0ee8..00000000000
--- a/include/asm-v850/timex.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * linux/include/asm-v850/timex.h
- *
- * v850 architecture timex specifications
- */
-#ifndef __V850_TIMEX_H__
-#define __V850_TIMEX_H__
-
-#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
-
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles(void)
-{
-	return 0;
-}
-
-#endif /* __V850_TIMEX_H__ */
diff --git a/include/asm-v850/tlb.h b/include/asm-v850/tlb.h
deleted file mode 100644
index 73bc9ead40d..00000000000
--- a/include/asm-v850/tlb.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * include/asm-v850/tlb.h
- *
- *  Copyright (C) 2002  NEC Corporation
- *  Copyright (C) 2002  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_TLB_H__
-#define __V850_TLB_H__
-
-#define tlb_flush(tlb)	((void)0)
-
-#include <asm-generic/tlb.h>
-
-#endif /* __V850_TLB_H__ */
diff --git a/include/asm-v850/tlbflush.h b/include/asm-v850/tlbflush.h
deleted file mode 100644
index c44aa64449c..00000000000
--- a/include/asm-v850/tlbflush.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * include/asm-v850/tlbflush.h
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_TLBFLUSH_H__
-#define __V850_TLBFLUSH_H__
-
-#include <asm/machdep.h>
-
-
-/*
- * flush all user-space atc entries.
- */
-static inline void __flush_tlb(void)
-{
-	BUG ();
-}
-
-static inline void __flush_tlb_one(unsigned long addr)
-{
-	BUG ();
-}
-
-#define flush_tlb() __flush_tlb()
-
-/*
- * flush all atc entries (both kernel and user-space entries).
- */
-static inline void flush_tlb_all(void)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_kernel_page(unsigned long addr)
-{
-	BUG ();
-}
-
-#endif /* __V850_TLBFLUSH_H__ */
diff --git a/include/asm-v850/topology.h b/include/asm-v850/topology.h
deleted file mode 100644
index 6040e41d794..00000000000
--- a/include/asm-v850/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_TOPOLOGY_H__
-#define __V850_TOPOLOGY_H__
-
-#include <asm-generic/topology.h>
-
-#endif /* __V850_TOPOLOGY_H__ */
diff --git a/include/asm-v850/types.h b/include/asm-v850/types.h
deleted file mode 100644
index 89f735ee41d..00000000000
--- a/include/asm-v850/types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __V850_TYPES_H__
-#define __V850_TYPES_H__
-
-#ifndef __ASSEMBLY__
-
-/*
- * This file is never included by application software unless
- * explicitly requested (e.g., via linux/types.h) in which case the
- * application is Linux specific so (user-) name space pollution is
- * not a major issue.  However, for interoperability, libraries still
- * need to be careful to avoid a name clashes.
- */
-#include <asm-generic/int-ll64.h>
-
-typedef unsigned short umode_t;
-
-#endif /* !__ASSEMBLY__ */
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-#ifdef __KERNEL__
-
-#define BITS_PER_LONG 32
-
-#ifndef __ASSEMBLY__
-
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_TYPES_H__ */
diff --git a/include/asm-v850/uaccess.h b/include/asm-v850/uaccess.h
deleted file mode 100644
index 64563c409bb..00000000000
--- a/include/asm-v850/uaccess.h
+++ /dev/null
@@ -1,159 +0,0 @@
-#ifndef __V850_UACCESS_H__
-#define __V850_UACCESS_H__
-
-/*
- * User space memory access functions
- */
-
-#include <linux/errno.h>
-#include <linux/string.h>
-
-#include <asm/segment.h>
-#include <asm/machdep.h>
-
-#define VERIFY_READ	0
-#define VERIFY_WRITE	1
-
-static inline int access_ok (int type, const void *addr, unsigned long size)
-{
-	/* XXX I guess we should check against real ram bounds at least, and
-	   possibly make sure ADDR is not within the kernel.
-	   For now we just check to make sure it's not a small positive
-	   or negative value, as that will at least catch some kinds of
-	   error.  In particular, we make sure that ADDR's not within the
-	   interrupt vector area, which we know starts at zero, or within the
-	   peripheral-I/O area, which is located just _before_ zero.  */
-	unsigned long val = (unsigned long)addr;
-	return val >= (0x80 + NUM_CPU_IRQS*16) && val < 0xFFFFF000;
-}
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry
-{
-	unsigned long insn, fixup;
-};
-
-/* Returns 0 if exception not found and fixup otherwise.  */
-extern unsigned long search_exception_table (unsigned long);
-
-
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- */
-
-extern int bad_user_access_length (void);
-
-#define __get_user(var, ptr)						      \
-  ({									      \
-	  int __gu_err = 0;						      \
-	  typeof(*(ptr)) __gu_val = 0;					      \
-	  switch (sizeof (*(ptr))) {					      \
-	  case 1:							      \
-	  case 2:							      \
-	  case 4:							      \
-		  __gu_val = *(ptr);					      \
-		  break;						      \
-	  case 8:							      \
-		  memcpy(&__gu_val, ptr, sizeof(__gu_val));		      \
-		  break;						      \
-	  default:							      \
-		  __gu_val = 0;						      \
-		  __gu_err = __get_user_bad ();				      \
-		  break;						      \
-	  }								      \
-	  (var) = __gu_val;						      \
-	  __gu_err;							      \
-  })
-#define __get_user_bad()	(bad_user_access_length (), (-EFAULT))
-
-#define __put_user(var, ptr)						      \
-  ({									      \
-	  int __pu_err = 0;						      \
-	  switch (sizeof (*(ptr))) {					      \
-	  case 1:							      \
-	  case 2:							      \
-	  case 4:							      \
-		  *(ptr) = (var);					      \
-		  break;						      \
-	  case 8: {							      \
-	  	  typeof(*(ptr)) __pu_val = 0;				      \
-		  memcpy(ptr, &__pu_val, sizeof(__pu_val));		      \
-		  }							      \
-		  break;						      \
-	  default:							      \
-		  __pu_err = __put_user_bad ();				      \
-		  break;						      \
-	  }								      \
-	  __pu_err;							      \
-  })
-#define __put_user_bad()	(bad_user_access_length (), (-EFAULT))
-
-#define put_user(x, ptr)	__put_user(x, ptr)
-#define get_user(x, ptr)	__get_user(x, ptr)
-
-#define __copy_from_user(to, from, n)	(memcpy (to, from, n), 0)
-#define __copy_to_user(to, from, n)	(memcpy(to, from, n), 0)
-
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
-
-#define copy_from_user(to, from, n)	__copy_from_user (to, from, n)
-#define copy_to_user(to, from, n) 	__copy_to_user(to, from, n)
-
-#define copy_to_user_ret(to,from,n,retval) \
-  ({ if (copy_to_user (to,from,n)) return retval; })
-
-#define copy_from_user_ret(to,from,n,retval) \
-  ({ if (copy_from_user (to,from,n)) return retval; })
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-static inline long
-strncpy_from_user (char *dst, const char *src, long count)
-{
-	char *tmp;
-	strncpy (dst, src, count);
-	for (tmp = dst; *tmp && count > 0; tmp++, count--)
-		;
-	return tmp - dst;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-static inline long strnlen_user (const char *src, long n)
-{
-	return strlen (src) + 1;
-}
-
-#define strlen_user(str)	strnlen_user (str, 32767)
-
-/*
- * Zero Userspace
- */
-
-static inline unsigned long
-clear_user (void *to, unsigned long n)
-{
-	memset (to, 0, n);
-	return 0;
-}
-
-#endif /* __V850_UACCESS_H__ */
diff --git a/include/asm-v850/ucontext.h b/include/asm-v850/ucontext.h
deleted file mode 100644
index 303c21590cf..00000000000
--- a/include/asm-v850/ucontext.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __V850_UCONTEXT_H__
-#define __V850_UCONTEXT_H__
-
-#include <asm/sigcontext.h>
-
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct sigcontext uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif /* __V850_UCONTEXT_H__ */
diff --git a/include/asm-v850/unaligned.h b/include/asm-v850/unaligned.h
deleted file mode 100644
index 53122b28491..00000000000
--- a/include/asm-v850/unaligned.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Note that some v850 chips support unaligned access, but it seems too
- * annoying to use.
- */
-#ifndef _ASM_V850_UNALIGNED_H
-#define _ASM_V850_UNALIGNED_H
-
-#include <linux/unaligned/be_byteshift.h>
-#include <linux/unaligned/le_byteshift.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned	__get_unaligned_le
-#define put_unaligned	__put_unaligned_le
-
-#endif /* _ASM_V850_UNALIGNED_H */
diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h
deleted file mode 100644
index 2241ed45ecf..00000000000
--- a/include/asm-v850/unistd.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * include/asm-v850/unistd.h -- System call numbers and invocation mechanism
- *
- *  Copyright (C) 2001,02,03,04  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_UNISTD_H__
-#define __V850_UNISTD_H__
-
-#define __NR_restart_syscall	  0
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_waitpid		  7
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_chown		 16
-#define __NR_break		 17
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_umount		 22
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-#define __NR_pause		 29
-#define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
-#define __NR_access		 33
-#define __NR_nice		 34
-#define __NR_ftime		 35
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-#define __NR_prof		 44
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_signal		 48
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_umount2		 52
-#define __NR_lock		 53
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-#define __NR_setpgid		 57
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-#define __NR_sigaction		 67
-#define __NR_sgetmask		 68
-#define __NR_ssetmask		 69
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_sigsuspend		 72
-#define __NR_sigpending		 73
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-#define __NR_ugetrlimit	 	 76
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_select		 82
-#define __NR_symlink		 83
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-#define __NR_readdir		 89
-#define __NR_mmap		 90
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-#define __NR_profil		 98
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-#define __NR_socketcall		102
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-#define __NR_vhangup		111
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_ipc		117
-#define __NR_fsync		118
-#define __NR_sigreturn		119
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-#define __NR_cacheflush		123
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-#define __NR_mlock		150
-#define __NR_munlock		151
-#define __NR_mlockall		152
-#define __NR_munlockall		153
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_query_module	167
-#define __NR_poll		168
-#define __NR_nfsservctl		169
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_prctl		172
-#define __NR_rt_sigreturn	173
-#define __NR_rt_sigaction	174
-#define __NR_rt_sigprocmask	175
-#define __NR_rt_sigpending	176
-#define __NR_rt_sigtimedwait	177
-#define __NR_rt_sigqueueinfo	178
-#define __NR_rt_sigsuspend	179
-#define __NR_pread		180
-#define __NR_pwrite		181
-#define __NR_lchown		182
-#define __NR_getcwd		183
-#define __NR_capget		184
-#define __NR_capset		185
-#define __NR_sigaltstack	186
-#define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
-#define __NR_vfork		190
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_fcntl64		198
-#define __NR_getdents64		199
-#define __NR_pivot_root		200
-#define __NR_gettid		201
-#define __NR_tkill		202
-
-#ifdef __KERNEL__
-
-#define __ARCH_WANT_IPC_PARSE_VERSION
-#define __ARCH_WANT_OLD_READDIR
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_GETHOSTNAME
-#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
-#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_WAITPID
-#define __ARCH_WANT_SYS_SOCKETCALL
-#define __ARCH_WANT_SYS_FADVISE64
-#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_SYS_OLDUMOUNT
-#define __ARCH_WANT_SYS_SIGPENDING
-#define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-
-/*
- * "Conditional" syscalls
- */
-#define cond_syscall(name)						      \
-  asm (".weak\t" C_SYMBOL_STRING(name) ";"				      \
-       ".set\t" C_SYMBOL_STRING(name) "," C_SYMBOL_STRING(sys_ni_syscall))
-#if 0
-/* This doesn't work if there's a function prototype for NAME visible,
-   because the argument types probably won't match.  */
-#define cond_syscall(name)  \
-  void name (void) __attribute__ ((weak, alias ("sys_ni_syscall")));
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* __V850_UNISTD_H__ */
diff --git a/include/asm-v850/user.h b/include/asm-v850/user.h
deleted file mode 100644
index 63cdc567d27..00000000000
--- a/include/asm-v850/user.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef __V850_USER_H__
-#define __V850_USER_H__
-
-/* Adapted from <asm-ppc/user.h>.  */
-
-#include <linux/ptrace.h>
-#include <asm/page.h>
-
-/*
- * Core file format: The core file is written in such a way that gdb
- * can understand it and provide useful information to the user (under
- * linux we use the `trad-core' bfd, NOT the osf-core).  The file contents
- * are as follows:
- *
- *  upage: 1 page consisting of a user struct that tells gdb
- *	what is present in the file.  Directly after this is a
- *	copy of the task_struct, which is currently not used by gdb,
- *	but it may come in handy at some point.  All of the registers
- *	are stored as part of the upage.  The upage should always be
- *	only one page long.
- *  data: The data segment follows next.  We use current->end_text to
- *	current->brk to pick up all of the user variables, plus any memory
- *	that may have been sbrk'ed.  No attempt is made to determine if a
- *	page is demand-zero or if a page is totally unused, we just cover
- *	the entire range.  All of the addresses are rounded in such a way
- *	that an integral number of pages is written.
- *  stack: We need the stack information in order to get a meaningful
- *	backtrace.  We need to write the data from usp to
- *	current->start_stack, so we round each of these in order to be able
- *	to write an integer number of pages.
- */
-struct user {
-	struct pt_regs	regs;			/* entire machine state */
-	size_t		u_tsize;		/* text size (pages) */
-	size_t		u_dsize;		/* data size (pages) */
-	size_t		u_ssize;		/* stack size (pages) */
-	unsigned long	start_code;		/* text starting address */
-	unsigned long	start_data;		/* data starting address */
-	unsigned long	start_stack;		/* stack starting address */
-	long int	signal;			/* signal causing core dump */
-	unsigned long	u_ar0;			/* help gdb find registers */
-	unsigned long	magic;			/* identifies a core file */
-	char		u_comm[32];		/* user command name */
-};
-
-#define NBPG			PAGE_SIZE
-#define UPAGES			1
-#define HOST_TEXT_START_ADDR	(u.start_code)
-#define HOST_DATA_START_ADDR	(u.start_data)
-#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
-
-#endif /* __V850_USER_H__ */
diff --git a/include/asm-v850/v850e.h b/include/asm-v850/v850e.h
deleted file mode 100644
index 5a222eb5117..00000000000
--- a/include/asm-v850/v850e.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * include/asm-v850/v850e.h -- V850E CPU
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_H__
-#define __V850_V850E_H__
-
-#include <asm/v850e_intc.h>
-
-#define CPU_ARCH "v850e"
-
-#endif /* __V850_V850E_H__ */
diff --git a/include/asm-v850/v850e2.h b/include/asm-v850/v850e2.h
deleted file mode 100644
index 48680408ab7..00000000000
--- a/include/asm-v850/v850e2.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * include/asm-v850/v850e2.h -- Machine-dependent defs for V850E2 CPUs
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E2_H__
-#define __V850_V850E2_H__
-
-#include <asm/v850e_intc.h>	/* v850e-style interrupt system.  */
-
-
-#define CPU_ARCH "v850e2"
-
-
-/* Control registers.  */
-
-/* Chip area select control */ 
-#define V850E2_CSC_ADDR(n)	(0xFFFFF060 + (n) * 2)
-#define V850E2_CSC(n)		(*(volatile u16 *)V850E2_CSC_ADDR(n))
-/* I/O area select control */
-#define V850E2_BPC_ADDR		0xFFFFF064
-#define V850E2_BPC		(*(volatile u16 *)V850E2_BPC_ADDR)
-/* Bus size configuration */
-#define V850E2_BSC_ADDR		0xFFFFF066
-#define V850E2_BSC		(*(volatile u16 *)V850E2_BSC_ADDR)
-/* Endian configuration */
-#define V850E2_BEC_ADDR		0xFFFFF068
-#define V850E2_BEC		(*(volatile u16 *)V850E2_BEC_ADDR)
-/* Cache configuration */
-#define V850E2_BHC_ADDR		0xFFFFF06A
-#define V850E2_BHC		(*(volatile u16 *)V850E2_BHC_ADDR)
-/* NPB strobe-wait configuration */
-#define V850E2_VSWC_ADDR	0xFFFFF06E
-#define V850E2_VSWC		(*(volatile u16 *)V850E2_VSWC_ADDR)
-/* Bus cycle type */
-#define V850E2_BCT_ADDR(n)	(0xFFFFF480 + (n) * 2)
-#define V850E2_BCT(n)		(*(volatile u16 *)V850E2_BCT_ADDR(n))
-/* Data wait control */
-#define V850E2_DWC_ADDR(n)	(0xFFFFF484 + (n) * 2)
-#define V850E2_DWC(n)		(*(volatile u16 *)V850E2_DWC_ADDR(n))
-/* Bus cycle control */
-#define V850E2_BCC_ADDR		0xFFFFF488
-#define V850E2_BCC		(*(volatile u16 *)V850E2_BCC_ADDR)
-/* Address wait control */
-#define V850E2_ASC_ADDR		0xFFFFF48A
-#define V850E2_ASC		(*(volatile u16 *)V850E2_ASC_ADDR)
-/* Local bus sizing control */
-#define V850E2_LBS_ADDR		0xFFFFF48E
-#define V850E2_LBS		(*(volatile u16 *)V850E2_LBS_ADDR)
-/* Line buffer control */
-#define V850E2_LBC_ADDR(n)	(0xFFFFF490 + (n) * 2)
-#define V850E2_LBC(n)		(*(volatile u16 *)V850E2_LBC_ADDR(n))
-/* SDRAM configuration */
-#define V850E2_SCR_ADDR(n)	(0xFFFFF4A0 + (n) * 4)
-#define V850E2_SCR(n)		(*(volatile u16 *)V850E2_SCR_ADDR(n))
-/* SDRAM refresh cycle control */
-#define V850E2_RFS_ADDR(n)	(0xFFFFF4A2 + (n) * 4)
-#define V850E2_RFS(n)		(*(volatile u16 *)V850E2_RFS_ADDR(n))
-
-
-#endif /* __V850_V850E2_H__ */
diff --git a/include/asm-v850/v850e2_cache.h b/include/asm-v850/v850e2_cache.h
deleted file mode 100644
index 87edf0d311d..00000000000
--- a/include/asm-v850/v850e2_cache.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * include/asm-v850/v850e2_cache_cache.h -- Cache control for V850E2
- * 	cache memories
- *
- *  Copyright (C) 2003,05  NEC Electronics Corporation
- *  Copyright (C) 2003,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E2_CACHE_H__
-#define __V850_V850E2_CACHE_H__
-
-#include <asm/types.h>
-
-
-/* Cache control registers.  */
-
-/* Bus Transaction Control */
-#define V850E2_CACHE_BTSC_ADDR	0xFFFFF070
-#define V850E2_CACHE_BTSC 	(*(volatile u16 *)V850E2_CACHE_BTSC_ADDR)
-#define V850E2_CACHE_BTSC_ICM	0x0001 /* icache enable */
-#define V850E2_CACHE_BTSC_DCM0	0x0004 /* dcache enable, bit 0 */
-#define V850E2_CACHE_BTSC_DCM1	0x0008 /* dcache enable, bit 1 */
-#define V850E2_CACHE_BTSC_DCM_WT		      /* write-through */ \
-			V850E2_CACHE_BTSC_DCM0
-#ifdef CONFIG_V850E2_V850E2S
-# define V850E2_CACHE_BTSC_DCM_WB_NO_ALLOC    /* write-back, non-alloc */ \
-			V850E2_CACHE_BTSC_DCM1	
-# define V850E2_CACHE_BTSC_DCM_WB_ALLOC	      /* write-back, non-alloc */ \
-			(V850E2_CACHE_BTSC_DCM1 | V850E2_CACHE_BTSC_DCM0)
-# define V850E2_CACHE_BTSC_ISEQ	0x0010 /* icache `address sequence mode' */
-# define V850E2_CACHE_BTSC_DSEQ	0x0020 /* dcache `address sequence mode' */
-# define V850E2_CACHE_BTSC_IRFC	0x0030
-# define V850E2_CACHE_BTSC_ILCD	0x4000
-# define V850E2_CACHE_BTSC_VABE	0x8000
-#endif /* CONFIG_V850E2_V850E2S */
-
-/* Cache operation start address register (low-bits).  */
-#define V850E2_CACHE_CADL_ADDR	0xFFFFF074
-#define V850E2_CACHE_CADL 	(*(volatile u16 *)V850E2_CACHE_CADL_ADDR)
-/* Cache operation start address register (high-bits).  */
-#define V850E2_CACHE_CADH_ADDR	0xFFFFF076
-#define V850E2_CACHE_CADH 	(*(volatile u16 *)V850E2_CACHE_CADH_ADDR)
-/* Cache operation count register.  */
-#define V850E2_CACHE_CCNT_ADDR	0xFFFFF078
-#define V850E2_CACHE_CCNT 	(*(volatile u16 *)V850E2_CACHE_CCNT_ADDR)
-/* Cache operation specification register.  */
-#define V850E2_CACHE_COPR_ADDR	0xFFFFF07A
-#define V850E2_CACHE_COPR 	(*(volatile u16 *)V850E2_CACHE_COPR_ADDR)
-#define V850E2_CACHE_COPR_STRT	0x0001 /* start cache operation */
-#define V850E2_CACHE_COPR_LBSL	0x0100 /* 0 = icache, 1 = dcache */
-#define V850E2_CACHE_COPR_WSLE	0x0200 /* operate on cache way */
-#define V850E2_CACHE_COPR_WSL(way) ((way) * 0x0400) /* way select */
-#define V850E2_CACHE_COPR_CFC(op)  ((op)  * 0x1000) /* cache function code */
-
-
-/* Size of a cache line in bytes.  */
-#define V850E2_CACHE_LINE_SIZE_BITS	4
-#define V850E2_CACHE_LINE_SIZE		(1 << V850E2_CACHE_LINE_SIZE_BITS)
-
-/* The size of each cache `way' in lines.  */
-#define V850E2_CACHE_WAY_SIZE		256
-
-
-/* For <asm/cache.h> */
-#define L1_CACHE_BYTES			V850E2_CACHE_LINE_SIZE
-#define L1_CACHE_SHIFT			V850E2_CACHE_LINE_SIZE_BITS
-
-
-#endif /* __V850_V850E2_CACHE_H__ */
diff --git a/include/asm-v850/v850e_cache.h b/include/asm-v850/v850e_cache.h
deleted file mode 100644
index aa7d7eb9da5..00000000000
--- a/include/asm-v850/v850e_cache.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * include/asm-v850/v850e_cache.h -- Cache control for V850E cache memories
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* This file implements cache control for the rather simple cache used on
-   some V850E CPUs, specifically the NB85E/TEG CPU-core and the V850E/ME2
-   CPU.  V850E2 processors have their own (better) cache
-   implementation.  */
-
-#ifndef __V850_V850E_CACHE_H__
-#define __V850_V850E_CACHE_H__
-
-#include <asm/types.h>
-
-
-/* Cache control registers.  */
-#define V850E_CACHE_BHC_ADDR	0xFFFFF06A
-#define V850E_CACHE_BHC		(*(volatile u16 *)V850E_CACHE_BHC_ADDR)
-#define V850E_CACHE_ICC_ADDR	0xFFFFF070
-#define V850E_CACHE_ICC		(*(volatile u16 *)V850E_CACHE_ICC_ADDR)
-#define V850E_CACHE_ISI_ADDR	0xFFFFF072
-#define V850E_CACHE_ISI		(*(volatile u16 *)V850E_CACHE_ISI_ADDR)
-#define V850E_CACHE_DCC_ADDR	0xFFFFF078
-#define V850E_CACHE_DCC		(*(volatile u16 *)V850E_CACHE_DCC_ADDR)
-
-/* Size of a cache line in bytes.  */
-#define V850E_CACHE_LINE_SIZE	16
-
-/* For <asm/cache.h> */
-#define L1_CACHE_BYTES		V850E_CACHE_LINE_SIZE
-
-
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-/* Set caching params via the BHC, ICC, and DCC registers.  */
-void v850e_cache_enable (u16 bhc, u16 icc, u16 dcc);
-#endif /* __KERNEL__ && !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_CACHE_H__ */
diff --git a/include/asm-v850/v850e_intc.h b/include/asm-v850/v850e_intc.h
deleted file mode 100644
index 6fdf9570831..00000000000
--- a/include/asm-v850/v850e_intc.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * include/asm-v850/v850e_intc.h -- V850E CPU interrupt controller (INTC)
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_INTC_H__
-#define __V850_V850E_INTC_H__
-
-
-/* There are 4 16-bit `Interrupt Mask Registers' located contiguously
-   starting from this base.  Each interrupt uses a single bit to
-   indicated enabled/disabled status.  */
-#define V850E_INTC_IMR_BASE_ADDR  0xFFFFF100
-#define V850E_INTC_IMR_ADDR(irq)  (V850E_INTC_IMR_BASE_ADDR + ((irq) >> 3))
-#define V850E_INTC_IMR_BIT(irq)	  ((irq) & 0x7)
-
-/* Each maskable interrupt has a single-byte control register at this
-   address.  */
-#define V850E_INTC_IC_BASE_ADDR	  0xFFFFF110
-#define V850E_INTC_IC_ADDR(irq)	  (V850E_INTC_IC_BASE_ADDR + ((irq) << 1))
-#define V850E_INTC_IC(irq)	  (*(volatile u8 *)V850E_INTC_IC_ADDR(irq))
-/* Encode priority PR for storing in an interrupt control register.  */
-#define V850E_INTC_IC_PR(pr)	  (pr)
-/* Interrupt disable bit in an interrupt control register.  */
-#define V850E_INTC_IC_MK_BIT	  6
-#define V850E_INTC_IC_MK	  (1 << V850E_INTC_IC_MK_BIT)
-/* Interrupt pending flag in an interrupt control register.  */
-#define V850E_INTC_IC_IF_BIT	  7
-#define V850E_INTC_IC_IF	  (1 << V850E_INTC_IC_IF_BIT)
-
-/* The ISPR (In-service priority register) contains one bit for each interrupt
-   priority level, which is set to one when that level is currently being
-   serviced (and thus blocking any interrupts of equal or lesser level).  */
-#define V850E_INTC_ISPR_ADDR	  0xFFFFF1FA
-#define V850E_INTC_ISPR		  (*(volatile u8 *)V850E_INTC_ISPR_ADDR)
-
-
-#ifndef __ASSEMBLY__
-
-/* Enable interrupt handling for interrupt IRQ.  */
-static inline void v850e_intc_enable_irq (unsigned irq)
-{
-	__asm__ __volatile__ ("clr1 %0, [%1]"
-			      :: "r" (V850E_INTC_IMR_BIT (irq)),
-			         "r" (V850E_INTC_IMR_ADDR (irq))
-			      : "memory");
-}
-
-/* Disable interrupt handling for interrupt IRQ.  Note that any
-   interrupts received while disabled will be delivered once the
-   interrupt is enabled again, unless they are explicitly cleared using
-   `v850e_intc_clear_pending_irq'.  */
-static inline void v850e_intc_disable_irq (unsigned irq)
-{
-	__asm__ __volatile__ ("set1 %0, [%1]"
-			      :: "r" (V850E_INTC_IMR_BIT (irq)),
-			         "r" (V850E_INTC_IMR_ADDR (irq))
-			      : "memory");
-}
-
-/* Return true if interrupt handling for interrupt IRQ is enabled.  */
-static inline int v850e_intc_irq_enabled (unsigned irq)
-{
-	int rval;
-	__asm__ __volatile__ ("tst1 %1, [%2]; setf z, %0"
-			      : "=r" (rval)
-			      : "r" (V850E_INTC_IMR_BIT (irq)),
-			        "r" (V850E_INTC_IMR_ADDR (irq)));
-	return rval;
-}
-
-/* Disable irqs from 0 until LIMIT.  LIMIT must be a multiple of 8.  */
-static inline void _v850e_intc_disable_irqs (unsigned limit)
-{
-	unsigned long addr;
-	for (addr = V850E_INTC_IMR_BASE_ADDR; limit >= 8; addr++, limit -= 8)
-		*(char *)addr = 0xFF;
-}
-
-/* Disable all irqs.  This is purposely a macro, because NUM_MACH_IRQS
-   will be only be defined later.  */
-#define v850e_intc_disable_irqs()   _v850e_intc_disable_irqs (NUM_MACH_IRQS)
-
-/* Clear any pending interrupts for IRQ.  */
-static inline void v850e_intc_clear_pending_irq (unsigned irq)
-{
-	__asm__ __volatile__ ("clr1 %0, 0[%1]"
-			      :: "i" (V850E_INTC_IC_IF_BIT),
-			         "r" (V850E_INTC_IC_ADDR (irq))
-			      : "memory");
-}
-
-/* Return true if interrupt IRQ is pending (but disabled).  */
-static inline int v850e_intc_irq_pending (unsigned irq)
-{
-	int rval;
-	__asm__ __volatile__ ("tst1 %1, 0[%2]; setf nz, %0"
-			      : "=r" (rval)
-			      : "i" (V850E_INTC_IC_IF_BIT),
-			        "r" (V850E_INTC_IC_ADDR (irq)));
-	return rval;
-}
-
-
-struct v850e_intc_irq_init {
-	const char *name;	/* name of interrupt type */
-
-	/* Range of kernel irq numbers for this type:
-	   BASE, BASE+INTERVAL, ..., BASE+INTERVAL*NUM  */
-	unsigned base, num, interval;
-
-	unsigned priority;	/* interrupt priority to assign */
-};
-struct hw_interrupt_type;	/* fwd decl */
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-extern void v850e_intc_init_irq_types (struct v850e_intc_irq_init *inits,
-				       struct hw_interrupt_type *hw_irq_types);
-
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_INTC_H__ */
diff --git a/include/asm-v850/v850e_timer_c.h b/include/asm-v850/v850e_timer_c.h
deleted file mode 100644
index f70575df6ea..00000000000
--- a/include/asm-v850/v850e_timer_c.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * include/asm-v850/v850e_timer_c.h -- `Timer C' component often used
- *	with the V850E cpu core
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* NOTE: this include file currently contains only enough to allow us to
-   use timer C as an interrupt pass-through.  */
-
-#ifndef __V850_V850E_TIMER_C_H__
-#define __V850_V850E_TIMER_C_H__
-
-#include <asm/types.h>
-#include <asm/machdep.h>	/* Pick up chip-specific defs.  */
-
-
-/* Timer C (16-bit interval timers).  */
-
-/* Control register 0 for timer C.  */
-#define V850E_TIMER_C_TMCC0_ADDR(n) (V850E_TIMER_C_BASE_ADDR + 0x6 + 0x10 *(n))
-#define V850E_TIMER_C_TMCC0(n)	  (*(volatile u8 *)V850E_TIMER_C_TMCC0_ADDR(n))
-#define V850E_TIMER_C_TMCC0_CAE	  0x01 /* clock action enable */
-#define V850E_TIMER_C_TMCC0_CE	  0x02 /* count enable */
-/* ... */
-
-/* Control register 1 for timer C.  */
-#define V850E_TIMER_C_TMCC1_ADDR(n) (V850E_TIMER_C_BASE_ADDR + 0x8 + 0x10 *(n))
-#define V850E_TIMER_C_TMCC1(n)	  (*(volatile u8 *)V850E_TIMER_C_TMCC1_ADDR(n))
-#define V850E_TIMER_C_TMCC1_CMS0  0x01 /* capture/compare mode select (ccc0) */
-#define V850E_TIMER_C_TMCC1_CMS1  0x02 /* capture/compare mode select (ccc1) */
-/* ... */
-
-/* Interrupt edge-sensitivity control for timer C.  */
-#define V850E_TIMER_C_SESC_ADDR(n) (V850E_TIMER_C_BASE_ADDR + 0x9 + 0x10 *(n))
-#define V850E_TIMER_C_SESC(n)	  (*(volatile u8 *)V850E_TIMER_C_SESC_ADDR(n))
-
-/* ...etc... */
-
-
-#endif /* __V850_V850E_TIMER_C_H__  */
diff --git a/include/asm-v850/v850e_timer_d.h b/include/asm-v850/v850e_timer_d.h
deleted file mode 100644
index 417612c5b22..00000000000
--- a/include/asm-v850/v850e_timer_d.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * include/asm-v850/v850e_timer_d.h -- `Timer D' component often used
- *	with the V850E cpu core
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_TIMER_D_H__
-#define __V850_V850E_TIMER_D_H__
-
-#include <asm/types.h>
-#include <asm/machdep.h>	/* Pick up chip-specific defs.  */
-
-
-/* Timer D (16-bit interval timers).  */
-
-/* Count registers for timer D.  */
-#define V850E_TIMER_D_TMD_ADDR(n) (V850E_TIMER_D_TMD_BASE_ADDR + 0x10 * (n))
-#define V850E_TIMER_D_TMD(n)	  (*(volatile u16 *)V850E_TIMER_D_TMD_ADDR(n))
-
-/* Count compare registers for timer D.  */
-#define V850E_TIMER_D_CMD_ADDR(n) (V850E_TIMER_D_CMD_BASE_ADDR + 0x10 * (n))
-#define V850E_TIMER_D_CMD(n)	  (*(volatile u16 *)V850E_TIMER_D_CMD_ADDR(n))
-
-/* Control registers for timer D.  */
-#define V850E_TIMER_D_TMCD_ADDR(n) (V850E_TIMER_D_TMCD_BASE_ADDR + 0x10 * (n))
-#define V850E_TIMER_D_TMCD(n)	   (*(volatile u8 *)V850E_TIMER_D_TMCD_ADDR(n))
-/* Control bits for timer D.  */
-#define V850E_TIMER_D_TMCD_CE  	   0x2 /* count enable */
-#define V850E_TIMER_D_TMCD_CAE	   0x1 /* clock action enable */
-/* Clock divider setting (log2).  */
-#define V850E_TIMER_D_TMCD_CS(divlog2) (((divlog2) - V850E_TIMER_D_TMCD_CS_MIN) << 4)
-/* Minimum clock divider setting (log2).  */
-#ifndef V850E_TIMER_D_TMCD_CS_MIN /* Can be overridden by mach-specific hdrs */
-#define V850E_TIMER_D_TMCD_CS_MIN  2 /* Default is correct for the v850e/ma1 */
-#endif
-/* Maximum clock divider setting (log2).  */
-#define V850E_TIMER_D_TMCD_CS_MAX  (V850E_TIMER_D_TMCD_CS_MIN + 7)
-
-/* Return the clock-divider (log2) of timer D unit N.  */
-#define V850E_TIMER_D_DIVLOG2(n) \
-  (((V850E_TIMER_D_TMCD(n) >> 4) & 0x7) + V850E_TIMER_D_TMCD_CS_MIN)
-
-
-#ifndef __ASSEMBLY__
-
-/* Start interval timer TIMER (0-3).  The timer will issue the
-   corresponding INTCMD interrupt RATE times per second.  This function
-   does not enable the interrupt.  */
-extern void v850e_timer_d_configure (unsigned timer, unsigned rate);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_TIMER_D_H__  */
diff --git a/include/asm-v850/v850e_uart.h b/include/asm-v850/v850e_uart.h
deleted file mode 100644
index 5182fb4cc98..00000000000
--- a/include/asm-v850/v850e_uart.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * include/asm-v850/v850e_uart.h -- common V850E on-chip UART driver
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* There's not actually a single UART implementation used by V850E CPUs,
-   but rather a series of implementations that are all `close' to one
-   another.  This file corresponds to the single driver which handles all
-   of them.  */
-
-#ifndef __V850_V850E_UART_H__
-#define __V850_V850E_UART_H__
-
-#include <linux/termios.h>
-
-#include <asm/v850e_utils.h>
-#include <asm/types.h>
-#include <asm/machdep.h>	/* Pick up chip-specific defs.  */
-
-
-/* Include model-specific definitions.  */
-#ifdef CONFIG_V850E_UART
-# ifdef CONFIG_V850E_UARTB
-#  include <asm-v850/v850e_uartb.h>
-# else
-#  include <asm-v850/v850e_uarta.h> /* original V850E UART */
-# endif
-#endif
-
-
-/* Optional capabilities some hardware provides.  */
-
-/* This UART doesn't implement RTS/CTS by default, but some platforms
-   implement them externally, so check to see if <asm/machdep.h> defined
-   anything.  */
-#ifdef V850E_UART_CTS
-#define v850e_uart_cts(n)		V850E_UART_CTS(n)
-#else
-#define v850e_uart_cts(n)		(1)
-#endif
-
-/* Do the same for RTS.  */
-#ifdef V850E_UART_SET_RTS
-#define v850e_uart_set_rts(n,v)		V850E_UART_SET_RTS(n,v)
-#else
-#define v850e_uart_set_rts(n,v)		((void)0)
-#endif
-
-
-/* This is the serial channel to use for the boot console (if desired).  */
-#ifndef V850E_UART_CONSOLE_CHANNEL
-# define V850E_UART_CONSOLE_CHANNEL 0
-#endif
-
-
-#ifndef __ASSEMBLY__
-
-/* Setup a console using channel 0 of the builtin uart.  */
-extern void v850e_uart_cons_init (unsigned chan);
-
-/* Configure and turn on uart channel CHAN, using the termios `control
-   modes' bits in CFLAGS, and a baud-rate of BAUD.  */
-void v850e_uart_configure (unsigned chan, unsigned cflags, unsigned baud);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_UART_H__ */
diff --git a/include/asm-v850/v850e_uarta.h b/include/asm-v850/v850e_uarta.h
deleted file mode 100644
index e483e095072..00000000000
--- a/include/asm-v850/v850e_uarta.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * include/asm-v850/v850e_uarta.h -- original V850E on-chip UART
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* This is the original V850E UART implementation is called just `UART' in
-   the docs, but we name this header file <asm/v850e_uarta.h> because the
-   name <asm/v850e_uart.h> is used for the common driver that handles both
-   `UART' and `UARTB' implementations.  */
-
-#ifndef __V850_V850E_UARTA_H__
-#define __V850_V850E_UARTA_H__
-
-
-/* Raw hardware interface.  */
-
-/* The base address of the UART control registers for channel N.
-   The default is the address used on the V850E/MA1.  */
-#ifndef V850E_UART_BASE_ADDR
-#define V850E_UART_BASE_ADDR(n)		(0xFFFFFA00 + 0x10 * (n))
-#endif 
-
-/* Addresses of specific UART control registers for channel N.
-   The defaults are the addresses used on the V850E/MA1; if a platform
-   wants to redefine any of these, it must redefine them all.  */
-#ifndef V850E_UART_ASIM_ADDR
-#define V850E_UART_ASIM_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x0)
-#define V850E_UART_RXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x2)
-#define V850E_UART_ASIS_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x3)
-#define V850E_UART_TXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x4)
-#define V850E_UART_ASIF_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x5)
-#define V850E_UART_CKSR_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x6)
-#define V850E_UART_BRGC_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x7)
-#endif
-
-/* UART config registers.  */
-#define V850E_UART_ASIM(n)	(*(volatile u8 *)V850E_UART_ASIM_ADDR(n))
-/* Control bits for config registers.  */
-#define V850E_UART_ASIM_CAE	0x80 /* clock enable */
-#define V850E_UART_ASIM_TXE	0x40 /* transmit enable */
-#define V850E_UART_ASIM_RXE	0x20 /* receive enable */
-#define V850E_UART_ASIM_PS_MASK	0x18 /* mask covering parity-select bits */
-#define V850E_UART_ASIM_PS_NONE	0x00 /* no parity */
-#define V850E_UART_ASIM_PS_ZERO	0x08 /* zero parity */
-#define V850E_UART_ASIM_PS_ODD	0x10 /* odd parity */
-#define V850E_UART_ASIM_PS_EVEN	0x18 /* even parity */
-#define V850E_UART_ASIM_CL_8	0x04 /* char len is 8 bits (otherwise, 7) */
-#define V850E_UART_ASIM_SL_2	0x02 /* 2 stop bits (otherwise, 1) */
-#define V850E_UART_ASIM_ISRM	0x01 /* generate INTSR interrupt on errors
-					(otherwise, generate INTSER) */
-
-/* UART serial interface status registers.  */
-#define V850E_UART_ASIS(n)	(*(volatile u8 *)V850E_UART_ASIS_ADDR(n))
-/* Control bits for status registers.  */
-#define V850E_UART_ASIS_PE	0x04 /* parity error */
-#define V850E_UART_ASIS_FE	0x02 /* framing error */
-#define V850E_UART_ASIS_OVE	0x01 /* overrun error */
-
-/* UART serial interface transmission status registers.  */
-#define V850E_UART_ASIF(n)	(*(volatile u8 *)V850E_UART_ASIF_ADDR(n))
-#define V850E_UART_ASIF_TXBF	0x02 /* transmit buffer flag (data in TXB) */
-#define V850E_UART_ASIF_TXSF	0x01 /* transmit shift flag (sending data) */
-
-/* UART receive buffer register.  */
-#define V850E_UART_RXB(n)	(*(volatile u8 *)V850E_UART_RXB_ADDR(n))
-
-/* UART transmit buffer register.  */
-#define V850E_UART_TXB(n)	(*(volatile u8 *)V850E_UART_TXB_ADDR(n))
-
-/* UART baud-rate generator control registers.  */
-#define V850E_UART_CKSR(n)	(*(volatile u8 *)V850E_UART_CKSR_ADDR(n))
-#define V850E_UART_CKSR_MAX	11
-#define V850E_UART_BRGC(n)	(*(volatile u8 *)V850E_UART_BRGC_ADDR(n))
-#define V850E_UART_BRGC_MIN	8
-
-
-#ifndef V850E_UART_CKSR_MAX_FREQ
-#define V850E_UART_CKSR_MAX_FREQ (25*1000*1000)
-#endif
-
-/* Calculate the minimum value for CKSR on this processor.  */
-static inline unsigned v850e_uart_cksr_min (void)
-{
-	int min = 0;
-	unsigned freq = V850E_UART_BASE_FREQ;
-	while (freq > V850E_UART_CKSR_MAX_FREQ) {
-		freq >>= 1;
-		min++;
-	}
-	return min;
-}
-
-
-/* Slightly abstract interface used by driver.  */
-
-
-/* Interrupts used by the UART.  */
-
-/* Received when the most recently transmitted character has been sent.  */
-#define V850E_UART_TX_IRQ(chan)		IRQ_INTST (chan)
-/* Received when a new character has been received.  */
-#define V850E_UART_RX_IRQ(chan)		IRQ_INTSR (chan)
-
-
-/* UART clock generator interface.  */
-
-/* This type encapsulates a particular uart frequency.  */
-typedef struct {
-	unsigned clk_divlog2;
-	unsigned brgen_count;
-} v850e_uart_speed_t;
-
-/* Calculate a uart speed from BAUD for this uart.  */
-static inline v850e_uart_speed_t v850e_uart_calc_speed (unsigned baud)
-{
-	v850e_uart_speed_t speed;
-
-	/* Calculate the log2 clock divider and baud-rate counter values
-	   (note that the UART divides the resulting clock by 2, so
-	   multiply BAUD by 2 here to compensate).  */
-	calc_counter_params (V850E_UART_BASE_FREQ, baud * 2,
-			     v850e_uart_cksr_min(),
-			     V850E_UART_CKSR_MAX, 8/*bits*/,
-			     &speed.clk_divlog2, &speed.brgen_count);
-
-	return speed;
-}
-
-/* Return the current speed of uart channel CHAN.  */
-static inline v850e_uart_speed_t v850e_uart_speed (unsigned chan)
-{
-	v850e_uart_speed_t speed;
-	speed.clk_divlog2 = V850E_UART_CKSR (chan);
-	speed.brgen_count = V850E_UART_BRGC (chan);
-	return speed;
-}
-
-/* Set the current speed of uart channel CHAN.  */
-static inline void v850e_uart_set_speed(unsigned chan,v850e_uart_speed_t speed)
-{
-	V850E_UART_CKSR (chan) = speed.clk_divlog2;
-	V850E_UART_BRGC (chan) = speed.brgen_count;
-}
-
-static inline int
-v850e_uart_speed_eq (v850e_uart_speed_t speed1, v850e_uart_speed_t speed2)
-{
-	return speed1.clk_divlog2 == speed2.clk_divlog2
-		&& speed1.brgen_count == speed2.brgen_count;
-}
-
-/* Minimum baud rate possible.  */
-#define v850e_uart_min_baud() \
-   ((V850E_UART_BASE_FREQ >> V850E_UART_CKSR_MAX) / (2 * 255) + 1)
-
-/* Maximum baud rate possible.  The error is quite high at max, though.  */
-#define v850e_uart_max_baud() \
-   ((V850E_UART_BASE_FREQ >> v850e_uart_cksr_min()) / (2 *V850E_UART_BRGC_MIN))
-
-/* The `maximum' clock rate the uart can used, which is wanted (though not
-   really used in any useful way) by the serial framework.  */
-#define v850e_uart_max_clock() \
-   ((V850E_UART_BASE_FREQ >> v850e_uart_cksr_min()) / 2)
-
-
-/* UART configuration interface.  */
-
-/* Type of the uart config register; must be a scalar.  */
-typedef u16 v850e_uart_config_t;
-
-/* The uart hardware config register for channel CHAN.  */
-#define V850E_UART_CONFIG(chan)		V850E_UART_ASIM (chan)
-
-/* This config bit set if the uart is enabled.  */
-#define V850E_UART_CONFIG_ENABLED	V850E_UART_ASIM_CAE
-/* If the uart _isn't_ enabled, store this value to it to do so.  */
-#define V850E_UART_CONFIG_INIT		V850E_UART_ASIM_CAE
-/* Store this config value to disable the uart channel completely.  */
-#define V850E_UART_CONFIG_FINI		0
-
-/* Setting/clearing these bits enable/disable TX/RX, respectively (but
-   otherwise generally leave things running).  */
-#define V850E_UART_CONFIG_RX_ENABLE	V850E_UART_ASIM_RXE
-#define V850E_UART_CONFIG_TX_ENABLE	V850E_UART_ASIM_TXE
-
-/* These masks define which config bits affect TX/RX modes, respectively.  */
-#define V850E_UART_CONFIG_RX_BITS \
-  (V850E_UART_ASIM_PS_MASK | V850E_UART_ASIM_CL_8 | V850E_UART_ASIM_ISRM)
-#define V850E_UART_CONFIG_TX_BITS \
-  (V850E_UART_ASIM_PS_MASK | V850E_UART_ASIM_CL_8 | V850E_UART_ASIM_SL_2)
-
-static inline v850e_uart_config_t v850e_uart_calc_config (unsigned cflags)
-{
-	v850e_uart_config_t config = 0;
-
-	/* Figure out new configuration of control register.  */
-	if (cflags & CSTOPB)
-		/* Number of stop bits, 1 or 2.  */
-		config |= V850E_UART_ASIM_SL_2;
-	if ((cflags & CSIZE) == CS8)
-		/* Number of data bits, 7 or 8.  */
-		config |= V850E_UART_ASIM_CL_8;
-	if (! (cflags & PARENB))
-		/* No parity check/generation.  */
-		config |= V850E_UART_ASIM_PS_NONE;
-	else if (cflags & PARODD)
-		/* Odd parity check/generation.  */
-		config |= V850E_UART_ASIM_PS_ODD;
-	else
-		/* Even parity check/generation.  */
-		config |= V850E_UART_ASIM_PS_EVEN;
-	if (cflags & CREAD)
-		/* Reading enabled.  */
-		config |= V850E_UART_ASIM_RXE;
-
-	config |= V850E_UART_ASIM_CAE;
-	config |= V850E_UART_ASIM_TXE; /* Writing is always enabled.  */
-	config |= V850E_UART_ASIM_ISRM; /* Errors generate a read-irq.  */
-
-	return config;
-}
-
-/* This should delay as long as necessary for a recently written config
-   setting to settle, before we turn the uart back on.  */
-static inline void
-v850e_uart_config_delay (v850e_uart_config_t config, v850e_uart_speed_t speed)
-{
-	/* The UART may not be reset properly unless we wait at least 2
-	   `basic-clocks' until turning on the TXE/RXE bits again.
-	   A `basic clock' is the clock used by the baud-rate generator,
-	   i.e., the cpu clock divided by the 2^new_clk_divlog2.
-	   The loop takes 2 insns, so loop CYCLES / 2 times.  */
-	register unsigned count = 1 << speed.clk_divlog2;
-	while (--count != 0)
-		/* nothing */;
-}
-
-
-/* RX/TX interface.  */
-
-/* Return true if all characters awaiting transmission on uart channel N
-   have been transmitted.  */
-#define v850e_uart_xmit_done(n)						      \
-   (! (V850E_UART_ASIF(n) & V850E_UART_ASIF_TXBF))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_done(n)				      \
-   do { } while (! v850e_uart_xmit_done (n))
-
-/* Return true if uart channel N is ready to transmit a character.  */
-#define v850e_uart_xmit_ok(n)						      \
-   (v850e_uart_xmit_done(n) && v850e_uart_cts(n))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_ok(n)					      \
-   do { } while (! v850e_uart_xmit_ok (n))
-
-/* Write character CH to uart channel CHAN.  */
-#define v850e_uart_putc(chan, ch)	(V850E_UART_TXB(chan) = (ch))
-
-/* Return latest character read on channel CHAN.  */
-#define v850e_uart_getc(chan)		V850E_UART_RXB (chan)
-
-/* Return bit-mask of uart error status.  */
-#define v850e_uart_err(chan)		V850E_UART_ASIS (chan)
-/* Various error bits set in the error result.  */
-#define V850E_UART_ERR_OVERRUN		V850E_UART_ASIS_OVE
-#define V850E_UART_ERR_FRAME		V850E_UART_ASIS_FE
-#define V850E_UART_ERR_PARITY		V850E_UART_ASIS_PE
-
-
-#endif /* __V850_V850E_UARTA_H__ */
diff --git a/include/asm-v850/v850e_uartb.h b/include/asm-v850/v850e_uartb.h
deleted file mode 100644
index 6d4767d5a83..00000000000
--- a/include/asm-v850/v850e_uartb.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * include/asm-v850/v850e_uartb.h -- V850E on-chip `UARTB' UART
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* The V850E UARTB is basically a superset of the original V850E UART, but
-   even where it's the same, the names and details have changed a bit.
-   It's similar enough to use the same driver (v850e_uart.c), but the
-   details have been abstracted slightly to do so.  */
-
-#ifndef __V850_V850E_UARTB_H__
-#define __V850_V850E_UARTB_H__
-
-
-/* Raw hardware interface.  */
-
-#define V850E_UARTB_BASE_ADDR(n)	(0xFFFFFA00 + 0x10 * (n))
-
-/* Addresses of specific UART control registers for channel N.  */
-#define V850E_UARTB_CTL0_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0x0)
-#define V850E_UARTB_CTL2_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0x2)
-#define V850E_UARTB_STR_ADDR(n)		(V850E_UARTB_BASE_ADDR(n) + 0x4)
-#define V850E_UARTB_RX_ADDR(n)		(V850E_UARTB_BASE_ADDR(n) + 0x6)
-#define V850E_UARTB_RXAP_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0x6)
-#define V850E_UARTB_TX_ADDR(n)		(V850E_UARTB_BASE_ADDR(n) + 0x8)
-#define V850E_UARTB_FIC0_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xA)
-#define V850E_UARTB_FIC1_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xB)
-#define V850E_UARTB_FIC2_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xC)
-#define V850E_UARTB_FIS0_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xE)
-#define V850E_UARTB_FIS1_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xF)
-
-/* UARTB control register 0 (general config).  */
-#define V850E_UARTB_CTL0(n)	(*(volatile u8 *)V850E_UARTB_CTL0_ADDR(n))
-/* Control bits for config registers.  */
-#define V850E_UARTB_CTL0_PWR		0x80	/* clock enable */
-#define V850E_UARTB_CTL0_TXE		0x40	/* transmit enable */
-#define V850E_UARTB_CTL0_RXE		0x20	/* receive enable */
-#define V850E_UARTB_CTL0_DIR		0x10	/*  */
-#define V850E_UARTB_CTL0_PS1		0x08	/* parity */
-#define V850E_UARTB_CTL0_PS0		0x04	/* parity */
-#define V850E_UARTB_CTL0_CL		0x02	/* char len 1:8bit, 0:7bit */
-#define V850E_UARTB_CTL0_SL		0x01	/* stop bit 1:2bit, 0:1bit */
-#define V850E_UARTB_CTL0_PS_MASK	0x0C	/* mask covering parity bits */
-#define V850E_UARTB_CTL0_PS_NONE	0x00	/* no parity */
-#define V850E_UARTB_CTL0_PS_ZERO	0x04	/* zero parity */
-#define V850E_UARTB_CTL0_PS_ODD		0x08	/* odd parity */
-#define V850E_UARTB_CTL0_PS_EVEN	0x0C	/* even parity */
-#define V850E_UARTB_CTL0_CL_8		0x02	/* char len 1:8bit, 0:7bit */
-#define V850E_UARTB_CTL0_SL_2		0x01	/* stop bit 1:2bit, 0:1bit */
-
-/* UARTB control register 2 (clock divider).  */
-#define V850E_UARTB_CTL2(n)	(*(volatile u16 *)V850E_UARTB_CTL2_ADDR(n))
-#define V850E_UARTB_CTL2_MIN	4
-#define V850E_UARTB_CTL2_MAX	0xFFFF
-
-/* UARTB serial interface status register.  */
-#define V850E_UARTB_STR(n)	(*(volatile u8 *)V850E_UARTB_STR_ADDR(n))
-/* Control bits for status registers.  */
-#define V850E_UARTB_STR_TSF	0x80	/* UBTX or FIFO exist data  */
-#define V850E_UARTB_STR_OVF	0x08	/* overflow error */
-#define V850E_UARTB_STR_PE	0x04	/* parity error */
-#define V850E_UARTB_STR_FE	0x02	/* framing error */
-#define V850E_UARTB_STR_OVE	0x01	/* overrun error */
-
-/* UARTB receive data register.  */
-#define V850E_UARTB_RX(n)	(*(volatile u8 *)V850E_UARTB_RX_ADDR(n))
-#define V850E_UARTB_RXAP(n)	(*(volatile u16 *)V850E_UARTB_RXAP_ADDR(n))
-/* Control bits for status registers.  */
-#define V850E_UARTB_RXAP_PEF	0x0200 /* parity error */
-#define V850E_UARTB_RXAP_FEF	0x0100 /* framing error */
-
-/* UARTB transmit data register.  */
-#define V850E_UARTB_TX(n)	(*(volatile u8 *)V850E_UARTB_TX_ADDR(n))
-
-/* UARTB FIFO control register 0.  */
-#define V850E_UARTB_FIC0(n)	(*(volatile u8 *)V850E_UARTB_FIC0_ADDR(n))
-
-/* UARTB FIFO control register 1.  */
-#define V850E_UARTB_FIC1(n)	(*(volatile u8 *)V850E_UARTB_FIC1_ADDR(n))
-
-/* UARTB FIFO control register 2.  */
-#define V850E_UARTB_FIC2(n)	(*(volatile u16 *)V850E_UARTB_FIC2_ADDR(n))
-
-/* UARTB FIFO status register 0.  */
-#define V850E_UARTB_FIS0(n)	(*(volatile u8 *)V850E_UARTB_FIS0_ADDR(n))
-
-/* UARTB FIFO status register 1.  */
-#define V850E_UARTB_FIS1(n)	(*(volatile u8 *)V850E_UARTB_FIS1_ADDR(n))
-
-
-/* Slightly abstract interface used by driver.  */
-
-
-/* Interrupts used by the UART.  */
-
-/* Received when the most recently transmitted character has been sent.  */
-#define V850E_UART_TX_IRQ(chan)		IRQ_INTUBTIT (chan)
-/* Received when a new character has been received.  */
-#define V850E_UART_RX_IRQ(chan)		IRQ_INTUBTIR (chan)
-
-/* Use by serial driver for information purposes.  */
-#define V850E_UART_BASE_ADDR(chan)	V850E_UARTB_BASE_ADDR(chan)
-
-
-/* UART clock generator interface.  */
-
-/* This type encapsulates a particular uart frequency.  */
-typedef u16 v850e_uart_speed_t;
-
-/* Calculate a uart speed from BAUD for this uart.  */
-static inline v850e_uart_speed_t v850e_uart_calc_speed (unsigned baud)
-{
-	v850e_uart_speed_t speed;
-
-	/*
-	 * V850E/ME2 UARTB baud rate is determined by the value of UBCTL2
-	 * fx = V850E_UARTB_BASE_FREQ = CPU_CLOCK_FREQ/4
-	 * baud = fx / 2*speed   [ speed >= 4 ]
-	 */
-	speed = V850E_UARTB_CTL2_MIN;
-	while (((V850E_UARTB_BASE_FREQ / 2) / speed ) > baud)
-		speed++;
-
-	return speed;
-}
-
-/* Return the current speed of uart channel CHAN.  */
-#define v850e_uart_speed(chan)		    V850E_UARTB_CTL2 (chan)
-
-/* Set the current speed of uart channel CHAN.  */
-#define v850e_uart_set_speed(chan, speed)   (V850E_UARTB_CTL2 (chan) = (speed))
-
-/* Return true if SPEED1 and SPEED2 are the same.  */
-#define v850e_uart_speed_eq(speed1, speed2) ((speed1) == (speed2))
-
-/* Minimum baud rate possible.  */
-#define v850e_uart_min_baud() \
-   ((V850E_UARTB_BASE_FREQ / 2) / V850E_UARTB_CTL2_MAX)
-
-/* Maximum baud rate possible.  The error is quite high at max, though.  */
-#define v850e_uart_max_baud() \
-   ((V850E_UARTB_BASE_FREQ / 2) / V850E_UARTB_CTL2_MIN)
-
-/* The `maximum' clock rate the uart can used, which is wanted (though not
-   really used in any useful way) by the serial framework.  */
-#define v850e_uart_max_clock() \
-   (V850E_UARTB_BASE_FREQ / 2)
-
-
-/* UART configuration interface.  */
-
-/* Type of the uart config register; must be a scalar.  */
-typedef u16 v850e_uart_config_t;
-
-/* The uart hardware config register for channel CHAN.  */
-#define V850E_UART_CONFIG(chan)		V850E_UARTB_CTL0 (chan)
-
-/* This config bit set if the uart is enabled.  */
-#define V850E_UART_CONFIG_ENABLED	V850E_UARTB_CTL0_PWR
-/* If the uart _isn't_ enabled, store this value to it to do so.  */
-#define V850E_UART_CONFIG_INIT		V850E_UARTB_CTL0_PWR
-/* Store this config value to disable the uart channel completely.  */
-#define V850E_UART_CONFIG_FINI		0
-
-/* Setting/clearing these bits enable/disable TX/RX, respectively (but
-   otherwise generally leave things running).  */
-#define V850E_UART_CONFIG_RX_ENABLE	V850E_UARTB_CTL0_RXE
-#define V850E_UART_CONFIG_TX_ENABLE	V850E_UARTB_CTL0_TXE
-
-/* These masks define which config bits affect TX/RX modes, respectively.  */
-#define V850E_UART_CONFIG_RX_BITS \
-  (V850E_UARTB_CTL0_PS_MASK | V850E_UARTB_CTL0_CL_8)
-#define V850E_UART_CONFIG_TX_BITS \
-  (V850E_UARTB_CTL0_PS_MASK | V850E_UARTB_CTL0_CL_8 | V850E_UARTB_CTL0_SL_2)
-
-static inline v850e_uart_config_t v850e_uart_calc_config (unsigned cflags)
-{
-	v850e_uart_config_t config = 0;
-
-	/* Figure out new configuration of control register.  */
-	if (cflags & CSTOPB)
-		/* Number of stop bits, 1 or 2.  */
-		config |= V850E_UARTB_CTL0_SL_2;
-	if ((cflags & CSIZE) == CS8)
-		/* Number of data bits, 7 or 8.  */
-		config |= V850E_UARTB_CTL0_CL_8;
-	if (! (cflags & PARENB))
-		/* No parity check/generation.  */
-		config |= V850E_UARTB_CTL0_PS_NONE;
-	else if (cflags & PARODD)
-		/* Odd parity check/generation.  */
-		config |= V850E_UARTB_CTL0_PS_ODD;
-	else
-		/* Even parity check/generation.  */
-		config |= V850E_UARTB_CTL0_PS_EVEN;
-	if (cflags & CREAD)
-		/* Reading enabled.  */
-		config |= V850E_UARTB_CTL0_RXE;
-
-	config |= V850E_UARTB_CTL0_PWR;
-	config |= V850E_UARTB_CTL0_TXE; /* Writing is always enabled.  */
-	config |= V850E_UARTB_CTL0_DIR; /* LSB first.  */
-
-	return config;
-}
-
-/* This should delay as long as necessary for a recently written config
-   setting to settle, before we turn the uart back on.  */
-static inline void
-v850e_uart_config_delay (v850e_uart_config_t config, v850e_uart_speed_t speed)
-{
-	/* The UART may not be reset properly unless we wait at least 2
-	   `basic-clocks' until turning on the TXE/RXE bits again.
-	   A `basic clock' is the clock used by the baud-rate generator,
-	   i.e., the cpu clock divided by the 2^new_clk_divlog2.
-	   The loop takes 2 insns, so loop CYCLES / 2 times.  */
-	register unsigned count = 1 << speed;
-	while (--count != 0)
-		/* nothing */;
-}
-
-
-/* RX/TX interface.  */
-
-/* Return true if all characters awaiting transmission on uart channel N
-   have been transmitted.  */
-#define v850e_uart_xmit_done(n)						      \
-   (! (V850E_UARTB_STR(n) & V850E_UARTB_STR_TSF))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_done(n)				      \
-   do { } while (! v850e_uart_xmit_done (n))
-
-/* Return true if uart channel N is ready to transmit a character.  */
-#define v850e_uart_xmit_ok(n)						      \
-   (v850e_uart_xmit_done(n) && v850e_uart_cts(n))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_ok(n)					      \
-   do { } while (! v850e_uart_xmit_ok (n))
-
-/* Write character CH to uart channel CHAN.  */
-#define v850e_uart_putc(chan, ch)	(V850E_UARTB_TX(chan) = (ch))
-
-/* Return latest character read on channel CHAN.  */
-#define v850e_uart_getc(chan)		V850E_UARTB_RX (chan)
-
-/* Return bit-mask of uart error status.  */
-#define v850e_uart_err(chan)		V850E_UARTB_STR (chan)
-/* Various error bits set in the error result.  */
-#define V850E_UART_ERR_OVERRUN		V850E_UARTB_STR_OVE
-#define V850E_UART_ERR_FRAME		V850E_UARTB_STR_FE
-#define V850E_UART_ERR_PARITY		V850E_UARTB_STR_PE
-
-
-#endif /* __V850_V850E_UARTB_H__ */
diff --git a/include/asm-v850/v850e_utils.h b/include/asm-v850/v850e_utils.h
deleted file mode 100644
index 52eb72822d3..00000000000
--- a/include/asm-v850/v850e_utils.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * include/asm-v850/v850e_utils.h -- Utility functions associated with
- *	V850E CPUs
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_UTILS_H__
-#define __V850_V850E_UTILS_H__
-
-/* Calculate counter clock-divider and count values to attain the
-   desired frequency RATE from the base frequency BASE_FREQ.  The
-   counter is expected to have a clock-divider, which can divide the
-   system cpu clock by a power of two value from MIN_DIVLOG2 to
-   MAX_DIV_LOG2, and a word-size of COUNTER_SIZE bits (the counter
-   counts up and resets whenever it's equal to the compare register,
-   generating an interrupt or whatever when it does so).  The returned
-   values are: *DIVLOG2 -- log2 of the desired clock divider and *COUNT
-   -- the counter compare value to use.  Returns true if it was possible
-   to find a reasonable value, otherwise false (and the other return
-   values will be set to be as good as possible).  */
-extern int calc_counter_params (unsigned long base_freq,
-				unsigned long rate,
-				unsigned min_divlog2, unsigned max_divlog2,
-				unsigned counter_size,
-				unsigned *divlog2, unsigned *count);
-
-#endif /* __V850_V850E_UTILS_H__ */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8b82974bdc1..6272a395d43 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -286,7 +286,6 @@
 #define AUDIT_ARCH_SHEL64	(EM_SH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_SPARC	(EM_SPARC)
 #define AUDIT_ARCH_SPARC64	(EM_SPARCV9|__AUDIT_ARCH_64BIT)
-#define AUDIT_ARCH_V850		(EM_V850|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 
 #define AUDIT_PERM_EXEC		1
diff --git a/include/linux/module.h b/include/linux/module.h
index fce15ebd0e1..68e09557c95 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -23,7 +23,7 @@
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
-/* v850 toolchain uses a `_' prefix for all user symbols */
+/* some toolchains uses a `_' prefix for all user symbols */
 #ifndef MODULE_SYMBOL_PREFIX
 #define MODULE_SYMBOL_PREFIX ""
 #endif
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f3a1c0e4502..3b2f6c04855 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -59,9 +59,6 @@
 #define PORT_SUNZILOG	38
 #define PORT_SUNSAB	39
 
-/* NEC v850.  */
-#define PORT_V850E_UART	40
-
 /* DEC */
 #define PORT_DZ		46
 #define PORT_ZS		47
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0522f368f9d..4394dadff81 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -443,7 +443,7 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
-#if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64) || defined(CONFIG_V850))
+#if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index dca5e0dd09b..4f8a3007e45 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -520,8 +520,7 @@ int main(int argc, char **argv)
 			genksyms_usage();
 			return 1;
 		}
-	if ((strcmp(arch, "v850") == 0) || (strcmp(arch, "h8300") == 0)
-	    || (strcmp(arch, "blackfin") == 0))
+	if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0))
 		mod_prefix = "_";
 	{
 		extern int yydebug;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 1fcaf3284a6..4fa1f3ad251 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -623,7 +623,7 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
-/* Ignore any prefix, eg. v850 prepends _ */
+/* Ignore any prefix, eg. some architectures prepend _ */
 static inline int sym_is(const char *symbol, const char *name)
 {
 	const char *match;
diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index db3881f14c2..6a96d47bd1e 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c
@@ -55,7 +55,7 @@ main(int argc, char **argv)
 	else
 		exit(1);
 
-	if ((strcmp(argv[1], "v850") == 0) || (strcmp(argv[1], "h8300") == 0)
+	if ((strcmp(argv[1], "h8300") == 0)
 	    || (strcmp(argv[1], "blackfin") == 0))
 		printf("#define MODULE_SYMBOL_PREFIX \"_\"\n");
 	else
-- 
GitLab


From 82736f4d1d2b7063b829cc93171a6e5aea8a9c49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <Uwe.Kleine-Koenig@digi.com>
Date: Wed, 23 Jul 2008 21:28:54 -0700
Subject: [PATCH 241/853] generic irqs: handle failure of irqchip->set_type in
 setup_irq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

set_type returns an int indicating success or failure, but up to now
setup_irq ignores that.

In my case this resulted in a machine hang:

gpio-keys requested IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, but
arm/ns9xxx can only trigger on one direction so set_type didn't touch
the configuration which happens do default to a level sensitiveness and
returned -EINVAL.  setup_irq ignored that and unmasked the irq.  This
resulted in an endless triggering of the gpio-key interrupt service
routine which effectively killed the machine.

With this patch applied setup_irq propagates the error to the caller.

Note that before in the case

	chip && !chip->set_type && !chip->name

a NULL pointer was feed to printk.  This is fixed, too.

Signed-off-by: Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq/manage.c | 64 +++++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 22 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3cfc0fefb5e..5bc6e5ecc49 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -308,6 +308,30 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
 		desc->handle_irq = NULL;
 }
 
+static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
+		unsigned long flags)
+{
+	int ret;
+
+	if (!chip || !chip->set_type) {
+		/*
+		 * IRQF_TRIGGER_* but the PIC does not support multiple
+		 * flow-types?
+		 */
+		pr_warning("No set_type function for IRQ %d (%s)\n", irq,
+				chip ? (chip->name ? : "unknown") : "unknown");
+		return 0;
+	}
+
+	ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
+
+	if (ret)
+		pr_err("setting flow type for irq %u failed (%pF)\n",
+				irq, chip->set_type);
+
+	return ret;
+}
+
 /*
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
@@ -319,6 +343,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	const char *old_name = NULL;
 	unsigned long flags;
 	int shared = 0;
+	int ret;
 
 	if (irq >= NR_IRQS)
 		return -EINVAL;
@@ -376,35 +401,23 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 		shared = 1;
 	}
 
-	*p = new;
-
-	/* Exclude IRQ from balancing */
-	if (new->flags & IRQF_NOBALANCING)
-		desc->status |= IRQ_NO_BALANCING;
-
 	if (!shared) {
 		irq_chip_set_defaults(desc->chip);
 
-#if defined(CONFIG_IRQ_PER_CPU)
-		if (new->flags & IRQF_PERCPU)
-			desc->status |= IRQ_PER_CPU;
-#endif
-
 		/* Setup the type (level, edge polarity) if configured: */
 		if (new->flags & IRQF_TRIGGER_MASK) {
-			if (desc->chip->set_type)
-				desc->chip->set_type(irq,
-						new->flags & IRQF_TRIGGER_MASK);
-			else
-				/*
-				 * IRQF_TRIGGER_* but the PIC does not support
-				 * multiple flow-types?
-				 */
-				printk(KERN_WARNING "No IRQF_TRIGGER set_type "
-				       "function for IRQ %d (%s)\n", irq,
-				       desc->chip->name);
+			ret = __irq_set_trigger(desc->chip, irq, new->flags);
+
+			if (ret) {
+				spin_unlock_irqrestore(&desc->lock, flags);
+				return ret;
+			}
 		} else
 			compat_irq_chip_set_default_handler(desc);
+#if defined(CONFIG_IRQ_PER_CPU)
+		if (new->flags & IRQF_PERCPU)
+			desc->status |= IRQ_PER_CPU;
+#endif
 
 		desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
 				  IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
@@ -423,6 +436,13 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 		/* Set default affinity mask once everything is setup */
 		irq_select_affinity(irq);
 	}
+
+	*p = new;
+
+	/* Exclude IRQ from balancing */
+	if (new->flags & IRQF_NOBALANCING)
+		desc->status |= IRQ_NO_BALANCING;
+
 	/* Reset broken irq detection when installing new handler */
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
-- 
GitLab


From 5aa0769d089125e63f8dc23e0283e559e1790493 Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Date: Wed, 23 Jul 2008 21:28:55 -0700
Subject: [PATCH 242/853] atmel_pwm: set up only one PWM clock when allocating
 a clock

This patch will only setup one clock, if free, and return this clock to the
caller.  The previous solution would setup both clocks with the same prescaler
and divider and return PWM_CPR_CLKB, thus taking both clocks in the same call
without the caller knowing.

Signed-off-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/atmel_pwm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c
index 5b5a14dab3d..6aa5294dfec 100644
--- a/drivers/misc/atmel_pwm.c
+++ b/drivers/misc/atmel_pwm.c
@@ -211,8 +211,7 @@ int pwm_clk_alloc(unsigned prescale, unsigned div)
 	if ((mr & 0xffff) == 0) {
 		mr |= val;
 		ret = PWM_CPR_CLKA;
-	}
-	if ((mr & (0xffff << 16)) == 0) {
+	} else if ((mr & (0xffff << 16)) == 0) {
 		mr |= val << 16;
 		ret = PWM_CPR_CLKB;
 	}
-- 
GitLab


From 6cbb2e711128b505209f7c910018aac77335c887 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:28:55 -0700
Subject: [PATCH 243/853] checkpatch: Version: 0.20

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 6971bf078d1..66f060ecb81 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -9,7 +9,7 @@ use strict;
 my $P = $0;
 $P =~ s@.*/@@g;
 
-my $V = '0.19';
+my $V = '0.20';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
-- 
GitLab


From fee61c47d15270bdea699a8a3dd867f0825c3541 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:28:56 -0700
Subject: [PATCH 244/853] checkpatch: return is not a function -- parentheses
 for casts are ok too

Casts require parentheses so it is possible to have something like this:

	return (int)(*a);

This miss trips the complexity function.  Ensure that the two separate
parenthesised sections are not coelesced.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 66f060ecb81..83ae37b3862 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1670,6 +1670,7 @@ sub process {
 			my $value = $2;
 
 			# Flatten any parentheses and braces
+			$value =~ s/\)\(/\) \(/g;
 			while ($value =~ s/\([^\(\)]*\)/1/) {
 			}
 
-- 
GitLab


From c8cb2ca37ed51aa1f3b20e3eff1e72df1c400f70 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:28:57 -0700
Subject: [PATCH 245/853] checkpatch: types: some types may also be identifiers

Some types such as typedefs may overlap real identifiers.  Be more
targetted about when a type can really exist.  Where it cannot let it be
an identifier.  This prevents false reporting of the minus '-' in unary
context in the following:

	foo[bar->bool - 1];

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 83ae37b3862..5420db6502f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -171,6 +171,7 @@ our @modifierList = (
 sub build_types {
 	my $mods = "(?:  \n" . join("|\n  ", @modifierList) . "\n)";
 	my $all = "(?:  \n" . join("|\n  ", @typeList) . "\n)";
+	$Modifier	= qr{(?:$Attribute|$Sparse|$mods)};
 	$NonptrType	= qr{
 			(?:const\s+)?
 			(?:$mods\s+)?
@@ -178,15 +179,14 @@ sub build_types {
 				(?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\)|
 				(?:${all}\b)
 			)
-			(?:\s+$Sparse|\s+const)*
+			(?:\s+$Modifier|\s+const)*
 		  }x;
 	$Type	= qr{
 			$NonptrType
 			(?:\s*\*+\s*const|\s*\*+|(?:\s*\[\s*\])+)?
-			(?:\s+$Inline|\s+$Sparse|\s+$Attribute|\s+$mods)*
+			(?:\s+$Inline|\s+$Modifier)*
 		  }x;
 	$Declare	= qr{(?:$Storage\s+)?$Type};
-	$Modifier	= qr{(?:$Attribute|$Sparse|$mods)};
 }
 build_types();
 
@@ -715,7 +715,7 @@ sub annotate_values {
 				$av_preprocessor = 0;
 			}
 
-		} elsif ($cur =~ /^($Type)/) {
+		} elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\))/) {
 			print "DECLARE($1)\n" if ($dbg_values > 1);
 			$type = 'T';
 
@@ -800,8 +800,9 @@ sub annotate_values {
 				print "PAREN('$1')\n" if ($dbg_values > 1);
 			}
 
-		} elsif ($cur =~ /^($Ident)\(/o) {
+		} elsif ($cur =~ /^($Ident)\s*\(/o) {
 			print "FUNC($1)\n" if ($dbg_values > 1);
+			$type = 'V';
 			$av_pending = 'V';
 
 		} elsif ($cur =~ /^($Ident|$Constant)/o) {
-- 
GitLab


From f3db6639fee577f6ed92c0a1fc881e748c47ec48 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 23 Jul 2008 21:28:57 -0700
Subject: [PATCH 246/853] checkpatch: add a checkpatch warning for new uses of
 __initcall().

[apw@shadowen.org: generalise pattern and add tests]
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 5420db6502f..cf70f123f57 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2108,6 +2108,10 @@ sub process {
 		if ($line =~ /\bsimple_(strto.*?)\s*\(/) {
 			WARN("consider using strict_$1 in preference to simple_$1\n" . $herecurr);
 		}
+# check for __initcall(), use device_initcall() explicitly please
+		if ($line =~ /^.\s*__initcall\s*\(/) {
+			WARN("please use device_initcall() instead of __initcall()\n" . $herecurr);
+		}
 
 # use of NR_CPUS is usually wrong
 # ignore definitions of NR_CPUS and usage to define arrays as likely right
-- 
GitLab


From d3ddcf471ea90d7ff711dbaa371ef379ed625ec0 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:28:58 -0700
Subject: [PATCH 247/853] checkpatch: possible types: __asm__ is never a type

We are false matching __asm__ as a type, and then tripping the external
function checks.  Squash.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index cf70f123f57..fd597a4b5da 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -846,7 +846,7 @@ sub possible {
 	if ($possible !~ /^(?:$Storage|$Type|DEFINE_\S+)$/ &&
 	    $possible ne 'goto' && $possible ne 'return' &&
 	    $possible ne 'case' && $possible ne 'else' &&
-	    $possible ne 'asm' &&
+	    $possible ne 'asm' && $possible ne '__asm__' &&
 	    $possible !~ /^(typedef|struct|enum)\b/) {
 		# Check for modifiers.
 		$possible =~ s/\s*$Storage\s*//g;
-- 
GitLab


From beae6332493a40116dba24928154621f2e88b9a9 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:28:59 -0700
Subject: [PATCH 248/853] checkpatch: comment detection: ignore macro
 continuation when detecting associated comments

When looking for an associated comment they may be suffixed by a macro
continuation.  Ignore this.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fd597a4b5da..94250d1a3a4 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -631,7 +631,7 @@ sub ctx_locate_comment {
 	my ($first_line, $end_line) = @_;
 
 	# Catch a comment on the end of the line itself.
-	my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*$@);
+	my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@);
 	return $current_comment if (defined $current_comment);
 
 	# Look through the context and try and figure out if there is a
-- 
GitLab


From 6ef9b297f6e8850da3be9c9ff5f00385c0977004 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:28:59 -0700
Subject: [PATCH 249/853] checkpatch: types: unary -- goto introduces unary
 context

When we see a goto we enter unary context.  For example:

	goto *h;

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 94250d1a3a4..b2b0648ee14 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -780,7 +780,7 @@ sub annotate_values {
 			$av_pending = 'N';
 			$type = 'N';
 
-		} elsif ($cur =~/^(return|case|else)/o) {
+		} elsif ($cur =~/^(return|case|else|goto)/o) {
 			print "KEYWORD($1)\n" if ($dbg_values > 1);
 			$type = 'N';
 
-- 
GitLab


From a3bb97a7aba36055d476896ed6393ab35a119d5b Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:00 -0700
Subject: [PATCH 250/853] checkpatch: macros: fix statement counting block end
 detection

We are incorrectly counting the lines in a block while accumulating
the trailing lines in a macro statement, leading to false positives.
Fix end of block handling and general counting for negative context lines.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b2b0648ee14..add86862325 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -470,7 +470,9 @@ sub ctx_statement_block {
 		}
 		$off++;
 	}
+	# We are truly at the end, so shuffle to the next line.
 	if ($off == $len) {
+		$loff = $len + 1;
 		$line++;
 		$remain--;
 	}
@@ -1793,30 +1795,26 @@ sub process {
 				$lines[$ln - 1] =~ /^(?:-|..*\\$)/)
 			{
 				$ctx .= $rawlines[$ln - 1] . "\n";
+				$cnt-- if ($lines[$ln - 1] !~ /^-/);
 				$ln++;
-				$cnt--;
 			}
 			$ctx .= $rawlines[$ln - 1];
 
 			($dstat, $dcond, $ln, $cnt, $off) =
 				ctx_statement_block($linenr, $ln - $linenr + 1, 0);
 			#print "dstat<$dstat> dcond<$dcond> cnt<$cnt> off<$off>\n";
-			#print "LINE<$lines[$ln]> len<" . length($lines[$ln]) . "\n";
+			#print "LINE<$lines[$ln-1]> len<" . length($lines[$ln-1]) . "\n";
 
 			# Extract the remainder of the define (if any) and
 			# rip off surrounding spaces, and trailing \'s.
 			$rest = '';
-			if (defined $lines[$ln - 1] &&
-			    $off > length($lines[$ln - 1]))
-			{
-				$ln++;
-				$cnt--;
-				$off = 0;
-			}
-			while ($cnt > 0) {
-				$rest .= substr($lines[$ln - 1], $off) . "\n";
+			while ($off != 0 || ($cnt > 0 && $rest =~ /(?:^|\\)\s*$/)) {
+				#print "ADDING $off <" . substr($lines[$ln - 1], $off) . ">\n";
+				if ($off != 0 || $lines[$ln - 1] !~ /^-/) {
+					$rest .= substr($lines[$ln - 1], $off) . "\n";
+					$cnt--;
+				}
 				$ln++;
-				$cnt--;
 				$off = 0;
 			}
 			$rest =~ s/\\\n.//g;
@@ -1847,6 +1845,7 @@ sub process {
 				DEFINE_PER_CPU|
 				__typeof__\(
 			}x;
+			#print "REST<$rest>\n";
 			if ($rest ne '') {
 				if ($rest !~ /while\s*\(/ &&
 				    $dstat !~ /$exceptions/)
-- 
GitLab


From 548596d523d83dff5a670beb84be0daf4c3bcd16 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:01 -0700
Subject: [PATCH 251/853] checkpatch: trailing statement indent: fix end of
 statement location

Fix end of statement location.  Where the last line of the statement is
replaced we are miss reporting the newly added replacement an incorrectly
indented trailing statement for the negative context.  We are also
incorrectly reporting negative statements generally.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index add86862325..89177c349f9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1249,17 +1249,22 @@ sub process {
 			my $pre_ctx = "$1$2";
 
 			my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
-			my $ctx_ln = $linenr + $#ctx + 1;
 			my $ctx_cnt = $realcnt - $#ctx - 1;
 			my $ctx = join("\n", @ctx);
 
-			##warn "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
+			my $ctx_ln = $linenr;
+			my $ctx_skip = $realcnt;
 
-			# Skip over any removed lines in the context following statement.
-			while (defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^-/) {
+			while ($ctx_skip > $ctx_cnt || ($ctx_skip == $ctx_cnt &&
+					defined $lines[$ctx_ln - 1] &&
+					$lines[$ctx_ln - 1] =~ /^-/)) {
+				##print "SKIP<$ctx_skip> CNT<$ctx_cnt>\n";
+				$ctx_skip-- if (!defined $lines[$ctx_ln - 1] || $lines[$ctx_ln - 1] !~ /^-/);
 				$ctx_ln++;
 			}
-			##warn "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
+
+			##print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
+			##print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
 
 			if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
 				ERROR("that open brace { should be on the previous line\n" .
-- 
GitLab


From f4c014c0dede10cc0a8463e748892e738e190699 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:01 -0700
Subject: [PATCH 252/853] checkpatch: allow printk strings to exceed 80
 characters to maintain their searchability

Allow printk strings to break the 80 character width limits, thus keeping
them complete and searchable.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 89177c349f9..614999f29aa 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1138,7 +1138,9 @@ sub process {
 		}
 #80 column limit
 		if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ &&
-		    $rawline !~ /^.\s*\*\s*\@$Ident\s/ && $length > 80)
+		    $rawline !~ /^.\s*\*\s*\@$Ident\s/ &&
+		    $line !~ /^\+\s*printk\s*\(\s*(?:KERN_\S+\s*)?"[X\t]*"\s*(?:,|\)\s*;)\s*$/ &&
+		    $length > 80)
 		{
 			WARN("line over 80 characters\n" . $herecurr);
 		}
-- 
GitLab


From e2a763c20b89890d2153551b1af6962b135de4c0 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:02 -0700
Subject: [PATCH 253/853] checkpatch: switch -- report trailing statements on
 case and default

Report trailing statements on case and default lines.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 614999f29aa..5f71b305025 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1244,6 +1244,10 @@ sub process {
 				ERROR("switch and case should be at the same indent\n$hereline$err");
 			}
 		}
+		if ($line =~ /^.\s*(?:case\s*.*|default\s*):/g &&
+		    $line !~ /\G(?:\s*{)?(?:\s*$;*)(?:\s*\\)?\s*$/g) {
+			ERROR("trailing statements should be on next line\n" . $herecurr);
+		}
 
 # if/while/etc brace do not go on next line, unless defining a do while loop,
 # or if that brace on the next line is for something else
-- 
GitLab


From 8d31cfcecf67563d70cd68616cb8fb4384f24b51 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:02 -0700
Subject: [PATCH 254/853] checkpatch: check spacing for square brackets

Check on the spacing before square brackets.  We should only allow spaces
there if this is part of a type definition or an initialialiser.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 5f71b305025..e7c8ab1b54b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1435,6 +1435,17 @@ sub process {
 			ERROR("open brace '{' following $1 go on the same line\n" . $hereprev);
 		}
 
+# check for spacing round square brackets; allowed:
+#  1. with a type on the left -- int [] a;
+#  2. at the beginning of a line for slice initialisers -- [0..10] = 5,
+		while ($line =~ /(.*?\s)\[/g) {
+			my ($where, $prefix) = ($-[1], $1);
+			if ($prefix !~ /$Type\s+$/ &&
+			    ($where != 0 || $prefix !~ /^.\s+$/)) {
+				ERROR("space prohibited before open square bracket '['\n" . $herecurr);
+			}
+		}
+
 # check for spaces between functions and their parentheses.
 		while ($line =~ /($Ident)\s+\(/g) {
 			my $name = $1;
-- 
GitLab


From 53210168feeff9a3c780bd42f69936d4c12381d5 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:03 -0700
Subject: [PATCH 255/853] checkpatch: toughen trailing if statement checks and
 extend them to while and for

Extend the trailing statement checks to report a trailing semi-colon ';'
as we really want it on the next line and indented so it is really really
obvious.  Also extend the tests to include while and for.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e7c8ab1b54b..8616baee0ae 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1269,8 +1269,8 @@ sub process {
 				$ctx_ln++;
 			}
 
-			##print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
-			##print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
+			#print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
+			#print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
 
 			if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
 				ERROR("that open brace { should be on the previous line\n" .
@@ -1713,7 +1713,7 @@ sub process {
 		}
 
 # Check for illegal assignment in if conditional.
-		if ($line =~ /\bif\s*\(/) {
+		if ($line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
 			my ($s, $c) = ($stat, $cond);
 
 			if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/) {
@@ -1725,8 +1725,8 @@ sub process {
 			substr($s, 0, length($c), '');
 			$s =~ s/\n.*//g;
 			$s =~ s/$;//g; 	# Remove any comments
-			if (length($c) && $s !~ /^\s*({|;|)\s*\\*\s*$/ &&
-			    $c !~ /^.\s*\#\s*if/)
+			if (length($c) && $s !~ /^\s*{?\s*\\*\s*$/ &&
+			    $c !~ /}\s*while\s*/)
 			{
 				ERROR("trailing statements should be on next line\n" . $herecurr);
 			}
-- 
GitLab


From f5fe35dd95549b1b419cdeb2ec3fe61fda94fa93 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:03 -0700
Subject: [PATCH 256/853] checkpatch: condition/loop indent checks

Check to see if the block/statement which a condition or loop introduces
is indented correctly.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 59 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 8616baee0ae..13d7a330b5d 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1167,10 +1167,10 @@ sub process {
 		}
 
 # Check for potential 'bare' types
-		my ($stat, $cond);
+		my ($stat, $cond, $line_nr_next, $remain_next);
 		if ($realcnt && $line =~ /.\s*\S/) {
-			($stat, $cond) = ctx_statement_block($linenr,
-								$realcnt, 0);
+			($stat, $cond, $line_nr_next, $remain_next) =
+				ctx_statement_block($linenr, $realcnt, 0);
 			$stat =~ s/\n./\n /g;
 			$cond =~ s/\n./\n /g;
 
@@ -1712,7 +1712,8 @@ sub process {
 			ERROR("space required before the open parenthesis '('\n" . $herecurr);
 		}
 
-# Check for illegal assignment in if conditional.
+# Check for illegal assignment in if conditional -- and check for trailing
+# statements after the conditional.
 		if ($line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
 			my ($s, $c) = ($stat, $cond);
 
@@ -1732,6 +1733,56 @@ sub process {
 			}
 		}
 
+# Check relative indent for conditionals and blocks.
+		if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+			my ($s, $c) = ($stat, $cond);
+
+			substr($s, 0, length($c), '');
+
+			# Make sure we remove the line prefixes as we have
+			# none on the first line, and are going to readd them
+			# where necessary.
+			$s =~ s/\n./\n/gs;
+
+			# We want to check the first line inside the block
+			# starting at the end of the conditional, so remove:
+			#  1) any blank line termination
+			#  2) any opening brace { on end of the line
+			#  3) any do (...) {
+			my $continuation = 0;
+			my $check = 0;
+			$s =~ s/^.*\bdo\b//;
+			$s =~ s/^\s*{//;
+			if ($s =~ s/^\s*\\//) {
+				$continuation = 1;
+			}
+			if ($s =~ s/^\s*\n//) {
+				$check = 1;
+			}
+
+			# Also ignore a loop construct at the end of a
+			# preprocessor statement.
+			if (($prevline =~ /^.\s*#\s*define\s/ ||
+			    $prevline =~ /\\\s*$/) && $continuation == 0) {
+				$check = 0;
+			}
+
+			# Ignore the current line if its is a preprocessor
+			# line.
+			if ($s =~ /^\s*#\s*/) {
+				$check = 0;
+			}
+
+			my (undef, $sindent) = line_stats("+" . $s);
+
+			##print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s>\n";
+
+			if ($check && (($sindent % 8) != 0 ||
+			    ($sindent <= $indent && $s ne ''))) {
+				WARN("suspect code indent for conditional statements\n" . $herecurr);
+			}
+		}
+
 # Check for bitwise tests written as boolean
 		if ($line =~ /
 			(?:
-- 
GitLab


From 4c432a8f0134504814aa8dcce6cc57c89d175604 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 23 Jul 2008 21:29:04 -0700
Subject: [PATCH 257/853] checkpatch: usb_free_urb() can take NULL

usb_free_urb() can take a NULL, so let's check and warn about that.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 13d7a330b5d..a4e8087a0ca 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2078,6 +2078,13 @@ sub process {
 				WARN("kfree(NULL) is safe this check is probabally not required\n" . $hereprev);
 			}
 		}
+# check for needless usb_free_urb() checks
+		if ($prevline =~ /\bif\s*\(([^\)]*)\)/) {
+			my $expr = $1;
+			if ($line =~ /\busb_free_urb\(\Q$expr\E\);/) {
+				WARN("usb_free_urb(NULL) is safe this check is probably not required\n" . $hereprev);
+			}
+		}
 
 # warn about #ifdefs in C files
 #		if ($line =~ /^.\s*\#\s*if(|n)def/ && ($realfile =~ /\.c$/)) {
-- 
GitLab


From 3c232147a7d5b0418b0a0bae0e5b9a62fb81f4f2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Wed, 23 Jul 2008 21:29:05 -0700
Subject: [PATCH 258/853] checkpatch: correct spelling in kfree checks

Correct spelling in the kfree reports.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a4e8087a0ca..3961e759a25 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2075,7 +2075,7 @@ sub process {
 		if ($prevline =~ /\bif\s*\(([^\)]*)\)/) {
 			my $expr = $1;
 			if ($line =~ /\bkfree\(\Q$expr\E\);/) {
-				WARN("kfree(NULL) is safe this check is probabally not required\n" . $hereprev);
+				WARN("kfree(NULL) is safe this check is probably not required\n" . $hereprev);
 			}
 		}
 # check for needless usb_free_urb() checks
-- 
GitLab


From 389a2fe57ffc59a649bea39db4d7e6d2eff2b562 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:05 -0700
Subject: [PATCH 259/853] checkpatch: allow for type modifiers on multiple
 declarations

Allow for type modifiers mid declaration on multiple declarations:

	struct mxser_mstatus ms, __user *msu = argp;

Reported by Jiri Slaby.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3961e759a25..bcfb8ef00fe 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -721,6 +721,10 @@ sub annotate_values {
 			print "DECLARE($1)\n" if ($dbg_values > 1);
 			$type = 'T';
 
+		} elsif ($cur =~ /^($Modifier)\s*/) {
+			print "MODIFIER($1)\n" if ($dbg_values > 1);
+			$type = 'T';
+
 		} elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) {
 			print "DEFINE($1,$2)\n" if ($dbg_values > 1);
 			$av_preprocessor = 1;
-- 
GitLab


From 7429c6903e3628fc2cfea65ec7e13bac030c7bfe Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:06 -0700
Subject: [PATCH 260/853] checkpatch: improve type matcher debug

Improve type matcher debug so we can see what it does match.  As part
of this move us to to using the common debug framework.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index bcfb8ef00fe..077a2ca3304 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -17,7 +17,6 @@ my $quiet = 0;
 my $tree = 1;
 my $chk_signoff = 1;
 my $chk_patch = 1;
-my $tst_type = 0;
 my $tst_only;
 my $emacs = 0;
 my $terse = 0;
@@ -44,7 +43,6 @@ GetOptions(
 	'summary-file!'	=> \$summary_file,
 
 	'debug=s'	=> \%debug,
-	'test-type!'	=> \$tst_type,
 	'test-only=s'	=> \$tst_only,
 ) or exit;
 
@@ -67,6 +65,7 @@ if ($#ARGV < 0) {
 
 my $dbg_values = 0;
 my $dbg_possible = 0;
+my $dbg_type = 0;
 for my $key (keys %debug) {
 	eval "\${dbg_$key} = '$debug{$key}';"
 }
@@ -1307,8 +1306,12 @@ sub process {
 		if ($line=~/^[^\+]/) {next;}
 
 # TEST: allow direct testing of the type matcher.
-		if ($tst_type && $line =~ /^.$Declare$/) {
-			ERROR("TEST: is type $Declare\n" . $herecurr);
+		if ($dbg_type) {
+			if ($line =~ /^.\s*$Declare\s*$/) {
+				ERROR("TEST: is type\n" . $herecurr);
+			} elsif ($dbg_type > 1 && $line =~ /^.+($Declare)/) {
+				ERROR("TEST: is not type ($1 is)\n". $herecurr);
+			}
 			next;
 		}
 
-- 
GitLab


From d2172eb5bd4b7d06577113ec40635083619ca54a Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:07 -0700
Subject: [PATCH 261/853] checkpatch: possible modifiers are not being
 correctly matched

Although we are finding the added modifier in the declaration below
we are not correctly matching it as a type.  Fix the declaration.

    static void __ref *vmem_alloc_pages(unsigned int order)
    {
    }

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 077a2ca3304..53ec3946670 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -168,12 +168,11 @@ our @modifierList = (
 );
 
 sub build_types {
-	my $mods = "(?:  \n" . join("|\n  ", @modifierList) . "\n)";
-	my $all = "(?:  \n" . join("|\n  ", @typeList) . "\n)";
+	my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
+	my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
 	$Modifier	= qr{(?:$Attribute|$Sparse|$mods)};
 	$NonptrType	= qr{
-			(?:const\s+)?
-			(?:$mods\s+)?
+			(?:$Modifier\s+|const\s+)*
 			(?:
 				(?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\)|
 				(?:${all}\b)
-- 
GitLab


From b8f96a31f38c8e9fc75f0a89c6815e7cbc402858 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:07 -0700
Subject: [PATCH 262/853] checkpatch: macro complexity checks are meaningless
 in linker scripts

Exclude vmlinux.lds.h from the macro complexity checks.  They will never
apply sanely here.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 53ec3946670..775f2b146aa 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1860,7 +1860,8 @@ sub process {
 # multi-statement macros should be enclosed in a do while loop, grab the
 # first statement and ensure its the whole macro if its not enclosed
 # in a known good container
-		if ($line =~ /^.\s*\#\s*define\s*$Ident(\()?/) {
+		if ($realfile !~ m@/vmlinux.lds.h$@ &&
+		    $line =~ /^.\s*\#\s*define\s*$Ident(\()?/) {
 			my $ln = $linenr;
 			my $cnt = $realcnt;
 			my ($off, $dstat, $dcond, $rest);
-- 
GitLab


From 8ea3eb9a20f39d5afa52900a34092b4b5f6b55cb Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:08 -0700
Subject: [PATCH 263/853] checkpatch: handle return types of pointers to
 functions

Make sure we correctly mark the return type of the pointer to a function
declaration.

    const void *(*sb_tag)(struct sysfs_tag_info *info);

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 775f2b146aa..6d07b6778c9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -715,7 +715,7 @@ sub annotate_values {
 				$av_preprocessor = 0;
 			}
 
-		} elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\))/) {
+		} elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\()/) {
 			print "DECLARE($1)\n" if ($dbg_values > 1);
 			$type = 'T';
 
-- 
GitLab


From 0221f55c142b0ac8baf6f0b6c4e1ec89f0c98e96 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:08 -0700
Subject: [PATCH 264/853] checkpatch: possible types -- known modifiers cannot
 be types

Ensure we do not inadvertantly load known modifiers up as possible types.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 6d07b6778c9..9c209165f25 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -847,7 +847,7 @@ sub possible {
 	my ($possible, $line) = @_;
 
 	print "CHECK<$possible> ($line)\n" if ($dbg_possible > 1);
-	if ($possible !~ /^(?:$Storage|$Type|DEFINE_\S+)$/ &&
+	if ($possible !~ /^(?:$Modifier|$Storage|$Type|DEFINE_\S+)$/ &&
 	    $possible ne 'goto' && $possible ne 'return' &&
 	    $possible ne 'case' && $possible ne 'else' &&
 	    $possible ne 'asm' && $possible ne '__asm__' &&
-- 
GitLab


From d2506586586c59f5db0e2ce00d5d31ccec6260b8 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:09 -0700
Subject: [PATCH 265/853] checkpatch: possible modifiers -- handle multiple
 modifiers and trailing

Add support for multiple modifiers such as:

	int __one __two foo;

Also handle trailing known modifiers when defecting modifiers:

	int __one foo __read_mostly;

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 9c209165f25..8a3b0fd67ad 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -859,8 +859,10 @@ sub possible {
 
 		} elsif ($possible =~ /\s/) {
 			$possible =~ s/\s*$Type\s*//g;
-			warn "MODIFIER: $possible ($line)\n" if ($dbg_possible);
-			push(@modifierList, $possible);
+			for my $modifier (split(' ', $possible)) {
+				warn "MODIFIER: $modifier ($possible) ($line)\n" if ($dbg_possible);
+				push(@modifierList, $modifier);
+			}
 
 		} else {
 			warn "POSSIBLE: $possible ($line)\n" if ($dbg_possible);
@@ -1186,7 +1188,7 @@ sub process {
 			} elsif ($s =~ /^.\s*$Ident\s*\(/s) {
 
 			# declarations always start with types
-			} elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))\s*(?:;|=|,|\()/s) {
+			} elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) {
 				my $type = $1;
 				$type =~ s/\s+/ /g;
 				possible($type, "A:" . $s);
-- 
GitLab


From 1f65f947a6a875e1fe7867dc08e981c4101d435d Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:10 -0700
Subject: [PATCH 266/853] checkpatch: add checks for question mark and colon
 spacing

Add checks for the question mark colon operator spacing, and also check
the other uses of colon.  Colon means a number of things:

 - it introduces the else part of the ?: operator,
 - it terminates a goto label,
 - it terminates the case value,
 - it separates the identifier from the bit size on bit fields, and
 - it is used to introduce option types in asm().

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 81 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 9 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 8a3b0fd67ad..88027f237cd 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -689,17 +689,20 @@ sub cat_vet {
 my $av_preprocessor = 0;
 my $av_pending;
 my @av_paren_type;
+my $av_pend_colon;
 
 sub annotate_reset {
 	$av_preprocessor = 0;
 	$av_pending = '_';
 	@av_paren_type = ('E');
+	$av_pend_colon = 'O';
 }
 
 sub annotate_values {
 	my ($stream, $type) = @_;
 
 	my $res;
+	my $var = '_' x length($stream);
 	my $cur = $stream;
 
 	print "$stream\n" if ($dbg_values > 1);
@@ -784,7 +787,12 @@ sub annotate_values {
 			$av_pending = 'N';
 			$type = 'N';
 
-		} elsif ($cur =~/^(return|case|else|goto)/o) {
+		} elsif ($cur =~/^(case)/o) {
+			print "CASE($1)\n" if ($dbg_values > 1);
+			$av_pend_colon = 'C';
+			$type = 'N';
+
+		} elsif ($cur =~/^(return|else|goto)/o) {
 			print "KEYWORD($1)\n" if ($dbg_values > 1);
 			$type = 'N';
 
@@ -809,6 +817,15 @@ sub annotate_values {
 			$type = 'V';
 			$av_pending = 'V';
 
+		} elsif ($cur =~ /^($Ident\s*):/) {
+			if ($type eq 'E') {
+				$av_pend_colon = 'L';
+			} elsif ($type eq 'T') {
+				$av_pend_colon = 'B';
+			}
+			print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1);
+			$type = 'V';
+
 		} elsif ($cur =~ /^($Ident|$Constant)/o) {
 			print "IDENT($1)\n" if ($dbg_values > 1);
 			$type = 'V';
@@ -820,8 +837,24 @@ sub annotate_values {
 		} elsif ($cur =~/^(;|{|})/) {
 			print "END($1)\n" if ($dbg_values > 1);
 			$type = 'E';
+			$av_pend_colon = 'O';
+
+		} elsif ($cur =~ /^(\?)/o) {
+			print "QUESTION($1)\n" if ($dbg_values > 1);
+			$type = 'N';
+
+		} elsif ($cur =~ /^(:)/o) {
+			print "COLON($1,$av_pend_colon)\n" if ($dbg_values > 1);
+
+			substr($var, length($res), 1, $av_pend_colon);
+			if ($av_pend_colon eq 'C' || $av_pend_colon eq 'L') {
+				$type = 'E';
+			} else {
+				$type = 'N';
+			}
+			$av_pend_colon = 'O';
 
-		} elsif ($cur =~ /^(;|\?|:|\[)/o) {
+		} elsif ($cur =~ /^(;|\[)/o) {
 			print "CLOSE($1)\n" if ($dbg_values > 1);
 			$type = 'N';
 
@@ -840,7 +873,7 @@ sub annotate_values {
 		}
 	}
 
-	return $res;
+	return ($res, $var);
 }
 
 sub possible {
@@ -1294,12 +1327,14 @@ sub process {
 
 		# Track the 'values' across context and added lines.
 		my $opline = $line; $opline =~ s/^./ /;
-		my $curr_values = annotate_values($opline . "\n", $prev_values);
+		my ($curr_values, $curr_vars) =
+				annotate_values($opline . "\n", $prev_values);
 		$curr_values = $prev_values . $curr_values;
 		if ($dbg_values) {
 			my $outline = $opline; $outline =~ s/\t/ /g;
 			print "$linenr > .$outline\n";
 			print "$linenr > $curr_values\n";
+			print "$linenr >  $curr_vars\n";
 		}
 		$prev_values = substr($curr_values, -1);
 
@@ -1490,7 +1525,8 @@ sub process {
 				<<=|>>=|<=|>=|==|!=|
 				\+=|-=|\*=|\/=|%=|\^=|\|=|&=|
 				=>|->|<<|>>|<|>|=|!|~|
-				&&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%
+				&&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%|
+				\?|:
 			}x;
 			my @elements = split(/($ops|;)/, $opline);
 			my $off = 0;
@@ -1554,6 +1590,9 @@ sub process {
 				#	print "UNARY: <$op_left$op_type $is_unary $a:$op:$c> <$ca:$op:$cc> <$unary_ctx>\n";
 				#}
 
+				# Get the full operator variant.
+				my $opv = $op . substr($curr_vars, $off, 1);
+
 				# Ignore operators passed as parameters.
 				if ($op_type ne 'V' &&
 				    $ca =~ /\s$/ && $cc =~ /^\s*,/) {
@@ -1571,8 +1610,10 @@ sub process {
 				# // is a comment
 				} elsif ($op eq '//') {
 
-				# -> should have no spaces
-				} elsif ($op eq '->') {
+				# No spaces for:
+				#   ->
+				#   :   when part of a bitfield
+				} elsif ($op eq '->' || $opv eq ':B') {
 					if ($ctx =~ /Wx.|.xW/) {
 						ERROR("spaces prohibited around that '$op' $at\n" . $hereptr);
 					}
@@ -1628,11 +1669,33 @@ sub process {
 							$hereptr);
 					}
 
+				# A colon needs no spaces before when it is
+				# terminating a case value or a label.
+				} elsif ($opv eq ':C' || $opv eq ':L') {
+					if ($ctx =~ /Wx./) {
+						ERROR("space prohibited before that '$op' $at\n" . $hereptr);
+					}
+
 				# All the others need spaces both sides.
 				} elsif ($ctx !~ /[EWC]x[CWE]/) {
+					my $ok = 0;
+
 					# Ignore email addresses <foo@bar>
-					if (!($op eq '<' && $cb =~ /$;\S+\@\S+>/) &&
-					    !($op eq '>' && $cb =~ /<\S+\@\S+$;/)) {
+					if (($op eq '<' &&
+					     $cc =~ /^\S+\@\S+>/) ||
+					    ($op eq '>' &&
+					     $ca =~ /<\S+\@\S+$/))
+					{
+					    	$ok = 1;
+					}
+
+					# Ignore ?:
+					if (($opv eq ':O' && $ca =~ /\?$/) ||
+					    ($op eq '?' && $cc =~ /^:/)) {
+					    	$ok = 1;
+					}
+
+					if ($ok == 0) {
 						ERROR("spaces required around that '$op' $at\n" . $hereptr);
 					}
 				}
-- 
GitLab


From 74048ed811152a995a88945ba9e0dded34adfff4 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:10 -0700
Subject: [PATCH 267/853] checkpatch: variants -- move the main unary/binary
 operators to use variants

Now that we have a variants system, move to using that to carry the
unary/binary designation for +, -, &, and *.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 88027f237cd..8afa88aaed9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -858,6 +858,19 @@ sub annotate_values {
 			print "CLOSE($1)\n" if ($dbg_values > 1);
 			$type = 'N';
 
+		} elsif ($cur =~ /^(-(?![->])|\+(?!\+)|\*|\&(?!\&))/o) {
+			my $variant;
+
+			print "OPV($1)\n" if ($dbg_values > 1);
+			if ($type eq 'V') {
+				$variant = 'B';
+			} else {
+				$variant = 'U';
+			}
+
+			substr($var, length($res), 1, $variant);
+			$type = 'N';
+
 		} elsif ($cur =~ /^($Operators)/o) {
 			print "OP($1)\n" if ($dbg_values > 1);
 			if ($1 ne '++' && $1 ne '--') {
@@ -1573,22 +1586,8 @@ sub process {
 				my $ptr = substr($blank, 0, $off) . "^";
 				my $hereptr = "$hereline$ptr\n";
 
-				# Classify operators into binary, unary, or
-				# definitions (* only) where they have more
-				# than one mode.
+				# Pull out the value of this operator.
 				my $op_type = substr($curr_values, $off + 1, 1);
-				my $op_left = substr($curr_values, $off, 1);
-				my $is_unary;
-				if ($op_type eq 'T') {
-					$is_unary = 2;
-				} elsif ($op_left eq 'V') {
-					$is_unary = 0;
-				} else {
-					$is_unary = 1;
-				}
-				#if ($op eq '-' || $op eq '&' || $op eq '*') {
-				#	print "UNARY: <$op_left$op_type $is_unary $a:$op:$c> <$ca:$op:$cc> <$unary_ctx>\n";
-				#}
 
 				# Get the full operator variant.
 				my $opv = $op . substr($curr_vars, $off, 1);
@@ -1625,18 +1624,19 @@ sub process {
 					}
 
 				# '*' as part of a type definition -- reported already.
-				} elsif ($op eq '*' && $is_unary == 2) {
+				} elsif ($opv eq '*_') {
 					#warn "'*' is part of type\n";
 
 				# unary operators should have a space before and
 				# none after.  May be left adjacent to another
 				# unary operator, or a cast
 				} elsif ($op eq '!' || $op eq '~' ||
-				         ($is_unary && ($op eq '*' || $op eq '-' || $op eq '&'))) {
+					 $opv eq '*U' || $opv eq '-U' ||
+					 $opv eq '&U') {
 					if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) {
 						ERROR("space required before that '$op' $at\n" . $hereptr);
 					}
-					if ($op  eq '*' && $cc =~/\s*const\b/) {
+					if ($op eq '*' && $cc =~/\s*const\b/) {
 						# A unary '*' may be const
 
 					} elsif ($ctx =~ /.xW/) {
-- 
GitLab


From 292f1a9b342d763f94ea3915726a48905be4acd1 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:11 -0700
Subject: [PATCH 268/853] checkpatch: complex macros need to ignore comments

Ensure we ignore comments in complex macro detection else we incorrectly
report this:

	#define PFM_GROUP_PERM_ANY     -1      /* any user/group */

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 8afa88aaed9..96a762be574 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1972,6 +1972,7 @@ sub process {
 			} else {
 				$dstat =~ s/^.\s*\#\s*define\s+$Ident\s*//;
 			}
+			$dstat =~ s/$;//g;
 			$dstat =~ s/\\\n.//g;
 			$dstat =~ s/^\s*//s;
 			$dstat =~ s/\s*$//s;
-- 
GitLab


From 234fff6515a11cf3e67c793146689da426787fea Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:12 -0700
Subject: [PATCH 269/853] checkpatch: types cannot start mid word for pointer
 tests

When checking spacing for pointer checks the type cannot start in the
middle of a word, ie. this is not 'int * bar':

	x = fooint * bar;

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 96a762be574..022ee557b68 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1435,11 +1435,11 @@ sub process {
 			ERROR("\"(foo $1 )\" should be \"(foo $1)\"\n" .
 				$herecurr);
 
-		} elsif ($line =~ m{$NonptrType(\*+)(?:\s+(?:$Attribute|$Sparse))?\s+[A-Za-z\d_]+}) {
+		} elsif ($line =~ m{\b$NonptrType(\*+)(?:\s+(?:$Attribute|$Sparse))?\s+[A-Za-z\d_]+}) {
 			ERROR("\"foo$1 bar\" should be \"foo $1bar\"\n" .
 				$herecurr);
 
-		} elsif ($line =~ m{$NonptrType\s+(\*+)(?!\s+(?:$Attribute|$Sparse))\s+[A-Za-z\d_]+}) {
+		} elsif ($line =~ m{\b$NonptrType\s+(\*+)(?!\s+(?:$Attribute|$Sparse))\s+[A-Za-z\d_]+}) {
 			ERROR("\"foo $1 bar\" should be \"foo $1bar\"\n" .
 				$herecurr);
 		}
-- 
GitLab


From 33cba0657393a75e18e1781e3e13613303f18124 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:29:12 -0700
Subject: [PATCH 270/853] checkpatch: version 0.21

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 022ee557b68..bc677939822 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -9,7 +9,7 @@ use strict;
 my $P = $0;
 $P =~ s@.*/@@g;
 
-my $V = '0.20';
+my $V = '0.21';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
-- 
GitLab


From 7102ed519a08b70eadc8fea9d8765d2d990241d1 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:29:13 -0700
Subject: [PATCH 271/853] remove the OSS trident driver

SOUND_TRIDENT was the last PCI OSS driver, and since there's already an
ALSA driver for the same hardware we can remove it.

[muli@il.ibm.com: update CREDITS]
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS             |    8 +
 MAINTAINERS         |    6 -
 sound/oss/Kconfig   |   41 -
 sound/oss/Makefile  |    1 -
 sound/oss/trident.c | 4654 -------------------------------------------
 sound/oss/trident.h |  358 ----
 6 files changed, 8 insertions(+), 5060 deletions(-)
 delete mode 100644 sound/oss/trident.c
 delete mode 100644 sound/oss/trident.h

diff --git a/CREDITS b/CREDITS
index 077b147388b..c62dcb3b7e2 100644
--- a/CREDITS
+++ b/CREDITS
@@ -317,6 +317,14 @@ S: 2322 37th Ave SW
 S: Seattle, Washington 98126-2010
 S: USA
 
+N: Muli Ben-Yehuda
+E: mulix@mulix.org
+E: muli@il.ibm.com
+W: http://www.mulix.org
+D: trident OSS sound driver, x86-64 dma-ops and Calgary IOMMU,
+D: KVM and Xen bits and other misc. hackery.
+S: Haifa, Israel
+
 N: Johannes Berg
 E: johannes@sipsolutions.net
 W: http://johannes.sipsolutions.net/
diff --git a/MAINTAINERS b/MAINTAINERS
index 7e5c7b0290b..5ecb97e13e5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4080,12 +4080,6 @@ W:	http://www.prosec.rub.de/tpm/
 L:	tpmdd-devel@lists.sourceforge.net
 S:	Maintained
 
-TRIDENT 4DWAVE/SIS 7018 PCI AUDIO CORE
-P:	Muli Ben-Yehuda
-M:	mulix@mulix.org
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-
 TRIVIAL PATCHES
 P:	Jesper Juhl
 M:	trivial@kernel.org
diff --git a/sound/oss/Kconfig b/sound/oss/Kconfig
index 33940139844..d4fafb6eec6 100644
--- a/sound/oss/Kconfig
+++ b/sound/oss/Kconfig
@@ -35,47 +35,6 @@ config SOUND_AU1550_AC97
 	tristate "Au1550/Au1200 AC97 Sound"
 	depends on SOC_AU1550 || SOC_AU1200
 
-config SOUND_TRIDENT
-	tristate "Trident 4DWave DX/NX, SiS 7018 or ALi 5451 PCI Audio Core"
-	depends on PCI
-	---help---
-	  Say Y or M if you have a PCI sound card utilizing the Trident
-	  4DWave-DX/NX chipset or your mother board chipset has SiS 7018
-	  or ALi 5451 built-in. The SiS 7018 PCI Audio Core is embedded
-	  in SiS960 Super South Bridge and SiS540/630 Single Chipset.
-	  The ALi 5451 PCI Audio Core is embedded in ALi M1535, M1535D,
-	  M1535+ or M1535D+ South Bridge.
-
-	  Use lspci -n to find out if your sound card or chipset uses
-	  Trident 4DWave or SiS 7018. PCI ID 1023:2000 or 1023:2001 stands
-	  for Trident 4Dwave. PCI ID 1039:7018 stands for SiS7018. PCI ID
-	  10B9:5451 stands for ALi5451.
-
-	  This driver supports S/PDIF in/out (record/playback) for ALi 5451
-	  embedded in ALi M1535+ and M1535D+. Note that they aren't all
-	  enabled by default; you can enable them by saying Y to "/proc file
-	  system support" and "Sysctl support", and after the /proc file
-	  system has been mounted, executing the command
-
-		command			what is enabled
-
-	  echo 0>/proc/ALi5451	pcm out is also set to S/PDIF out. (Default).
-
-	  echo 1>/proc/ALi5451	use S/PDIF out to output pcm data.
-
-	  echo 2>/proc/ALi5451	use S/PDIF out to output non-pcm data.
-	  (AC3...).
-
-	  echo 3>/proc/ALi5451	record from Ac97 in(MIC, Line in...).
-	  (Default).
-
-	  echo 4>/proc/ALi5451	no matter Ac97 settings, record from S/PDIF
-	  in.
-
-
-	  This driver differs slightly from OSS/Free, so PLEASE READ the
-	  comments at the top of <file:sound/oss/trident.c>.
-
 config SOUND_MSNDCLAS
 	tristate "Support for Turtle Beach MultiSound Classic, Tahiti, Monterey"
 	depends on (m || !STANDALONE) && ISA
diff --git a/sound/oss/Makefile b/sound/oss/Makefile
index 1f86299fae4..3a141474fb7 100644
--- a/sound/oss/Makefile
+++ b/sound/oss/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_SOUND_MSNDCLAS)	+= msnd.o msnd_classic.o
 obj-$(CONFIG_SOUND_MSNDPIN)	+= msnd.o msnd_pinnacle.o
 obj-$(CONFIG_SOUND_VWSND)	+= vwsnd.o
 obj-$(CONFIG_SOUND_AU1550_AC97)	+= au1550_ac97.o ac97_codec.o
-obj-$(CONFIG_SOUND_TRIDENT)	+= trident.o ac97_codec.o
 obj-$(CONFIG_SOUND_BCM_CS4297A)	+= swarm_cs4297a.o
 
 obj-$(CONFIG_SOUND_WM97XX)	+= ac97_plugin_wm97xx.o
diff --git a/sound/oss/trident.c b/sound/oss/trident.c
deleted file mode 100644
index f43f91ef86c..00000000000
--- a/sound/oss/trident.c
+++ /dev/null
@@ -1,4654 +0,0 @@
-/*
- *	OSS driver for Linux 2.[46].x for
- *
- *	Trident 4D-Wave
- *	SiS 7018
- *	ALi 5451
- *	Tvia/IGST CyberPro 5050
- *
- *	Driver: Alan Cox <alan@redhat.com>
- *
- *  Built from:
- *	Low level code: <audio@tridentmicro.com> from ALSA
- *	Framework: Thomas Sailer <sailer@ife.ee.ethz.ch>
- *	Extended by: Zach Brown <zab@redhat.com>
- *
- *  Hacked up by:
- *	Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *	Ollie Lho <ollie@sis.com.tw> SiS 7018 Audio Core Support
- *	Ching-Ling Lee <cling-li@ali.com.tw> ALi 5451 Audio Core Support
- *	Matt Wu <mattwu@acersoftech.com.cn> ALi 5451 Audio Core Support
- *	Peter Wächtler <pwaechtler@loewe-komp.de> CyberPro5050 support
- *      Muli Ben-Yehuda <mulix@mulix.org>
- *
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License as published by
- *	the Free Software Foundation; either version 2 of the License, or
- *	(at your option) any later version.
- *
- *	This program is distributed in the hope that it will be useful,
- *	but WITHOUT ANY WARRANTY; without even the implied warranty of
- *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *	GNU General Public License for more details.
- *
- *	You should have received a copy of the GNU General Public License
- *	along with this program; if not, write to the Free Software
- *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- *  History
- *  v0.14.10j
- *  	January 3 2004 Eugene Teo <eugeneteo@eugeneteo.net>
- *  	minor cleanup to use pr_debug instead of TRDBG since it is already
- *  	defined in linux/kernel.h.
- *  v0.14.10i
- *      December 29 2003 Muli Ben-Yehuda <mulix@mulix.org>
- *      major cleanup for 2.6, fix a few error patch buglets
- *      with returning without properly cleaning up first,
- *      get rid of lock_kernel().
- *  v0.14.10h
- *	Sept 10 2002 Pascal Schmidt <der.eremit@email.de>
- *	added support for ALi 5451 joystick port
- *  v0.14.10g
- *	Sept 05 2002 Alan Cox <alan@redhat.com>
- *	adapt to new pci joystick attachment interface
- *  v0.14.10f
- *      July 24 2002 Muli Ben-Yehuda <mulix@actcom.co.il>
- *      patch from Eric Lemar (via Ian Soboroff): in suspend and resume,
- *      fix wrong cast from pci_dev* to struct trident_card*.
- *  v0.14.10e
- *      July 19 2002 Muli Ben-Yehuda <mulix@actcom.co.il>
- *      rewrite the DMA buffer allocation/deallcoation functions, to make it
- *      modular and fix a bug where we would call free_pages on memory
- *      obtained with pci_alloc_consistent. Also remove unnecessary #ifdef
- *      CONFIG_PROC_FS and various other cleanups.
- *  v0.14.10d
- *      July 19 2002 Muli Ben-Yehuda <mulix@actcom.co.il>
- *      made several printk(KERN_NOTICE...) into TRDBG(...), to avoid spamming
- *      my syslog with hundreds of messages.
- *  v0.14.10c
- *      July 16 2002 Muli Ben-Yehuda <mulix@actcom.co.il>
- *      Cleaned up Lei Hu's 0.4.10 driver to conform to Documentation/CodingStyle
- *      and the coding style used in the rest of the file.
- *  v0.14.10b
- *      June 23 2002 Muli Ben-Yehuda <mulix@actcom.co.il>
- *      add a missing unlock_set_fmt, remove a superflous lock/unlock pair
- *      with nothing in between.
- *  v0.14.10a
- *      June 21 2002 Muli Ben-Yehuda <mulix@actcom.co.il>
- *      use a debug macro instead of #ifdef CONFIG_DEBUG, trim to 80 columns
- *      per line, use 'do {} while (0)' in statement macros.
- *  v0.14.10
- *      June 6 2002 Lei Hu <Lei_hu@ali.com.tw>
- *      rewrite the part to read/write registers of audio codec for Ali5451
- *  v0.14.9e
- *      January 2 2002 Vojtech Pavlik <vojtech@ucw.cz> added gameport
- *      support to avoid resource conflict with pcigame.c
- *  v0.14.9d
- *  	October 8 2001 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *	use set_current_state, properly release resources on failure in
- *	trident_probe, get rid of check_region
- *  v0.14.9c
- *	August 10 2001 Peter Wächtler <pwaechtler@loewe-komp.de>
- *	added support for Tvia (formerly Integraphics/IGST) CyberPro5050
- *	this chip is often found in settop boxes (combined video+audio)
- *  v0.14.9b
- *	Switch to static inline not extern inline (gcc 3)
- *  v0.14.9a
- *	Aug 6 2001 Alan Cox
- *	0.14.9 crashed on rmmod due to a timer/bh left running. Simplified
- *	the existing logic (the BH doesn't help as ac97 is lock_irqsave)
- *	and used del_timer_sync to clean up
- *	Fixed a problem where the ALi change broke my generic card
- *  v0.14.9
- *	Jul 10 2001 Matt Wu
- *	Add H/W Volume Control
- *  v0.14.8a
- *	July 7 2001 Alan Cox
- *	Moved Matt Wu's ac97 register cache into the card structure
- *  v0.14.8
- *	Apr 30 2001 Matt Wu
- *	Set EBUF1 and EBUF2 to still mode
- *	Add dc97/ac97 reset function
- *	Fix power management: ali_restore_regs
- *  unreleased
- *	Mar 09 2001 Matt Wu
- *	Add cache for ac97 access
- *  v0.14.7
- *	Feb 06 2001 Matt Wu
- *	Fix ac97 initialization
- *	Fix bug: an extra tail will be played when playing
- *	Jan 05 2001 Matt Wu
- *	Implement multi-channels and S/PDIF in support for ALi 1535+
- *  v0.14.6
- *	Nov 1 2000 Ching-Ling Lee
- *	Fix the bug of memory leak when switching 5.1-channels to 2 channels.
- *	Add lock protection into dynamic changing format of data.
- *	Oct 18 2000 Ching-Ling Lee
- *	5.1-channels support for ALi
- *	June 28 2000 Ching-Ling Lee
- *	S/PDIF out/in(playback/record) support for ALi 1535+, using /proc to be selected by user
- *	Simple Power Management support for ALi
- *  v0.14.5 May 23 2000 Ollie Lho
- *  	Misc bug fix from the Net
- *  v0.14.4 May 20 2000 Aaron Holtzman
- *  	Fix kfree'd memory access in release
- *  	Fix race in open while looking for a free virtual channel slot
- *  	remove open_wait wq (which appears to be unused)
- *  v0.14.3 May 10 2000 Ollie Lho
- *	fixed a small bug in trident_update_ptr, xmms 1.0.1 no longer uses 100% CPU
- *  v0.14.2 Mar 29 2000 Ching-Ling Lee
- *	Add clear to silence advance in trident_update_ptr
- *	fix invalid data of the end of the sound
- *  v0.14.1 Mar 24 2000 Ching-Ling Lee
- *	ALi 5451 support added, playback and recording O.K.
- *	ALi 5451 originally developed and structured based on sonicvibes, and
- *	suggested to merge into this file by Alan Cox.
- *  v0.14 Mar 15 2000 Ollie Lho
- *	5.1 channel output support with channel binding. What's the Matrix ?
- *  v0.13.1 Mar 10 2000 Ollie Lho
- *	few minor bugs on dual codec support, needs more testing
- *  v0.13 Mar 03 2000 Ollie Lho
- *	new pci_* for 2.4 kernel, back ported to 2.2
- *  v0.12 Feb 23 2000 Ollie Lho
- *	Preliminary Recording support
- *  v0.11.2 Feb 19 2000 Ollie Lho
- *	removed incomplete full-dulplex support
- *  v0.11.1 Jan 28 2000 Ollie Lho
- *	small bug in setting sample rate for 4d-nx (reported by Aaron)
- *  v0.11 Jan 27 2000 Ollie Lho
- *	DMA bug, scheduler latency, second try
- *  v0.10 Jan 24 2000 Ollie Lho
- *	DMA bug fixed, found kernel scheduling problem
- *  v0.09 Jan 20 2000 Ollie Lho
- *	Clean up of channel register access routine (prepare for channel binding)
- *  v0.08 Jan 14 2000 Ollie Lho
- *	Isolation of AC97 codec code
- *  v0.07 Jan 13 2000 Ollie Lho
- *	Get rid of ugly old low level access routines (e.g. CHRegs.lp****)
- *  v0.06 Jan 11 2000 Ollie Lho
- *	Preliminary support for dual (more ?) AC97 codecs
- *  v0.05 Jan 08 2000 Luca Montecchiani <m.luca@iname.com>
- *	adapt to 2.3.x new __setup/__init call
- *  v0.04 Dec 31 1999 Ollie Lho
- *	Multiple Open, using Middle Loop Interrupt to smooth playback
- *  v0.03 Dec 24 1999 Ollie Lho
- *	mem leak in prog_dmabuf and dealloc_dmabuf removed
- *  v0.02 Dec 15 1999 Ollie Lho
- *	SiS 7018 support added, playback O.K.
- *  v0.01 Alan Cox et. al.
- *	Initial Release in kernel 2.3.30, does not work
- *
- *  ToDo
- *	Clean up of low level channel register access code. (done)
- *	Fix the bug on dma buffer management in update_ptr, read/write, drain_dac (done)
- *	Dual AC97 codecs support (done)
- *	Recording support (done)
- *	Mmap support
- *	"Channel Binding" ioctl extension (done)
- *	new pci device driver interface for 2.4 kernel (done)
- *
- *	Lock order (high->low)
- *		lock	-	hardware lock
- *		open_mutex - 	guard opens
- *		sem	-	guard dmabuf, write re-entry etc
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/ioport.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/sound.h>
-#include <linux/slab.h>
-#include <linux/soundcard.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/spinlock.h>
-#include <linux/ac97_codec.h>
-#include <linux/bitops.h>
-#include <linux/proc_fs.h>
-#include <linux/interrupt.h>
-#include <linux/pm.h>
-#include <linux/gameport.h>
-#include <linux/kernel.h>
-#include <linux/mutex.h>
-#include <linux/mm.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#if defined(CONFIG_ALPHA_NAUTILUS) || defined(CONFIG_ALPHA_GENERIC)
-#include <asm/hwrpb.h>
-#endif
-
-#include "trident.h"
-
-#define DRIVER_VERSION "0.14.10j-2.6"
-
-#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE))
-#define SUPPORT_JOYSTICK 1
-#endif
-
-/* magic numbers to protect our data structures */
-#define TRIDENT_CARD_MAGIC	0x5072696E	/* "Prin" */
-#define TRIDENT_STATE_MAGIC	0x63657373	/* "cess" */
-
-#define TRIDENT_DMA_MASK	0x3fffffff	/* DMA buffer mask for pci_alloc_consist */
-#define ALI_DMA_MASK		0x7fffffff	/* ALI Tridents have 31-bit DMA. Wow. */
-
-#define NR_HW_CH		32
-
-/* maximum number of AC97 codecs connected, AC97 2.0 defined 4, but 7018 and 4D-NX only
-   have 2 SDATA_IN lines (currently) */
-#define NR_AC97		2
-
-/* minor number of /dev/swmodem (temporary, experimental) */
-#define SND_DEV_SWMODEM	7
-
-static const unsigned ali_multi_channels_5_1[] = {
-	/*ALI_SURR_LEFT_CHANNEL, ALI_SURR_RIGHT_CHANNEL, */
-	ALI_CENTER_CHANNEL,
-	ALI_LEF_CHANNEL,
-	ALI_SURR_LEFT_CHANNEL,
-	ALI_SURR_RIGHT_CHANNEL
-};
-
-static const unsigned sample_size[] = { 1, 2, 2, 4 };
-static const unsigned sample_shift[] = { 0, 1, 1, 2 };
-
-static const char invalid_magic[] = KERN_CRIT "trident: invalid magic value in %s\n";
-
-enum {
-	TRIDENT_4D_DX = 0,
-	TRIDENT_4D_NX,
-	SIS_7018,
-	ALI_5451,
-	CYBER5050
-};
-
-static char *card_names[] = {
-	"Trident 4DWave DX",
-	"Trident 4DWave NX",
-	"SiS 7018 PCI Audio",
-	"ALi Audio Accelerator",
-	"Tvia/IGST CyberPro 5050"
-};
-
-static struct pci_device_id trident_pci_tbl[] = {
-	{PCI_DEVICE(PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_TRIDENT_4DWAVE_DX),
-		PCI_CLASS_MULTIMEDIA_AUDIO << 8, 0xffff00, TRIDENT_4D_DX},
-	{PCI_DEVICE(PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_TRIDENT_4DWAVE_NX),
-		0, 0, TRIDENT_4D_NX},
-	{PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_7018), 0, 0, SIS_7018},
-	{PCI_DEVICE(PCI_VENDOR_ID_ALI, PCI_DEVICE_ID_ALI_5451), 0, 0, ALI_5451},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_5050),
-		0, 0, CYBER5050},
-	{0,}
-};
-
-MODULE_DEVICE_TABLE(pci, trident_pci_tbl);
-
-/* "software" or virtual channel, an instance of opened /dev/dsp */
-struct trident_state {
-	unsigned int magic;
-	struct trident_card *card;	/* Card info */
-
-	/* file mode */
-	mode_t open_mode;
-
-	/* virtual channel number */
-	int virt;
-
-	struct dmabuf {
-		/* wave sample stuff */
-		unsigned int rate;
-		unsigned char fmt, enable;
-
-		/* hardware channel */
-		struct trident_channel *channel;
-
-		/* OSS buffer management stuff */
-		void *rawbuf;
-		dma_addr_t dma_handle;
-		unsigned buforder;
-		unsigned numfrag;
-		unsigned fragshift;
-
-		/* our buffer acts like a circular ring */
-		unsigned hwptr;	/* where dma last started, updated by update_ptr */
-		unsigned swptr;	/* where driver last clear/filled, updated by read/write */
-		int count;	/* bytes to be comsumed or been generated by dma machine */
-		unsigned total_bytes;	/* total bytes dmaed by hardware */
-
-		unsigned error;	/* number of over/underruns */
-                /* put process on wait queue when no more space in buffer */
-		wait_queue_head_t wait;
-
-		/* redundant, but makes calculations easier */
-		unsigned fragsize;
-		unsigned dmasize;
-		unsigned fragsamples;
-
-		/* OSS stuff */
-		unsigned mapped:1;
-		unsigned ready:1;
-		unsigned endcleared:1;
-		unsigned update_flag;
-		unsigned ossfragshift;
-		int ossmaxfrags;
-		unsigned subdivision;
-
-	} dmabuf;
-
-	/* 5.1 channels */
-	struct trident_state *other_states[4];
-	int multi_channels_adjust_count;
-	unsigned chans_num;
-	unsigned long fmt_flag;
-	/* Guard against mmap/write/read races */
-	struct mutex sem;
-
-};
-
-/* hardware channels */
-struct trident_channel {
-	int num; /* channel number */
-	u32 lba; /* Loop Begine Address, where dma buffer starts */
-	u32 eso; /* End Sample Offset, wehre dma buffer ends */
-	         /* (in the unit of samples) */
-	u32 delta; /* delta value, sample rate / 48k for playback, */
-	           /* 48k/sample rate for recording */
-	u16 attribute; /* control where PCM data go and come  */
-	u16 fm_vol;
-	u32 control; /* signed/unsigned, 8/16 bits, mono/stereo */
-};
-
-struct trident_pcm_bank_address {
-	u32 start;
-	u32 stop;
-	u32 aint;
-	u32 aint_en;
-};
-
-static struct trident_pcm_bank_address bank_a_addrs = {
-	T4D_START_A,
-	T4D_STOP_A,
-	T4D_AINT_A,
-	T4D_AINTEN_A
-};
-
-static struct trident_pcm_bank_address bank_b_addrs = {
-	T4D_START_B,
-	T4D_STOP_B,
-	T4D_AINT_B,
-	T4D_AINTEN_B
-};
-
-struct trident_pcm_bank {
-	/* register addresses to control bank operations */
-	struct trident_pcm_bank_address *addresses;
-	/* each bank has 32 channels */
-	u32 bitmap;		/* channel allocation bitmap */
-	struct trident_channel channels[32];
-};
-
-struct trident_card {
-	unsigned int magic;
-
-	/* We keep trident cards in a linked list */
-	struct trident_card *next;
-
-	/* single open lock mechanism, only used for recording */
-	struct mutex open_mutex;
-
-	/* The trident has a certain amount of cross channel interaction
-	   so we use a single per card lock */
-	spinlock_t lock;
-
-	/* PCI device stuff */
-	struct pci_dev *pci_dev;
-	u16 pci_id;
-	u8 revision;
-
-	/* soundcore stuff */
-	int dev_audio;
-
-	/* structures for abstraction of hardware facilities, codecs, */
-	/* banks and channels */
-	struct ac97_codec *ac97_codec[NR_AC97];
-	struct trident_pcm_bank banks[NR_BANKS];
-	struct trident_state *states[NR_HW_CH];
-
-	/* hardware resources */
-	unsigned long iobase;
-	u32 irq;
-
-	/* Function support */
-	struct trident_channel *(*alloc_pcm_channel) (struct trident_card *);
-	struct trident_channel *(*alloc_rec_pcm_channel) (struct trident_card *);
-	void (*free_pcm_channel) (struct trident_card *, unsigned int chan);
-	void (*address_interrupt) (struct trident_card *);
-
-	/* Added by Matt Wu 01-05-2001 for spdif in */
-	int multi_channel_use_count;
-	int rec_channel_use_count;
-	u16 mixer_regs[64][NR_AC97];	/* Made card local by Alan */
-	int mixer_regs_ready;
-
-	/* Added for hardware volume control */
-	int hwvolctl;
-	struct timer_list timer;
-
-	/* Game port support */
-	struct gameport *gameport;
-};
-
-enum dmabuf_mode {
-	DM_PLAYBACK = 0,
-	DM_RECORD
-};
-
-/* table to map from CHANNELMASK to channel attribute for SiS 7018 */
-static u16 mask2attr[] = {
-	PCM_LR, PCM_LR, SURR_LR, CENTER_LFE,
-	HSET, MIC, MODEM_LINE1, MODEM_LINE2,
-	I2S_LR, SPDIF_LR
-};
-
-/* table to map from channel attribute to CHANNELMASK for SiS 7018 */
-static int attr2mask[] = {
-	DSP_BIND_MODEM1, DSP_BIND_MODEM2, DSP_BIND_FRONT, DSP_BIND_HANDSET,
-	DSP_BIND_I2S, DSP_BIND_CENTER_LFE, DSP_BIND_SURR, DSP_BIND_SPDIF
-};
-
-/* Added by Matt Wu 01-05-2001 for spdif in */
-static int ali_close_multi_channels(void);
-static void ali_delay(struct trident_card *card, int interval);
-static void ali_detect_spdif_rate(struct trident_card *card);
-
-static void ali_ac97_write(struct ac97_codec *codec, u8 reg, u16 val);
-static u16 ali_ac97_read(struct ac97_codec *codec, u8 reg);
-
-static struct trident_card *devs;
-
-static void trident_ac97_set(struct ac97_codec *codec, u8 reg, u16 val);
-static u16 trident_ac97_get(struct ac97_codec *codec, u8 reg);
-
-static int trident_open_mixdev(struct inode *inode, struct file *file);
-static int trident_ioctl_mixdev(struct inode *inode, struct file *file,
-				unsigned int cmd, unsigned long arg);
-
-static void ali_ac97_set(struct trident_card *card, int secondary, u8 reg, u16 val);
-static u16 ali_ac97_get(struct trident_card *card, int secondary, u8 reg);
-static void ali_set_spdif_out_rate(struct trident_card *card, unsigned int rate);
-static void ali_enable_special_channel(struct trident_state *stat);
-static struct trident_channel *ali_alloc_rec_pcm_channel(struct trident_card *card);
-static struct trident_channel *ali_alloc_pcm_channel(struct trident_card *card);
-static void ali_free_pcm_channel(struct trident_card *card, unsigned int channel);
-static int ali_setup_multi_channels(struct trident_card *card, int chan_nums);
-static unsigned int ali_get_spdif_in_rate(struct trident_card *card);
-static void ali_setup_spdif_in(struct trident_card *card);
-static void ali_disable_spdif_in(struct trident_card *card);
-static void ali_disable_special_channel(struct trident_card *card, int ch);
-static void ali_setup_spdif_out(struct trident_card *card, int flag);
-static int ali_write_5_1(struct trident_state *state,
-			 const char __user *buffer,
-			 int cnt_for_multi_channel, unsigned int *copy_count,
-			 unsigned int *state_cnt);
-static int ali_allocate_other_states_resources(struct trident_state *state,
-					       int chan_nums);
-static void ali_free_other_states_resources(struct trident_state *state);
-
-#define seek_offset(dma_ptr, buffer, cnt, offset, copy_count)	do { \
-        (dma_ptr) += (offset);	  \
-	(buffer) += (offset);	  \
-        (cnt) -= (offset);	  \
-	(copy_count) += (offset); \
-} while (0)
-
-static inline int lock_set_fmt(struct trident_state* state)
-{
-	if (test_and_set_bit(0, &state->fmt_flag))
-		return -EFAULT;
-
-	return 0;
-}
-
-static inline void unlock_set_fmt(struct trident_state* state)
-{
-	clear_bit(0, &state->fmt_flag);
-}
-
-static int
-trident_enable_loop_interrupts(struct trident_card *card)
-{
-	u32 global_control;
-
-	global_control = inl(TRID_REG(card, T4D_LFO_GC_CIR));
-
-	switch (card->pci_id) {
-	case PCI_DEVICE_ID_SI_7018:
-		global_control |= (ENDLP_IE | MIDLP_IE | BANK_B_EN);
-		break;
-	case PCI_DEVICE_ID_ALI_5451:
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX:
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_NX:
-	case PCI_DEVICE_ID_INTERG_5050:
-		global_control |= (ENDLP_IE | MIDLP_IE);
-		break;
-	default:
-		return 0;
-	}
-
-	outl(global_control, TRID_REG(card, T4D_LFO_GC_CIR));
-
-	pr_debug("trident: Enable Loop Interrupts, globctl = 0x%08X\n",
-		 inl(TRID_REG(card, T4D_LFO_GC_CIR)));
-
-	return 1;
-}
-
-static int
-trident_disable_loop_interrupts(struct trident_card *card)
-{
-	u32 global_control;
-
-	global_control = inl(TRID_REG(card, T4D_LFO_GC_CIR));
-	global_control &= ~(ENDLP_IE | MIDLP_IE);
-	outl(global_control, TRID_REG(card, T4D_LFO_GC_CIR));
-
-	pr_debug("trident: Disabled Loop Interrupts, globctl = 0x%08X\n",
-		 global_control);
-
-	return 1;
-}
-
-static void
-trident_enable_voice_irq(struct trident_card *card, unsigned int channel)
-{
-	unsigned int mask = 1 << (channel & 0x1f);
-	struct trident_pcm_bank *bank = &card->banks[channel >> 5];
-	u32 reg, addr = bank->addresses->aint_en;
-
-	reg = inl(TRID_REG(card, addr));
-	reg |= mask;
-	outl(reg, TRID_REG(card, addr));
-
-#ifdef DEBUG
-	reg = inl(TRID_REG(card, addr));
-	pr_debug("trident: enabled IRQ on channel %d, %s = 0x%08x(addr:%X)\n",
-		 channel, addr == T4D_AINTEN_B ? "AINTEN_B" : "AINTEN_A",
-		 reg, addr);
-#endif /* DEBUG */
-}
-
-static void
-trident_disable_voice_irq(struct trident_card *card, unsigned int channel)
-{
-	unsigned int mask = 1 << (channel & 0x1f);
-	struct trident_pcm_bank *bank = &card->banks[channel >> 5];
-	u32 reg, addr = bank->addresses->aint_en;
-
-	reg = inl(TRID_REG(card, addr));
-	reg &= ~mask;
-	outl(reg, TRID_REG(card, addr));
-
-	/* Ack the channel in case the interrupt was set before we disable it. */
-	outl(mask, TRID_REG(card, bank->addresses->aint));
-
-#ifdef DEBUG
-	reg = inl(TRID_REG(card, addr));
-	pr_debug("trident: disabled IRQ on channel %d, %s = 0x%08x(addr:%X)\n",
-		 channel, addr == T4D_AINTEN_B ? "AINTEN_B" : "AINTEN_A",
-		 reg, addr);
-#endif /* DEBUG */
-}
-
-static void
-trident_start_voice(struct trident_card *card, unsigned int channel)
-{
-	unsigned int mask = 1 << (channel & 0x1f);
-	struct trident_pcm_bank *bank = &card->banks[channel >> 5];
-	u32 addr = bank->addresses->start;
-
-#ifdef DEBUG
-	u32 reg;
-#endif /* DEBUG */
-
-	outl(mask, TRID_REG(card, addr));
-
-#ifdef DEBUG
-	reg = inl(TRID_REG(card, addr));
-	pr_debug("trident: start voice on channel %d, %s = 0x%08x(addr:%X)\n",
-		 channel, addr == T4D_START_B ? "START_B" : "START_A",
-		 reg, addr);
-#endif /* DEBUG */
-}
-
-static void
-trident_stop_voice(struct trident_card *card, unsigned int channel)
-{
-	unsigned int mask = 1 << (channel & 0x1f);
-	struct trident_pcm_bank *bank = &card->banks[channel >> 5];
-	u32 addr = bank->addresses->stop;
-
-#ifdef DEBUG
-	u32 reg;
-#endif /* DEBUG */
-
-	outl(mask, TRID_REG(card, addr));
-
-#ifdef DEBUG
-	reg = inl(TRID_REG(card, addr));
-	pr_debug("trident: stop voice on channel %d, %s = 0x%08x(addr:%X)\n",
-		 channel, addr == T4D_STOP_B ? "STOP_B" : "STOP_A",
-		 reg, addr);
-#endif /* DEBUG */
-}
-
-static u32
-trident_get_interrupt_mask(struct trident_card *card, unsigned int channel)
-{
-	struct trident_pcm_bank *bank = &card->banks[channel];
-	u32 addr = bank->addresses->aint;
-	return inl(TRID_REG(card, addr));
-}
-
-static int
-trident_check_channel_interrupt(struct trident_card *card, unsigned int channel)
-{
-	unsigned int mask = 1 << (channel & 0x1f);
-	u32 reg = trident_get_interrupt_mask(card, channel >> 5);
-
-#ifdef DEBUG
-	if (reg & mask)
-		pr_debug("trident: channel %d has interrupt, %s = 0x%08x\n",
-			 channel, reg == T4D_AINT_B ? "AINT_B" : "AINT_A",
-			 reg);
-#endif /* DEBUG */
-	return (reg & mask) ? 1 : 0;
-}
-
-static void
-trident_ack_channel_interrupt(struct trident_card *card, unsigned int channel)
-{
-	unsigned int mask = 1 << (channel & 0x1f);
-	struct trident_pcm_bank *bank = &card->banks[channel >> 5];
-	u32 reg, addr = bank->addresses->aint;
-
-	reg = inl(TRID_REG(card, addr));
-	reg &= mask;
-	outl(reg, TRID_REG(card, addr));
-
-#ifdef DEBUG
-	reg = inl(TRID_REG(card, T4D_AINT_B));
-	pr_debug("trident: Ack channel %d interrupt, AINT_B = 0x%08x\n",
-		 channel, reg);
-#endif /* DEBUG */
-}
-
-static struct trident_channel *
-trident_alloc_pcm_channel(struct trident_card *card)
-{
-	struct trident_pcm_bank *bank;
-	int idx;
-
-	bank = &card->banks[BANK_B];
-
-	for (idx = 31; idx >= 0; idx--) {
-		if (!(bank->bitmap & (1 << idx))) {
-			struct trident_channel *channel = &bank->channels[idx];
-			bank->bitmap |= 1 << idx;
-			channel->num = idx + 32;
-			return channel;
-		}
-	}
-
-	/* no more free channels available */
-	printk(KERN_ERR "trident: no more channels available on Bank B.\n");
-	return NULL;
-}
-
-static void
-trident_free_pcm_channel(struct trident_card *card, unsigned int channel)
-{
-	int bank;
-	unsigned char b;
-
-	if (channel < 31 || channel > 63)
-		return;
-
-	if (card->pci_id == PCI_DEVICE_ID_TRIDENT_4DWAVE_DX ||
-	    card->pci_id == PCI_DEVICE_ID_TRIDENT_4DWAVE_NX) {
-		b = inb(TRID_REG(card, T4D_REC_CH));
-		if ((b & ~0x80) == channel)
-			outb(0x0, TRID_REG(card, T4D_REC_CH));
-	}
-
-	bank = channel >> 5;
-	channel = channel & 0x1f;
-
-	card->banks[bank].bitmap &= ~(1 << (channel));
-}
-
-static struct trident_channel *
-cyber_alloc_pcm_channel(struct trident_card *card)
-{
-	struct trident_pcm_bank *bank;
-	int idx;
-
-	/* The cyberpro 5050 has only 32 voices and one bank */
-	/* .. at least they are not documented (if you want to call that
-	 * crap documentation), perhaps broken ? */
-
-	bank = &card->banks[BANK_A];
-
-	for (idx = 31; idx >= 0; idx--) {
-		if (!(bank->bitmap & (1 << idx))) {
-			struct trident_channel *channel = &bank->channels[idx];
-			bank->bitmap |= 1 << idx;
-			channel->num = idx;
-			return channel;
-		}
-	}
-
-	/* no more free channels available */
-	printk(KERN_ERR "cyberpro5050: no more channels available on Bank A.\n");
-	return NULL;
-}
-
-static void
-cyber_free_pcm_channel(struct trident_card *card, unsigned int channel)
-{
-	if (channel > 31)
-		return;
-	card->banks[BANK_A].bitmap &= ~(1 << (channel));
-}
-
-static inline void
-cyber_outidx(int port, int idx, int data)
-{
-	outb(idx, port);
-	outb(data, port + 1);
-}
-
-static inline int
-cyber_inidx(int port, int idx)
-{
-	outb(idx, port);
-	return inb(port + 1);
-}
-
-static int
-cyber_init_ritual(struct trident_card *card)
-{
-	/* some black magic, taken from SDK samples */
-	/* remove this and nothing will work */
-	int portDat;
-	int ret = 0;
-	unsigned long flags;
-
-	/*
-	 *      Keep interrupts off for the configure - we don't want to
-	 *      clash with another cyberpro config event
-	 */
-
-	spin_lock_irqsave(&card->lock, flags);
-	portDat = cyber_inidx(CYBER_PORT_AUDIO, CYBER_IDX_AUDIO_ENABLE);
-	/* enable, if it was disabled */
-	if ((portDat & CYBER_BMSK_AUENZ) != CYBER_BMSK_AUENZ_ENABLE) {
-		printk(KERN_INFO "cyberpro5050: enabling audio controller\n");
-		cyber_outidx(CYBER_PORT_AUDIO, CYBER_IDX_AUDIO_ENABLE,
-			     portDat | CYBER_BMSK_AUENZ_ENABLE);
-		/* check again if hardware is enabled now */
-		portDat = cyber_inidx(CYBER_PORT_AUDIO, CYBER_IDX_AUDIO_ENABLE);
-	}
-	if ((portDat & CYBER_BMSK_AUENZ) != CYBER_BMSK_AUENZ_ENABLE) {
-		printk(KERN_ERR "cyberpro5050: initAudioAccess: no success\n");
-		ret = -1;
-	} else {
-		cyber_outidx(CYBER_PORT_AUDIO, CYBER_IDX_IRQ_ENABLE,
-			     CYBER_BMSK_AUDIO_INT_ENABLE);
-		cyber_outidx(CYBER_PORT_AUDIO, 0xbf, 0x01);
-		cyber_outidx(CYBER_PORT_AUDIO, 0xba, 0x20);
-		cyber_outidx(CYBER_PORT_AUDIO, 0xbb, 0x08);
-		cyber_outidx(CYBER_PORT_AUDIO, 0xbf, 0x02);
-		cyber_outidx(CYBER_PORT_AUDIO, 0xb3, 0x06);
-		cyber_outidx(CYBER_PORT_AUDIO, 0xbf, 0x00);
-	}
-	spin_unlock_irqrestore(&card->lock, flags);
-	return ret;
-}
-
-/*  called with spin lock held */
-
-static int
-trident_load_channel_registers(struct trident_card *card, u32 * data,
-			       unsigned int channel)
-{
-	int i;
-
-	if (channel > 63)
-		return 0;
-
-	/* select hardware channel to write */
-	outb(channel, TRID_REG(card, T4D_LFO_GC_CIR));
-
-	/* Output the channel registers, but don't write register
-	   three to an ALI chip. */
-	for (i = 0; i < CHANNEL_REGS; i++) {
-		if (i == 3 && card->pci_id == PCI_DEVICE_ID_ALI_5451)
-			continue;
-		outl(data[i], TRID_REG(card, CHANNEL_START + 4 * i));
-	}
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451 ||
-	    card->pci_id == PCI_DEVICE_ID_INTERG_5050) {
-		outl(ALI_EMOD_Still, TRID_REG(card, ALI_EBUF1));
-		outl(ALI_EMOD_Still, TRID_REG(card, ALI_EBUF2));
-	}
-	return 1;
-}
-
-/* called with spin lock held */
-static int
-trident_write_voice_regs(struct trident_state *state)
-{
-	unsigned int data[CHANNEL_REGS + 1];
-	struct trident_channel *channel;
-
-	channel = state->dmabuf.channel;
-
-	data[1] = channel->lba;
-	data[4] = channel->control;
-
-	switch (state->card->pci_id) {
-	case PCI_DEVICE_ID_ALI_5451:
-		data[0] = 0;	/* Current Sample Offset */
-		data[2] = (channel->eso << 16) | (channel->delta & 0xffff);
-		data[3] = 0;
-		break;
-	case PCI_DEVICE_ID_SI_7018:
-	case PCI_DEVICE_ID_INTERG_5050:
-		data[0] = 0;	/* Current Sample Offset */
-		data[2] = (channel->eso << 16) | (channel->delta & 0xffff);
-		data[3] = (channel->attribute << 16) | (channel->fm_vol & 0xffff);
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX:
-		data[0] = 0;	/* Current Sample Offset */
-		data[2] = (channel->eso << 16) | (channel->delta & 0xffff);
-		data[3] = channel->fm_vol & 0xffff;
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_NX:
-		data[0] = (channel->delta << 24);
-		data[2] = ((channel->delta << 16) & 0xff000000) |
-			(channel->eso & 0x00ffffff);
-		data[3] = channel->fm_vol & 0xffff;
-		break;
-	default:
-		return 0;
-	}
-
-	return trident_load_channel_registers(state->card, data, channel->num);
-}
-
-static int
-compute_rate_play(u32 rate)
-{
-	int delta;
-	/* We special case 44100 and 8000 since rounding with the equation
-	   does not give us an accurate enough value. For 11025 and 22050
-	   the equation gives us the best answer. All other frequencies will
-	   also use the equation. JDW */
-	if (rate == 44100)
-		delta = 0xeb3;
-	else if (rate == 8000)
-		delta = 0x2ab;
-	else if (rate == 48000)
-		delta = 0x1000;
-	else
-		delta = (((rate << 12) + rate) / 48000) & 0x0000ffff;
-	return delta;
-}
-
-static int
-compute_rate_rec(u32 rate)
-{
-	int delta;
-
-	if (rate == 44100)
-		delta = 0x116a;
-	else if (rate == 8000)
-		delta = 0x6000;
-	else if (rate == 48000)
-		delta = 0x1000;
-	else
-		delta = ((48000 << 12) / rate) & 0x0000ffff;
-
-	return delta;
-}
-
-/* set playback sample rate */
-static unsigned int
-trident_set_dac_rate(struct trident_state *state, unsigned int rate)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-
-	if (rate > 48000)
-		rate = 48000;
-	if (rate < 4000)
-		rate = 4000;
-
-	dmabuf->rate = rate;
-	dmabuf->channel->delta = compute_rate_play(rate);
-
-	trident_write_voice_regs(state);
-
-	pr_debug("trident: called trident_set_dac_rate : rate = %d\n", rate);
-
-	return rate;
-}
-
-/* set recording sample rate */
-static unsigned int
-trident_set_adc_rate(struct trident_state *state, unsigned int rate)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-
-	if (rate > 48000)
-		rate = 48000;
-	if (rate < 4000)
-		rate = 4000;
-
-	dmabuf->rate = rate;
-	dmabuf->channel->delta = compute_rate_rec(rate);
-
-	trident_write_voice_regs(state);
-
-	pr_debug("trident: called trident_set_adc_rate : rate = %d\n", rate);
-
-	return rate;
-}
-
-/* prepare channel attributes for playback */
-static void
-trident_play_setup(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	struct trident_channel *channel = dmabuf->channel;
-
-	channel->lba = dmabuf->dma_handle;
-	channel->delta = compute_rate_play(dmabuf->rate);
-
-	channel->eso = dmabuf->dmasize >> sample_shift[dmabuf->fmt];
-	channel->eso -= 1;
-
-	if (state->card->pci_id != PCI_DEVICE_ID_SI_7018) {
-		channel->attribute = 0;
-		if (state->card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-			if ((channel->num == ALI_SPDIF_IN_CHANNEL) ||
-			    (channel->num == ALI_PCM_IN_CHANNEL))
-				ali_disable_special_channel(state->card, channel->num);
-			else if ((inl(TRID_REG(state->card, ALI_GLOBAL_CONTROL))
-				  & ALI_SPDIF_OUT_CH_ENABLE)
-				 && (channel->num == ALI_SPDIF_OUT_CHANNEL)) {
-				ali_set_spdif_out_rate(state->card,
-						       state->dmabuf.rate);
-				state->dmabuf.channel->delta = 0x1000;
-			}
-		}
-	}
-
-	channel->fm_vol = 0x0;
-
-	channel->control = CHANNEL_LOOP;
-	if (dmabuf->fmt & TRIDENT_FMT_16BIT) {
-		/* 16-bits */
-		channel->control |= CHANNEL_16BITS;
-		/* signed */
-		channel->control |= CHANNEL_SIGNED;
-	}
-	if (dmabuf->fmt & TRIDENT_FMT_STEREO)
-		/* stereo */
-		channel->control |= CHANNEL_STEREO;
-
-	pr_debug("trident: trident_play_setup, LBA = 0x%08x, Delta = 0x%08x, "
-		 "ESO = 0x%08x, Control = 0x%08x\n", channel->lba,
-		 channel->delta, channel->eso, channel->control);
-
-	trident_write_voice_regs(state);
-}
-
-/* prepare channel attributes for recording */
-static void
-trident_rec_setup(struct trident_state *state)
-{
-	u16 w;
-	u8 bval;
-
-	struct trident_card *card = state->card;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	struct trident_channel *channel = dmabuf->channel;
-	unsigned int rate;
-
-	/* Enable AC-97 ADC (capture) */
-	switch (card->pci_id) {
-	case PCI_DEVICE_ID_ALI_5451:
-		ali_enable_special_channel(state);
-		break;
-	case PCI_DEVICE_ID_SI_7018:
-		/* for 7018, the ac97 is always in playback/record (duplex) mode */
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX:
-		w = inb(TRID_REG(card, DX_ACR2_AC97_COM_STAT));
-		outb(w | 0x48, TRID_REG(card, DX_ACR2_AC97_COM_STAT));
-		/* enable and set record channel */
-		outb(0x80 | channel->num, TRID_REG(card, T4D_REC_CH));
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_NX:
-		w = inw(TRID_REG(card, T4D_MISCINT));
-		outw(w | 0x1000, TRID_REG(card, T4D_MISCINT));
-		/* enable and set record channel */
-		outb(0x80 | channel->num, TRID_REG(card, T4D_REC_CH));
-		break;
-	case PCI_DEVICE_ID_INTERG_5050:
-		/* don't know yet, using special channel 22 in GC1(0xd4)? */
-		break;
-	default:
-		return;
-	}
-
-	channel->lba = dmabuf->dma_handle;
-	channel->delta = compute_rate_rec(dmabuf->rate);
-	if ((card->pci_id == PCI_DEVICE_ID_ALI_5451) &&
-	    (channel->num == ALI_SPDIF_IN_CHANNEL)) {
-		rate = ali_get_spdif_in_rate(card);
-		if (rate == 0) {
-			printk(KERN_WARNING "trident: ALi 5451 "
-			       "S/PDIF input setup error!\n");
-			rate = 48000;
-		}
-		bval = inb(TRID_REG(card, ALI_SPDIF_CTRL));
-		if (bval & 0x10) {
-			outb(bval, TRID_REG(card, ALI_SPDIF_CTRL));
-			printk(KERN_WARNING "trident: cleared ALi "
-			       "5451 S/PDIF parity error flag.\n");
-		}
-
-		if (rate != 48000)
-			channel->delta = ((rate << 12) / dmabuf->rate) & 0x0000ffff;
-	}
-
-	channel->eso = dmabuf->dmasize >> sample_shift[dmabuf->fmt];
-	channel->eso -= 1;
-
-	if (state->card->pci_id != PCI_DEVICE_ID_SI_7018) {
-		channel->attribute = 0;
-	}
-
-	channel->fm_vol = 0x0;
-
-	channel->control = CHANNEL_LOOP;
-	if (dmabuf->fmt & TRIDENT_FMT_16BIT) {
-		/* 16-bits */
-		channel->control |= CHANNEL_16BITS;
-		/* signed */
-		channel->control |= CHANNEL_SIGNED;
-	}
-	if (dmabuf->fmt & TRIDENT_FMT_STEREO)
-		/* stereo */
-		channel->control |= CHANNEL_STEREO;
-
-	pr_debug("trident: trident_rec_setup, LBA = 0x%08x, Delat = 0x%08x, "
-		 "ESO = 0x%08x, Control = 0x%08x\n", channel->lba,
-		 channel->delta, channel->eso, channel->control);
-
-	trident_write_voice_regs(state);
-}
-
-/* get current playback/recording dma buffer pointer (byte offset from LBA),
-   called with spinlock held! */
-static inline unsigned
-trident_get_dma_addr(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	u32 cso;
-
-	if (!dmabuf->enable)
-		return 0;
-
-	outb(dmabuf->channel->num, TRID_REG(state->card, T4D_LFO_GC_CIR));
-
-	switch (state->card->pci_id) {
-	case PCI_DEVICE_ID_ALI_5451:
-	case PCI_DEVICE_ID_SI_7018:
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX:
-	case PCI_DEVICE_ID_INTERG_5050:
-		/* 16 bits ESO, CSO for 7018 and DX */
-		cso = inw(TRID_REG(state->card, CH_DX_CSO_ALPHA_FMS + 2));
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_NX:
-		/* 24 bits ESO, CSO for NX */
-		cso = inl(TRID_REG(state->card, CH_NX_DELTA_CSO)) & 0x00ffffff;
-		break;
-	default:
-		return 0;
-	}
-
-	pr_debug("trident: trident_get_dma_addr: chip reported channel: %d, "
-		 "cso = 0x%04x\n", dmabuf->channel->num, cso);
-
-	/* ESO and CSO are in units of Samples, convert to byte offset */
-	cso <<= sample_shift[dmabuf->fmt];
-
-	return (cso % dmabuf->dmasize);
-}
-
-/* Stop recording (lock held) */
-static inline void
-__stop_adc(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned int chan_num = dmabuf->channel->num;
-	struct trident_card *card = state->card;
-
-	dmabuf->enable &= ~ADC_RUNNING;
-	trident_stop_voice(card, chan_num);
-	trident_disable_voice_irq(card, chan_num);
-}
-
-static void
-stop_adc(struct trident_state *state)
-{
-	struct trident_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	__stop_adc(state);
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-static void
-start_adc(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned int chan_num = dmabuf->channel->num;
-	struct trident_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	if ((dmabuf->mapped ||
-	     dmabuf->count < (signed) dmabuf->dmasize) &&
-	    dmabuf->ready) {
-		dmabuf->enable |= ADC_RUNNING;
-		trident_enable_voice_irq(card, chan_num);
-		trident_start_voice(card, chan_num);
-	}
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-/* stop playback (lock held) */
-static inline void
-__stop_dac(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned int chan_num = dmabuf->channel->num;
-	struct trident_card *card = state->card;
-
-	dmabuf->enable &= ~DAC_RUNNING;
-	trident_stop_voice(card, chan_num);
-	if (state->chans_num == 6) {
-		trident_stop_voice(card, state->other_states[0]->
-				   dmabuf.channel->num);
-		trident_stop_voice(card, state->other_states[1]->
-				   dmabuf.channel->num);
-		trident_stop_voice(card, state->other_states[2]->
-				   dmabuf.channel->num);
-		trident_stop_voice(card, state->other_states[3]->
-				   dmabuf.channel->num);
-	}
-	trident_disable_voice_irq(card, chan_num);
-}
-
-static void
-stop_dac(struct trident_state *state)
-{
-	struct trident_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	__stop_dac(state);
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-static void
-start_dac(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned int chan_num = dmabuf->channel->num;
-	struct trident_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	if ((dmabuf->mapped || dmabuf->count > 0) && dmabuf->ready) {
-		dmabuf->enable |= DAC_RUNNING;
-		trident_enable_voice_irq(card, chan_num);
-		trident_start_voice(card, chan_num);
-		if (state->chans_num == 6) {
-			trident_start_voice(card, state->other_states[0]->
-					    dmabuf.channel->num);
-			trident_start_voice(card, state->other_states[1]->
-					    dmabuf.channel->num);
-			trident_start_voice(card, state->other_states[2]->
-					    dmabuf.channel->num);
-			trident_start_voice(card, state->other_states[3]->
-					    dmabuf.channel->num);
-		}
-	}
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-#define DMABUF_DEFAULTORDER (15-PAGE_SHIFT)
-#define DMABUF_MINORDER 1
-
-/* alloc a DMA buffer of with a buffer of this order */
-static int
-alloc_dmabuf(struct dmabuf *dmabuf, struct pci_dev *pci_dev, int order)
-{
-	void *rawbuf = NULL;
-	struct page *page, *pend;
-
-	if (!(rawbuf = pci_alloc_consistent(pci_dev, PAGE_SIZE << order,
-					    &dmabuf->dma_handle)))
-		return -ENOMEM;
-
-	pr_debug("trident: allocated %ld (order = %d) bytes at %p\n",
-		 PAGE_SIZE << order, order, rawbuf);
-
-	dmabuf->ready = dmabuf->mapped = 0;
-	dmabuf->rawbuf = rawbuf;
-	dmabuf->buforder = order;
-
-	/* now mark the pages as reserved; otherwise */
-	/* remap_pfn_range doesn't do what we want */
-	pend = virt_to_page(rawbuf + (PAGE_SIZE << order) - 1);
-	for (page = virt_to_page(rawbuf); page <= pend; page++)
-		SetPageReserved(page);
-
-	return 0;
-}
-
-/* allocate the main DMA buffer, playback and recording buffer should be */
-/* allocated separately */
-static int
-alloc_main_dmabuf(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	int order;
-	int ret = -ENOMEM;
-
-	/* alloc as big a chunk as we can, FIXME: is this necessary ?? */
-	for (order = DMABUF_DEFAULTORDER; order >= DMABUF_MINORDER; order--) {
-		if (!(ret = alloc_dmabuf(dmabuf, state->card->pci_dev, order)))
-			return 0;
-		/* else try again */
-	}
-	return ret;
-}
-
-/* deallocate a DMA buffer */
-static void
-dealloc_dmabuf(struct dmabuf *dmabuf, struct pci_dev *pci_dev)
-{
-	struct page *page, *pend;
-
-	if (dmabuf->rawbuf) {
-		/* undo marking the pages as reserved */
-		pend = virt_to_page(dmabuf->rawbuf + (PAGE_SIZE << dmabuf->buforder) - 1);
-		for (page = virt_to_page(dmabuf->rawbuf); page <= pend; page++)
-			ClearPageReserved(page);
-		pci_free_consistent(pci_dev, PAGE_SIZE << dmabuf->buforder,
-				    dmabuf->rawbuf, dmabuf->dma_handle);
-		dmabuf->rawbuf = NULL;
-	}
-	dmabuf->mapped = dmabuf->ready = 0;
-}
-
-static int
-prog_dmabuf(struct trident_state *state, enum dmabuf_mode rec)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned bytepersec;
-	struct trident_state *s = state;
-	unsigned bufsize, dma_nums;
-	unsigned long flags;
-	int ret, i, order;
-
-	if ((ret = lock_set_fmt(state)) < 0)
-		return ret;
-
-	if (state->chans_num == 6)
-		dma_nums = 5;
-	else
-		dma_nums = 1;
-
-	for (i = 0; i < dma_nums; i++) {
-		if (i > 0) {
-			s = state->other_states[i - 1];
-			dmabuf = &s->dmabuf;
-			dmabuf->fmt = state->dmabuf.fmt;
-			dmabuf->rate = state->dmabuf.rate;
-		}
-
-		spin_lock_irqsave(&s->card->lock, flags);
-		dmabuf->hwptr = dmabuf->swptr = dmabuf->total_bytes = 0;
-		dmabuf->count = dmabuf->error = 0;
-		spin_unlock_irqrestore(&s->card->lock, flags);
-
-		/* allocate DMA buffer if not allocated yet */
-		if (!dmabuf->rawbuf) {
-			if (i == 0) {
-				if ((ret = alloc_main_dmabuf(state))) {
-					unlock_set_fmt(state);
-					return ret;
-				}
-			} else {
-				ret = -ENOMEM;
-				order = state->dmabuf.buforder - 1;
-				if (order >= DMABUF_MINORDER) {
-					ret = alloc_dmabuf(dmabuf,
-							   state->card->pci_dev,
-							   order);
-				}
-				if (ret) {
-					/* release the main DMA buffer */
-					dealloc_dmabuf(&state->dmabuf, state->card->pci_dev);
-					/* release the auxiliary DMA buffers */
-					for (i -= 2; i >= 0; i--)
-						dealloc_dmabuf(&state->other_states[i]->dmabuf,
-							       state->card->pci_dev);
-					unlock_set_fmt(state);
-					return ret;
-				}
-			}
-		}
-		/* FIXME: figure out all this OSS fragment stuff */
-		bytepersec = dmabuf->rate << sample_shift[dmabuf->fmt];
-		bufsize = PAGE_SIZE << dmabuf->buforder;
-		if (dmabuf->ossfragshift) {
-			if ((1000 << dmabuf->ossfragshift) < bytepersec)
-				dmabuf->fragshift = ld2(bytepersec / 1000);
-			else
-				dmabuf->fragshift = dmabuf->ossfragshift;
-		} else {
-			/* lets hand out reasonable big ass buffers by default */
-			dmabuf->fragshift = (dmabuf->buforder + PAGE_SHIFT - 2);
-		}
-		dmabuf->numfrag = bufsize >> dmabuf->fragshift;
-		while (dmabuf->numfrag < 4 && dmabuf->fragshift > 3) {
-			dmabuf->fragshift--;
-			dmabuf->numfrag = bufsize >> dmabuf->fragshift;
-		}
-		dmabuf->fragsize = 1 << dmabuf->fragshift;
-		if (dmabuf->ossmaxfrags >= 4 && dmabuf->ossmaxfrags < dmabuf->numfrag)
-			dmabuf->numfrag = dmabuf->ossmaxfrags;
-		dmabuf->fragsamples = dmabuf->fragsize >> sample_shift[dmabuf->fmt];
-		dmabuf->dmasize = dmabuf->numfrag << dmabuf->fragshift;
-
-		memset(dmabuf->rawbuf, (dmabuf->fmt & TRIDENT_FMT_16BIT) ? 0 : 0x80,
-		       dmabuf->dmasize);
-
-		spin_lock_irqsave(&s->card->lock, flags);
-		if (rec == DM_RECORD)
-			trident_rec_setup(s);
-		else /* DM_PLAYBACK */
-			trident_play_setup(s);
-
-		spin_unlock_irqrestore(&s->card->lock, flags);
-
-		/* set the ready flag for the dma buffer */
-		dmabuf->ready = 1;
-
-		pr_debug("trident: prog_dmabuf(%d), sample rate = %d, "
-			 "format = %d, numfrag = %d, fragsize = %d "
-			 "dmasize = %d\n", dmabuf->channel->num,
-			 dmabuf->rate, dmabuf->fmt, dmabuf->numfrag,
-			 dmabuf->fragsize, dmabuf->dmasize);
-	}
-	unlock_set_fmt(state);
-	return 0;
-}
-
-
-static inline int prog_dmabuf_record(struct trident_state* state)
-{
-	return prog_dmabuf(state, DM_RECORD);
-}
-
-static inline int prog_dmabuf_playback(struct trident_state* state)
-{
-	return prog_dmabuf(state, DM_PLAYBACK);
-}
-
-/* we are doing quantum mechanics here, the buffer can only be empty, half or full filled i.e.
-   |------------|------------|   or   |xxxxxxxxxxxx|------------|   or   |xxxxxxxxxxxx|xxxxxxxxxxxx|
-   but we almost always get this
-   |xxxxxx------|------------|   or   |xxxxxxxxxxxx|xxxxx-------|
-   so we have to clear the tail space to "silence"
-   |xxxxxx000000|------------|   or   |xxxxxxxxxxxx|xxxxxx000000|
-*/
-static void
-trident_clear_tail(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned swptr;
-	unsigned char silence = (dmabuf->fmt & TRIDENT_FMT_16BIT) ? 0 : 0x80;
-	unsigned int len;
-	unsigned long flags;
-
-	spin_lock_irqsave(&state->card->lock, flags);
-	swptr = dmabuf->swptr;
-	spin_unlock_irqrestore(&state->card->lock, flags);
-
-	if (swptr == 0 || swptr == dmabuf->dmasize / 2 ||
-	    swptr == dmabuf->dmasize)
-		return;
-
-	if (swptr < dmabuf->dmasize / 2)
-		len = dmabuf->dmasize / 2 - swptr;
-	else
-		len = dmabuf->dmasize - swptr;
-
-	memset(dmabuf->rawbuf + swptr, silence, len);
-	if (state->card->pci_id != PCI_DEVICE_ID_ALI_5451) {
-		spin_lock_irqsave(&state->card->lock, flags);
-		dmabuf->swptr += len;
-		dmabuf->count += len;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-	}
-
-	/* restart the dma machine in case it is halted */
-	start_dac(state);
-}
-
-static int
-drain_dac(struct trident_state *state, int nonblock)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-	unsigned long tmo;
-	int count;
-	unsigned long diff = 0;
-
-	if (dmabuf->mapped || !dmabuf->ready)
-		return 0;
-
-	add_wait_queue(&dmabuf->wait, &wait);
-	for (;;) {
-		/* It seems that we have to set the current state to TASK_INTERRUPTIBLE
-		   every time to make the process really go to sleep */
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		spin_lock_irqsave(&state->card->lock, flags);
-		count = dmabuf->count;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-		if (count <= 0)
-			break;
-
-		if (signal_pending(current))
-			break;
-
-		if (nonblock) {
-			remove_wait_queue(&dmabuf->wait, &wait);
-			set_current_state(TASK_RUNNING);
-			return -EBUSY;
-		}
-
-		/* No matter how much data is left in the buffer, we have to wait until
-		   CSO == ESO/2 or CSO == ESO when address engine interrupts */
-		if (state->card->pci_id == PCI_DEVICE_ID_ALI_5451 ||
-		    state->card->pci_id == PCI_DEVICE_ID_INTERG_5050) {
-			diff = dmabuf->swptr - trident_get_dma_addr(state) + dmabuf->dmasize;
-			diff = diff % (dmabuf->dmasize);
-			tmo = (diff * HZ) / dmabuf->rate;
-		} else {
-			tmo = (dmabuf->dmasize * HZ) / dmabuf->rate;
-		}
-		tmo >>= sample_shift[dmabuf->fmt];
-		if (!schedule_timeout(tmo ? tmo : 1) && tmo) {
-			break;
-		}
-	}
-	remove_wait_queue(&dmabuf->wait, &wait);
-	set_current_state(TASK_RUNNING);
-	if (signal_pending(current))
-		return -ERESTARTSYS;
-
-	return 0;
-}
-
-/* update buffer manangement pointers, especially, */
-/* dmabuf->count and dmabuf->hwptr */
-static void
-trident_update_ptr(struct trident_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned hwptr, swptr;
-	int clear_cnt = 0;
-	int diff;
-	unsigned char silence;
-	unsigned half_dmasize;
-
-	/* update hardware pointer */
-	hwptr = trident_get_dma_addr(state);
-	diff = (dmabuf->dmasize + hwptr - dmabuf->hwptr) % dmabuf->dmasize;
-	dmabuf->hwptr = hwptr;
-	dmabuf->total_bytes += diff;
-
-	/* error handling and process wake up for ADC */
-	if (dmabuf->enable == ADC_RUNNING) {
-		if (dmabuf->mapped) {
-			dmabuf->count -= diff;
-			if (dmabuf->count >= (signed) dmabuf->fragsize)
-				wake_up(&dmabuf->wait);
-		} else {
-			dmabuf->count += diff;
-
-			if (dmabuf->count < 0 ||
-			    dmabuf->count > dmabuf->dmasize) {
-				/* buffer underrun or buffer overrun, */
-				/* we have no way to recover it here, just */
-				/* stop the machine and let the process */
-				/* force hwptr and swptr to sync */
-				__stop_adc(state);
-				dmabuf->error++;
-			}
-			if (dmabuf->count < (signed) dmabuf->dmasize / 2)
-				wake_up(&dmabuf->wait);
-		}
-	}
-
-	/* error handling and process wake up for DAC */
-	if (dmabuf->enable == DAC_RUNNING) {
-		if (dmabuf->mapped) {
-			dmabuf->count += diff;
-			if (dmabuf->count >= (signed) dmabuf->fragsize)
-				wake_up(&dmabuf->wait);
-		} else {
-			dmabuf->count -= diff;
-
-			if (dmabuf->count < 0 ||
-			    dmabuf->count > dmabuf->dmasize) {
-				/* buffer underrun or buffer overrun, we have no way to recover
-				   it here, just stop the machine and let the process force hwptr
-				   and swptr to sync */
-				__stop_dac(state);
-				dmabuf->error++;
-			} else if (!dmabuf->endcleared) {
-				swptr = dmabuf->swptr;
-				silence = (dmabuf->fmt & TRIDENT_FMT_16BIT ? 0 : 0x80);
-				if (dmabuf->update_flag & ALI_ADDRESS_INT_UPDATE) {
-					/* We must clear end data of 1/2 dmabuf if needed.
-					   According to 1/2 algorithm of Address Engine Interrupt,
-					   check the validation of the data of half dmasize. */
-					half_dmasize = dmabuf->dmasize / 2;
-					if ((diff = hwptr - half_dmasize) < 0)
-						diff = hwptr;
-					if ((dmabuf->count + diff) < half_dmasize) {
-						//there is invalid data in the end of half buffer
-						if ((clear_cnt = half_dmasize - swptr) < 0)
-							clear_cnt += half_dmasize;
-						//clear the invalid data
-						memset(dmabuf->rawbuf + swptr, silence, clear_cnt);
-						if (state->chans_num == 6) {
-							clear_cnt = clear_cnt / 2;
-							swptr = swptr / 2;
-							memset(state->other_states[0]->dmabuf.rawbuf + swptr,
-							       silence, clear_cnt);
-							memset(state->other_states[1]->dmabuf.rawbuf + swptr,
-							       silence, clear_cnt);
-							memset(state->other_states[2]->dmabuf.rawbuf + swptr,
-							       silence, clear_cnt);
-							memset(state->other_states[3]->dmabuf.rawbuf + swptr,
-							       silence, clear_cnt);
-						}
-						dmabuf->endcleared = 1;
-					}
-				} else if (dmabuf->count < (signed) dmabuf->fragsize) {
-					clear_cnt = dmabuf->fragsize;
-					if ((swptr + clear_cnt) > dmabuf->dmasize)
-						clear_cnt = dmabuf->dmasize - swptr;
-					memset(dmabuf->rawbuf + swptr, silence, clear_cnt);
-					if (state->chans_num == 6) {
-						clear_cnt = clear_cnt / 2;
-						swptr = swptr / 2;
-						memset(state->other_states[0]->dmabuf.rawbuf + swptr,
-						       silence, clear_cnt);
-						memset(state->other_states[1]->dmabuf.rawbuf + swptr,
-						       silence, clear_cnt);
-						memset(state->other_states[2]->dmabuf.rawbuf + swptr,
-						       silence, clear_cnt);
-						memset(state->other_states[3]->dmabuf.rawbuf + swptr,
-						       silence, clear_cnt);
-					}
-					dmabuf->endcleared = 1;
-				}
-			}
-			/* trident_update_ptr is called by interrupt handler or by process via
-			   ioctl/poll, we only wake up the waiting process when we have more
-			   than 1/2 buffer free (always true for interrupt handler) */
-			if (dmabuf->count < (signed) dmabuf->dmasize / 2)
-				wake_up(&dmabuf->wait);
-		}
-	}
-	dmabuf->update_flag &= ~ALI_ADDRESS_INT_UPDATE;
-}
-
-static void
-trident_address_interrupt(struct trident_card *card)
-{
-	int i;
-	struct trident_state *state;
-	unsigned int channel;
-
-	/* Update the pointers for all channels we are running. */
-	/* FIXME: should read interrupt status only once */
-	for (i = 0; i < NR_HW_CH; i++) {
-		channel = 63 - i;
-		if (trident_check_channel_interrupt(card, channel)) {
-			trident_ack_channel_interrupt(card, channel);
-			if ((state = card->states[i]) != NULL) {
-				trident_update_ptr(state);
-			} else {
-				printk(KERN_WARNING "trident: spurious channel "
-				       "irq %d.\n", channel);
-				trident_stop_voice(card, channel);
-				trident_disable_voice_irq(card, channel);
-			}
-		}
-	}
-}
-
-static void
-ali_hwvol_control(struct trident_card *card, int opt)
-{
-	u16 dwTemp, volume[2], mute, diff, *pVol[2];
-
-	dwTemp = ali_ac97_read(card->ac97_codec[0], 0x02);
-	mute = dwTemp & 0x8000;
-	volume[0] = dwTemp & 0x001f;
-	volume[1] = (dwTemp & 0x1f00) >> 8;
-	if (volume[0] < volume[1]) {
-		pVol[0] = &volume[0];
-		pVol[1] = &volume[1];
-	} else {
-		pVol[1] = &volume[0];
-		pVol[0] = &volume[1];
-	}
-	diff = *(pVol[1]) - *(pVol[0]);
-
-	if (opt == 1) {		// MUTE
-		dwTemp ^= 0x8000;
-		ali_ac97_write(card->ac97_codec[0],
-			       0x02, dwTemp);
-	} else if (opt == 2) {	// Down
-		if (mute)
-			return;
-		if (*(pVol[1]) < 0x001f) {
-			(*pVol[1])++;
-			*(pVol[0]) = *(pVol[1]) - diff;
-		}
-		dwTemp &= 0xe0e0;
-		dwTemp |= (volume[0]) | (volume[1] << 8);
-		ali_ac97_write(card->ac97_codec[0], 0x02, dwTemp);
-		card->ac97_codec[0]->mixer_state[0] = ((32 - volume[0]) * 25 / 8) |
-			(((32 - volume[1]) * 25 / 8) << 8);
-	} else if (opt == 4) {	// Up
-		if (mute)
-			return;
-		if (*(pVol[0]) > 0) {
-			(*pVol[0])--;
-			*(pVol[1]) = *(pVol[0]) + diff;
-		}
-		dwTemp &= 0xe0e0;
-		dwTemp |= (volume[0]) | (volume[1] << 8);
-		ali_ac97_write(card->ac97_codec[0], 0x02, dwTemp);
-		card->ac97_codec[0]->mixer_state[0] = ((32 - volume[0]) * 25 / 8) |
-			(((32 - volume[1]) * 25 / 8) << 8);
-	} else {
-		/* Nothing needs doing */
-	}
-}
-
-/*
- *	Re-enable reporting of vol change after 0.1 seconds
- */
-
-static void
-ali_timeout(unsigned long ptr)
-{
-	struct trident_card *card = (struct trident_card *) ptr;
-	u16 temp = 0;
-
-	/* Enable GPIO IRQ (MISCINT bit 18h) */
-	temp = inw(TRID_REG(card, T4D_MISCINT + 2));
-	temp |= 0x0004;
-	outw(temp, TRID_REG(card, T4D_MISCINT + 2));
-}
-
-/*
- *	Set up the timer to clear the vol change notification
- */
-
-static void
-ali_set_timer(struct trident_card *card)
-{
-	/* Add Timer Routine to Enable GPIO IRQ */
-	del_timer(&card->timer);	/* Never queue twice */
-	card->timer.function = ali_timeout;
-	card->timer.data = (unsigned long) card;
-	card->timer.expires = jiffies + HZ / 10;
-	add_timer(&card->timer);
-}
-
-/*
- *	Process a GPIO event
- */
-
-static void
-ali_queue_task(struct trident_card *card, int opt)
-{
-	u16 temp;
-
-	/* Disable GPIO IRQ (MISCINT bit 18h) */
-	temp = inw(TRID_REG(card, T4D_MISCINT + 2));
-	temp &= (u16) (~0x0004);
-	outw(temp, TRID_REG(card, T4D_MISCINT + 2));
-
-	/* Adjust the volume */
-	ali_hwvol_control(card, opt);
-
-	/* Set the timer for 1/10th sec */
-	ali_set_timer(card);
-}
-
-static void
-cyber_address_interrupt(struct trident_card *card)
-{
-	int i, irq_status;
-	struct trident_state *state;
-	unsigned int channel;
-
-	/* Update the pointers for all channels we are running. */
-	/* FIXED: read interrupt status only once */
-	irq_status = inl(TRID_REG(card, T4D_AINT_A));
-
-	pr_debug("cyber_address_interrupt: irq_status 0x%X\n", irq_status);
-
-	for (i = 0; i < NR_HW_CH; i++) {
-		channel = 31 - i;
-		if (irq_status & (1 << channel)) {
-			/* clear bit by writing a 1, zeroes are ignored */
-			outl((1 << channel), TRID_REG(card, T4D_AINT_A));
-
-			pr_debug("cyber_interrupt: channel %d\n", channel);
-
-			if ((state = card->states[i]) != NULL) {
-				trident_update_ptr(state);
-			} else {
-				printk(KERN_WARNING "cyber5050: spurious "
-				       "channel irq %d.\n", channel);
-				trident_stop_voice(card, channel);
-				trident_disable_voice_irq(card, channel);
-			}
-		}
-	}
-}
-
-static irqreturn_t
-trident_interrupt(int irq, void *dev_id)
-{
-	struct trident_card *card = (struct trident_card *) dev_id;
-	u32 event;
-	u32 gpio;
-
-	spin_lock(&card->lock);
-	event = inl(TRID_REG(card, T4D_MISCINT));
-
-	pr_debug("trident: trident_interrupt called, MISCINT = 0x%08x\n",
-		 event);
-
-	if (event & ADDRESS_IRQ) {
-		card->address_interrupt(card);
-	}
-
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-		/* GPIO IRQ (H/W Volume Control) */
-		event = inl(TRID_REG(card, T4D_MISCINT));
-		if (event & (1 << 25)) {
-			gpio = inl(TRID_REG(card, ALI_GPIO));
-			if (!timer_pending(&card->timer))
-				ali_queue_task(card, gpio & 0x07);
-		}
-		event = inl(TRID_REG(card, T4D_MISCINT));
-		outl(event | (ST_TARGET_REACHED | MIXER_OVERFLOW | MIXER_UNDERFLOW),
-		     TRID_REG(card, T4D_MISCINT));
-		spin_unlock(&card->lock);
-		return IRQ_HANDLED;
-	}
-
-	/* manually clear interrupt status, bad hardware design, blame T^2 */
-	outl((ST_TARGET_REACHED | MIXER_OVERFLOW | MIXER_UNDERFLOW),
-	     TRID_REG(card, T4D_MISCINT));
-	spin_unlock(&card->lock);
-	return IRQ_HANDLED;
-}
-
-/* in this loop, dmabuf.count signifies the amount of data that is waiting */
-/* to be copied to the user's buffer.  it is filled by the dma machine and */
-/* drained by this loop. */
-static ssize_t
-trident_read(struct file *file, char __user *buffer, size_t count, loff_t * ppos)
-{
-	struct trident_state *state = (struct trident_state *)file->private_data;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	ssize_t ret = 0;
-	unsigned long flags;
-	unsigned swptr;
-	int cnt;
-
-	pr_debug("trident: trident_read called, count = %zd\n", count);
-
-	VALIDATE_STATE(state);
-
-	if (dmabuf->mapped)
-		return -ENXIO;
-	if (!access_ok(VERIFY_WRITE, buffer, count))
-		return -EFAULT;
-
-	mutex_lock(&state->sem);
-	if (!dmabuf->ready && (ret = prog_dmabuf_record(state)))
-		goto out;
-
-	while (count > 0) {
-		spin_lock_irqsave(&state->card->lock, flags);
-		if (dmabuf->count > (signed) dmabuf->dmasize) {
-			/* buffer overrun, we are recovering from */
-			/* sleep_on_timeout, resync hwptr and swptr, */
-			/* make process flush the buffer */
-			dmabuf->count = dmabuf->dmasize;
-			dmabuf->swptr = dmabuf->hwptr;
-		}
-		swptr = dmabuf->swptr;
-		cnt = dmabuf->dmasize - swptr;
-		if (dmabuf->count < cnt)
-			cnt = dmabuf->count;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-		if (cnt > count)
-			cnt = count;
-		if (cnt <= 0) {
-			unsigned long tmo;
-			/* buffer is empty, start the dma machine and */
-			/* wait for data to be recorded */
-			start_adc(state);
-			if (file->f_flags & O_NONBLOCK) {
-				if (!ret)
-					ret = -EAGAIN;
-				goto out;
-			}
-
-			mutex_unlock(&state->sem);
-			/* No matter how much space left in the buffer, */
-			/* we have to wait until CSO == ESO/2 or CSO == ESO */
-			/* when address engine interrupts */
-			tmo = (dmabuf->dmasize * HZ) / (dmabuf->rate * 2);
-			tmo >>= sample_shift[dmabuf->fmt];
-			/* There are two situations when sleep_on_timeout returns, one is when
-			   the interrupt is serviced correctly and the process is waked up by
-			   ISR ON TIME. Another is when timeout is expired, which means that
-			   either interrupt is NOT serviced correctly (pending interrupt) or it
-			   is TOO LATE for the process to be scheduled to run (scheduler latency)
-			   which results in a (potential) buffer overrun. And worse, there is
-			   NOTHING we can do to prevent it. */
-			if (!interruptible_sleep_on_timeout(&dmabuf->wait, tmo)) {
-				pr_debug(KERN_ERR "trident: recording schedule timeout, "
-					 "dmasz %u fragsz %u count %i hwptr %u swptr %u\n",
-					 dmabuf->dmasize, dmabuf->fragsize, dmabuf->count,
-					 dmabuf->hwptr, dmabuf->swptr);
-
-				/* a buffer overrun, we delay the recovery until next time the
-				   while loop begin and we REALLY have space to record */
-			}
-			if (signal_pending(current)) {
-				if (!ret)
-					ret = -ERESTARTSYS;
-				goto out;
-			}
-			mutex_lock(&state->sem);
-			if (dmabuf->mapped) {
-				if (!ret)
-					ret = -ENXIO;
-				goto out;
-			}
-			continue;
-		}
-
-		if (copy_to_user(buffer, dmabuf->rawbuf + swptr, cnt)) {
-			if (!ret)
-				ret = -EFAULT;
-			goto out;
-		}
-
-		swptr = (swptr + cnt) % dmabuf->dmasize;
-
-		spin_lock_irqsave(&state->card->lock, flags);
-		dmabuf->swptr = swptr;
-		dmabuf->count -= cnt;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-		count -= cnt;
-		buffer += cnt;
-		ret += cnt;
-		start_adc(state);
-	}
-out:
-	mutex_unlock(&state->sem);
-	return ret;
-}
-
-/* in this loop, dmabuf.count signifies the amount of data that is waiting to be dma to
-   the soundcard.  it is drained by the dma machine and filled by this loop. */
-
-static ssize_t
-trident_write(struct file *file, const char __user *buffer, size_t count, loff_t * ppos)
-{
-	struct trident_state *state = (struct trident_state *)file->private_data;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	ssize_t ret;
-	unsigned long flags;
-	unsigned swptr;
-	int cnt;
-	unsigned int state_cnt;
-	unsigned int copy_count;
-	int lret; /* for lock_set_fmt */
-
-	pr_debug("trident: trident_write called, count = %zd\n", count);
-
-	VALIDATE_STATE(state);
-
-	/*
-	 *      Guard against an mmap or ioctl while writing
-	 */
-
-	mutex_lock(&state->sem);
-
-	if (dmabuf->mapped) {
-		ret = -ENXIO;
-		goto out;
-	}
-	if (!dmabuf->ready && (ret = prog_dmabuf_playback(state)))
-		goto out;
-
-	if (!access_ok(VERIFY_READ, buffer, count)) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	ret = 0;
-
-	while (count > 0) {
-		spin_lock_irqsave(&state->card->lock, flags);
-		if (dmabuf->count < 0) {
-			/* buffer underrun, we are recovering from */
-			/* sleep_on_timeout, resync hwptr and swptr */
-			dmabuf->count = 0;
-			dmabuf->swptr = dmabuf->hwptr;
-		}
-		swptr = dmabuf->swptr;
-		cnt = dmabuf->dmasize - swptr;
-		if (dmabuf->count + cnt > dmabuf->dmasize)
-			cnt = dmabuf->dmasize - dmabuf->count;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-		if (cnt > count)
-			cnt = count;
-		if (cnt <= 0) {
-			unsigned long tmo;
-			/* buffer is full, start the dma machine and */
-			/* wait for data to be played */
-			start_dac(state);
-			if (file->f_flags & O_NONBLOCK) {
-				if (!ret)
-					ret = -EAGAIN;
-				goto out;
-			}
-			/* No matter how much data left in the buffer, */
-			/* we have to wait until CSO == ESO/2 or CSO == ESO */
-			/* when address engine interrupts */
-			lock_set_fmt(state);
-			tmo = (dmabuf->dmasize * HZ) / (dmabuf->rate * 2);
-			tmo >>= sample_shift[dmabuf->fmt];
-			unlock_set_fmt(state);
-			mutex_unlock(&state->sem);
-
-			/* There are two situations when sleep_on_timeout */
-			/* returns, one is when the interrupt is serviced */
-			/* correctly and the process is waked up by ISR */
-			/* ON TIME. Another is when timeout is expired, which */
-			/* means that either interrupt is NOT serviced */
-			/* correctly (pending interrupt) or it is TOO LATE */
-			/* for the process to be scheduled to run */
-			/* (scheduler latency) which results in a (potential) */
-			/* buffer underrun. And worse, there is NOTHING we */
-			/* can do to prevent it. */
-			if (!interruptible_sleep_on_timeout(&dmabuf->wait, tmo)) {
-				pr_debug(KERN_ERR "trident: playback schedule "
-					 "timeout, dmasz %u fragsz %u count %i "
-					 "hwptr %u swptr %u\n", dmabuf->dmasize,
-					 dmabuf->fragsize, dmabuf->count,
-					 dmabuf->hwptr, dmabuf->swptr);
-
-				/* a buffer underrun, we delay the recovery */
-				/* until next time the while loop begin and */
-				/* we REALLY have data to play */
-			}
-			if (signal_pending(current)) {
-				if (!ret)
-					ret = -ERESTARTSYS;
-				goto out_nolock;
-			}
-			mutex_lock(&state->sem);
-			if (dmabuf->mapped) {
-				if (!ret)
-					ret = -ENXIO;
-				goto out;
-			}
-			continue;
-		}
-		if ((lret = lock_set_fmt(state)) < 0) {
-			ret = lret;
-			goto out;
-		}
-
-		if (state->chans_num == 6) {
-			copy_count = 0;
-			state_cnt = 0;
-			if (ali_write_5_1(state, buffer, cnt, &copy_count,
-					  &state_cnt) == -EFAULT) {
-				if (state_cnt) {
-					swptr = (swptr + state_cnt) % dmabuf->dmasize;
-					spin_lock_irqsave(&state->card->lock, flags);
-					dmabuf->swptr = swptr;
-					dmabuf->count += state_cnt;
-					dmabuf->endcleared = 0;
-					spin_unlock_irqrestore(&state->card->lock, flags);
-				}
-				ret += copy_count;
-				if (!ret)
-					ret = -EFAULT;
-				unlock_set_fmt(state);
-				goto out;
-			}
-		} else {
-			if (copy_from_user(dmabuf->rawbuf + swptr,
-					   buffer, cnt)) {
-				if (!ret)
-					ret = -EFAULT;
-				unlock_set_fmt(state);
-				goto out;
-			}
-			state_cnt = cnt;
-		}
-		unlock_set_fmt(state);
-
-		swptr = (swptr + state_cnt) % dmabuf->dmasize;
-
-		spin_lock_irqsave(&state->card->lock, flags);
-		dmabuf->swptr = swptr;
-		dmabuf->count += state_cnt;
-		dmabuf->endcleared = 0;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-		count -= cnt;
-		buffer += cnt;
-		ret += cnt;
-		start_dac(state);
-	}
-out:
-	mutex_unlock(&state->sem);
-out_nolock:
-	return ret;
-}
-
-/* No kernel lock - we have our own spinlock */
-static unsigned int
-trident_poll(struct file *file, struct poll_table_struct *wait)
-{
-	struct trident_state *state = (struct trident_state *)file->private_data;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-	unsigned int mask = 0;
-
-	VALIDATE_STATE(state);
-
-	/*
-	 *      Guard against a parallel poll and write causing multiple
-	 *      prog_dmabuf events
-	 */
-
-	mutex_lock(&state->sem);
-
-	if (file->f_mode & FMODE_WRITE) {
-		if (!dmabuf->ready && prog_dmabuf_playback(state)) {
-			mutex_unlock(&state->sem);
-			return 0;
-		}
-		poll_wait(file, &dmabuf->wait, wait);
-	}
-	if (file->f_mode & FMODE_READ) {
-		if (!dmabuf->ready && prog_dmabuf_record(state)) {
-			mutex_unlock(&state->sem);
-			return 0;
-		}
-		poll_wait(file, &dmabuf->wait, wait);
-	}
-
-	mutex_unlock(&state->sem);
-
-	spin_lock_irqsave(&state->card->lock, flags);
-	trident_update_ptr(state);
-	if (file->f_mode & FMODE_READ) {
-		if (dmabuf->count >= (signed) dmabuf->fragsize)
-			mask |= POLLIN | POLLRDNORM;
-	}
-	if (file->f_mode & FMODE_WRITE) {
-		if (dmabuf->mapped) {
-			if (dmabuf->count >= (signed) dmabuf->fragsize)
-				mask |= POLLOUT | POLLWRNORM;
-		} else {
-			if ((signed) dmabuf->dmasize >= dmabuf->count +
-			    (signed) dmabuf->fragsize)
-				mask |= POLLOUT | POLLWRNORM;
-		}
-	}
-	spin_unlock_irqrestore(&state->card->lock, flags);
-
-	return mask;
-}
-
-static int
-trident_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct trident_state *state = (struct trident_state *)file->private_data;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	int ret = -EINVAL;
-	unsigned long size;
-
-	VALIDATE_STATE(state);
-
-	/*
-	 *      Lock against poll read write or mmap creating buffers. Also lock
-	 *      a read or write against an mmap.
-	 */
-
-	mutex_lock(&state->sem);
-
-	if (vma->vm_flags & VM_WRITE) {
-		if ((ret = prog_dmabuf_playback(state)) != 0)
-			goto out;
-	} else if (vma->vm_flags & VM_READ) {
-		if ((ret = prog_dmabuf_record(state)) != 0)
-			goto out;
-	} else
-		goto out;
-
-	ret = -EINVAL;
-	if (vma->vm_pgoff != 0)
-		goto out;
-	size = vma->vm_end - vma->vm_start;
-	if (size > (PAGE_SIZE << dmabuf->buforder))
-		goto out;
-	ret = -EAGAIN;
-	if (remap_pfn_range(vma, vma->vm_start,
-			     virt_to_phys(dmabuf->rawbuf) >> PAGE_SHIFT,
-			     size, vma->vm_page_prot))
-		goto out;
-	dmabuf->mapped = 1;
-	ret = 0;
-out:
-	mutex_unlock(&state->sem);
-	return ret;
-}
-
-static int
-trident_ioctl(struct inode *inode, struct file *file,
-	      unsigned int cmd, unsigned long arg)
-{
-	struct trident_state *state = (struct trident_state *)file->private_data;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-	audio_buf_info abinfo;
-	count_info cinfo;
-	int val, mapped, ret = 0;
-	struct trident_card *card = state->card;
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-
-	VALIDATE_STATE(state);
-
-
-	mapped = ((file->f_mode & (FMODE_WRITE | FMODE_READ)) && dmabuf->mapped);
-
-	pr_debug("trident: trident_ioctl, command = %2d, arg = 0x%08x\n",
-		 _IOC_NR(cmd), arg ? *p : 0);
-
-	switch (cmd) {
-	case OSS_GETVERSION:
-		ret = put_user(SOUND_VERSION, p);
-		break;
-
-	case SNDCTL_DSP_RESET:
-		/* FIXME: spin_lock ? */
-		if (file->f_mode & FMODE_WRITE) {
-			stop_dac(state);
-			synchronize_irq(card->irq);
-			dmabuf->ready = 0;
-			dmabuf->swptr = dmabuf->hwptr = 0;
-			dmabuf->count = dmabuf->total_bytes = 0;
-		}
-		if (file->f_mode & FMODE_READ) {
-			stop_adc(state);
-			synchronize_irq(card->irq);
-			dmabuf->ready = 0;
-			dmabuf->swptr = dmabuf->hwptr = 0;
-			dmabuf->count = dmabuf->total_bytes = 0;
-		}
-		break;
-
-	case SNDCTL_DSP_SYNC:
-		if (file->f_mode & FMODE_WRITE)
-			ret = drain_dac(state, file->f_flags & O_NONBLOCK);
-		break;
-
-	case SNDCTL_DSP_SPEED:	/* set smaple rate */
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (val >= 0) {
-			if (file->f_mode & FMODE_WRITE) {
-				stop_dac(state);
-				dmabuf->ready = 0;
-				spin_lock_irqsave(&state->card->lock, flags);
-				trident_set_dac_rate(state, val);
-				spin_unlock_irqrestore(&state->card->lock, flags);
-			}
-			if (file->f_mode & FMODE_READ) {
-				stop_adc(state);
-				dmabuf->ready = 0;
-				spin_lock_irqsave(&state->card->lock, flags);
-				trident_set_adc_rate(state, val);
-				spin_unlock_irqrestore(&state->card->lock, flags);
-			}
-		}
-		ret = put_user(dmabuf->rate, p);
-		break;
-
-	case SNDCTL_DSP_STEREO:	/* set stereo or mono channel */
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if ((ret = lock_set_fmt(state)) < 0)
-			return ret;
-
-		if (file->f_mode & FMODE_WRITE) {
-			stop_dac(state);
-			dmabuf->ready = 0;
-			if (val)
-				dmabuf->fmt |= TRIDENT_FMT_STEREO;
-			else
-				dmabuf->fmt &= ~TRIDENT_FMT_STEREO;
-		}
-		if (file->f_mode & FMODE_READ) {
-			stop_adc(state);
-			dmabuf->ready = 0;
-			if (val)
-				dmabuf->fmt |= TRIDENT_FMT_STEREO;
-			else
-				dmabuf->fmt &= ~TRIDENT_FMT_STEREO;
-		}
-		unlock_set_fmt(state);
-		break;
-
-	case SNDCTL_DSP_GETBLKSIZE:
-		if (file->f_mode & FMODE_WRITE) {
-			if ((val = prog_dmabuf_playback(state)))
-				ret = val;
-			else
-				ret = put_user(dmabuf->fragsize, p);
-			break;
-		}
-		if (file->f_mode & FMODE_READ) {
-			if ((val = prog_dmabuf_record(state)))
-				ret = val;
-			else
-				ret = put_user(dmabuf->fragsize, p);
-			break;
-		}
-		/* neither READ nor WRITE? is this even possible? */
-		ret = -EINVAL;
-		break;
-
-
-	case SNDCTL_DSP_GETFMTS: /* Returns a mask of supported sample format */
-		ret = put_user(AFMT_S16_LE | AFMT_U16_LE | AFMT_S8 |
-			       AFMT_U8, p);
-		break;
-
-	case SNDCTL_DSP_SETFMT:	/* Select sample format */
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if ((ret = lock_set_fmt(state)) < 0)
-			return ret;
-
-		if (val != AFMT_QUERY) {
-			if (file->f_mode & FMODE_WRITE) {
-				stop_dac(state);
-				dmabuf->ready = 0;
-				if (val == AFMT_S16_LE)
-					dmabuf->fmt |= TRIDENT_FMT_16BIT;
-				else
-					dmabuf->fmt &= ~TRIDENT_FMT_16BIT;
-			}
-			if (file->f_mode & FMODE_READ) {
-				stop_adc(state);
-				dmabuf->ready = 0;
-				if (val == AFMT_S16_LE)
-					dmabuf->fmt |= TRIDENT_FMT_16BIT;
-				else
-					dmabuf->fmt &= ~TRIDENT_FMT_16BIT;
-			}
-		}
-		unlock_set_fmt(state);
-		ret = put_user((dmabuf->fmt & TRIDENT_FMT_16BIT) ? AFMT_S16_LE :
-			       AFMT_U8, p);
-		break;
-
-	case SNDCTL_DSP_CHANNELS:
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (val != 0) {
-			if ((ret = lock_set_fmt(state)) < 0)
-				return ret;
-
-			if (file->f_mode & FMODE_WRITE) {
-				stop_dac(state);
-				dmabuf->ready = 0;
-
-				//prevent from memory leak
-				if ((state->chans_num > 2) && (state->chans_num != val)) {
-					ali_free_other_states_resources(state);
-					state->chans_num = 1;
-				}
-
-				if (val >= 2) {
-
-					dmabuf->fmt |= TRIDENT_FMT_STEREO;
-					if ((val == 6) && (state->card->pci_id == PCI_DEVICE_ID_ALI_5451)) {
-						if (card->rec_channel_use_count > 0) {
-							printk(KERN_ERR "trident: Record is "
-							       "working on the card!\n");
-							ret = -EBUSY;
-							unlock_set_fmt(state);
-							break;
-						}
-
-						ret = ali_setup_multi_channels(state->card, 6);
-						if (ret < 0) {
-							unlock_set_fmt(state);
-							break;
-						}
-						mutex_lock(&state->card->open_mutex);
-						ret = ali_allocate_other_states_resources(state, 6);
-						if (ret < 0) {
-							mutex_unlock(&state->card->open_mutex);
-							unlock_set_fmt(state);
-							break;
-						}
-						state->card->multi_channel_use_count++;
-						mutex_unlock(&state->card->open_mutex);
-					} else
-						val = 2;	/*yield to 2-channels */
-				} else
-					dmabuf->fmt &= ~TRIDENT_FMT_STEREO;
-				state->chans_num = val;
-			}
-			if (file->f_mode & FMODE_READ) {
-				stop_adc(state);
-				dmabuf->ready = 0;
-				if (val >= 2) {
-					if (!((file->f_mode & FMODE_WRITE) &&
-					      (val == 6)))
-						val = 2;
-					dmabuf->fmt |= TRIDENT_FMT_STEREO;
-				} else
-					dmabuf->fmt &= ~TRIDENT_FMT_STEREO;
-				state->chans_num = val;
-			}
-			unlock_set_fmt(state);
-		}
-		ret = put_user(val, p);
-		break;
-
-	case SNDCTL_DSP_POST:
-		/* Cause the working fragment to be output */
-		break;
-
-	case SNDCTL_DSP_SUBDIVIDE:
-		if (dmabuf->subdivision) {
-			ret = -EINVAL;
-			break;
-		}
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (val != 1 && val != 2 && val != 4) {
-			ret = -EINVAL;
-			break;
-		}
-		dmabuf->subdivision = val;
-		break;
-
-	case SNDCTL_DSP_SETFRAGMENT:
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-
-		dmabuf->ossfragshift = val & 0xffff;
-		dmabuf->ossmaxfrags = (val >> 16) & 0xffff;
-		if (dmabuf->ossfragshift < 4)
-			dmabuf->ossfragshift = 4;
-		if (dmabuf->ossfragshift > 15)
-			dmabuf->ossfragshift = 15;
-		if (dmabuf->ossmaxfrags < 4)
-			dmabuf->ossmaxfrags = 4;
-
-		break;
-
-	case SNDCTL_DSP_GETOSPACE:
-		if (!(file->f_mode & FMODE_WRITE)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (!dmabuf->ready && (val = prog_dmabuf_playback(state)) != 0) {
-			ret = val;
-			break;
-		}
-		spin_lock_irqsave(&state->card->lock, flags);
-		trident_update_ptr(state);
-		abinfo.fragsize = dmabuf->fragsize;
-		abinfo.bytes = dmabuf->dmasize - dmabuf->count;
-		abinfo.fragstotal = dmabuf->numfrag;
-		abinfo.fragments = abinfo.bytes >> dmabuf->fragshift;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-		ret = copy_to_user(argp, &abinfo, sizeof (abinfo)) ?
-			-EFAULT : 0;
-		break;
-
-	case SNDCTL_DSP_GETISPACE:
-		if (!(file->f_mode & FMODE_READ)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (!dmabuf->ready && (val = prog_dmabuf_record(state)) != 0) {
-			ret = val;
-			break;
-		}
-		spin_lock_irqsave(&state->card->lock, flags);
-		trident_update_ptr(state);
-		abinfo.fragsize = dmabuf->fragsize;
-		abinfo.bytes = dmabuf->count;
-		abinfo.fragstotal = dmabuf->numfrag;
-		abinfo.fragments = abinfo.bytes >> dmabuf->fragshift;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-		ret = copy_to_user(argp, &abinfo, sizeof (abinfo)) ?
-			-EFAULT : 0;
-		break;
-
-	case SNDCTL_DSP_NONBLOCK:
-		file->f_flags |= O_NONBLOCK;
-		break;
-
-	case SNDCTL_DSP_GETCAPS:
-		ret = put_user(DSP_CAP_REALTIME | DSP_CAP_TRIGGER |
-			       DSP_CAP_MMAP | DSP_CAP_BIND, p);
-		break;
-
-	case SNDCTL_DSP_GETTRIGGER:
-		val = 0;
-		if ((file->f_mode & FMODE_READ) && dmabuf->enable)
-			val |= PCM_ENABLE_INPUT;
-		if ((file->f_mode & FMODE_WRITE) && dmabuf->enable)
-			val |= PCM_ENABLE_OUTPUT;
-		ret = put_user(val, p);
-		break;
-
-	case SNDCTL_DSP_SETTRIGGER:
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (file->f_mode & FMODE_READ) {
-			if (val & PCM_ENABLE_INPUT) {
-				if (!dmabuf->ready &&
-				    (ret = prog_dmabuf_record(state)))
-					break;
-				start_adc(state);
-			} else
-				stop_adc(state);
-		}
-		if (file->f_mode & FMODE_WRITE) {
-			if (val & PCM_ENABLE_OUTPUT) {
-				if (!dmabuf->ready &&
-				    (ret = prog_dmabuf_playback(state)))
-					break;
-				start_dac(state);
-			} else
-				stop_dac(state);
-		}
-		break;
-
-	case SNDCTL_DSP_GETIPTR:
-		if (!(file->f_mode & FMODE_READ)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (!dmabuf->ready && (val = prog_dmabuf_record(state))
-		    != 0) {
-			ret = val;
-			break;
-		}
-		spin_lock_irqsave(&state->card->lock, flags);
-		trident_update_ptr(state);
-		cinfo.bytes = dmabuf->total_bytes;
-		cinfo.blocks = dmabuf->count >> dmabuf->fragshift;
-		cinfo.ptr = dmabuf->hwptr;
-		if (dmabuf->mapped)
-			dmabuf->count &= dmabuf->fragsize - 1;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-		ret = copy_to_user(argp, &cinfo, sizeof (cinfo)) ?
-			-EFAULT : 0;
-		break;
-
-	case SNDCTL_DSP_GETOPTR:
-		if (!(file->f_mode & FMODE_WRITE)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (!dmabuf->ready && (val = prog_dmabuf_playback(state))
-		    != 0) {
-			ret = val;
-			break;
-		}
-
-		spin_lock_irqsave(&state->card->lock, flags);
-		trident_update_ptr(state);
-		cinfo.bytes = dmabuf->total_bytes;
-		cinfo.blocks = dmabuf->count >> dmabuf->fragshift;
-		cinfo.ptr = dmabuf->hwptr;
-		if (dmabuf->mapped)
-			dmabuf->count &= dmabuf->fragsize - 1;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-		ret = copy_to_user(argp, &cinfo, sizeof (cinfo)) ?
-			-EFAULT : 0;
-		break;
-
-	case SNDCTL_DSP_SETDUPLEX:
-		ret = -EINVAL;
-		break;
-
-	case SNDCTL_DSP_GETODELAY:
-		if (!(file->f_mode & FMODE_WRITE)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (!dmabuf->ready && (val = prog_dmabuf_playback(state)) != 0) {
-			ret = val;
-			break;
-		}
-		spin_lock_irqsave(&state->card->lock, flags);
-		trident_update_ptr(state);
-		val = dmabuf->count;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-		ret = put_user(val, p);
-		break;
-
-	case SOUND_PCM_READ_RATE:
-		ret = put_user(dmabuf->rate, p);
-		break;
-
-	case SOUND_PCM_READ_CHANNELS:
-		ret = put_user((dmabuf->fmt & TRIDENT_FMT_STEREO) ? 2 : 1,
-			       p);
-		break;
-
-	case SOUND_PCM_READ_BITS:
-		ret = put_user((dmabuf->fmt & TRIDENT_FMT_16BIT) ? AFMT_S16_LE :
-			       AFMT_U8, p);
-		break;
-
-	case SNDCTL_DSP_GETCHANNELMASK:
-		ret = put_user(DSP_BIND_FRONT | DSP_BIND_SURR |
-			       DSP_BIND_CENTER_LFE,  p);
-		break;
-
-	case SNDCTL_DSP_BIND_CHANNEL:
-		if (state->card->pci_id != PCI_DEVICE_ID_SI_7018) {
-			ret = -EINVAL;
-			break;
-		}
-
-		if (get_user(val, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (val == DSP_BIND_QUERY) {
-			val = dmabuf->channel->attribute | 0x3c00;
-			val = attr2mask[val >> 8];
-		} else {
-			dmabuf->ready = 0;
-			if (file->f_mode & FMODE_READ)
-				dmabuf->channel->attribute = (CHANNEL_REC |
-							      SRC_ENABLE);
-			if (file->f_mode & FMODE_WRITE)
-				dmabuf->channel->attribute = (CHANNEL_SPC_PB |
-							      SRC_ENABLE);
-			dmabuf->channel->attribute |= mask2attr[ffs(val)];
-		}
-		ret = put_user(val, p);
-		break;
-
-	case SNDCTL_DSP_MAPINBUF:
-	case SNDCTL_DSP_MAPOUTBUF:
-	case SNDCTL_DSP_SETSYNCRO:
-	case SOUND_PCM_WRITE_FILTER:
-	case SOUND_PCM_READ_FILTER:
-	default:
-		ret = -EINVAL;
-		break;
-
-	}
-	return ret;
-}
-
-static int
-trident_open(struct inode *inode, struct file *file)
-{
-	int i = 0;
-	int minor = iminor(inode);
-	struct trident_card *card = devs;
-	struct trident_state *state = NULL;
-	struct dmabuf *dmabuf = NULL;
-	unsigned long flags;
-
-	/* Added by Matt Wu 01-05-2001 */
-	/* TODO: there's some redundacy here wrt the check below */
-	/* for multi_use_count > 0. Should we return -EBUSY or find */
-	/* a different card? for now, don't break current behaviour */
-	/* -- mulix */
-	if (file->f_mode & FMODE_READ) {
-		if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-			if (card->multi_channel_use_count > 0)
-				return -EBUSY;
-		}
-	}
-
-	/* find an available virtual channel (instance of /dev/dsp) */
-	while (card != NULL) {
-		mutex_lock(&card->open_mutex);
-		if (file->f_mode & FMODE_READ) {
-			/* Skip opens on cards that are in 6 channel mode */
-			if (card->multi_channel_use_count > 0) {
-				mutex_unlock(&card->open_mutex);
-				card = card->next;
-				continue;
-			}
-		}
-		for (i = 0; i < NR_HW_CH; i++) {
-			if (card->states[i] == NULL) {
-				state = card->states[i] = kzalloc(sizeof(*state), GFP_KERNEL);
-				if (state == NULL) {
-					mutex_unlock(&card->open_mutex);
-					return -ENOMEM;
-				}
-				mutex_init(&state->sem);
-				dmabuf = &state->dmabuf;
-				goto found_virt;
-			}
-		}
-		mutex_unlock(&card->open_mutex);
-		card = card->next;
-	}
-	/* no more virtual channel avaiable */
-	if (!state) {
-		return -ENODEV;
-	}
-      found_virt:
-	/* found a free virtual channel, allocate hardware channels */
-	if (file->f_mode & FMODE_READ)
-		dmabuf->channel = card->alloc_rec_pcm_channel(card);
-	else
-		dmabuf->channel = card->alloc_pcm_channel(card);
-
-	if (dmabuf->channel == NULL) {
-		kfree(card->states[i]);
-		card->states[i] = NULL;
-		return -ENODEV;
-	}
-
-	/* initialize the virtual channel */
-	state->virt = i;
-	state->card = card;
-	state->magic = TRIDENT_STATE_MAGIC;
-	init_waitqueue_head(&dmabuf->wait);
-	file->private_data = state;
-
-	/* set default sample format. According to OSS Programmer's */
-	/* Guide  /dev/dsp should be default to unsigned 8-bits, mono, */
-	/* with sample rate 8kHz and /dev/dspW will accept 16-bits sample */
-	if (file->f_mode & FMODE_WRITE) {
-		dmabuf->fmt &= ~TRIDENT_FMT_MASK;
-		if ((minor & 0x0f) == SND_DEV_DSP16)
-			dmabuf->fmt |= TRIDENT_FMT_16BIT;
-		dmabuf->ossfragshift = 0;
-		dmabuf->ossmaxfrags = 0;
-		dmabuf->subdivision = 0;
-		if (card->pci_id == PCI_DEVICE_ID_SI_7018) {
-			/* set default channel attribute to normal playback */
-			dmabuf->channel->attribute = CHANNEL_PB;
-		}
-		spin_lock_irqsave(&card->lock, flags);
-		trident_set_dac_rate(state, 8000);
-		spin_unlock_irqrestore(&card->lock, flags);
-	}
-
-	if (file->f_mode & FMODE_READ) {
-		/* FIXME: Trident 4d can only record in signed 16-bits stereo, */
-		/* 48kHz sample, to be dealed with in trident_set_adc_rate() ?? */
-		dmabuf->fmt &= ~TRIDENT_FMT_MASK;
-		if ((minor & 0x0f) == SND_DEV_DSP16)
-			dmabuf->fmt |= TRIDENT_FMT_16BIT;
-		dmabuf->ossfragshift = 0;
-		dmabuf->ossmaxfrags = 0;
-		dmabuf->subdivision = 0;
-		if (card->pci_id == PCI_DEVICE_ID_SI_7018) {
-			/* set default channel attribute to 0x8a80, record from
-			   PCM L/R FIFO and mono = (left + right + 1)/2 */
-			dmabuf->channel->attribute = (CHANNEL_REC | PCM_LR |
-						      MONO_MIX);
-		}
-		spin_lock_irqsave(&card->lock, flags);
-		trident_set_adc_rate(state, 8000);
-		spin_unlock_irqrestore(&card->lock, flags);
-
-		/* Added by Matt Wu 01-05-2001 */
-		if (card->pci_id == PCI_DEVICE_ID_ALI_5451)
-			card->rec_channel_use_count++;
-	}
-
-	state->open_mode |= file->f_mode & (FMODE_READ | FMODE_WRITE);
-	mutex_unlock(&card->open_mutex);
-
-	pr_debug("trident: open virtual channel %d, hard channel %d\n",
-		 state->virt, dmabuf->channel->num);
-
-	return nonseekable_open(inode, file);
-}
-
-static int
-trident_release(struct inode *inode, struct file *file)
-{
-	struct trident_state *state = (struct trident_state *)file->private_data;
-	struct trident_card *card;
-	struct dmabuf *dmabuf;
-
-	VALIDATE_STATE(state);
-
-	card = state->card;
-	dmabuf = &state->dmabuf;
-
-	if (file->f_mode & FMODE_WRITE) {
-		trident_clear_tail(state);
-		drain_dac(state, file->f_flags & O_NONBLOCK);
-	}
-
-	pr_debug("trident: closing virtual channel %d, hard channel %d\n",
-		 state->virt, dmabuf->channel->num);
-
-	/* stop DMA state machine and free DMA buffers/channels */
-	mutex_lock(&card->open_mutex);
-
-	if (file->f_mode & FMODE_WRITE) {
-		stop_dac(state);
-		dealloc_dmabuf(&state->dmabuf, state->card->pci_dev);
-		state->card->free_pcm_channel(state->card, dmabuf->channel->num);
-
-		/* Added by Matt Wu */
-		if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-			if (state->chans_num > 2) {
-				if (card->multi_channel_use_count-- < 0)
-					card->multi_channel_use_count = 0;
-				if (card->multi_channel_use_count == 0)
-					ali_close_multi_channels();
-				ali_free_other_states_resources(state);
-			}
-		}
-	}
-	if (file->f_mode & FMODE_READ) {
-		stop_adc(state);
-		dealloc_dmabuf(&state->dmabuf, state->card->pci_dev);
-		state->card->free_pcm_channel(state->card, dmabuf->channel->num);
-
-		/* Added by Matt Wu */
-		if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-			if (card->rec_channel_use_count-- < 0)
-				card->rec_channel_use_count = 0;
-		}
-	}
-
-	card->states[state->virt] = NULL;
-	kfree(state);
-
-	/* we're covered by the open_mutex */
-	mutex_unlock(&card->open_mutex);
-
-	return 0;
-}
-
-static const struct file_operations trident_audio_fops = {
-	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-	.read = trident_read,
-	.write = trident_write,
-	.poll = trident_poll,
-	.ioctl = trident_ioctl,
-	.mmap = trident_mmap,
-	.open = trident_open,
-	.release = trident_release,
-};
-
-/* trident specific AC97 functions */
-/* Write AC97 codec registers */
-static void
-trident_ac97_set(struct ac97_codec *codec, u8 reg, u16 val)
-{
-	struct trident_card *card = (struct trident_card *)codec->private_data;
-	unsigned int address, mask, busy;
-	unsigned short count = 0xffff;
-	unsigned long flags;
-	u32 data;
-
-	data = ((u32) val) << 16;
-
-	switch (card->pci_id) {
-	default:
-	case PCI_DEVICE_ID_SI_7018:
-		address = SI_AC97_WRITE;
-		mask = SI_AC97_BUSY_WRITE | SI_AC97_AUDIO_BUSY;
-		if (codec->id)
-			mask |= SI_AC97_SECONDARY;
-		busy = SI_AC97_BUSY_WRITE;
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX:
-		address = DX_ACR0_AC97_W;
-		mask = busy = DX_AC97_BUSY_WRITE;
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_NX:
-		address = NX_ACR1_AC97_W;
-		mask = NX_AC97_BUSY_WRITE;
-		if (codec->id)
-			mask |= NX_AC97_WRITE_SECONDARY;
-		busy = NX_AC97_BUSY_WRITE;
-		break;
-	case PCI_DEVICE_ID_INTERG_5050:
-		address = SI_AC97_WRITE;
-		mask = busy = SI_AC97_BUSY_WRITE;
-		if (codec->id)
-			mask |= SI_AC97_SECONDARY;
-		break;
-	}
-
-	spin_lock_irqsave(&card->lock, flags);
-	do {
-		if ((inw(TRID_REG(card, address)) & busy) == 0)
-			break;
-	} while (--count);
-
-	data |= (mask | (reg & AC97_REG_ADDR));
-
-	if (count == 0) {
-		printk(KERN_ERR "trident: AC97 CODEC write timed out.\n");
-		spin_unlock_irqrestore(&card->lock, flags);
-		return;
-	}
-
-	outl(data, TRID_REG(card, address));
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-/* Read AC97 codec registers */
-static u16
-trident_ac97_get(struct ac97_codec *codec, u8 reg)
-{
-	struct trident_card *card = (struct trident_card *)codec->private_data;
-	unsigned int address, mask, busy;
-	unsigned short count = 0xffff;
-	unsigned long flags;
-	u32 data;
-
-	switch (card->pci_id) {
-	default:
-	case PCI_DEVICE_ID_SI_7018:
-		address = SI_AC97_READ;
-		mask = SI_AC97_BUSY_READ | SI_AC97_AUDIO_BUSY;
-		if (codec->id)
-			mask |= SI_AC97_SECONDARY;
-		busy = SI_AC97_BUSY_READ;
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX:
-		address = DX_ACR1_AC97_R;
-		mask = busy = DX_AC97_BUSY_READ;
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_NX:
-		if (codec->id)
-			address = NX_ACR3_AC97_R_SECONDARY;
-		else
-			address = NX_ACR2_AC97_R_PRIMARY;
-		mask = NX_AC97_BUSY_READ;
-		busy = NX_AC97_BUSY_READ | NX_AC97_BUSY_DATA;
-		break;
-	case PCI_DEVICE_ID_INTERG_5050:
-		address = SI_AC97_READ;
-		mask = busy = SI_AC97_BUSY_READ;
-		if (codec->id)
-			mask |= SI_AC97_SECONDARY;
-		break;
-	}
-
-	data = (mask | (reg & AC97_REG_ADDR));
-
-	spin_lock_irqsave(&card->lock, flags);
-	outl(data, TRID_REG(card, address));
-	do {
-		data = inl(TRID_REG(card, address));
-		if ((data & busy) == 0)
-			break;
-	} while (--count);
-	spin_unlock_irqrestore(&card->lock, flags);
-
-	if (count == 0) {
-		printk(KERN_ERR "trident: AC97 CODEC read timed out.\n");
-		data = 0;
-	}
-	return ((u16) (data >> 16));
-}
-
-/* rewrite ac97 read and write mixer register by hulei for ALI*/
-static int
-acquirecodecaccess(struct trident_card *card)
-{
-	u16 wsemamask = 0x6000;	/* bit 14..13 */
-	u16 wsemabits;
-	u16 wcontrol;
-	int block = 0;
-	int ncount = 25;
-	while (1) {
-		wcontrol = inw(TRID_REG(card, ALI_AC97_WRITE));
-		wsemabits = wcontrol & wsemamask;
-
-		if (wsemabits == 0x4000)
-			return 1;	/* 0x4000 is audio ,then success */
-		if (ncount-- < 0)
-			break;
-		if (wsemabits == 0) {
-		      unlock:
-			outl(((u32) (wcontrol & 0x1eff) | 0x00004000),
-			     TRID_REG(card, ALI_AC97_WRITE));
-			continue;
-		}
-		udelay(20);
-	}
-	if (!block) {
-		pr_debug("accesscodecsemaphore: try unlock\n");
-		block = 1;
-		goto unlock;
-	}
-	return 0;
-}
-
-static void
-releasecodecaccess(struct trident_card *card)
-{
-	unsigned long wcontrol;
-	wcontrol = inl(TRID_REG(card, ALI_AC97_WRITE));
-	outl((wcontrol & 0xffff1eff), TRID_REG(card, ALI_AC97_WRITE));
-}
-
-static int
-waitforstimertick(struct trident_card *card)
-{
-	unsigned long chk1, chk2;
-	unsigned int wcount = 0xffff;
-	chk1 = inl(TRID_REG(card, ALI_STIMER));
-
-	while (1) {
-		chk2 = inl(TRID_REG(card, ALI_STIMER));
-		if ((wcount > 0) && chk1 != chk2)
-			return 1;
-		if (wcount <= 0)
-			break;
-		udelay(50);
-	}
-	return 0;
-}
-
-/* Read AC97 codec registers for ALi*/
-static u16
-ali_ac97_get(struct trident_card *card, int secondary, u8 reg)
-{
-	unsigned int address, mask;
-	unsigned int ncount;
-	unsigned long aud_reg;
-	u32 data;
-	u16 wcontrol;
-	unsigned long flags;
-
-	BUG_ON(!card);
-
-	address = ALI_AC97_READ;
-	if (card->revision == ALI_5451_V02) {
-		address = ALI_AC97_WRITE;
-	}
-	mask = ALI_AC97_READ_ACTION | ALI_AC97_AUDIO_BUSY;
-	if (secondary)
-		mask |= ALI_AC97_SECONDARY;
-
-	spin_lock_irqsave(&card->lock, flags);
-
-	if (!acquirecodecaccess(card))
-		printk(KERN_ERR "access codec fail\n");
-
-	wcontrol = inw(TRID_REG(card, ALI_AC97_WRITE));
-	wcontrol &= 0xfe00;
-	wcontrol |= (0x8000 | reg);
-	outw(wcontrol, TRID_REG(card, ALI_AC97_WRITE));
-
-	data = (mask | (reg & AC97_REG_ADDR));
-
-	if (!waitforstimertick(card)) {
-		printk(KERN_ERR "ali_ac97_read: BIT_CLOCK is dead\n");
-		goto releasecodec;
-	}
-
-	udelay(20);
-
-	ncount = 10;
-
-	while (1) {
-		if ((inw(TRID_REG(card, ALI_AC97_WRITE)) & ALI_AC97_BUSY_READ)
-		    != 0)
-			break;
-		if (ncount <= 0)
-			break;
-		if (ncount-- == 1) {
-			pr_debug("ali_ac97_read :try clear busy flag\n");
-			aud_reg = inl(TRID_REG(card, ALI_AC97_WRITE));
-			outl((aud_reg & 0xffff7fff),
-			     TRID_REG(card, ALI_AC97_WRITE));
-		}
-		udelay(10);
-	}
-
-	data = inl(TRID_REG(card, address));
-
-	spin_unlock_irqrestore(&card->lock, flags);
-
-	return ((u16) (data >> 16));
-
-      releasecodec:
-	releasecodecaccess(card);
-	spin_unlock_irqrestore(&card->lock, flags);
-	printk(KERN_ERR "ali_ac97_read: AC97 CODEC read timed out.\n");
-	return 0;
-}
-
-/* Write AC97 codec registers for hulei*/
-static void
-ali_ac97_set(struct trident_card *card, int secondary, u8 reg, u16 val)
-{
-	unsigned int address, mask;
-	unsigned int ncount;
-	u32 data;
-	u16 wcontrol;
-	unsigned long flags;
-
-	data = ((u32) val) << 16;
-
-	BUG_ON(!card);
-
-	address = ALI_AC97_WRITE;
-	mask = ALI_AC97_WRITE_ACTION | ALI_AC97_AUDIO_BUSY;
-	if (secondary)
-		mask |= ALI_AC97_SECONDARY;
-	if (card->revision == ALI_5451_V02)
-		mask |= ALI_AC97_WRITE_MIXER_REGISTER;
-
-	spin_lock_irqsave(&card->lock, flags);
-	if (!acquirecodecaccess(card))
-		printk(KERN_ERR "ali_ac97_write: access codec fail\n");
-
-	wcontrol = inw(TRID_REG(card, ALI_AC97_WRITE));
-	wcontrol &= 0xff00;
-	wcontrol |= (0x8100 | reg); /* bit 8=1: (ali1535 )reserved/ */
-	                            /* ali1535+ write */
-	outl((data | wcontrol), TRID_REG(card, ALI_AC97_WRITE));
-
-	if (!waitforstimertick(card)) {
-		printk(KERN_ERR "BIT_CLOCK is dead\n");
-		goto releasecodec;
-	}
-
-	ncount = 10;
-	while (1) {
-		wcontrol = inw(TRID_REG(card, ALI_AC97_WRITE));
-		if (!(wcontrol & 0x8000))
-			break;
-		if (ncount <= 0)
-			break;
-		if (ncount-- == 1) {
-			pr_debug("ali_ac97_set :try clear busy flag!!\n");
-			outw(wcontrol & 0x7fff,
-			     TRID_REG(card, ALI_AC97_WRITE));
-		}
-		udelay(10);
-	}
-
-      releasecodec:
-	releasecodecaccess(card);
-	spin_unlock_irqrestore(&card->lock, flags);
-	return;
-}
-
-static void
-ali_enable_special_channel(struct trident_state *stat)
-{
-	struct trident_card *card = stat->card;
-	unsigned long s_channels;
-
-	s_channels = inl(TRID_REG(card, ALI_GLOBAL_CONTROL));
-	s_channels |= (1 << stat->dmabuf.channel->num);
-	outl(s_channels, TRID_REG(card, ALI_GLOBAL_CONTROL));
-}
-
-static u16
-ali_ac97_read(struct ac97_codec *codec, u8 reg)
-{
-	int id;
-	u16 data;
-	struct trident_card *card = NULL;
-
-	/* Added by Matt Wu */
-	BUG_ON(!codec);
-
-	card = (struct trident_card *) codec->private_data;
-
-	if (!card->mixer_regs_ready)
-		return ali_ac97_get(card, codec->id, reg);
-
-	/*
-	 *      FIXME: need to stop this caching some registers
-	 */
-	if (codec->id)
-		id = 1;
-	else
-		id = 0;
-
-	data = card->mixer_regs[reg / 2][id];
-	return data;
-}
-
-static void
-ali_ac97_write(struct ac97_codec *codec, u8 reg, u16 val)
-{
-	int id;
-	struct trident_card *card;
-
-	/*  Added by Matt Wu */
-	BUG_ON(!codec);
-
-	card = (struct trident_card *) codec->private_data;
-
-	if (!card->mixer_regs_ready) {
-		ali_ac97_set(card, codec->id, reg, val);
-		return;
-	}
-
-	if (codec->id)
-		id = 1;
-	else
-		id = 0;
-
-	card->mixer_regs[reg / 2][id] = val;
-	ali_ac97_set(card, codec->id, reg, val);
-}
-
-/*
-flag:	ALI_SPDIF_OUT_TO_SPDIF_OUT
-	ALI_PCM_TO_SPDIF_OUT
-*/
-
-static void
-ali_setup_spdif_out(struct trident_card *card, int flag)
-{
-	unsigned long spdif;
-	unsigned char ch;
-
-	char temp;
-	struct pci_dev *pci_dev = NULL;
-
-	pci_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533,
-				 pci_dev);
-	if (pci_dev == NULL)
-		return;
-	pci_read_config_byte(pci_dev, 0x61, &temp);
-	temp |= 0x40;
-	pci_write_config_byte(pci_dev, 0x61, temp);
-	pci_read_config_byte(pci_dev, 0x7d, &temp);
-	temp |= 0x01;
-	pci_write_config_byte(pci_dev, 0x7d, temp);
-	pci_read_config_byte(pci_dev, 0x7e, &temp);
-	temp &= (~0x20);
-	temp |= 0x10;
-	pci_write_config_byte(pci_dev, 0x7e, temp);
-
-	pci_dev_put(pci_dev);
-
-	ch = inb(TRID_REG(card, ALI_SCTRL));
-	outb(ch | ALI_SPDIF_OUT_ENABLE, TRID_REG(card, ALI_SCTRL));
-	ch = inb(TRID_REG(card, ALI_SPDIF_CTRL));
-	outb(ch & ALI_SPDIF_OUT_CH_STATUS, TRID_REG(card, ALI_SPDIF_CTRL));
-
-	if (flag & ALI_SPDIF_OUT_TO_SPDIF_OUT) {
-		spdif = inw(TRID_REG(card, ALI_GLOBAL_CONTROL));
-		spdif |= ALI_SPDIF_OUT_CH_ENABLE;
-		spdif &= ALI_SPDIF_OUT_SEL_SPDIF;
-		outw(spdif, TRID_REG(card, ALI_GLOBAL_CONTROL));
-		spdif = inw(TRID_REG(card, ALI_SPDIF_CS));
-		if (flag & ALI_SPDIF_OUT_NON_PCM)
-			spdif |= 0x0002;
-		else
-			spdif &= (~0x0002);
-		outw(spdif, TRID_REG(card, ALI_SPDIF_CS));
-	} else {
-		spdif = inw(TRID_REG(card, ALI_GLOBAL_CONTROL));
-		spdif |= ALI_SPDIF_OUT_SEL_PCM;
-		outw(spdif, TRID_REG(card, ALI_GLOBAL_CONTROL));
-	}
-}
-
-static void
-ali_disable_special_channel(struct trident_card *card, int ch)
-{
-	unsigned long sc;
-
-	sc = inl(TRID_REG(card, ALI_GLOBAL_CONTROL));
-	sc &= ~(1 << ch);
-	outl(sc, TRID_REG(card, ALI_GLOBAL_CONTROL));
-}
-
-static void
-ali_disable_spdif_in(struct trident_card *card)
-{
-	unsigned long spdif;
-
-	spdif = inl(TRID_REG(card, ALI_GLOBAL_CONTROL));
-	spdif &= (~ALI_SPDIF_IN_SUPPORT);
-	outl(spdif, TRID_REG(card, ALI_GLOBAL_CONTROL));
-
-	ali_disable_special_channel(card, ALI_SPDIF_IN_CHANNEL);
-}
-
-static void
-ali_setup_spdif_in(struct trident_card *card)
-{
-	unsigned long spdif;
-
-	//Set SPDIF IN Supported
-	spdif = inl(TRID_REG(card, ALI_GLOBAL_CONTROL));
-	spdif |= ALI_SPDIF_IN_SUPPORT;
-	outl(spdif, TRID_REG(card, ALI_GLOBAL_CONTROL));
-
-	//Set SPDIF IN Rec
-	spdif = inl(TRID_REG(card, ALI_GLOBAL_CONTROL));
-	spdif |= ALI_SPDIF_IN_CH_ENABLE;
-	outl(spdif, TRID_REG(card, ALI_GLOBAL_CONTROL));
-
-	spdif = inb(TRID_REG(card, ALI_SPDIF_CTRL));
-	spdif |= ALI_SPDIF_IN_CH_STATUS;
-	outb(spdif, TRID_REG(card, ALI_SPDIF_CTRL));
-/*
-	spdif = inb(TRID_REG(card, ALI_SPDIF_CTRL));
-	spdif |= ALI_SPDIF_IN_FUNC_ENABLE;
-	outb(spdif, TRID_REG(card, ALI_SPDIF_CTRL));
-*/
-}
-
-static void
-ali_delay(struct trident_card *card, int interval)
-{
-	unsigned long begintimer, currenttimer;
-
-	begintimer = inl(TRID_REG(card, ALI_STIMER));
-	currenttimer = inl(TRID_REG(card, ALI_STIMER));
-
-	while (currenttimer < begintimer + interval)
-		currenttimer = inl(TRID_REG(card, ALI_STIMER));
-}
-
-static void
-ali_detect_spdif_rate(struct trident_card *card)
-{
-	u16 wval = 0;
-	u16 count = 0;
-	u8 bval = 0, R1 = 0, R2 = 0;
-
-	bval = inb(TRID_REG(card, ALI_SPDIF_CTRL));
-	bval |= 0x02;
-	outb(bval, TRID_REG(card, ALI_SPDIF_CTRL));
-
-	bval = inb(TRID_REG(card, ALI_SPDIF_CTRL + 1));
-	bval |= 0x1F;
-	outb(bval, TRID_REG(card, ALI_SPDIF_CTRL + 1));
-
-	while (((R1 < 0x0B) || (R1 > 0x0E)) && (R1 != 0x12) &&
-	       count <= 50000) {
-		count++;
-
-		ali_delay(card, 6);
-
-		bval = inb(TRID_REG(card, ALI_SPDIF_CTRL + 1));
-		R1 = bval & 0x1F;
-	}
-
-	if (count > 50000) {
-		printk(KERN_WARNING "trident: Error in "
-		       "ali_detect_spdif_rate!\n");
-		return;
-	}
-
-	count = 0;
-
-	while (count <= 50000) {
-		count++;
-
-		ali_delay(card, 6);
-
-		bval = inb(TRID_REG(card, ALI_SPDIF_CTRL + 1));
-		R2 = bval & 0x1F;
-
-		if (R2 != R1)
-			R1 = R2;
-		else
-			break;
-	}
-
-	if (count > 50000) {
-		printk(KERN_WARNING "trident: Error in "
-		       "ali_detect_spdif_rate!\n");
-		return;
-	}
-
-	switch (R2) {
-	case 0x0b:
-	case 0x0c:
-	case 0x0d:
-	case 0x0e:
-		wval = inw(TRID_REG(card, ALI_SPDIF_CTRL + 2));
-		wval &= 0xE0F0;
-		wval |= (u16) 0x09 << 8 | (u16) 0x05;
-		outw(wval, TRID_REG(card, ALI_SPDIF_CTRL + 2));
-
-		bval = inb(TRID_REG(card, ALI_SPDIF_CS + 3)) & 0xF0;
-		outb(bval | 0x02, TRID_REG(card, ALI_SPDIF_CS + 3));
-		break;
-
-	case 0x12:
-		wval = inw(TRID_REG(card, ALI_SPDIF_CTRL + 2));
-		wval &= 0xE0F0;
-		wval |= (u16) 0x0E << 8 | (u16) 0x08;
-		outw(wval, TRID_REG(card, ALI_SPDIF_CTRL + 2));
-
-		bval = inb(TRID_REG(card, ALI_SPDIF_CS + 3)) & 0xF0;
-		outb(bval | 0x03, TRID_REG(card, ALI_SPDIF_CS + 3));
-		break;
-
-	default:
-		break;
-	}
-
-}
-
-static unsigned int
-ali_get_spdif_in_rate(struct trident_card *card)
-{
-	u32 dwRate = 0;
-	u8 bval = 0;
-
-	ali_detect_spdif_rate(card);
-
-	bval = inb(TRID_REG(card, ALI_SPDIF_CTRL));
-	bval &= 0x7F;
-	bval |= 0x40;
-	outb(bval, TRID_REG(card, ALI_SPDIF_CTRL));
-
-	bval = inb(TRID_REG(card, ALI_SPDIF_CS + 3));
-	bval &= 0x0F;
-
-	switch (bval) {
-	case 0:
-		dwRate = 44100;
-		break;
-	case 1:
-		dwRate = 48000;
-		break;
-	case 2:
-		dwRate = 32000;
-		break;
-	default:
-		// Error occurs
-		break;
-	}
-
-	return dwRate;
-
-}
-
-static int
-ali_close_multi_channels(void)
-{
-	char temp = 0;
-	struct pci_dev *pci_dev = NULL;
-
-	pci_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533,
-				 pci_dev);
-	if (pci_dev == NULL)
-		return -1;
-
-	pci_read_config_byte(pci_dev, 0x59, &temp);
-	temp &= ~0x80;
-	pci_write_config_byte(pci_dev, 0x59, temp);
-
-	pci_dev_put(pci_dev);
-
-	pci_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101,
-				 NULL);
-	if (pci_dev == NULL)
-		return -1;
-
-	pci_read_config_byte(pci_dev, 0xB8, &temp);
-	temp &= ~0x20;
-	pci_write_config_byte(pci_dev, 0xB8, temp);
-
-	pci_dev_put(pci_dev);
-
-	return 0;
-}
-
-static int
-ali_setup_multi_channels(struct trident_card *card, int chan_nums)
-{
-	unsigned long dwValue;
-	char temp = 0;
-	struct pci_dev *pci_dev = NULL;
-
-	pci_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533,
-				 pci_dev);
-	if (pci_dev == NULL)
-		return -1;
-	pci_read_config_byte(pci_dev, 0x59, &temp);
-	temp |= 0x80;
-	pci_write_config_byte(pci_dev, 0x59, temp);
-
-	pci_dev_put(pci_dev);
-
-	pci_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101,
-				 NULL);
-	if (pci_dev == NULL)
-		return -1;
-	pci_read_config_byte(pci_dev, (int) 0xB8, &temp);
-	temp |= 0x20;
-	pci_write_config_byte(pci_dev, (int) 0xB8, (u8) temp);
-
-	pci_dev_put(pci_dev);
-
-	if (chan_nums == 6) {
-		dwValue = inl(TRID_REG(card, ALI_SCTRL)) | 0x000f0000;
-		outl(dwValue, TRID_REG(card, ALI_SCTRL));
-		mdelay(4);
-		dwValue = inl(TRID_REG(card, ALI_SCTRL));
-		if (dwValue & 0x2000000) {
-			ali_ac97_write(card->ac97_codec[0], 0x02, 8080);
-			ali_ac97_write(card->ac97_codec[0], 0x36, 0);
-			ali_ac97_write(card->ac97_codec[0], 0x38, 0);
-			/*
-			 *      On a board with a single codec you won't get the
-			 *      surround. On other boards configure it.
-			 */
-			if (card->ac97_codec[1] != NULL) {
-				ali_ac97_write(card->ac97_codec[1], 0x36, 0);
-				ali_ac97_write(card->ac97_codec[1], 0x38, 0);
-				ali_ac97_write(card->ac97_codec[1], 0x02, 0x0606);
-				ali_ac97_write(card->ac97_codec[1], 0x18, 0x0303);
-				ali_ac97_write(card->ac97_codec[1], 0x74, 0x3);
-			}
-			return 1;
-		}
-	}
-	return -EINVAL;
-}
-
-static void
-ali_free_pcm_channel(struct trident_card *card, unsigned int channel)
-{
-	int bank;
-
-	if (channel > 31)
-		return;
-
-	bank = channel >> 5;
-	channel = channel & 0x1f;
-
-	card->banks[bank].bitmap &= ~(1 << (channel));
-}
-
-static int
-ali_allocate_other_states_resources(struct trident_state *state, int chan_nums)
-{
-	struct trident_card *card = state->card;
-	struct trident_state *s;
-	int i, state_count = 0;
-	struct trident_pcm_bank *bank;
-	struct trident_channel *channel;
-	unsigned long num;
-
-	bank = &card->banks[BANK_A];
-
-	if (chan_nums != 6)
-		return 0;
-
-	for (i = 0; (i < ALI_CHANNELS) && (state_count != 4); i++) {
-		if (card->states[i])
-			continue;
-
-		num = ali_multi_channels_5_1[state_count];
-		if (!(bank->bitmap & (1 << num))) {
-			bank->bitmap |= 1 << num;
-			channel = &bank->channels[num];
-			channel->num = num;
-		} else {
-			state_count--;
-			for (; state_count >= 0; state_count--) {
-				kfree(state->other_states[state_count]);
-				num = ali_multi_channels_5_1[state_count];
-					ali_free_pcm_channel(card, num);
-			}
-			return -EBUSY;
-		}
-		s = card->states[i] = kzalloc(sizeof(*state), GFP_KERNEL);
-		if (!s) {
-			num = ali_multi_channels_5_1[state_count];
-			ali_free_pcm_channel(card, num);
-			state_count--;
-			for (; state_count >= 0; state_count--) {
-				num = ali_multi_channels_5_1[state_count];
-				ali_free_pcm_channel(card, num);
-				kfree(state->other_states[state_count]);
-			}
-			return -ENOMEM;
-		}
-
-		s->dmabuf.channel = channel;
-		s->dmabuf.ossfragshift = s->dmabuf.ossmaxfrags =
-			s->dmabuf.subdivision = 0;
-		init_waitqueue_head(&s->dmabuf.wait);
-		s->magic = card->magic;
-		s->card = card;
-		s->virt = i;
-		ali_enable_special_channel(s);
-		state->other_states[state_count++] = s;
-	}
-
-	if (state_count != 4) {
-		state_count--;
-		for (; state_count >= 0; state_count--) {
-			kfree(state->other_states[state_count]);
-			num = ali_multi_channels_5_1[state_count];
-			ali_free_pcm_channel(card, num);
-		}
-		return -EBUSY;
-	}
-	return 0;
-}
-
-#ifdef CONFIG_PM
-/* save registers for ALi Power Management */
-static struct ali_saved_registers {
-	unsigned long global_regs[ALI_GLOBAL_REGS];
-	unsigned long channel_regs[ALI_CHANNELS][ALI_CHANNEL_REGS];
-	unsigned mixer_regs[ALI_MIXER_REGS];
-} ali_registers;
-
-static void
-ali_save_regs(struct trident_card *card)
-{
-	unsigned long flags;
-	int i, j;
-
-	spin_lock_irqsave(&card->lock, flags);
-
-	ali_registers.global_regs[0x2c] = inl(TRID_REG(card, T4D_MISCINT));
-	//ali_registers.global_regs[0x20] = inl(TRID_REG(card,T4D_START_A));
-	ali_registers.global_regs[0x21] = inl(TRID_REG(card, T4D_STOP_A));
-
-	//disable all IRQ bits
-	outl(ALI_DISABLE_ALL_IRQ, TRID_REG(card, T4D_MISCINT));
-
-	for (i = 1; i < ALI_MIXER_REGS; i++)
-		ali_registers.mixer_regs[i] = ali_ac97_read(card->ac97_codec[0],
-							    i * 2);
-
-	for (i = 0; i < ALI_GLOBAL_REGS; i++) {
-		if ((i * 4 == T4D_MISCINT) || (i * 4 == T4D_STOP_A))
-			continue;
-		ali_registers.global_regs[i] = inl(TRID_REG(card, i * 4));
-	}
-
-	for (i = 0; i < ALI_CHANNELS; i++) {
-		outb(i, TRID_REG(card, T4D_LFO_GC_CIR));
-		for (j = 0; j < ALI_CHANNEL_REGS; j++)
-			ali_registers.channel_regs[i][j] = inl(TRID_REG(card,
-									j * 4 + 0xe0));
-	}
-
-	//Stop all HW channel
-	outl(ALI_STOP_ALL_CHANNELS, TRID_REG(card, T4D_STOP_A));
-
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-static void
-ali_restore_regs(struct trident_card *card)
-{
-	unsigned long flags;
-	int i, j;
-
-	spin_lock_irqsave(&card->lock, flags);
-
-	for (i = 1; i < ALI_MIXER_REGS; i++)
-		ali_ac97_write(card->ac97_codec[0], i * 2,
-			       ali_registers.mixer_regs[i]);
-
-	for (i = 0; i < ALI_CHANNELS; i++) {
-		outb(i, TRID_REG(card, T4D_LFO_GC_CIR));
-		for (j = 0; j < ALI_CHANNEL_REGS; j++)
-			outl(ali_registers.channel_regs[i][j],
-			     TRID_REG(card, j * 4 + 0xe0));
-	}
-
-	for (i = 0; i < ALI_GLOBAL_REGS; i++) {
-		if ((i * 4 == T4D_MISCINT) || (i * 4 == T4D_STOP_A) ||
-		    (i * 4 == T4D_START_A))
-			continue;
-		outl(ali_registers.global_regs[i], TRID_REG(card, i * 4));
-	}
-
-	//start HW channel
-	outl(ali_registers.global_regs[0x20], TRID_REG(card, T4D_START_A));
-	//restore IRQ enable bits
-	outl(ali_registers.global_regs[0x2c], TRID_REG(card, T4D_MISCINT));
-
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-static int
-trident_suspend(struct pci_dev *dev, pm_message_t unused)
-{
-	struct trident_card *card = pci_get_drvdata(dev);
-
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-		ali_save_regs(card);
-	}
-	return 0;
-}
-
-static int
-trident_resume(struct pci_dev *dev)
-{
-	struct trident_card *card = pci_get_drvdata(dev);
-
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-		ali_restore_regs(card);
-	}
-	return 0;
-}
-#endif
-
-static struct trident_channel *
-ali_alloc_pcm_channel(struct trident_card *card)
-{
-	struct trident_pcm_bank *bank;
-	int idx;
-
-	bank = &card->banks[BANK_A];
-
-	if (inl(TRID_REG(card, ALI_GLOBAL_CONTROL)) &
-	    (ALI_SPDIF_OUT_CH_ENABLE)) {
-		idx = ALI_SPDIF_OUT_CHANNEL;
-		if (!(bank->bitmap & (1 << idx))) {
-			struct trident_channel *channel = &bank->channels[idx];
-			bank->bitmap |= 1 << idx;
-			channel->num = idx;
-			return channel;
-		}
-	}
-
-	for (idx = ALI_PCM_OUT_CHANNEL_FIRST; idx <= ALI_PCM_OUT_CHANNEL_LAST;
-	     idx++) {
-		if (!(bank->bitmap & (1 << idx))) {
-			struct trident_channel *channel = &bank->channels[idx];
-			bank->bitmap |= 1 << idx;
-			channel->num = idx;
-			return channel;
-		}
-	}
-
-	/* no more free channels avaliable */
-#if 0
-	printk(KERN_ERR "ali: no more channels available on Bank A.\n");
-#endif /* 0 */
-	return NULL;
-}
-
-static struct trident_channel *
-ali_alloc_rec_pcm_channel(struct trident_card *card)
-{
-	struct trident_pcm_bank *bank;
-	int idx;
-
-	if (inl(TRID_REG(card, ALI_GLOBAL_CONTROL)) & ALI_SPDIF_IN_SUPPORT)
-		idx = ALI_SPDIF_IN_CHANNEL;
-	else
-		idx = ALI_PCM_IN_CHANNEL;
-
-	bank = &card->banks[BANK_A];
-
-	if (!(bank->bitmap & (1 << idx))) {
-		struct trident_channel *channel = &bank->channels[idx];
-		bank->bitmap |= 1 << idx;
-		channel->num = idx;
-		return channel;
-	}
-
-	/* no free recordable channels avaliable */
-#if 0
-	printk(KERN_ERR "ali: no recordable channels available on Bank A.\n");
-#endif /* 0 */
-	return NULL;
-}
-
-static void
-ali_set_spdif_out_rate(struct trident_card *card, unsigned int rate)
-{
-	unsigned char ch_st_sel;
-	unsigned short status_rate;
-
-	switch (rate) {
-	case 44100:
-		status_rate = 0;
-		break;
-	case 32000:
-		status_rate = 0x300;
-		break;
-	case 48000:
-	default:
-		status_rate = 0x200;
-		break;
-	}
-
-	/* select spdif_out */
-	ch_st_sel = inb(TRID_REG(card, ALI_SPDIF_CTRL)) & ALI_SPDIF_OUT_CH_STATUS;
-
-	ch_st_sel |= 0x80;	/* select right */
-	outb(ch_st_sel, TRID_REG(card, ALI_SPDIF_CTRL));
-	outb(status_rate | 0x20, TRID_REG(card, ALI_SPDIF_CS + 2));
-
-	ch_st_sel &= (~0x80);	/* select left */
-	outb(ch_st_sel, TRID_REG(card, ALI_SPDIF_CTRL));
-	outw(status_rate | 0x10, TRID_REG(card, ALI_SPDIF_CS + 2));
-}
-
-static void
-ali_address_interrupt(struct trident_card *card)
-{
-	int i, channel;
-	struct trident_state *state;
-	u32 mask, channel_mask;
-
-	mask = trident_get_interrupt_mask(card, 0);
-	for (i = 0; i < NR_HW_CH; i++) {
-		if ((state = card->states[i]) == NULL)
-			continue;
-		channel = state->dmabuf.channel->num;
-		if ((channel_mask = 1 << channel) & mask) {
-			mask &= ~channel_mask;
-			trident_ack_channel_interrupt(card, channel);
-			udelay(100);
-			state->dmabuf.update_flag |= ALI_ADDRESS_INT_UPDATE;
-			trident_update_ptr(state);
-		}
-	}
-	if (mask) {
-		for (i = 0; i < NR_HW_CH; i++) {
-			if (mask & (1 << i)) {
-				printk("ali: spurious channel irq %d.\n", i);
-				trident_ack_channel_interrupt(card, i);
-				trident_stop_voice(card, i);
-				trident_disable_voice_irq(card, i);
-			}
-		}
-	}
-}
-
-/* Updating the values of counters of other_states' DMAs without lock
-protection is no harm because all DMAs of multi-channels and interrupt
-depend on a master state's DMA, and changing the counters of the master
-state DMA is protected by a spinlock.
-*/
-static int
-ali_write_5_1(struct trident_state *state, const char __user *buf,
-	      int cnt_for_multi_channel, unsigned int *copy_count,
-	      unsigned int *state_cnt)
-{
-
-	struct dmabuf *dmabuf = &state->dmabuf;
-	struct dmabuf *dmabuf_temp;
-	const char __user *buffer = buf;
-	unsigned swptr, other_dma_nums, sample_s;
-	unsigned int i, loop;
-
-	other_dma_nums = 4;
-	sample_s = sample_size[dmabuf->fmt] >> 1;
-	swptr = dmabuf->swptr;
-
-	if ((i = state->multi_channels_adjust_count) > 0) {
-		if (i == 1) {
-			if (copy_from_user(dmabuf->rawbuf + swptr,
-					   buffer, sample_s))
-				return -EFAULT;
-			seek_offset(swptr, buffer, cnt_for_multi_channel,
-				    sample_s, *copy_count);
-			i--;
-			(*state_cnt) += sample_s;
-			state->multi_channels_adjust_count++;
-		} else
-			i = i - (state->chans_num - other_dma_nums);
-		for (; (i < other_dma_nums) && (cnt_for_multi_channel > 0); i++) {
-			dmabuf_temp = &state->other_states[i]->dmabuf;
-			if (copy_from_user(dmabuf_temp->rawbuf + dmabuf_temp->swptr,
-					   buffer, sample_s))
-				return -EFAULT;
-			seek_offset(dmabuf_temp->swptr, buffer, cnt_for_multi_channel,
-				    sample_s, *copy_count);
-		}
-		if (cnt_for_multi_channel == 0)
-			state->multi_channels_adjust_count += i;
-	}
-	if (cnt_for_multi_channel > 0) {
-		loop = cnt_for_multi_channel / (state->chans_num * sample_s);
-		for (i = 0; i < loop; i++) {
-			if (copy_from_user(dmabuf->rawbuf + swptr, buffer,
-					   sample_s * 2))
-				return -EFAULT;
-			seek_offset(swptr, buffer, cnt_for_multi_channel,
-				    sample_s * 2, *copy_count);
-			(*state_cnt) += (sample_s * 2);
-
-			dmabuf_temp = &state->other_states[0]->dmabuf;
-			if (copy_from_user(dmabuf_temp->rawbuf + dmabuf_temp->swptr,
-					   buffer, sample_s))
-				return -EFAULT;
-			seek_offset(dmabuf_temp->swptr, buffer, cnt_for_multi_channel,
-				    sample_s, *copy_count);
-
-			dmabuf_temp = &state->other_states[1]->dmabuf;
-			if (copy_from_user(dmabuf_temp->rawbuf + dmabuf_temp->swptr,
-					   buffer, sample_s))
-				return -EFAULT;
-			seek_offset(dmabuf_temp->swptr, buffer, cnt_for_multi_channel,
-				    sample_s, *copy_count);
-
-			dmabuf_temp = &state->other_states[2]->dmabuf;
-			if (copy_from_user(dmabuf_temp->rawbuf + dmabuf_temp->swptr,
-					   buffer, sample_s))
-				return -EFAULT;
-			seek_offset(dmabuf_temp->swptr, buffer, cnt_for_multi_channel,
-				    sample_s, *copy_count);
-
-			dmabuf_temp = &state->other_states[3]->dmabuf;
-			if (copy_from_user(dmabuf_temp->rawbuf + dmabuf_temp->swptr,
-					   buffer, sample_s))
-				return -EFAULT;
-			seek_offset(dmabuf_temp->swptr, buffer, cnt_for_multi_channel,
-				    sample_s, *copy_count);
-		}
-
-		if (cnt_for_multi_channel > 0) {
-			state->multi_channels_adjust_count = cnt_for_multi_channel / sample_s;
-
-			if (copy_from_user(dmabuf->rawbuf + swptr, buffer, sample_s))
-				return -EFAULT;
-			seek_offset(swptr, buffer, cnt_for_multi_channel,
-				    sample_s, *copy_count);
-			(*state_cnt) += sample_s;
-
-			if (cnt_for_multi_channel > 0) {
-				if (copy_from_user(dmabuf->rawbuf + swptr,
-						   buffer, sample_s))
-					return -EFAULT;
-				seek_offset(swptr, buffer, cnt_for_multi_channel,
-					    sample_s, *copy_count);
-				(*state_cnt) += sample_s;
-
-				if (cnt_for_multi_channel > 0) {
-					int diff = state->chans_num - other_dma_nums;
-					loop = state->multi_channels_adjust_count - diff;
-					for (i = 0; i < loop; i++) {
-						dmabuf_temp = &state->other_states[i]->dmabuf;
-						if (copy_from_user(dmabuf_temp->rawbuf +
-								   dmabuf_temp->swptr,
-								   buffer, sample_s))
-							return -EFAULT;
-						seek_offset(dmabuf_temp->swptr, buffer,
-							    cnt_for_multi_channel,
-							    sample_s, *copy_count);
-					}
-				}
-			}
-		} else
-			state->multi_channels_adjust_count = 0;
-	}
-	for (i = 0; i < other_dma_nums; i++) {
-		dmabuf_temp = &state->other_states[i]->dmabuf;
-		dmabuf_temp->swptr = dmabuf_temp->swptr % dmabuf_temp->dmasize;
-	}
-	return *state_cnt;
-}
-
-static void
-ali_free_other_states_resources(struct trident_state *state)
-{
-	int i;
-	struct trident_card *card = state->card;
-	struct trident_state *s;
-	unsigned other_states_count;
-
-	other_states_count = state->chans_num - 2; /* except PCM L/R channels */
-	for (i = 0; i < other_states_count; i++) {
-		s = state->other_states[i];
-		dealloc_dmabuf(&s->dmabuf, card->pci_dev);
-		ali_disable_special_channel(s->card, s->dmabuf.channel->num);
-		state->card->free_pcm_channel(s->card, s->dmabuf.channel->num);
-		card->states[s->virt] = NULL;
-		kfree(s);
-	}
-}
-
-static struct proc_dir_entry *res;
-
-static int
-ali_write_proc(struct file *file, const char __user *buffer, unsigned long count, void *data)
-{
-	struct trident_card *card = (struct trident_card *) data;
-	unsigned long flags;
-	char c;
-
-	if (count < 0)
-		return -EINVAL;
-	if (count == 0)
-		return 0;
-	if (get_user(c, buffer))
-		return -EFAULT;
-
-	spin_lock_irqsave(&card->lock, flags);
-	switch (c) {
-	case '0':
-		ali_setup_spdif_out(card, ALI_PCM_TO_SPDIF_OUT);
-		ali_disable_special_channel(card, ALI_SPDIF_OUT_CHANNEL);
-		break;
-	case '1':
-		ali_setup_spdif_out(card, ALI_SPDIF_OUT_TO_SPDIF_OUT |
-				    ALI_SPDIF_OUT_PCM);
-		break;
-	case '2':
-		ali_setup_spdif_out(card, ALI_SPDIF_OUT_TO_SPDIF_OUT |
-				    ALI_SPDIF_OUT_NON_PCM);
-		break;
-	case '3':
-		ali_disable_spdif_in(card);	//default
-		break;
-	case '4':
-		ali_setup_spdif_in(card);
-		break;
-	}
-	spin_unlock_irqrestore(&card->lock, flags);
-
-	return count;
-}
-
-/* OSS /dev/mixer file operation methods */
-static int
-trident_open_mixdev(struct inode *inode, struct file *file)
-{
-	int i = 0;
-	int minor = iminor(inode);
-	struct trident_card *card = devs;
-
-	for (card = devs; card != NULL; card = card->next)
-		for (i = 0; i < NR_AC97; i++)
-			if (card->ac97_codec[i] != NULL &&
-			    card->ac97_codec[i]->dev_mixer == minor)
-				goto match;
-
-	if (!card) {
-		return -ENODEV;
-	}
-      match:
-	file->private_data = card->ac97_codec[i];
-
-	return nonseekable_open(inode, file);
-}
-
-static int
-trident_ioctl_mixdev(struct inode *inode, struct file *file, unsigned int cmd,
-		     unsigned long arg)
-{
-	struct ac97_codec *codec = (struct ac97_codec *) file->private_data;
-
-	return codec->mixer_ioctl(codec, cmd, arg);
-}
-
-static const struct file_operations trident_mixer_fops = {
-	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-	.ioctl = trident_ioctl_mixdev,
-	.open = trident_open_mixdev,
-};
-
-static int
-ali_reset_5451(struct trident_card *card)
-{
-	struct pci_dev *pci_dev = NULL;
-	unsigned int dwVal;
-	unsigned short wCount, wReg;
-
-	pci_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533,
-				 pci_dev);
-	if (pci_dev == NULL)
-		return -1;
-
-	pci_read_config_dword(pci_dev, 0x7c, &dwVal);
-	pci_write_config_dword(pci_dev, 0x7c, dwVal | 0x08000000);
-	udelay(5000);
-	pci_read_config_dword(pci_dev, 0x7c, &dwVal);
-	pci_write_config_dword(pci_dev, 0x7c, dwVal & 0xf7ffffff);
-	udelay(5000);
-	pci_dev_put(pci_dev);
-
-	pci_dev = card->pci_dev;
-	if (pci_dev == NULL)
-		return -1;
-
-	pci_read_config_dword(pci_dev, 0x44, &dwVal);
-	pci_write_config_dword(pci_dev, 0x44, dwVal | 0x000c0000);
-	udelay(500);
-	pci_read_config_dword(pci_dev, 0x44, &dwVal);
-	pci_write_config_dword(pci_dev, 0x44, dwVal & 0xfffbffff);
-	udelay(5000);
-
-	/* TODO: recognize if we have a PM capable codec and only do this */
-	/* if the codec is PM capable */
-	wCount = 2000;
-	while (wCount--) {
-		wReg = ali_ac97_get(card, 0, AC97_POWER_CONTROL);
-		if ((wReg & 0x000f) == 0x000f)
-			return 0;
-		udelay(5000);
-	}
-	/* This is non fatal if you have a non PM capable codec.. */
-	return 0;
-}
-
-/* AC97 codec initialisation. */
-static int __devinit
-trident_ac97_init(struct trident_card *card)
-{
-	int num_ac97 = 0;
-	unsigned long ready_2nd = 0;
-	struct ac97_codec *codec;
-	int i = 0;
-
-	/* initialize controller side of AC link, and find out if secondary codes
-	   really exist */
-	switch (card->pci_id) {
-	case PCI_DEVICE_ID_ALI_5451:
-		if (ali_reset_5451(card)) {
-			printk(KERN_ERR "trident_ac97_init: error "
-			       "resetting 5451.\n");
-			return -1;
-		}
-		outl(0x80000001, TRID_REG(card, ALI_GLOBAL_CONTROL));
-		outl(0x00000000, TRID_REG(card, T4D_AINTEN_A));
-		outl(0xffffffff, TRID_REG(card, T4D_AINT_A));
-		outl(0x00000000, TRID_REG(card, T4D_MUSICVOL_WAVEVOL));
-		outb(0x10, TRID_REG(card, ALI_MPUR2));
-		ready_2nd = inl(TRID_REG(card, ALI_SCTRL));
-		ready_2nd &= 0x3fff;
-		outl(ready_2nd | PCMOUT | 0x8000, TRID_REG(card, ALI_SCTRL));
-		ready_2nd = inl(TRID_REG(card, ALI_SCTRL));
-		ready_2nd &= SI_AC97_SECONDARY_READY;
-		if (card->revision < ALI_5451_V02)
-			ready_2nd = 0;
-		break;
-	case PCI_DEVICE_ID_SI_7018:
-		/* disable AC97 GPIO interrupt */
-		outl(0x00, TRID_REG(card, SI_AC97_GPIO));
-		/* when power up the AC link is in cold reset mode so stop it */
-		outl(PCMOUT | SURROUT | CENTEROUT | LFEOUT | SECONDARY_ID,
-		     TRID_REG(card, SI_SERIAL_INTF_CTRL));
-		/* it take a long time to recover from a cold reset */
-		/* (especially when you have more than one codec) */
-		udelay(2000);
-		ready_2nd = inl(TRID_REG(card, SI_SERIAL_INTF_CTRL));
-		ready_2nd &= SI_AC97_SECONDARY_READY;
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_DX:
-		/* playback on */
-		outl(DX_AC97_PLAYBACK, TRID_REG(card, DX_ACR2_AC97_COM_STAT));
-		break;
-	case PCI_DEVICE_ID_TRIDENT_4DWAVE_NX:
-		/* enable AC97 Output Slot 3,4 (PCM Left/Right Playback) */
-		outl(NX_AC97_PCM_OUTPUT, TRID_REG(card, NX_ACR0_AC97_COM_STAT));
-		ready_2nd = inl(TRID_REG(card, NX_ACR0_AC97_COM_STAT));
-		ready_2nd &= NX_AC97_SECONDARY_READY;
-		break;
-	case PCI_DEVICE_ID_INTERG_5050:
-		/* disable AC97 GPIO interrupt */
-		outl(0x00, TRID_REG(card, SI_AC97_GPIO));
-		/* when power up, the AC link is in cold reset mode, so stop it */
-		outl(PCMOUT | SURROUT | CENTEROUT | LFEOUT,
-		     TRID_REG(card, SI_SERIAL_INTF_CTRL));
-		/* it take a long time to recover from a cold reset (especially */
-		/* when you have more than one codec) */
-		udelay(2000);
-		ready_2nd = inl(TRID_REG(card, SI_SERIAL_INTF_CTRL));
-		ready_2nd &= SI_AC97_SECONDARY_READY;
-		break;
-	}
-
-	for (num_ac97 = 0; num_ac97 < NR_AC97; num_ac97++) {
-		if ((codec = ac97_alloc_codec()) == NULL)
-			return -ENOMEM;
-
-		/* initialize some basic codec information, other fields */
-		/* will be filled in ac97_probe_codec */
-		codec->private_data = card;
-		codec->id = num_ac97;
-
-		if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-			codec->codec_read = ali_ac97_read;
-			codec->codec_write = ali_ac97_write;
-		} else {
-			codec->codec_read = trident_ac97_get;
-			codec->codec_write = trident_ac97_set;
-		}
-
-		if (ac97_probe_codec(codec) == 0)
-			break;
-
-		codec->dev_mixer = register_sound_mixer(&trident_mixer_fops, -1);
-		if (codec->dev_mixer < 0) {
-			printk(KERN_ERR "trident: couldn't register mixer!\n");
-			ac97_release_codec(codec);
-			break;
-		}
-
-		card->ac97_codec[num_ac97] = codec;
-
-		/* if there is no secondary codec at all, don't probe any more */
-		if (!ready_2nd)
-			break;
-	}
-
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-		for (num_ac97 = 0; num_ac97 < NR_AC97; num_ac97++) {
-			if (card->ac97_codec[num_ac97] == NULL)
-				break;
-			for (i = 0; i < 64; i++) {
-				u16 reg = ali_ac97_get(card, num_ac97, i * 2);
-				card->mixer_regs[i][num_ac97] = reg;
-			}
-		}
-	}
-	return num_ac97 + 1;
-}
-
-#ifdef SUPPORT_JOYSTICK
-/* Gameport functions for the cards ADC gameport */
-
-static unsigned char trident_game_read(struct gameport *gameport)
-{
-	struct trident_card *card = gameport->port_data;
-
-	return inb(TRID_REG(card, T4D_GAME_LEG));
-}
-
-static void trident_game_trigger(struct gameport *gameport)
-{
-	struct trident_card *card = gameport->port_data;
-
-	outb(0xff, TRID_REG(card, T4D_GAME_LEG));
-}
-
-static int trident_game_cooked_read(struct gameport *gameport,
-				    int *axes, int *buttons)
-{
-	struct trident_card *card = gameport->port_data;
-	int i;
-
-	*buttons = (~inb(TRID_REG(card, T4D_GAME_LEG)) >> 4) & 0xf;
-
-	for (i = 0; i < 4; i++) {
-		axes[i] = inw(TRID_REG(card, T4D_GAME_AXD) + i * sizeof (u16));
-		if (axes[i] == 0xffff)
-			axes[i] = -1;
-	}
-
-	return 0;
-}
-
-static int trident_game_open(struct gameport *gameport, int mode)
-{
-	struct trident_card *card = gameport->port_data;
-
-	switch (mode) {
-	case GAMEPORT_MODE_COOKED:
-		outb(0x80, TRID_REG(card, T4D_GAME_CR));
-		msleep(20);
-		return 0;
-	case GAMEPORT_MODE_RAW:
-		outb(0x00, TRID_REG(card, T4D_GAME_CR));
-		return 0;
-	default:
-		return -1;
-	}
-
-	return 0;
-}
-
-static int __devinit trident_register_gameport(struct trident_card *card)
-{
-	struct gameport *gp;
-
-	card->gameport = gp = gameport_allocate_port();
-	if (!gp) {
-		printk(KERN_ERR "trident: can not allocate memory for gameport\n");
-		return -ENOMEM;
-	}
-
-	gameport_set_name(gp, "Trident 4DWave");
-	gameport_set_phys(gp, "pci%s/gameport0", pci_name(card->pci_dev));
-	gp->read = trident_game_read;
-	gp->trigger = trident_game_trigger;
-	gp->cooked_read = trident_game_cooked_read;
-	gp->open = trident_game_open;
-	gp->fuzz = 64;
-	gp->port_data = card;
-
-	gameport_register_port(gp);
-
-	return 0;
-}
-
-static inline void trident_unregister_gameport(struct trident_card *card)
-{
-	if (card->gameport)
-		gameport_unregister_port(card->gameport);
-}
-
-#else
-static inline int trident_register_gameport(struct trident_card *card) { return -ENOSYS; }
-static inline void trident_unregister_gameport(struct trident_card *card) { }
-#endif /* SUPPORT_JOYSTICK */
-
-/* install the driver, we do not allocate hardware channel nor DMA buffer */
-/* now, they are defered until "ACCESS" time (in prog_dmabuf called by */
-/* open/read/write/ioctl/mmap) */
-static int __devinit
-trident_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_id)
-{
-	unsigned long iobase;
-	struct trident_card *card;
-	u8 bits;
-	u8 revision;
-	int i = 0;
-	u16 temp;
-	struct pci_dev *pci_dev_m1533 = NULL;
-	int rc = -ENODEV;
-	u64 dma_mask;
-
-	if (pci_enable_device(pci_dev))
-		goto out;
-
-	if (pci_dev->device == PCI_DEVICE_ID_ALI_5451)
-		dma_mask = ALI_DMA_MASK;
-	else
-		dma_mask = TRIDENT_DMA_MASK;
-	if (pci_set_dma_mask(pci_dev, dma_mask)) {
-		printk(KERN_ERR "trident: architecture does not support"
-		       " %s PCI busmaster DMA\n",
-		       pci_dev->device == PCI_DEVICE_ID_ALI_5451 ?
-		       "32-bit" : "30-bit");
-		goto out;
-	}
-	pci_read_config_byte(pci_dev, PCI_CLASS_REVISION, &revision);
-
-	if (pci_id->device == PCI_DEVICE_ID_INTERG_5050)
-		iobase = pci_resource_start(pci_dev, 1);
-	else
-		iobase = pci_resource_start(pci_dev, 0);
-
-	if (!request_region(iobase, 256, card_names[pci_id->driver_data])) {
-		printk(KERN_ERR "trident: can't allocate I/O space at "
-		       "0x%4.4lx\n", iobase);
-		goto out;
-	}
-
-	rc = -ENOMEM;
-	if ((card = kzalloc(sizeof(*card), GFP_KERNEL)) == NULL) {
-		printk(KERN_ERR "trident: out of memory\n");
-		goto out_release_region;
-	}
-
-	init_timer(&card->timer);
-	card->iobase = iobase;
-	card->pci_dev = pci_dev_get(pci_dev);
-	card->pci_id = pci_id->device;
-	card->revision = revision;
-	card->irq = pci_dev->irq;
-	card->next = devs;
-	card->magic = TRIDENT_CARD_MAGIC;
-	card->banks[BANK_A].addresses = &bank_a_addrs;
-	card->banks[BANK_A].bitmap = 0UL;
-	card->banks[BANK_B].addresses = &bank_b_addrs;
-	card->banks[BANK_B].bitmap = 0UL;
-
-	mutex_init(&card->open_mutex);
-	spin_lock_init(&card->lock);
-	init_timer(&card->timer);
-
-	devs = card;
-
-	pci_set_master(pci_dev);
-
-	printk(KERN_INFO "trident: %s found at IO 0x%04lx, IRQ %d\n",
-	       card_names[pci_id->driver_data], card->iobase, card->irq);
-
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-		/* ALi channel Management */
-		card->alloc_pcm_channel = ali_alloc_pcm_channel;
-		card->alloc_rec_pcm_channel = ali_alloc_rec_pcm_channel;
-		card->free_pcm_channel = ali_free_pcm_channel;
-
-		card->address_interrupt = ali_address_interrupt;
-
-		/* Added by Matt Wu 01-05-2001 for spdif in */
-		card->multi_channel_use_count = 0;
-		card->rec_channel_use_count = 0;
-
-		/* ALi SPDIF OUT function */
-		if (card->revision == ALI_5451_V02) {
-			ali_setup_spdif_out(card, ALI_PCM_TO_SPDIF_OUT);
-			res = create_proc_entry("ALi5451", 0, NULL);
-			if (res) {
-				res->write_proc = ali_write_proc;
-				res->data = card;
-			}
-		}
-
-		/* Add H/W Volume Control By Matt Wu Jul. 06, 2001 */
-		card->hwvolctl = 0;
-		pci_dev_m1533 = pci_get_device(PCI_VENDOR_ID_AL,
-					       PCI_DEVICE_ID_AL_M1533,
-					       pci_dev_m1533);
-		rc = -ENODEV;
-		if (pci_dev_m1533 == NULL)
-			goto out_proc_fs;
-		pci_read_config_byte(pci_dev_m1533, 0x63, &bits);
-		if (bits & (1 << 5))
-			card->hwvolctl = 1;
-		if (card->hwvolctl) {
-			/* Clear m1533 pci cfg 78h bit 30 to zero, which makes
-			   GPIO11/12/13 work as ACGP_UP/DOWN/MUTE. */
-			pci_read_config_byte(pci_dev_m1533, 0x7b, &bits);
-			bits &= 0xbf;	/*clear bit 6 */
-			pci_write_config_byte(pci_dev_m1533, 0x7b, bits);
-		}
-		pci_dev_put(pci_dev_m1533);
-
-	} else if (card->pci_id == PCI_DEVICE_ID_INTERG_5050) {
-		card->alloc_pcm_channel = cyber_alloc_pcm_channel;
-		card->alloc_rec_pcm_channel = cyber_alloc_pcm_channel;
-		card->free_pcm_channel = cyber_free_pcm_channel;
-		card->address_interrupt = cyber_address_interrupt;
-		cyber_init_ritual(card);
-	} else {
-		card->alloc_pcm_channel = trident_alloc_pcm_channel;
-		card->alloc_rec_pcm_channel = trident_alloc_pcm_channel;
-		card->free_pcm_channel = trident_free_pcm_channel;
-		card->address_interrupt = trident_address_interrupt;
-	}
-
-	/* claim our irq */
-	rc = -ENODEV;
-	if (request_irq(card->irq, &trident_interrupt, IRQF_SHARED,
-			card_names[pci_id->driver_data], card)) {
-		printk(KERN_ERR "trident: unable to allocate irq %d\n",
-		       card->irq);
-		goto out_proc_fs;
-	}
-	/* register /dev/dsp */
-	if ((card->dev_audio = register_sound_dsp(&trident_audio_fops, -1)) < 0) {
-		printk(KERN_ERR "trident: couldn't register DSP device!\n");
-		goto out_free_irq;
-	}
-	card->mixer_regs_ready = 0;
-	/* initialize AC97 codec and register /dev/mixer */
-	if (trident_ac97_init(card) <= 0) {
-		/* unregister audio devices */
-		for (i = 0; i < NR_AC97; i++) {
-			if (card->ac97_codec[i] != NULL) {
-				struct ac97_codec* codec = card->ac97_codec[i];
-				unregister_sound_mixer(codec->dev_mixer);
-				ac97_release_codec(codec);
-			}
-		}
-		goto out_unregister_sound_dsp;
-	}
-	card->mixer_regs_ready = 1;
-	outl(0x00, TRID_REG(card, T4D_MUSICVOL_WAVEVOL));
-
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-		/* Add H/W Volume Control By Matt Wu Jul. 06, 2001 */
-		if (card->hwvolctl) {
-			/* Enable GPIO IRQ (MISCINT bit 18h) */
-			temp = inw(TRID_REG(card, T4D_MISCINT + 2));
-			temp |= 0x0004;
-			outw(temp, TRID_REG(card, T4D_MISCINT + 2));
-
-			/* Enable H/W Volume Control GLOVAL CONTROL bit 0 */
-			temp = inw(TRID_REG(card, ALI_GLOBAL_CONTROL));
-			temp |= 0x0001;
-			outw(temp, TRID_REG(card, ALI_GLOBAL_CONTROL));
-
-		}
-		if (card->revision == ALI_5451_V02)
-			ali_close_multi_channels();
-		/* edited by HMSEO for GT sound */
-#if defined(CONFIG_ALPHA_NAUTILUS) || defined(CONFIG_ALPHA_GENERIC)
-		{
-			u16 ac97_data;
-			extern struct hwrpb_struct *hwrpb;
-
-			if ((hwrpb->sys_type) == 201) {
-				printk(KERN_INFO "trident: Running on Alpha system "
-				       "type Nautilus\n");
-				ac97_data = ali_ac97_get(card, 0, AC97_POWER_CONTROL);
-				ali_ac97_set(card, 0, AC97_POWER_CONTROL,
-					     ac97_data | ALI_EAPD_POWER_DOWN);
-			}
-		}
-#endif				/* CONFIG_ALPHA_NAUTILUS || CONFIG_ALPHA_GENERIC */
-		/* edited by HMSEO for GT sound */
-	}
-	rc = 0;
-	pci_set_drvdata(pci_dev, card);
-
-	/* Enable Address Engine Interrupts */
-	trident_enable_loop_interrupts(card);
-
-	/* Register gameport */
-	trident_register_gameport(card);
-
-out:
-	return rc;
-
-out_unregister_sound_dsp:
-	unregister_sound_dsp(card->dev_audio);
-out_free_irq:
-	free_irq(card->irq, card);
-out_proc_fs:
-	pci_dev_put(card->pci_dev);
-	if (res) {
-		remove_proc_entry("ALi5451", NULL);
-		res = NULL;
-	}
-	kfree(card);
-	devs = NULL;
-out_release_region:
-	release_region(iobase, 256);
-	return rc;
-}
-
-static void __devexit
-trident_remove(struct pci_dev *pci_dev)
-{
-	int i;
-	struct trident_card *card = pci_get_drvdata(pci_dev);
-
-	/*
-	 *      Kill running timers before unload. We can't have them
-	 *      going off after rmmod!
-	 */
-	if (card->hwvolctl)
-		del_timer_sync(&card->timer);
-
-	/* ALi S/PDIF and Power Management */
-	if (card->pci_id == PCI_DEVICE_ID_ALI_5451) {
-		ali_setup_spdif_out(card, ALI_PCM_TO_SPDIF_OUT);
-		ali_disable_special_channel(card, ALI_SPDIF_OUT_CHANNEL);
-		ali_disable_spdif_in(card);
-		remove_proc_entry("ALi5451", NULL);
-	}
-
-	/* Unregister gameport */
-	trident_unregister_gameport(card);
-
-	/* Kill interrupts, and SP/DIF */
-	trident_disable_loop_interrupts(card);
-
-	/* free hardware resources */
-	free_irq(card->irq, card);
-	release_region(card->iobase, 256);
-
-	/* unregister audio devices */
-	for (i = 0; i < NR_AC97; i++)
-		if (card->ac97_codec[i] != NULL) {
-			unregister_sound_mixer(card->ac97_codec[i]->dev_mixer);
-			ac97_release_codec(card->ac97_codec[i]);
-		}
-	unregister_sound_dsp(card->dev_audio);
-
-	pci_set_drvdata(pci_dev, NULL);
-	pci_dev_put(card->pci_dev);
-	kfree(card);
-}
-
-MODULE_AUTHOR("Alan Cox, Aaron Holtzman, Ollie Lho, Ching Ling Lee, Muli Ben-Yehuda");
-MODULE_DESCRIPTION("Trident 4DWave/SiS 7018/ALi 5451 and Tvia/IGST CyberPro5050 PCI "
-		   "Audio Driver");
-MODULE_LICENSE("GPL");
-
-#define TRIDENT_MODULE_NAME "trident"
-
-static struct pci_driver trident_pci_driver = {
-	.name = TRIDENT_MODULE_NAME,
-	.id_table = trident_pci_tbl,
-	.probe = trident_probe,
-	.remove = __devexit_p(trident_remove),
-#ifdef CONFIG_PM
-	.suspend = trident_suspend,
-	.resume = trident_resume
-#endif
-};
-
-static int __init
-trident_init_module(void)
-{
-	printk(KERN_INFO "Trident 4DWave/SiS 7018/ALi 5451,Tvia CyberPro "
-	       "5050 PCI Audio, version " DRIVER_VERSION ", " __TIME__ " "
-	       __DATE__ "\n");
-
-	return pci_register_driver(&trident_pci_driver);
-}
-
-static void __exit
-trident_cleanup_module(void)
-{
-	pci_unregister_driver(&trident_pci_driver);
-}
-
-module_init(trident_init_module);
-module_exit(trident_cleanup_module);
diff --git a/sound/oss/trident.h b/sound/oss/trident.h
deleted file mode 100644
index ff30a1d7c2f..00000000000
--- a/sound/oss/trident.h
+++ /dev/null
@@ -1,358 +0,0 @@
-#ifndef __TRID4DWAVE_H
-#define __TRID4DWAVE_H
-
-/*
- *  audio@tridentmicro.com
- *  Fri Feb 19 15:55:28 MST 1999
- *  Definitions for Trident 4DWave DX/NX chips
- *
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-/* PCI vendor and device ID */ 
-#ifndef PCI_VENDOR_ID_TRIDENT
-#define PCI_VENDOR_ID_TRIDENT		0x1023
-#endif
-
-#ifndef PCI_VENDOR_ID_SI
-#define PCI_VENDOR_ID_SI			0x1039
-#endif
-
-#ifndef PCI_VENDOR_ID_ALI
-#define PCI_VENDOR_ID_ALI			0x10b9
-#endif
-
-#ifndef PCI_DEVICE_ID_TRIDENT_4DWAVE_DX
-#define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX	0x2000
-#endif
-
-#ifndef PCI_DEVICE_ID_TRIDENT_4DWAVE_NX
-#define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX	0x2001
-#endif
-
-#ifndef PCI_DEVICE_ID_SI_7018
-#define PCI_DEVICE_ID_SI_7018		0x7018
-#endif
-
-#ifndef PCI_DEVICE_ID_ALI_5451
-#define PCI_DEVICE_ID_ALI_5451		0x5451
-#endif
-
-#ifndef PCI_DEVICE_ID_ALI_1533
-#define PCI_DEVICE_ID_ALI_1533		0x1533
-#endif
-
-#define CHANNEL_REGS	5
-#define CHANNEL_START	0xe0   // The first bytes of the contiguous register space.
-
-#define BANK_A 		0
-#define BANK_B 		1
-#define NR_BANKS		2
-
-#define TRIDENT_FMT_STEREO     0x01
-#define TRIDENT_FMT_16BIT      0x02
-#define TRIDENT_FMT_MASK       0x03
-
-#define DAC_RUNNING	0x01
-#define ADC_RUNNING	0x02
-
-/* Register Addresses */
-
-/* operational registers common to DX, NX, 7018 */
-enum trident_op_registers {
-	T4D_GAME_CR	= 0x30, T4D_GAME_LEG	= 0x31,
-	T4D_GAME_AXD	= 0x34,
-	T4D_REC_CH	= 0x70,
-	T4D_START_A     = 0x80, T4D_STOP_A      = 0x84,
-	T4D_DLY_A       = 0x88, T4D_SIGN_CSO_A  = 0x8c,
-	T4D_CSPF_A      = 0x90, T4D_CEBC_A      = 0x94,
-	T4D_AINT_A      = 0x98, T4D_EINT_A	= 0x9c,
-	T4D_LFO_GC_CIR	= 0xa0, T4D_AINTEN_A    = 0xa4,
-	T4D_MUSICVOL_WAVEVOL = 0xa8, T4D_SBDELTA_DELTA_R = 0xac,
-	T4D_MISCINT	= 0xb0, T4D_START_B     = 0xb4,
-	T4D_STOP_B      = 0xb8, T4D_CSPF_B	= 0xbc,
-	T4D_SBBL_SBCL	= 0xc0, T4D_SBCTRL_SBE2R_SBDD    = 0xc4,
-	T4D_STIMER	= 0xc8, T4D_LFO_B_I2S_DELTA      = 0xcc,
-	T4D_AINT_B	= 0xd8, T4D_AINTEN_B	= 0xdc,
-	ALI_MPUR2	= 0x22,	ALI_GPIO	= 0x7c,
-	ALI_EBUF1 = 0xf4,
-	ALI_EBUF2 = 0xf8
-};
-
-enum ali_op_registers {
-	ALI_SCTRL		= 0x48,
-	ALI_GLOBAL_CONTROL	= 0xd4,
-	ALI_STIMER		= 0xc8,
-	ALI_SPDIF_CS		= 0x70,
-	ALI_SPDIF_CTRL		= 0x74
-};
-
-enum ali_registers_number {
-	ALI_GLOBAL_REGS		= 56,
-	ALI_CHANNEL_REGS	= 8,
-	ALI_MIXER_REGS		= 20
-};
-
-enum ali_sctrl_control_bit {
-	ALI_SPDIF_OUT_ENABLE	= 0x20
-};
-
-enum ali_global_control_bit {
-	ALI_SPDIF_OUT_SEL_PCM	= 0x00000400,
-	ALI_SPDIF_IN_SUPPORT	= 0x00000800,
-	ALI_SPDIF_OUT_CH_ENABLE	= 0x00008000,
-	ALI_SPDIF_IN_CH_ENABLE	= 0x00080000,
-	ALI_PCM_IN_DISABLE	= 0x7fffffff,
-	ALI_PCM_IN_ENABLE	= 0x80000000,
-	ALI_SPDIF_IN_CH_DISABLE	= 0xfff7ffff,
-	ALI_SPDIF_OUT_CH_DISABLE = 0xffff7fff,
-	ALI_SPDIF_OUT_SEL_SPDIF	= 0xfffffbff
-	
-};
-
-enum ali_spdif_control_bit {
-	ALI_SPDIF_IN_FUNC_ENABLE	= 0x02,
-	ALI_SPDIF_IN_CH_STATUS		= 0x40,
-	ALI_SPDIF_OUT_CH_STATUS		= 0xbf
-	
-};
-
-enum ali_control_all {
-	ALI_DISABLE_ALL_IRQ	= 0,
-	ALI_CHANNELS		= 32,
-	ALI_STOP_ALL_CHANNELS	= 0xffffffff,
-	ALI_MULTI_CHANNELS_START_STOP	= 0x07800000
-};
-
-enum ali_EMOD_control_bit {
-	ALI_EMOD_DEC	= 0x00000000,
-	ALI_EMOD_INC	= 0x10000000,
-	ALI_EMOD_Delay	= 0x20000000,
-	ALI_EMOD_Still	= 0x30000000
-};
-
-enum ali_pcm_in_channel_num {
-	ALI_NORMAL_CHANNEL	= 0,
-	ALI_SPDIF_OUT_CHANNEL	= 15,
-	ALI_SPDIF_IN_CHANNEL    = 19,
-	ALI_LEF_CHANNEL		= 23,
-	ALI_CENTER_CHANNEL	= 24,
-	ALI_SURR_RIGHT_CHANNEL	= 25,
-	ALI_SURR_LEFT_CHANNEL	= 26,
-	ALI_PCM_IN_CHANNEL	= 31
-};
-
-enum ali_pcm_out_channel_num {
-	ALI_PCM_OUT_CHANNEL_FIRST = 0,
-	ALI_PCM_OUT_CHANNEL_LAST = 31
-};
-
-enum ali_ac97_power_control_bit {
-	ALI_EAPD_POWER_DOWN	= 0x8000
-};
-
-enum ali_update_ptr_flags {
-	ALI_ADDRESS_INT_UPDATE	= 0x01
-};
-
-enum ali_revision {
-	ALI_5451_V02	= 0x02
-};
-
-enum ali_spdif_out_control {
-	ALI_PCM_TO_SPDIF_OUT		= 0,
-	ALI_SPDIF_OUT_TO_SPDIF_OUT	= 1,
-	ALI_SPDIF_OUT_PCM		= 0,
-	ALI_SPDIF_OUT_NON_PCM		= 2
-};
-
-/* S/PDIF Operational Registers for 4D-NX */
-enum nx_spdif_registers {
-	NX_SPCTRL_SPCSO	= 0x24, NX_SPLBA = 0x28,
-	NX_SPESO	= 0x2c, NX_SPCSTATUS = 0x64
-};
-
-/* OP registers to access each hardware channel */
-enum channel_registers {
-	CH_DX_CSO_ALPHA_FMS = 0xe0, CH_DX_ESO_DELTA = 0xe8,
-	CH_DX_FMC_RVOL_CVOL = 0xec,
-	CH_NX_DELTA_CSO     = 0xe0, CH_NX_DELTA_ESO = 0xe8,
-	CH_NX_ALPHA_FMS_FMC_RVOL_CVOL = 0xec,
-	CH_LBA              = 0xe4,
-	CH_GVSEL_PAN_VOL_CTRL_EC      = 0xf0
-};
-
-/* registers to read/write/control AC97 codec */
-enum dx_ac97_registers {
-	DX_ACR0_AC97_W        = 0x40, DX_ACR1_AC97_R = 0x44,
-	DX_ACR2_AC97_COM_STAT = 0x48
-};
-
-enum nx_ac97_registers {
-	NX_ACR0_AC97_COM_STAT  = 0x40, NX_ACR1_AC97_W           = 0x44,
-	NX_ACR2_AC97_R_PRIMARY = 0x48, NX_ACR3_AC97_R_SECONDARY	= 0x4c
-};
-
-enum si_ac97_registers {
-	SI_AC97_WRITE       = 0x40, SI_AC97_READ = 0x44,
-	SI_SERIAL_INTF_CTRL = 0x48, SI_AC97_GPIO = 0x4c
-};
-
-enum ali_ac97_registers {
-	ALI_AC97_WRITE       = 0x40, ALI_AC97_READ = 0x44
-};
-
-/* Bit mask for operational registers */
-#define AC97_REG_ADDR      0x000000ff
-
-enum ali_ac97_bits {
-	ALI_AC97_BUSY_WRITE = 0x8000, ALI_AC97_BUSY_READ = 0x8000,
-	ALI_AC97_WRITE_ACTION = 0x8000, ALI_AC97_READ_ACTION = 0x8000,
-	ALI_AC97_AUDIO_BUSY = 0x4000, ALI_AC97_SECONDARY  = 0x0080,
-	ALI_AC97_READ_MIXER_REGISTER = 0xfeff,
-	ALI_AC97_WRITE_MIXER_REGISTER = 0x0100
-};
-
-enum sis7018_ac97_bits {
-	SI_AC97_BUSY_WRITE = 0x8000, SI_AC97_BUSY_READ = 0x8000,
-	SI_AC97_AUDIO_BUSY = 0x4000, SI_AC97_MODEM_BUSY = 0x2000,
-	SI_AC97_SECONDARY  = 0x0080
-};
-
-enum trident_dx_ac97_bits {
-	DX_AC97_BUSY_WRITE = 0x8000, DX_AC97_BUSY_READ = 0x8000,
-	DX_AC97_READY      = 0x0010, DX_AC97_RECORD    = 0x0008,
-	DX_AC97_PLAYBACK   = 0x0002
-};
-
-enum trident_nx_ac97_bits {
-	/* ACR1-3 */
-	NX_AC97_BUSY_WRITE = 0x0800, NX_AC97_BUSY_READ = 0x0800,
-	NX_AC97_BUSY_DATA  = 0x0400, NX_AC97_WRITE_SECONDARY = 0x0100,
-	/* ACR0 */
-	NX_AC97_SECONDARY_READY = 0x0040, NX_AC97_SECONDARY_RECORD = 0x0020,
-	NX_AC97_SURROUND_OUTPUT = 0x0010,
-	NX_AC97_PRIMARY_READY   = 0x0008, NX_AC97_PRIMARY_RECORD   = 0x0004,
-	NX_AC97_PCM_OUTPUT      = 0x0002,
-	NX_AC97_WARM_RESET      = 0x0001
-};
-
-enum serial_intf_ctrl_bits {
-	WARM_REST   = 0x00000001, COLD_RESET  = 0x00000002,
-	I2S_CLOCK   = 0x00000004, PCM_SEC_AC97= 0x00000008,
-	AC97_DBL_RATE = 0x00000010, SPDIF_EN  = 0x00000020,
-	I2S_OUTPUT_EN = 0x00000040, I2S_INPUT_EN = 0x00000080,
-	PCMIN       = 0x00000100, LINE1IN     = 0x00000200,
-	MICIN       = 0x00000400, LINE2IN     = 0x00000800,
-	HEAD_SET_IN = 0x00001000, GPIOIN      = 0x00002000,
-	/* 7018 spec says id = 01 but the demo board routed to 10 
-	   SECONDARY_ID= 0x00004000, */
-	SECONDARY_ID= 0x00004000,
-	PCMOUT      = 0x00010000, SURROUT     = 0x00020000,
-	CENTEROUT   = 0x00040000, LFEOUT      = 0x00080000,
-	LINE1OUT    = 0x00100000, LINE2OUT    = 0x00200000,
-	GPIOOUT     = 0x00400000,
-	SI_AC97_PRIMARY_READY   = 0x01000000,
-	SI_AC97_SECONDARY_READY = 0x02000000,
-};
-
-enum global_control_bits {
-	CHANNLE_IDX = 0x0000003f, PB_RESET    = 0x00000100,
-	PAUSE_ENG   = 0x00000200,
-	OVERRUN_IE  = 0x00000400, UNDERRUN_IE = 0x00000800,
-	ENDLP_IE    = 0x00001000, MIDLP_IE    = 0x00002000,
-	ETOG_IE     = 0x00004000,
-	EDROP_IE    = 0x00008000, BANK_B_EN   = 0x00010000
-};
-
-enum channel_control_bits {
-	CHANNEL_LOOP   = 0x00001000, CHANNEL_SIGNED = 0x00002000,
-	CHANNEL_STEREO = 0x00004000, CHANNEL_16BITS = 0x00008000,
-};
-
-enum channel_attribute {
-	/* playback/record select */
-	CHANNEL_PB     = 0x0000, CHANNEL_SPC_PB = 0x4000,
-	CHANNEL_REC    = 0x8000, CHANNEL_REC_PB = 0xc000,
-	/* playback destination/record source select */
-	MODEM_LINE1    = 0x0000, MODEM_LINE2    = 0x0400,
-	PCM_LR         = 0x0800, HSET           = 0x0c00,
-	I2S_LR         = 0x1000, CENTER_LFE     = 0x1400,
-	SURR_LR        = 0x1800, SPDIF_LR       = 0x1c00,
-	MIC            = 0x1400,
-	/* mist stuff */
-	MONO_LEFT      = 0x0000, MONO_RIGHT     = 0x0100,
-	MONO_MIX       = 0x0200, SRC_ENABLE     = 0x0080,
-};
-
-enum miscint_bits {
-	PB_UNDERRUN_IRO = 0x00000001, REC_OVERRUN_IRQ = 0x00000002,
-	SB_IRQ          = 0x00000004, MPU401_IRQ      = 0x00000008,
-	OPL3_IRQ        = 0x00000010, ADDRESS_IRQ     = 0x00000020,
-	ENVELOPE_IRQ    = 0x00000040, ST_IRQ          = 0x00000080,
-	PB_UNDERRUN     = 0x00000100, REC_OVERRUN     = 0x00000200,
-	MIXER_UNDERFLOW = 0x00000400, MIXER_OVERFLOW  = 0x00000800,
-	ST_TARGET_REACHED = 0x00008000, PB_24K_MODE   = 0x00010000, 
-	ST_IRQ_EN       = 0x00800000, ACGPIO_IRQ      = 0x01000000
-};
-
-#define TRID_REG( trident, x ) ( (trident) -> iobase + (x) )
-
-#define		CYBER_PORT_AUDIO		0x3CE
-#define		CYBER_IDX_AUDIO_ENABLE          0x7B
-#define		CYBER_BMSK_AUDIO_INT_ENABLE	0x09
-#define		CYBER_BMSK_AUENZ		0x01
-#define		CYBER_BMSK_AUENZ_ENABLE		0x00
-#define		CYBER_IDX_IRQ_ENABLE		0x12
-      
-#define VALIDATE_MAGIC(FOO,MAG)				\
-({						  	\
-	if (!(FOO) || (FOO)->magic != MAG) { 		\
-		printk(invalid_magic,__func__);	\
-		return -ENXIO;			  	\
-	}					  	\
-})
-
-#define VALIDATE_STATE(a) VALIDATE_MAGIC(a,TRIDENT_STATE_MAGIC)
-#define VALIDATE_CARD(a) VALIDATE_MAGIC(a,TRIDENT_CARD_MAGIC)
-
-static inline unsigned ld2(unsigned int x)
-{
-	unsigned r = 0;
-	
-	if (x >= 0x10000) {
-		x >>= 16;
-		r += 16;
-	}
-	if (x >= 0x100) {
-		x >>= 8;
-		r += 8;
-	}
-	if (x >= 0x10) {
-		x >>= 4;
-		r += 4;
-	}
-	if (x >= 4) {
-		x >>= 2;
-		r += 2;
-	}
-	if (x >= 2)
-		r++;
-	return r;
-}
-
-#endif /* __TRID4DWAVE_H */
-- 
GitLab


From 76a6f3dc9a7108785c145a298f82c72f9208fe17 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:29:15 -0700
Subject: [PATCH 272/853] CONFIG_SOUND_WM97XX: remove stale makefile line

The driver is gone for a long time.

Reported-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 sound/oss/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sound/oss/Makefile b/sound/oss/Makefile
index 3a141474fb7..c611514f7ff 100644
--- a/sound/oss/Makefile
+++ b/sound/oss/Makefile
@@ -31,8 +31,6 @@ obj-$(CONFIG_SOUND_VWSND)	+= vwsnd.o
 obj-$(CONFIG_SOUND_AU1550_AC97)	+= au1550_ac97.o ac97_codec.o
 obj-$(CONFIG_SOUND_BCM_CS4297A)	+= swarm_cs4297a.o
 
-obj-$(CONFIG_SOUND_WM97XX)	+= ac97_plugin_wm97xx.o
-
 obj-$(CONFIG_DMASOUND)		+= dmasound/
 
 # Declare multi-part drivers.
-- 
GitLab


From 6e2c10a12a2170856f5582d62d583cbcd1cb5eaf Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Jul 2008 21:29:15 -0700
Subject: [PATCH 273/853] binfmt_misc: use simple_read_from_buffer()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 7191306367c..756205314c2 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/syscalls.h>
+#include <linux/fs.h>
 
 #include <asm/uaccess.h>
 
@@ -535,31 +536,16 @@ static ssize_t
 bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
 {
 	Node *e = file->f_path.dentry->d_inode->i_private;
-	loff_t pos = *ppos;
 	ssize_t res;
 	char *page;
-	int len;
 
 	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
 		return -ENOMEM;
 
 	entry_status(e, page);
-	len = strlen(page);
 
-	res = -EINVAL;
-	if (pos < 0)
-		goto out;
-	res = 0;
-	if (pos >= len)
-		goto out;
-	if (len < pos + nbytes)
-		nbytes = len - pos;
-	res = -EFAULT;
-	if (copy_to_user(buf, page + pos, nbytes))
-		goto out;
-	*ppos = pos + nbytes;
-	res = nbytes;
-out:
+	res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
+
 	free_page((unsigned long) page);
 	return res;
 }
-- 
GitLab


From a677a039be7243357d93502bff2b40850c942e2d Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:17 -0700
Subject: [PATCH 274/853] flag parameters: socket and socketpair

This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair.  The additional code is minimal.  The flag
values in this implementation can and must match the O_* flags.  This
avoids overhead in the conversion.

The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>

#define PORT 57392

/* For Linux these must be the same.  */
#define SOCK_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd;
  fd = socket (PF_INET, SOCK_STREAM, 0);
  if (fd == -1)
    {
      puts ("socket(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("socket(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
  if (fd == -1)
    {
      puts ("socket(SOCK_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  int fds[2];
  if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
      puts ("socketpair(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      coe = fcntl (fds[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (coe & FD_CLOEXEC)
        {
          printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
    {
      puts ("socketpair(SOCK_CLOEXEC) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      coe = fcntl (fds[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((coe & FD_CLOEXEC) == 0)
        {
          printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-mips/socket.h |  7 +++++++
 include/linux/net.h       |  9 ++++++++-
 net/9p/trans_fd.c         |  2 +-
 net/sctp/socket.c         |  2 +-
 net/socket.c              | 28 ++++++++++++++++++++--------
 5 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h
index 63f60254d30..facc2d7a87c 100644
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -102,6 +102,13 @@ enum sock_type {
 };
 
 #define SOCK_MAX (SOCK_PACKET + 1)
+/* Mask which covers at least up to SOCK_MASK-1.  The
+ *  * remaining bits are used as flags. */
+#define SOCK_TYPE_MASK 0xf
+
+/* Flags for socket, socketpair, paccept */
+#define SOCK_CLOEXEC	O_CLOEXEC
+#define SOCK_NONBLOCK	O_NONBLOCK
 
 #define ARCH_HAS_SOCKET_TYPES 1
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 150a48c68d5..8b5383c45b4 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,6 +20,7 @@
 
 #include <linux/wait.h>
 #include <linux/socket.h>
+#include <linux/fcntl.h>	/* For O_CLOEXEC */
 #include <asm/socket.h>
 
 struct poll_table_struct;
@@ -94,6 +95,12 @@ enum sock_type {
 };
 
 #define SOCK_MAX (SOCK_PACKET + 1)
+/* Mask which covers at least up to SOCK_MASK-1.  The
+ * remaining bits are used as flags. */
+#define SOCK_TYPE_MASK 0xf
+
+/* Flags for socket, socketpair, paccept */
+#define SOCK_CLOEXEC	O_CLOEXEC
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
@@ -208,7 +215,7 @@ extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
 				  size_t len);
 extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
 				  size_t size, int flags);
-extern int 	     sock_map_fd(struct socket *sock);
+extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 4507f744f44..cdf137af7ad 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1285,7 +1285,7 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
 	int fd, ret;
 
 	csocket->sk->sk_allocation = GFP_NOIO;
-	fd = sock_map_fd(csocket);
+	fd = sock_map_fd(csocket, 0);
 	if (fd < 0) {
 		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n");
 		return fd;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 79bece16aed..dbb79adf8f3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3910,7 +3910,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
 		goto out;
 
 	/* Map the socket to an unused fd that can be returned to the user.  */
-	retval = sock_map_fd(newsock);
+	retval = sock_map_fd(newsock, 0);
 	if (retval < 0) {
 		sock_release(newsock);
 		goto out;
diff --git a/net/socket.c b/net/socket.c
index 1ba57d88898..64601f90035 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -349,11 +349,11 @@ static struct dentry_operations sockfs_dentry_operations = {
  *	but we take care of internal coherence yet.
  */
 
-static int sock_alloc_fd(struct file **filep)
+static int sock_alloc_fd(struct file **filep, int flags)
 {
 	int fd;
 
-	fd = get_unused_fd();
+	fd = get_unused_fd_flags(flags);
 	if (likely(fd >= 0)) {
 		struct file *file = get_empty_filp();
 
@@ -396,10 +396,10 @@ static int sock_attach_fd(struct socket *sock, struct file *file)
 	return 0;
 }
 
-int sock_map_fd(struct socket *sock)
+int sock_map_fd(struct socket *sock, int flags)
 {
 	struct file *newfile;
-	int fd = sock_alloc_fd(&newfile);
+	int fd = sock_alloc_fd(&newfile, flags);
 
 	if (likely(fd >= 0)) {
 		int err = sock_attach_fd(sock, newfile);
@@ -1218,12 +1218,18 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 {
 	int retval;
 	struct socket *sock;
+	int flags;
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
 		goto out;
 
-	retval = sock_map_fd(sock);
+	retval = sock_map_fd(sock, flags & O_CLOEXEC);
 	if (retval < 0)
 		goto out_release;
 
@@ -1246,6 +1252,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	struct socket *sock1, *sock2;
 	int fd1, fd2, err;
 	struct file *newfile1, *newfile2;
+	int flags;
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	/*
 	 * Obtain the first socket and check if the underlying protocol
@@ -1264,13 +1276,13 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	if (err < 0)
 		goto out_release_both;
 
-	fd1 = sock_alloc_fd(&newfile1);
+	fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
 	if (unlikely(fd1 < 0)) {
 		err = fd1;
 		goto out_release_both;
 	}
 
-	fd2 = sock_alloc_fd(&newfile2);
+	fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
 	if (unlikely(fd2 < 0)) {
 		err = fd2;
 		put_filp(newfile1);
@@ -1426,7 +1438,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile);
+	newfd = sock_alloc_fd(&newfile, 0);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
-- 
GitLab


From aaca0bdca573f3f51ea03139f9c7289541e7bca3 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:20 -0700
Subject: [PATCH 275/853] flag parameters: paccept

This patch is by far the most complex in the series.  It adds a new syscall
paccept.  This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:

- a signal mask
- a flags value

The flags parameter can be used to set flag like SOCK_CLOEXEC.  This is
imlpemented here as well.  Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX.  Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc).  So an interface change in inevitable.

The flag value is the same as for socket and socketpair.  I think diverging
here will only create confusion.  Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.

The signal mask is handled as for pselect etc.  The mask is temporarily
installed for the thread and removed before the call returns.  I modeled the
code after pselect.  If there is a problem it's likely also in pselect.

For architectures which use socketcall I maintained this interface instead of
adding a system call.  The symmetry shouldn't be broken.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_CLOEXEC O_CLOEXEC

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  sleep (2);
  pthread_kill ((pthread_t) arg, SIGUSR1);

  return NULL;
}

static void
handler (int s)
{
}

int
main (void)
{
  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  int coe = fcntl (s2, F_GETFD);
  if (coe & FD_CLOEXEC)
    {
      puts ("paccept(0) set close-on-exec-flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_CLOEXEC) failed");
      return 1;
    }

  coe = fcntl (s2, F_GETFD);
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  struct sigaction sa;
  sa.sa_handler = handler;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  sigaction (SIGUSR1, &sa, NULL);

  sigset_t ss;
  pthread_sigmask (SIG_SETMASK, NULL, &ss);
  sigaddset (&ss, SIGUSR1);
  pthread_sigmask (SIG_SETMASK, &ss, NULL);

  sigdelset (&ss, SIGUSR1);
  alarm (4);
  pthread_barrier_wait (&b);

  errno = 0 ;
  s2 = paccept (s, NULL, 0, &ss, 0);
  if (s2 != -1 || errno != EINTR)
    {
      puts ("paccept did not fail with EINTR");
      return 1;
    }

  close (s);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-alpha/socket.h  |  5 +++
 include/asm-parisc/socket.h |  5 +++
 include/asm-x86/unistd_64.h |  2 +
 include/linux/net.h         |  3 ++
 include/linux/syscalls.h    |  2 +
 kernel/sys_ni.c             |  1 +
 net/compat.c                | 52 ++++++++++++++++++++++--
 net/socket.c                | 81 +++++++++++++++++++++++++++++++++----
 8 files changed, 139 insertions(+), 12 deletions(-)

diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h
index 08c97931992..a1057c2d95e 100644
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -62,4 +62,9 @@
 
 #define SO_MARK			36
 
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK	0x40000000
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
index 69a7a0d30b0..fba402c95ac 100644
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -54,4 +54,9 @@
 
 #define SO_MARK			0x401f
 
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK   0x40000000
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 9c1a4a3470d..e323994a370 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -639,6 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+#define __NR_paccept				288
+__SYSCALL(__NR_paccept, sys_paccept)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/net.h b/include/linux/net.h
index 8b5383c45b4..3a9b06d4d0f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -47,6 +47,7 @@ struct net;
 #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
+#define SYS_PACCEPT	18		/* sys_paccept(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
@@ -219,6 +220,8 @@ extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
+extern long	     do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			       int __user *upeer_addrlen, int flags);
 
 #define net_random()		random32()
 #define net_srandom(seed)	srandom32((__force u32)seed)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4394dadff81..2a2a40af6b2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -409,6 +409,8 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
+asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
+			    const sigset_t *, size_t, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0fea0ee12da..2f0b8a2e600 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,6 +31,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
+cond_syscall(sys_paccept);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
diff --git a/net/compat.c b/net/compat.c
index 6e1b03b5193..67fb6a3834a 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,9 +722,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
+				AL(6)};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -737,13 +738,52 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
+asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+				   int __user *upeer_addrlen,
+				   const compat_sigset_t __user *sigmask,
+				   compat_size_t sigsetsize, int flags)
+{
+	compat_sigset_t ss32;
+	sigset_t ksigmask, sigsaved;
+	int ret;
+
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &ss32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+
+	if (ret == -ERESTARTNOHAND) {
+		/*
+		 * Don't restore the signal mask yet. Let do_signal() deliver
+		 * the signal on the way back to userspace, before the signal
+		 * mask is restored.
+		 */
+		if (sigmask) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_restore_sigmask();
+		}
+	} else if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_RECVMSG)
+	if (call < SYS_SOCKET || call > SYS_PACCEPT)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -764,7 +804,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 		ret = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2]));
+		ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
 		break;
 	case SYS_GETSOCKNAME:
 		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -804,6 +844,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
+	case SYS_PACCEPT:
+		ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
+					 compat_ptr(a[3]), a[4], a[5]);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/net/socket.c b/net/socket.c
index 64601f90035..a0ce8ad7225 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -63,6 +63,7 @@
 #include <linux/file.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
+#include <linux/thread_info.h>
 #include <linux/rcupdate.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
@@ -1225,6 +1226,9 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
 		goto out;
@@ -1259,6 +1263,9 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	/*
 	 * Obtain the first socket and check if the underlying protocol
 	 * supports the socketpair call.
@@ -1413,14 +1420,20 @@ asmlinkage long sys_listen(int fd, int backlog)
  *	clean when we restucture accept also.
  */
 
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-			   int __user *upeer_addrlen)
+long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+	       int __user *upeer_addrlen, int flags)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
 	int err, len, newfd, fput_needed;
 	struct sockaddr_storage address;
 
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1438,7 +1451,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile, 0);
+	newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
@@ -1491,6 +1504,50 @@ out_fd:
 	goto out_put;
 }
 
+asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen,
+			    const sigset_t __user *sigmask,
+			    size_t sigsetsize, int flags)
+{
+	sigset_t ksigmask, sigsaved;
+	int ret;
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+        }
+
+	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+
+	if (ret < 0 && signal_pending(current)) {
+		/*
+		 * Don't restore the signal mask yet. Let do_signal() deliver
+		 * the signal on the way back to userspace, before the signal
+		 * mask is restored.
+		 */
+		if (sigmask) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_restore_sigmask();
+		}
+	} else if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
+{
+	return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+}
+
 /*
  *	Attempt to connect to a socket with the server address.  The address
  *	is in user space so we verify it is OK and move it to kernel space.
@@ -2011,10 +2068,11 @@ out:
 
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static const unsigned char nargs[18]={
+static const unsigned char nargs[19]={
 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
+	AL(6)
 };
 
 #undef AL
@@ -2033,7 +2091,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	unsigned long a0, a1;
 	int err;
 
-	if (call < 1 || call > SYS_RECVMSG)
+	if (call < 1 || call > SYS_PACCEPT)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
@@ -2062,8 +2120,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 		break;
 	case SYS_ACCEPT:
 		err =
-		    sys_accept(a0, (struct sockaddr __user *)a1,
-			       (int __user *)a[2]);
+		    do_accept(a0, (struct sockaddr __user *)a1,
+			      (int __user *)a[2], 0);
 		break;
 	case SYS_GETSOCKNAME:
 		err =
@@ -2110,6 +2168,13 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
+	case SYS_PACCEPT:
+		err =
+		    sys_paccept(a0, (struct sockaddr __user *)a1,
+			        (int __user *)a[2],
+				(const sigset_t __user *) a[3],
+				a[4], a[5]);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
GitLab


From c019bbc612f6633ede7ed67725cbf68de45ae8a4 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:21 -0700
Subject: [PATCH 276/853] flag parameters: paccept w/out set_restore_sigmask

Some platforms do not have support to restore the signal mask in the
return path from a syscall.  For those platforms syscalls like pselect are
not defined at all.  This is, I think, not a good choice for paccept()
since paccept() adds more value on top of accept() than just the signal
mask handling.

Therefore this patch defines a scaled down version of the sys_paccept
function for those platforms.  It returns -EINVAL in case the signal mask
is non-NULL but behaves the same otherwise.

Note that I explicitly included <linux/thread_info.h>.  I saw that it is
currently included but indirectly two levels down.  There is too much risk
in relying on this.  The header might change and then suddenly the
function definition would change without anyone immediately noticing.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/net.h |  3 +++
 net/socket.c        | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 3a9b06d4d0f..39a23af059b 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -102,6 +102,9 @@ enum sock_type {
 
 /* Flags for socket, socketpair, paccept */
 #define SOCK_CLOEXEC	O_CLOEXEC
+#ifndef SOCK_NONBLOCK
+#define SOCK_NONBLOCK	O_NONBLOCK
+#endif
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
diff --git a/net/socket.c b/net/socket.c
index a0ce8ad7225..d163adff95b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -69,6 +69,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/thread_info.h>
 #include <linux/wanrouter.h>
 #include <linux/if_bridge.h>
 #include <linux/if_frad.h>
@@ -1504,6 +1505,7 @@ out_fd:
 	goto out_put;
 }
 
+#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 			    int __user *upeer_addrlen,
 			    const sigset_t __user *sigmask,
@@ -1541,6 +1543,21 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 
 	return ret;
 }
+#else
+asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen,
+			    const sigset_t __user *sigmask,
+			    size_t sigsetsize, int flags)
+{
+	/* The platform does not support restoring the signal mask in the
+	 * return path.  So we do not allow using paccept() with a signal
+	 * mask.  */
+	if (sigmask)
+		return -EINVAL;
+
+	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+}
+#endif
 
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
-- 
GitLab


From 7d9dbca34240ebb6ff88d8a29c6c7bffd098f0c1 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:22 -0700
Subject: [PATCH 277/853] flag parameters: anon_inode_getfd extension

This patch just extends the anon_inode_getfd interface to take an additional
parameter with a flag value.  The flag value is passed on to
get_unused_fd_flags in anticipation for a use with the O_CLOEXEC flag.

No actual semantic changes here, the changed callers all pass 0 for now.

[akpm@linux-foundation.org: KVM fix]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/anon_inodes.c            | 9 +++++----
 fs/eventfd.c                | 2 +-
 fs/eventpoll.c              | 2 +-
 fs/signalfd.c               | 3 ++-
 fs/timerfd.c                | 2 +-
 include/linux/anon_inodes.h | 2 +-
 virt/kvm/kvm_main.c         | 4 ++--
 7 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c05..1a4eee620b0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
  *                    of the file
  *
  * @name:    [in]    name of the "class" of the new file
- * @fops     [in]    file operations for the new file
- * @priv     [in]    private data for the new file (will be file's private_data)
+ * @fops:    [in]    file operations for the new file
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
  *
  * Creates a new file by hooking it on a single inode. This is useful for files
  * that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
  * setup.  Returns new descriptor or -error.
  */
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
-		     void *priv)
+		     void *priv, int flags)
 {
 	struct qstr this;
 	struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	if (IS_ERR(anon_inode_inode))
 		return -ENODEV;
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		return error;
 	fd = error;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec..6094265ca40 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -214,7 +214,7 @@ asmlinkage long sys_eventfd(unsigned int count)
 	 * When we call this, the initialization must be complete, since
 	 * anon_inode_getfd() will install the fd.
 	 */
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
+	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 0);
 	if (fd < 0)
 		kfree(ctx);
 	return fd;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66..9392dd96812 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1068,7 +1068,7 @@ asmlinkage long sys_epoll_create(int size)
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
 	 */
-	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
+	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 0);
 	if (fd < 0)
 		ep_free(ep);
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c7..ddb328b74bd 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -227,7 +227,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 		 * When we call this, the initialization must be complete, since
 		 * anon_inode_getfd() will install the fd.
 		 */
-		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
+		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
+				       0);
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec42..77c2bc92cbe 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -198,7 +198,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
-	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
+	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 0);
 	if (ufd < 0)
 		kfree(ctx);
 
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index 6129e58ca7c..e0a0cdc2da4 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -9,7 +9,7 @@
 #define _LINUX_ANON_INODES_H
 
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
-		     void *priv);
+		     void *priv, int flags);
 
 #endif /* _LINUX_ANON_INODES_H */
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 904d7b7bd78..a845890b680 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -902,7 +902,7 @@ static const struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu);
+	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
 	if (fd < 0)
 		kvm_put_kvm(vcpu->kvm);
 	return fd;
@@ -1261,7 +1261,7 @@ static int kvm_dev_ioctl_create_vm(void)
 	kvm = kvm_create_vm();
 	if (IS_ERR(kvm))
 		return PTR_ERR(kvm);
-	fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm);
+	fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0);
 	if (fd < 0)
 		kvm_put_kvm(kvm);
 
-- 
GitLab


From 9deb27baedb79759c3ab9435a7d8b841842d56e9 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:24 -0700
Subject: [PATCH 278/853] flag parameters: signalfd

This patch adds the new signalfd4 syscall.  It extends the old signalfd
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is SFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name SFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_signalfd4
# ifdef __x86_64__
#  define __NR_signalfd4 289
# elif defined __i386__
#  define __NR_signalfd4 327
# else
#  error "need __NR_signalfd4"
# endif
#endif

#define SFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  sigset_t ss;
  sigemptyset (&ss);
  sigaddset (&ss, SIGUSR1);
  int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
  if (fd == -1)
    {
      puts ("signalfd4(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("signalfd4(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("signalfd4(SFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("signalfd4(SFD_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/compat.c                        | 14 ++++++++++----
 fs/signalfd.c                      | 14 ++++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/signalfd.h           |  5 +++++
 include/linux/syscalls.h           |  1 +
 kernel/sys_ni.c                    |  1 +
 9 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 021d71bc69b..c308128b925 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -826,4 +826,5 @@ ia32_sys_call_table:
 	.quad sys32_fallocate
 	.quad compat_sys_timerfd_settime	/* 325 */
 	.quad compat_sys_timerfd_gettime
+	.quad compat_sys_signalfd4
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index adff5562f5f..c12a36c9fd5 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -326,3 +326,4 @@ ENTRY(sys_call_table)
 	.long sys_fallocate
 	.long sys_timerfd_settime	/* 325 */
 	.long sys_timerfd_gettime
+	.long sys_signalfd4
diff --git a/fs/compat.c b/fs/compat.c
index b4660428176..106eba28ec5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 
 #ifdef CONFIG_SIGNALFD
 
-asmlinkage long compat_sys_signalfd(int ufd,
-				    const compat_sigset_t __user *sigmask,
-				    compat_size_t sigsetsize)
+asmlinkage long compat_sys_signalfd4(int ufd,
+				     const compat_sigset_t __user *sigmask,
+				     compat_size_t sigsetsize, int flags)
 {
 	compat_sigset_t ss32;
 	sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
 	if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
 		return -EFAULT;
 
-	return sys_signalfd(ufd, ksigmask, sizeof(sigset_t));
+	return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
 }
 
+asmlinkage long compat_sys_signalfd(int ufd,
+				    const compat_sigset_t __user *sigmask,
+				    compat_size_t sigsetsize)
+{
+	return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
+}
 #endif /* CONFIG_SIGNALFD */
 
 #ifdef CONFIG_TIMERFD
diff --git a/fs/signalfd.c b/fs/signalfd.c
index ddb328b74bd..c8609fa51a1 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,15 @@ static const struct file_operations signalfd_fops = {
 	.read		= signalfd_read,
 };
 
-asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
+			      size_t sizemask, int flags)
 {
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
 
+	if (flags & ~SFD_CLOEXEC)
+		return -EINVAL;
+
 	if (sizemask != sizeof(sigset_t) ||
 	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
 		return -EINVAL;
@@ -228,7 +232,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 		 * anon_inode_getfd() will install the fd.
 		 */
 		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
-				       0);
+				       flags & O_CLOEXEC);
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
@@ -250,3 +254,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 
 	return ufd;
 }
+
+asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
+			     size_t sizemask)
+{
+	return sys_signalfd4(ufd, user_mask, sizemask, 0);
+}
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 8317d94771d..c310371f561 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -332,6 +332,7 @@
 #define __NR_fallocate		324
 #define __NR_timerfd_settime	325
 #define __NR_timerfd_gettime	326
+#define __NR_signalfd4		327
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index e323994a370..e0a9b45b234 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -641,6 +641,8 @@ __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
 #define __NR_paccept				288
 __SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_signalfd4				289
+__SYSCALL(__NR_signalfd4, sys_signalfd4)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index ea037f28df9..8b3f7b7420a 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -8,6 +8,11 @@
 #ifndef _LINUX_SIGNALFD_H
 #define _LINUX_SIGNALFD_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
+
+/* Flags for signalfd4.  */
+#define SFD_CLOEXEC O_CLOEXEC
 
 struct signalfd_siginfo {
 	__u32 ssi_signo;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2a2a40af6b2..1c270779784 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,7 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags);
 asmlinkage long sys_timerfd_create(int clockid, int flags);
 asmlinkage long sys_timerfd_settime(int ufd, int flags,
 				    const struct itimerspec __user *utmr,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2f0b8a2e600..8627c89ae9e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -156,6 +156,7 @@ cond_syscall(sys_ioprio_get);
 
 /* New file descriptors */
 cond_syscall(sys_signalfd);
+cond_syscall(sys_signalfd4);
 cond_syscall(compat_sys_signalfd);
 cond_syscall(sys_timerfd_create);
 cond_syscall(sys_timerfd_settime);
-- 
GitLab


From b087498eb5605673b0f260a7620d91818cd72304 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:25 -0700
Subject: [PATCH 279/853] flag parameters: eventfd

This patch adds the new eventfd2 syscall.  It extends the old eventfd
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is EFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_eventfd2
# ifdef __x86_64__
#  define __NR_eventfd2 290
# elif defined __i386__
#  define __NR_eventfd2 328
# else
#  error "need __NR_eventfd2"
# endif
#endif

#define EFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_eventfd2, 1, 0);
  if (fd == -1)
    {
      puts ("eventfd2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("eventfd2(0) sets close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_eventfd2, 1, EFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("eventfd2(EFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("eventfd2(EFD_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/eventfd.c                       | 13 +++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/eventfd.h            |  6 ++++++
 include/linux/syscalls.h           |  1 +
 kernel/sys_ni.c                    |  1 +
 8 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index c308128b925..cf0eb31745c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -827,4 +827,5 @@ ia32_sys_call_table:
 	.quad compat_sys_timerfd_settime	/* 325 */
 	.quad compat_sys_timerfd_gettime
 	.quad compat_sys_signalfd4
+	.quad sys_eventfd2
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index c12a36c9fd5..cf112cb11c3 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -327,3 +327,4 @@ ENTRY(sys_call_table)
 	.long sys_timerfd_settime	/* 325 */
 	.long sys_timerfd_gettime
 	.long sys_signalfd4
+	.long sys_eventfd2
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 6094265ca40..bd420e6478a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,14 @@ struct file *eventfd_fget(int fd)
 	return file;
 }
 
-asmlinkage long sys_eventfd(unsigned int count)
+asmlinkage long sys_eventfd2(unsigned int count, int flags)
 {
 	int fd;
 	struct eventfd_ctx *ctx;
 
+	if (flags & ~EFD_CLOEXEC)
+		return -EINVAL;
+
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
@@ -214,9 +217,15 @@ asmlinkage long sys_eventfd(unsigned int count)
 	 * When we call this, the initialization must be complete, since
 	 * anon_inode_getfd() will install the fd.
 	 */
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 0);
+	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
+			      flags & O_CLOEXEC);
 	if (fd < 0)
 		kfree(ctx);
 	return fd;
 }
 
+asmlinkage long sys_eventfd(unsigned int count)
+{
+	return sys_eventfd2(count, 0);
+}
+
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index c310371f561..edbd8723c93 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -333,6 +333,7 @@
 #define __NR_timerfd_settime	325
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
+#define __NR_eventfd2		328
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index e0a9b45b234..fb059a6feeb 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -643,6 +643,8 @@ __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
 __SYSCALL(__NR_paccept, sys_paccept)
 #define __NR_signalfd4				289
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
+#define __NR_eventfd2				290
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index a701399b7fe..a6c0eaedb1b 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,12 @@
 
 #ifdef CONFIG_EVENTFD
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
+
+/* Flags for eventfd2.  */
+#define EFD_CLOEXEC O_CLOEXEC
+
 struct file *eventfd_fget(int fd);
 int eventfd_signal(struct file *file, int n);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1c270779784..9ab09926a7f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -617,6 +617,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 				    struct itimerspec __user *otmr);
 asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
+asmlinkage long sys_eventfd2(unsigned int count, int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 8627c89ae9e..2a361ccdc7c 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -164,3 +164,4 @@ cond_syscall(sys_timerfd_gettime);
 cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
+cond_syscall(sys_eventfd2);
-- 
GitLab


From 11fcb6c14676023d0bd437841f5dcd670e7990a0 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:26 -0700
Subject: [PATCH 280/853] flag parameters: timerfd_create

The timerfd_create syscall already has a flags parameter.  It just is
unused so far.  This patch changes this by introducing the TFD_CLOEXEC
flag to set the close-on-exec flag for the returned file descriptor.

A new name TFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_timerfd_create
# ifdef __x86_64__
#  define __NR_timerfd_create 283
# elif defined __i386__
#  define __NR_timerfd_create 322
# else
#  error "need __NR_timerfd_create"
# endif
#endif

#define TFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, 0);
  if (fd == -1)
    {
      puts ("timerfd_create(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("timerfd_create(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, TFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("timerfd_create(TFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("timerfd_create(TFD_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/timerfd.c            | 5 +++--
 include/linux/timerfd.h | 5 +++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 77c2bc92cbe..c6ef5e33cb3 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	int ufd;
 	struct timerfd_ctx *ctx;
 
-	if (flags)
+	if (flags & ~TFD_CLOEXEC)
 		return -EINVAL;
 	if (clockid != CLOCK_MONOTONIC &&
 	    clockid != CLOCK_REALTIME)
@@ -198,7 +198,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
-	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 0);
+	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
+			       flags & O_CLOEXEC);
 	if (ufd < 0)
 		kfree(ctx);
 
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index cf2b10d7573..96ed97dff00 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -8,9 +8,14 @@
 #ifndef _LINUX_TIMERFD_H
 #define _LINUX_TIMERFD_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
 
+/* Flags for timerfd_settime.  */
 #define TFD_TIMER_ABSTIME (1 << 0)
 
+/* Flags for timerfd_create.  */
+#define TFD_CLOEXEC O_CLOEXEC
 
 
 #endif /* _LINUX_TIMERFD_H */
-- 
GitLab


From a0998b50c3f0b8fdd265c63e0032f86ebe377dbf Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:27 -0700
Subject: [PATCH 281/853] flag parameters: epoll_create

This patch adds the new epoll_create2 syscall.  It extends the old epoll_create
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is EPOLL_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EPOLL_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_epoll_create2
# ifdef __x86_64__
#  define __NR_epoll_create2 291
# elif defined __i386__
#  define __NR_epoll_create2 329
# else
#  error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_epoll_create2, 1, 0);
  if (fd == -1)
    {
      puts ("epoll_create2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("epoll_create2(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_epoll_create2, 1, EPOLL_CLOEXEC);
  if (fd == -1)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/eventpoll.c                     | 13 +++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/eventpoll.h          |  4 ++++
 include/linux/syscalls.h           |  1 +
 7 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index cf0eb31745c..04366f08f42 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -828,4 +828,5 @@ ia32_sys_call_table:
 	.quad compat_sys_timerfd_gettime
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
+	.quad sys_epoll_create2
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index cf112cb11c3..4d7007ca263 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -328,3 +328,4 @@ ENTRY(sys_call_table)
 	.long sys_timerfd_gettime
 	.long sys_signalfd4
 	.long sys_eventfd2
+	.long sys_epoll_create2
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9392dd96812..3fd4014f3c5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,11 +1046,14 @@ retry:
  * RB tree. With the current implementation, the "size" parameter is ignored
  * (besides sanity checks).
  */
-asmlinkage long sys_epoll_create(int size)
+asmlinkage long sys_epoll_create2(int size, int flags)
 {
 	int error, fd = -1;
 	struct eventpoll *ep;
 
+	if (flags & ~EPOLL_CLOEXEC)
+		return -EINVAL;
+
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
 		     current, size));
 
@@ -1068,7 +1071,8 @@ asmlinkage long sys_epoll_create(int size)
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
 	 */
-	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 0);
+	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+			      flags & O_CLOEXEC);
 	if (fd < 0)
 		ep_free(ep);
 
@@ -1079,6 +1083,11 @@ error_return:
 	return fd;
 }
 
+asmlinkage long sys_epoll_create(int size)
+{
+	return sys_epoll_create2(size, 0);
+}
+
 /*
  * The following function implements the controller interface for
  * the eventpoll file that enables the insertion/removal/change of
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index edbd8723c93..a37d6b0c4e1 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -334,6 +334,7 @@
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
+#define __NR_epoll_create2	329
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index fb059a6feeb..a1a4a5b6e5e 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -645,6 +645,8 @@ __SYSCALL(__NR_paccept, sys_paccept)
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
+#define __NR_epoll_create2			291
+__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index cf79853967f..1cfaa40059c 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -14,8 +14,12 @@
 #ifndef _LINUX_EVENTPOLL_H
 #define _LINUX_EVENTPOLL_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
 #include <linux/types.h>
 
+/* Flags for epoll_create2.  */
+#define EPOLL_CLOEXEC O_CLOEXEC
 
 /* Valid opcodes to issue to sys_epoll_ctl() */
 #define EPOLL_CTL_ADD 1
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9ab09926a7f..85953240f28 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -430,6 +430,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_epoll_create(int size);
+asmlinkage long sys_epoll_create2(int size, int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
-- 
GitLab


From 336dd1f70ff62d7dd8655228caed4c5bfc818c56 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:29 -0700
Subject: [PATCH 282/853] flag parameters: dup2

This patch adds the new dup3 syscall.  It extends the old dup2 syscall by one
parameter which is meant to hold a flag value.  Support for the O_CLOEXEC flag
is added in this patch.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_dup3
# ifdef __x86_64__
#  define __NR_dup3 292
# elif defined __i386__
#  define __NR_dup3 330
# else
#  error "need __NR_dup3"
# endif
#endif

int
main (void)
{
  int fd = syscall (__NR_dup3, 1, 4, 0);
  if (fd == -1)
    {
      puts ("dup3(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("dup3(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_dup3, 1, 4, O_CLOEXEC);
  if (fd == -1)
    {
      puts ("dup3(O_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("dup3(O_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/fcntl.c                         | 15 +++++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/syscalls.h           |  1 +
 6 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 04366f08f42..5614a8f7bed 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -829,4 +829,5 @@ ia32_sys_call_table:
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
 	.quad sys_epoll_create2
+	.quad sys_dup3			/* 330 */
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 4d7007ca263..24a3f1ea6a0 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -329,3 +329,4 @@ ENTRY(sys_call_table)
 	.long sys_signalfd4
 	.long sys_eventfd2
 	.long sys_epoll_create2
+	.long sys_dup3			/* 330 */
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d78259..9679fcbdeaa 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -125,13 +125,16 @@ static int dupfd(struct file *file, unsigned int start, int cloexec)
 	return fd;
 }
 
-asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
 	int err = -EBADF;
 	struct file * file, *tofree;
 	struct files_struct * files = current->files;
 	struct fdtable *fdt;
 
+	if ((flags & ~O_CLOEXEC) != 0)
+		return -EINVAL;
+
 	spin_lock(&files->file_lock);
 	if (!(file = fcheck(oldfd)))
 		goto out_unlock;
@@ -163,7 +166,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 
 	rcu_assign_pointer(fdt->fd[newfd], file);
 	FD_SET(newfd, fdt->open_fds);
-	FD_CLR(newfd, fdt->close_on_exec);
+	if (flags & O_CLOEXEC)
+		FD_SET(newfd, fdt->close_on_exec);
+	else
+		FD_CLR(newfd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 
 	if (tofree)
@@ -181,6 +187,11 @@ out_fput:
 	goto out;
 }
 
+asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+{
+	return sys_dup3(oldfd, newfd, 0);
+}
+
 asmlinkage long sys_dup(unsigned int fildes)
 {
 	int ret = -EBADF;
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index a37d6b0c4e1..a1f6383bf69 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -335,6 +335,7 @@
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
 #define __NR_epoll_create2	329
+#define __NR_dup3		330
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index a1a4a5b6e5e..f0fb2bd40cd 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -647,6 +647,8 @@ __SYSCALL(__NR_signalfd4, sys_signalfd4)
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
 #define __NR_epoll_create2			291
 __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
+#define __NR_dup3				292
+__SYSCALL(__NR_dup3, sys_dup3)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 85953240f28..034d3358549 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -305,6 +305,7 @@ asmlinkage long sys_fcntl64(unsigned int fd,
 #endif
 asmlinkage long sys_dup(unsigned int fildes);
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd);
+asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags);
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd,
 				unsigned long arg);
-- 
GitLab


From ed8cae8ba01348bfd83333f4648dd807b04d7f08 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:30 -0700
Subject: [PATCH 283/853] flag parameters: pipe

This patch introduces the new syscall pipe2 which is like pipe but it also
takes an additional parameter which takes a flag value.  This patch implements
the handling of O_CLOEXEC for the flag.  I did not add support for the new
syscall for the architectures which have a special sys_pipe implementation.  I
think the maintainers of those archs have the chance to go with the unified
implementation but that's up to them.

The implementation introduces do_pipe_flags.  I did that instead of changing
all callers of do_pipe because some of the callers are written in assembler.
I would probably screw up changing the assembly code.  To avoid breaking code
do_pipe is now a small wrapper around do_pipe_flags.  Once all callers are
changed over to do_pipe_flags the old do_pipe function can be removed.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_pipe2
# ifdef __x86_64__
#  define __NR_pipe2 293
# elif defined __i386__
#  define __NR_pipe2 331
# else
#  error "need __NR_pipe2"
# endif
#endif

int
main (void)
{
  int fd[2];
  if (syscall (__NR_pipe2, fd, 0) != 0)
    {
      puts ("pipe2(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int coe = fcntl (fd[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (coe & FD_CLOEXEC)
        {
          printf ("pipe2(0) set close-on-exit for fd[%d]\n", i);
          return 1;
        }
    }
  close (fd[0]);
  close (fd[1]);

  if (syscall (__NR_pipe2, fd, O_CLOEXEC) != 0)
    {
      puts ("pipe2(O_CLOEXEC) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int coe = fcntl (fd[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((coe & FD_CLOEXEC) == 0)
        {
          printf ("pipe2(O_CLOEXEC) does not set close-on-exit for fd[%d]\n", i);
          return 1;
        }
    }
  close (fd[0]);
  close (fd[1]);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/sys_ia32.c          |  2 +-
 arch/ia64/kernel/sys_ia64.c        |  2 +-
 arch/mips/kernel/syscall.c         |  2 +-
 arch/parisc/hpux/sys_hpux.c        |  2 +-
 arch/sh/kernel/sys_sh32.c          |  2 +-
 arch/sparc/kernel/sys_sparc.c      |  2 +-
 arch/sparc64/kernel/sys_sparc.c    |  2 +-
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/ia32/sys_ia32.c           |  2 +-
 arch/x86/kernel/syscall_table_32.S |  1 +
 arch/xtensa/kernel/syscall.c       |  2 +-
 fs/pipe.c                          | 23 ++++++++++++++++++-----
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/fs.h                 |  1 +
 15 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 7e028ceb93b..465116aecb8 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1139,7 +1139,7 @@ sys32_pipe (int __user *fd)
 	int retval;
 	int fds[2];
 
-	retval = do_pipe(fds);
+	retval = do_pipe_flags(fds, 0);
 	if (retval)
 		goto out;
 	if (copy_to_user(fd, fds, sizeof(fds)))
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 1eda194b955..bcbb6d8792d 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -160,7 +160,7 @@ sys_pipe (void)
 	int fd[2];
 	int retval;
 
-	retval = do_pipe(fd);
+	retval = do_pipe_flags(fd, 0);
 	if (retval)
 		goto out;
 	retval = fd[0];
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3523c8d12ed..343015a2f41 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -52,7 +52,7 @@ asmlinkage int sysm_pipe(nabi_no_regargs volatile struct pt_regs regs)
 	int fd[2];
 	int error, res;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (error) {
 		res = error;
 		goto out;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 0c5b9dabb47..be255ebb609 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -448,7 +448,7 @@ int hpux_pipe(int *kstack_fildes)
 	int error;
 
 	lock_kernel();
-	error = do_pipe(kstack_fildes);
+	error = do_pipe_flags(kstack_fildes, 0);
 	unlock_kernel();
 	return error;
 }
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 125e493ead8..f0aa5c39865 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -29,7 +29,7 @@ asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (!error) {
 		regs->regs[1] = fd[1];
 		return fd[0];
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 3c6b49a53ae..4d73421559c 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -97,7 +97,7 @@ asmlinkage int sparc_pipe(struct pt_regs *regs)
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (error)
 		goto out;
 	regs->u_regs[UREG_I1] = fd[1];
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index e1f4eba2e57..39749e32dc7 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -418,7 +418,7 @@ asmlinkage long sparc_pipe(struct pt_regs *regs)
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (error)
 		goto out;
 	regs->u_regs[UREG_I1] = fd[1];
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 5614a8f7bed..18808b16457 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,5 @@ ia32_sys_call_table:
 	.quad sys_eventfd2
 	.quad sys_epoll_create2
 	.quad sys_dup3			/* 330 */
+	.quad sys_pipe2
 ia32_syscall_end:
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index f00afdf61e6..d3c64088b98 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -238,7 +238,7 @@ asmlinkage long sys32_pipe(int __user *fd)
 	int retval;
 	int fds[2];
 
-	retval = do_pipe(fds);
+	retval = do_pipe_flags(fds, 0);
 	if (retval)
 		goto out;
 	if (copy_to_user(fd, fds, sizeof(fds)))
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 24a3f1ea6a0..66154769d52 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -330,3 +330,4 @@ ENTRY(sys_call_table)
 	.long sys_eventfd2
 	.long sys_epoll_create2
 	.long sys_dup3			/* 330 */
+	.long sys_pipe2
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index f3e16efcd47..ac15ecbdf91 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -49,7 +49,7 @@ asmlinkage long xtensa_pipe(int __user *userfds)
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (!error) {
 		if (copy_to_user(userfds, fd, 2 * sizeof(int)))
 			error = -EFAULT;
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d957..68e82061070 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1027,12 +1027,15 @@ struct file *create_read_pipe(struct file *wrf)
 	return f;
 }
 
-int do_pipe(int *fd)
+int do_pipe_flags(int *fd, int flags)
 {
 	struct file *fw, *fr;
 	int error;
 	int fdw, fdr;
 
+	if (flags & ~O_CLOEXEC)
+		return -EINVAL;
+
 	fw = create_write_pipe();
 	if (IS_ERR(fw))
 		return PTR_ERR(fw);
@@ -1041,12 +1044,12 @@ int do_pipe(int *fd)
 	if (IS_ERR(fr))
 		goto err_write_pipe;
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		goto err_read_pipe;
 	fdr = error;
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		goto err_fdr;
 	fdw = error;
@@ -1074,16 +1077,21 @@ int do_pipe(int *fd)
 	return error;
 }
 
+int do_pipe(int *fd)
+{
+	return do_pipe_flags(fd, 0);
+}
+
 /*
  * sys_pipe() is the normal C calling standard for creating
  * a pipe. It's not the way Unix traditionally does this, though.
  */
-asmlinkage long __weak sys_pipe(int __user *fildes)
+asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
 {
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, flags);
 	if (!error) {
 		if (copy_to_user(fildes, fd, sizeof(fd))) {
 			sys_close(fd[0]);
@@ -1094,6 +1102,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
 	return error;
 }
 
+asmlinkage long __weak sys_pipe(int __user *fildes)
+{
+	return sys_pipe2(fildes, 0);
+}
+
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
  * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index a1f6383bf69..748a05c77da 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -336,6 +336,7 @@
 #define __NR_eventfd2		328
 #define __NR_epoll_create2	329
 #define __NR_dup3		330
+#define __NR_pipe2		331
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index f0fb2bd40cd..d2284b43ad5 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -649,6 +649,8 @@ __SYSCALL(__NR_eventfd2, sys_eventfd2)
 __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 #define __NR_dup3				292
 __SYSCALL(__NR_dup3, sys_dup3)
+#define __NR_pipe2				293
+__SYSCALL(__NR_pipe2, sys_pipe2)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5e6a244096..0e80cd717d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1777,6 +1777,7 @@ static inline void allow_write_access(struct file *file)
 		atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
 }
 extern int do_pipe(int *);
+extern int do_pipe_flags(int *, int);
 extern struct file *create_read_pipe(struct file *f);
 extern struct file *create_write_pipe(void);
 extern void free_write_pipe(struct file *);
-- 
GitLab


From 4006553b06306b34054529477b06b68a1c66249b Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:32 -0700
Subject: [PATCH 284/853] flag parameters: inotify_init

This patch introduces the new syscall inotify_init1 (note: the 1 stands for
the one parameter the syscall takes, as opposed to no parameter before).  The
values accepted for this parameter are function-specific and defined in the
inotify.h header.  Here the values must match the O_* flags, though.  In this
patch CLOEXEC support is introduced.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_inotify_init1
# ifdef __x86_64__
#  define __NR_inotify_init1 294
# elif defined __i386__
#  define __NR_inotify_init1 332
# else
#  error "need __NR_inotify_init1"
# endif
#endif

#define IN_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd;
  fd = syscall (__NR_inotify_init1, 0);
  if (fd == -1)
    {
      puts ("inotify_init1(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("inotify_init1(0) set close-on-exit");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_inotify_init1, IN_CLOEXEC);
  if (fd == -1)
    {
      puts ("inotify_init1(IN_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("inotify_init1(O_CLOEXEC) does not set close-on-exit");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/inotify_user.c                  | 12 ++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/inotify.h            |  5 +++++
 include/linux/syscalls.h           |  1 +
 kernel/sys_ni.c                    |  1 +
 8 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 18808b16457..4541073dd83 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -831,4 +831,5 @@ ia32_sys_call_table:
 	.quad sys_epoll_create2
 	.quad sys_dup3			/* 330 */
 	.quad sys_pipe2
+	.quad sys_inotify_init1
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 66154769d52..f59aba5ff0f 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -331,3 +331,4 @@ ENTRY(sys_call_table)
 	.long sys_epoll_create2
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
+	.long sys_inotify_init1
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c..851005998cd 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
 	.destroy_watch	= free_inotify_user_watch,
 };
 
-asmlinkage long sys_inotify_init(void)
+asmlinkage long sys_inotify_init1(int flags)
 {
 	struct inotify_device *dev;
 	struct inotify_handle *ih;
@@ -574,7 +574,10 @@ asmlinkage long sys_inotify_init(void)
 	struct file *filp;
 	int fd, ret;
 
-	fd = get_unused_fd();
+	if (flags & ~IN_CLOEXEC)
+		return -EINVAL;
+
+	fd = get_unused_fd_flags(flags & O_CLOEXEC);
 	if (fd < 0)
 		return fd;
 
@@ -638,6 +641,11 @@ out_put_fd:
 	return ret;
 }
 
+asmlinkage long sys_inotify_init(void)
+{
+	return sys_inotify_init1(0);
+}
+
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 {
 	struct inode *inode;
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 748a05c77da..b3daf503ab9 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -337,6 +337,7 @@
 #define __NR_epoll_create2	329
 #define __NR_dup3		330
 #define __NR_pipe2		331
+#define __NR_inotify_init1	332
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index d2284b43ad5..c8cb88d70c6 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -651,6 +651,8 @@ __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR_pipe2				293
 __SYSCALL(__NR_pipe2, sys_pipe2)
+#define __NR_inotify_init1			294
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 742b917e7d1..72ef8212051 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,6 +7,8 @@
 #ifndef _LINUX_INOTIFY_H
 #define _LINUX_INOTIFY_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
 #include <linux/types.h>
 
 /*
@@ -63,6 +65,9 @@ struct inotify_event {
 			 IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF | \
 			 IN_MOVE_SELF)
 
+/* Flags for sys_inotify_init1.  */
+#define IN_CLOEXEC O_CLOEXEC
+
 #ifdef __KERNEL__
 
 #include <linux/dcache.h>
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 034d3358549..93a7e7f017a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -547,6 +547,7 @@ asmlinkage long sys_get_mempolicy(int __user *policy,
 				unsigned long addr, unsigned long flags);
 
 asmlinkage long sys_inotify_init(void);
+asmlinkage long sys_inotify_init1(int flags);
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
 					u32 mask);
 asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2a361ccdc7c..bd66ac5406f 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -96,6 +96,7 @@ cond_syscall(sys_keyctl);
 cond_syscall(compat_sys_keyctl);
 cond_syscall(compat_sys_socketcall);
 cond_syscall(sys_inotify_init);
+cond_syscall(sys_inotify_init1);
 cond_syscall(sys_inotify_add_watch);
 cond_syscall(sys_inotify_rm_watch);
 cond_syscall(sys_migrate_pages);
-- 
GitLab


From 99829b832997d907c30669bfd17da32151e18f04 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:33 -0700
Subject: [PATCH 285/853] flag parameters: NONBLOCK in anon_inode_getfd

Building on the previous change to anon_inode_getfd, this patch introduces
support for handling of O_NONBLOCK in addition to the already supported
O_CLOEXEC.  Following patches will take advantage of this support.  As can be
seen, the additional support for supporting this functionality is minimal.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/anon_inodes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 1a4eee620b0..3662dd44896 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -116,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	file->f_mapping = anon_inode_inode->i_mapping;
 
 	file->f_pos = 0;
-	file->f_flags = O_RDWR;
+	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->f_version = 0;
 	file->private_data = priv;
 
-- 
GitLab


From 77d2720059618b9b6e827a8b73831eb6c6fad63c Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:35 -0700
Subject: [PATCH 286/853] flag parameters: NONBLOCK in socket and socketpair

This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and  paccept.  To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.

Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_NONBLOCK O_NONBLOCK

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  return NULL;
}

int
main (void)
{
  int fd;
  fd = socket (PF_INET, SOCK_STREAM, 0);
  if (fd == -1)
    {
      puts ("socket(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("socket(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
  if (fd == -1)
    {
      puts ("socket(SOCK_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  int fds[2];
  if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
      puts ("socketpair(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (fl & O_NONBLOCK)
        {
          printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
    {
      puts ("socketpair(SOCK_NONBLOCK) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((fl & O_NONBLOCK) == 0)
        {
          printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, NULL) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if (fl & O_NONBLOCK)
    {
      puts ("paccept(0) set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);

  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_NONBLOCK) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);
  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/net.h |  2 +-
 net/socket.c        | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 39a23af059b..2f999fbb188 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,7 +20,7 @@
 
 #include <linux/wait.h>
 #include <linux/socket.h>
-#include <linux/fcntl.h>	/* For O_CLOEXEC */
+#include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <asm/socket.h>
 
 struct poll_table_struct;
diff --git a/net/socket.c b/net/socket.c
index d163adff95b..31105f9048a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -369,7 +369,7 @@ static int sock_alloc_fd(struct file **filep, int flags)
 	return fd;
 }
 
-static int sock_attach_fd(struct socket *sock, struct file *file)
+static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
 {
 	struct dentry *dentry;
 	struct qstr name = { .name = "" };
@@ -391,7 +391,7 @@ static int sock_attach_fd(struct socket *sock, struct file *file)
 	init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
-	file->f_flags = O_RDWR;
+	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->f_pos = 0;
 	file->private_data = sock;
 
@@ -404,7 +404,7 @@ int sock_map_fd(struct socket *sock, int flags)
 	int fd = sock_alloc_fd(&newfile, flags);
 
 	if (likely(fd >= 0)) {
-		int err = sock_attach_fd(sock, newfile);
+		int err = sock_attach_fd(sock, newfile, flags);
 
 		if (unlikely(err < 0)) {
 			put_filp(newfile);
@@ -1223,7 +1223,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	int flags;
 
 	flags = type & ~SOCK_TYPE_MASK;
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
@@ -1234,7 +1234,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	if (retval < 0)
 		goto out;
 
-	retval = sock_map_fd(sock, flags & O_CLOEXEC);
+	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
 	if (retval < 0)
 		goto out_release;
 
@@ -1260,7 +1260,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	int flags;
 
 	flags = type & ~SOCK_TYPE_MASK;
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
@@ -1298,12 +1298,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		goto out_release_both;
 	}
 
-	err = sock_attach_fd(sock1, newfile1);
+	err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
 	if (unlikely(err < 0)) {
 		goto out_fd2;
 	}
 
-	err = sock_attach_fd(sock2, newfile2);
+	err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
 	if (unlikely(err < 0)) {
 		fput(newfile1);
 		goto out_fd1;
@@ -1429,7 +1429,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	int err, len, newfd, fput_needed;
 	struct sockaddr_storage address;
 
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 
 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
@@ -1459,7 +1459,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 		goto out_put;
 	}
 
-	err = sock_attach_fd(newsock, newfile);
+	err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
 	if (err < 0)
 		goto out_fd_simple;
 
-- 
GitLab


From 5fb5e04926a54bc1c22bba7ca166840f4476196f Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:37 -0700
Subject: [PATCH 287/853] flag parameters: NONBLOCK in signalfd

This patch adds support for the SFD_NONBLOCK flag to signalfd4.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_signalfd4
# ifdef __x86_64__
#  define __NR_signalfd4 289
# elif defined __i386__
#  define __NR_signalfd4 327
# else
#  error "need __NR_signalfd4"
# endif
#endif

#define SFD_NONBLOCK O_NONBLOCK

int
main (void)
{
  sigset_t ss;
  sigemptyset (&ss);
  sigaddset (&ss, SIGUSR1);
  int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
  if (fd == -1)
    {
      puts ("signalfd4(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("signalfd4(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_NONBLOCK);
  if (fd == -1)
    {
      puts ("signalfd4(SFD_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("signalfd4(SFD_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/signalfd.c            | 4 ++--
 include/linux/signalfd.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/signalfd.c b/fs/signalfd.c
index c8609fa51a1..5441a4bca77 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -211,7 +211,7 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
 
-	if (flags & ~SFD_CLOEXEC)
+	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
 		return -EINVAL;
 
 	if (sizemask != sizeof(sigset_t) ||
@@ -232,7 +232,7 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
 		 * anon_inode_getfd() will install the fd.
 		 */
 		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
-				       flags & O_CLOEXEC);
+				       flags & (O_CLOEXEC | O_NONBLOCK));
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index 8b3f7b7420a..bef0c46d471 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -8,11 +8,12 @@
 #ifndef _LINUX_SIGNALFD_H
 #define _LINUX_SIGNALFD_H
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
 /* Flags for signalfd4.  */
 #define SFD_CLOEXEC O_CLOEXEC
+#define SFD_NONBLOCK O_NONBLOCK
 
 struct signalfd_siginfo {
 	__u32 ssi_signo;
-- 
GitLab


From e7d476dfdf0bcfed478a207aecfdc84f81efecaf Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:38 -0700
Subject: [PATCH 288/853] flag parameters: NONBLOCK in eventfd

This patch adds support for the EFD_NONBLOCK flag to eventfd2.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_eventfd2
# ifdef __x86_64__
#  define __NR_eventfd2 290
# elif defined __i386__
#  define __NR_eventfd2 328
# else
#  error "need __NR_eventfd2"
# endif
#endif

#define EFD_NONBLOCK O_NONBLOCK

int
main (void)
{
  int fd = syscall (__NR_eventfd2, 1, 0);
  if (fd == -1)
    {
      puts ("eventfd2(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("eventfd2(0) sets non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_eventfd2, 1, EFD_NONBLOCK);
  if (fd == -1)
    {
      puts ("eventfd2(EFD_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("eventfd2(EFD_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventfd.c            | 4 ++--
 include/linux/eventfd.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index bd420e6478a..3ed4466177a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -203,7 +203,7 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags)
 	int fd;
 	struct eventfd_ctx *ctx;
 
-	if (flags & ~EFD_CLOEXEC)
+	if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
 		return -EINVAL;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
@@ -218,7 +218,7 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags)
 	 * anon_inode_getfd() will install the fd.
 	 */
 	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-			      flags & O_CLOEXEC);
+			      flags & (O_CLOEXEC | O_NONBLOCK));
 	if (fd < 0)
 		kfree(ctx);
 	return fd;
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index a6c0eaedb1b..a667637b54e 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,11 +10,12 @@
 
 #ifdef CONFIG_EVENTFD
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
 /* Flags for eventfd2.  */
 #define EFD_CLOEXEC O_CLOEXEC
+#define EFD_NONBLOCK O_NONBLOCK
 
 struct file *eventfd_fget(int fd);
 int eventfd_signal(struct file *file, int n);
-- 
GitLab


From 6b1ef0e60d42f2fdaec26baee8327eb156347b4f Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:39 -0700
Subject: [PATCH 289/853] flag parameters: NONBLOCK in timerfd_create

This patch adds support for the TFD_NONBLOCK flag to timerfd_create.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_timerfd_create
# ifdef __x86_64__
#  define __NR_timerfd_create 283
# elif defined __i386__
#  define __NR_timerfd_create 322
# else
#  error "need __NR_timerfd_create"
# endif
#endif

#define TFD_NONBLOCK O_NONBLOCK

int
main (void)
{
  int fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, 0);
  if (fd == -1)
    {
      puts ("timerfd_create(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("timerfd_create(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, TFD_NONBLOCK);
  if (fd == -1)
    {
      puts ("timerfd_create(TFD_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("timerfd_create(TFD_NONBLOCK) set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/timerfd.c            | 4 ++--
 include/linux/timerfd.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/timerfd.c b/fs/timerfd.c
index c6ef5e33cb3..75d44efe346 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	int ufd;
 	struct timerfd_ctx *ctx;
 
-	if (flags & ~TFD_CLOEXEC)
+	if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
 		return -EINVAL;
 	if (clockid != CLOCK_MONOTONIC &&
 	    clockid != CLOCK_REALTIME)
@@ -199,7 +199,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
-			       flags & O_CLOEXEC);
+			       flags & (O_CLOEXEC | O_NONBLOCK));
 	if (ufd < 0)
 		kfree(ctx);
 
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index 96ed97dff00..86cb0501d3e 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -8,7 +8,7 @@
 #ifndef _LINUX_TIMERFD_H
 #define _LINUX_TIMERFD_H
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
 /* Flags for timerfd_settime.  */
@@ -16,6 +16,7 @@
 
 /* Flags for timerfd_create.  */
 #define TFD_CLOEXEC O_CLOEXEC
+#define TFD_NONBLOCK O_NONBLOCK
 
 
 #endif /* _LINUX_TIMERFD_H */
-- 
GitLab


From be61a86d7237dd80510615f38ae21d6e1e98660c Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:40 -0700
Subject: [PATCH 290/853] flag parameters: NONBLOCK in pipe

This patch adds O_NONBLOCK support to pipe2.  It is minimally more involved
than the patches for eventfd et.al but still trivial.  The interfaces of the
create_write_pipe and create_read_pipe helper functions were changed and the
one other caller as well.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_pipe2
# ifdef __x86_64__
#  define __NR_pipe2 293
# elif defined __i386__
#  define __NR_pipe2 331
# else
#  error "need __NR_pipe2"
# endif
#endif

int
main (void)
{
  int fds[2];
  if (syscall (__NR_pipe2, fds, 0) == -1)
    {
      puts ("pipe2(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (fl & O_NONBLOCK)
        {
          printf ("pipe2(0) set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (syscall (__NR_pipe2, fds, O_NONBLOCK) == -1)
    {
      puts ("pipe2(O_NONBLOCK) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((fl & O_NONBLOCK) == 0)
        {
          printf ("pipe2(O_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c          | 14 +++++++-------
 include/linux/fs.h |  4 ++--
 kernel/kmod.c      |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 68e82061070..10c4e9aa5c4 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -950,7 +950,7 @@ fail_inode:
 	return NULL;
 }
 
-struct file *create_write_pipe(void)
+struct file *create_write_pipe(int flags)
 {
 	int err;
 	struct inode *inode;
@@ -983,7 +983,7 @@ struct file *create_write_pipe(void)
 		goto err_dentry;
 	f->f_mapping = inode->i_mapping;
 
-	f->f_flags = O_WRONLY;
+	f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
 	f->f_version = 0;
 
 	return f;
@@ -1007,7 +1007,7 @@ void free_write_pipe(struct file *f)
 	put_filp(f);
 }
 
-struct file *create_read_pipe(struct file *wrf)
+struct file *create_read_pipe(struct file *wrf, int flags)
 {
 	struct file *f = get_empty_filp();
 	if (!f)
@@ -1019,7 +1019,7 @@ struct file *create_read_pipe(struct file *wrf)
 	f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
 
 	f->f_pos = 0;
-	f->f_flags = O_RDONLY;
+	f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	f->f_op = &read_pipe_fops;
 	f->f_mode = FMODE_READ;
 	f->f_version = 0;
@@ -1033,13 +1033,13 @@ int do_pipe_flags(int *fd, int flags)
 	int error;
 	int fdw, fdr;
 
-	if (flags & ~O_CLOEXEC)
+	if (flags & ~(O_CLOEXEC | O_NONBLOCK))
 		return -EINVAL;
 
-	fw = create_write_pipe();
+	fw = create_write_pipe(flags);
 	if (IS_ERR(fw))
 		return PTR_ERR(fw);
-	fr = create_read_pipe(fw);
+	fr = create_read_pipe(fw, flags);
 	error = PTR_ERR(fr);
 	if (IS_ERR(fr))
 		goto err_write_pipe;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0e80cd717d3..4b86f806014 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1778,8 +1778,8 @@ static inline void allow_write_access(struct file *file)
 }
 extern int do_pipe(int *);
 extern int do_pipe_flags(int *, int);
-extern struct file *create_read_pipe(struct file *f);
-extern struct file *create_write_pipe(void);
+extern struct file *create_read_pipe(struct file *f, int flags);
+extern struct file *create_write_pipe(int flags);
 extern void free_write_pipe(struct file *);
 
 extern struct file *do_filp_open(int dfd, const char *pathname,
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 90d7af1c165..2989f67c444 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -417,12 +417,12 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
 {
 	struct file *f;
 
-	f = create_write_pipe();
+	f = create_write_pipe(0);
 	if (IS_ERR(f))
 		return PTR_ERR(f);
 	*filp = f;
 
-	f = create_read_pipe(f);
+	f = create_read_pipe(f, 0);
 	if (IS_ERR(f)) {
 		free_write_pipe(*filp);
 		return PTR_ERR(f);
-- 
GitLab


From 510df2dd482496083e1c3b1a8c9b6afd5fa4c7d7 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:41 -0700
Subject: [PATCH 291/853] flag parameters: NONBLOCK in inotify_init

This patch adds non-blocking support for inotify_init1.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_inotify_init1
# ifdef __x86_64__
#  define __NR_inotify_init1 294
# elif defined __i386__
#  define __NR_inotify_init1 332
# else
#  error "need __NR_inotify_init1"
# endif
#endif

#define IN_NONBLOCK O_NONBLOCK

int
main (void)
{
  int fd = syscall (__NR_inotify_init1, 0);
  if (fd == -1)
    {
      puts ("inotify_init1(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("inotify_init1(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_inotify_init1, IN_NONBLOCK);
  if (fd == -1)
    {
      puts ("inotify_init1(IN_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("inotify_init1(IN_NONBLOCK) set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inotify_user.c       | 4 ++--
 include/linux/inotify.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 851005998cd..dc7e1f61974 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -574,7 +574,7 @@ asmlinkage long sys_inotify_init1(int flags)
 	struct file *filp;
 	int fd, ret;
 
-	if (flags & ~IN_CLOEXEC)
+	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
 	fd = get_unused_fd_flags(flags & O_CLOEXEC);
@@ -613,7 +613,7 @@ asmlinkage long sys_inotify_init1(int flags)
 	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
 	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 	filp->f_mode = FMODE_READ;
-	filp->f_flags = O_RDONLY;
+	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	filp->private_data = dev;
 
 	INIT_LIST_HEAD(&dev->events);
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 72ef8212051..bd578578a8b 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,7 +7,7 @@
 #ifndef _LINUX_INOTIFY_H
 #define _LINUX_INOTIFY_H
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 #include <linux/types.h>
 
@@ -67,6 +67,7 @@ struct inotify_event {
 
 /* Flags for sys_inotify_init1.  */
 #define IN_CLOEXEC O_CLOEXEC
+#define IN_NONBLOCK O_NONBLOCK
 
 #ifdef __KERNEL__
 
-- 
GitLab


From e38b36f325153eaadd1c2a7abc5762079233e540 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:42 -0700
Subject: [PATCH 292/853] flag parameters: check magic constants

This patch adds test that ensure the boundary conditions for the various
constants introduced in the previous patches is met.  No code is generated.

[akpm@linux-foundation.org: fix alpha]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventfd.c      | 4 ++++
 fs/eventpoll.c    | 3 +++
 fs/inotify_user.c | 4 ++++
 fs/signalfd.c     | 4 ++++
 fs/timerfd.c      | 4 ++++
 net/socket.c      | 6 ++++++
 6 files changed, 25 insertions(+)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3ed4466177a..08bf558d040 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -203,6 +203,10 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags)
 	int fd;
 	struct eventfd_ctx *ctx;
 
+	/* Check the EFD_* constants for consistency.  */
+	BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
 		return -EINVAL;
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3fd4014f3c5..2fdad420404 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1051,6 +1051,9 @@ asmlinkage long sys_epoll_create2(int size, int flags)
 	int error, fd = -1;
 	struct eventpoll *ep;
 
+	/* Check the EPOLL_* constant for consistency.  */
+	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
+
 	if (flags & ~EPOLL_CLOEXEC)
 		return -EINVAL;
 
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index dc7e1f61974..fe79c25d95d 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -574,6 +574,10 @@ asmlinkage long sys_inotify_init1(int flags)
 	struct file *filp;
 	int fd, ret;
 
+	/* Check the IN_* constants for consistency.  */
+	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 5441a4bca77..9c39bc7f843 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -211,6 +211,10 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
 
+	/* Check the SFD_* constants for consistency.  */
+	BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
 		return -EINVAL;
 
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 75d44efe346..c502c60e4f5 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,6 +184,10 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	int ufd;
 	struct timerfd_ctx *ctx;
 
+	/* Check the TFD_* constants for consistency.  */
+	BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
 		return -EINVAL;
 	if (clockid != CLOCK_MONOTONIC &&
diff --git a/net/socket.c b/net/socket.c
index 31105f9048a..1310a82cbba 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1222,6 +1222,12 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	struct socket *sock;
 	int flags;
 
+	/* Check the SOCK_* constants for consistency.  */
+	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
+	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
+	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
+
 	flags = type & ~SOCK_TYPE_MASK;
 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
-- 
GitLab


From 9fe5ad9c8cef9ad5873d8ee55d1cf00d9b607df0 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:43 -0700
Subject: [PATCH 293/853] flag parameters add-on: remove epoll_create size
 param

Remove the size parameter from the new epoll_create syscall and renames the
syscall itself.  The updated test program follows.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_epoll_create2
# ifdef __x86_64__
#  define __NR_epoll_create2 291
# elif defined __i386__
#  define __NR_epoll_create2 329
# else
#  error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_epoll_create2, 0);
  if (fd == -1)
    {
      puts ("epoll_create2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("epoll_create2(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_epoll_create2, EPOLL_CLOEXEC);
  if (fd == -1)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  2 +-
 arch/x86/kernel/syscall_table_32.S |  2 +-
 fs/eventpoll.c                     | 18 ++++++++++--------
 include/asm-x86/unistd_32.h        |  2 +-
 include/asm-x86/unistd_64.h        |  4 ++--
 include/linux/eventpoll.h          |  2 +-
 include/linux/syscalls.h           |  2 +-
 7 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 4541073dd83..e4bd1793a5e 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -828,7 +828,7 @@ ia32_sys_call_table:
 	.quad compat_sys_timerfd_gettime
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
-	.quad sys_epoll_create2
+	.quad sys_epoll_create1
 	.quad sys_dup3			/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index f59aba5ff0f..d44395ff34c 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -328,7 +328,7 @@ ENTRY(sys_call_table)
 	.long sys_timerfd_gettime
 	.long sys_signalfd4
 	.long sys_eventfd2
-	.long sys_epoll_create2
+	.long sys_epoll_create1
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 2fdad420404..0c87474f791 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,7 +1046,7 @@ retry:
  * RB tree. With the current implementation, the "size" parameter is ignored
  * (besides sanity checks).
  */
-asmlinkage long sys_epoll_create2(int size, int flags)
+asmlinkage long sys_epoll_create1(int flags)
 {
 	int error, fd = -1;
 	struct eventpoll *ep;
@@ -1058,14 +1058,13 @@ asmlinkage long sys_epoll_create2(int size, int flags)
 		return -EINVAL;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
-		     current, size));
+		     current, flags));
 
 	/*
-	 * Sanity check on the size parameter, and create the internal data
-	 * structure ( "struct eventpoll" ).
+	 * Create the internal data structure ( "struct eventpoll" ).
 	 */
-	error = -EINVAL;
-	if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
+	error = ep_alloc(&ep);
+	if (error < 0) {
 		fd = error;
 		goto error_return;
 	}
@@ -1081,14 +1080,17 @@ asmlinkage long sys_epoll_create2(int size, int flags)
 
 error_return:
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-		     current, size, fd));
+		     current, flags, fd));
 
 	return fd;
 }
 
 asmlinkage long sys_epoll_create(int size)
 {
-	return sys_epoll_create2(size, 0);
+	if (size < 0)
+		return -EINVAL;
+
+	return sys_epoll_create1(0);
 }
 
 /*
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index b3daf503ab9..d7394673b77 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -334,7 +334,7 @@
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
-#define __NR_epoll_create2	329
+#define __NR_epoll_create1	329
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index c8cb88d70c6..3a341d79179 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -645,8 +645,8 @@ __SYSCALL(__NR_paccept, sys_paccept)
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
-#define __NR_epoll_create2			291
-__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
+#define __NR_epoll_create1			291
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_dup3				292
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR_pipe2				293
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 1cfaa40059c..f1e1d3c4712 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -18,7 +18,7 @@
 #include <linux/fcntl.h>
 #include <linux/types.h>
 
-/* Flags for epoll_create2.  */
+/* Flags for epoll_create1.  */
 #define EPOLL_CLOEXEC O_CLOEXEC
 
 /* Valid opcodes to issue to sys_epoll_ctl() */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 93a7e7f017a..06f2bf76c03 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -431,7 +431,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_epoll_create(int size);
-asmlinkage long sys_epoll_create2(int size, int flags);
+asmlinkage long sys_epoll_create1(int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
-- 
GitLab


From 920519c1c31ca46ef6caab1a4be102ed0dfb5fbc Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:29:44 -0700
Subject: [PATCH 294/853] serial/8250_gsc.c: add MODULE_LICENSE

This patch adds the missing MODULE_LICENSE("GPL").

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_gsc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c
index 4eb7437a404..0416ad3bc12 100644
--- a/drivers/serial/8250_gsc.c
+++ b/drivers/serial/8250_gsc.c
@@ -119,3 +119,5 @@ int __init probe_serial_gsc(void)
 }
 
 module_init(probe_serial_gsc);
+
+MODULE_LICENSE("GPL");
-- 
GitLab


From 7500b1f602aad75901774a67a687ee985d85893f Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <arozansk@redhat.com>
Date: Wed, 23 Jul 2008 21:29:45 -0700
Subject: [PATCH 295/853] 8250: fix break handling for Intel 82571

Intel 82571 has a "Serial Over LAN" feature that doesn't properly
implements the receiving of break characters.  When a break is received,
it doesn't set UART_LSR_DR and unless another character is received, the
break won't be received by the application.

Signed-off-by: Aristeu Rozanski <arozansk@redhat.com>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 27f34a9f9cb..a97f1ae11f7 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1293,7 +1293,18 @@ receive_chars(struct uart_8250_port *up, unsigned int *status)
 	char flag;
 
 	do {
-		ch = serial_inp(up, UART_RX);
+		if (likely(lsr & UART_LSR_DR))
+			ch = serial_inp(up, UART_RX);
+		else
+			/*
+			 * Intel 82571 has a Serial Over Lan device that will
+			 * set UART_LSR_BI without setting UART_LSR_DR when
+			 * it receives a break. To avoid reading from the
+			 * receive buffer without UART_LSR_DR bit set, we
+			 * just force the read character to be 0
+			 */
+			ch = 0;
+
 		flag = TTY_NORMAL;
 		up->port.icount.rx++;
 
@@ -1342,7 +1353,7 @@ receive_chars(struct uart_8250_port *up, unsigned int *status)
 
 ignore_char:
 		lsr = serial_inp(up, UART_LSR);
-	} while ((lsr & UART_LSR_DR) && (max_count-- > 0));
+	} while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (max_count-- > 0));
 	spin_unlock(&up->port.lock);
 	tty_flip_buffer_push(tty);
 	spin_lock(&up->port.lock);
@@ -1425,7 +1436,7 @@ serial8250_handle_port(struct uart_8250_port *up)
 
 	DEBUG_INTR("status = %x...", status);
 
-	if (status & UART_LSR_DR)
+	if (status & (UART_LSR_DR | UART_LSR_BI))
 		receive_chars(up, &status);
 	check_modem_status(up);
 	if (status & UART_LSR_THRE)
-- 
GitLab


From b76c5a0717094f0a900d9afd8e36f7ad8dbba587 Mon Sep 17 00:00:00 2001
From: "Catalin(ux) M BOIE" <catab@embedromix.ro>
Date: Wed, 23 Jul 2008 21:29:46 -0700
Subject: [PATCH 296/853] serial: add support for a no-name 4 ports multiserial
 card

It is a no-name PCI card.  I found no reference to a producer so I used
"UNKNOWN_0x1584" as the name.

Full lspci:
01:07.0 0780: 10b5:9050 (rev 01)
        Subsystem: 10b5:1584
        Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- \
                ParErr- Stepping- SERR+ FastB2B-
        Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- \
                DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
        Interrupt: pin A routed to IRQ 10
        Region 1: I/O ports at ec00 [size=128]
        Region 2: I/O ports at e480 [size=32]
        Region 3: I/O ports at e400 [size=8]
        Capabilities: [40] Power Management version 1
                Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA \
                        PME(D0+,D1-,D2-,D3hot+,D3cold-)
                Status: D0 PME-Enable- DSel=0 DScale=0 PME-
        Capabilities: [48] #06 [0080]
        Capabilities: [4c] Vital Product Data

After:
0000:01:07.0: ttyS4 at I/O 0xe480 (irq = 10) is a 16550A
0000:01:07.0: ttyS5 at I/O 0xe488 (irq = 10) is a 16550A
0000:01:07.0: ttyS6 at I/O 0xe490 (irq = 10) is a 16550A
0000:01:07.0: ttyS7 at I/O 0xe498 (irq = 10) is a 16550A

Signed-off-by: Catalin(ux) M BOIE <catab@embedromix.ro>
Acked-by: Alan Cox <alan@redhat.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 1b36087665a..c2f23933155 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -767,6 +767,9 @@ pci_default_setup(struct serial_private *priv, struct pciserial_board *board,
 #define PCI_SUBDEVICE_ID_POCTAL232	0x0308
 #define PCI_SUBDEVICE_ID_POCTAL422	0x0408
 
+/* Unknown vendors/cards - this should not be in linux/pci_ids.h */
+#define PCI_SUBDEVICE_ID_UNKNOWN_0x1584	0x1584
+
 /*
  * Master list of serial port init/setup/exit quirks.
  * This does not describe the general nature of the port.
@@ -880,6 +883,15 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.setup		= pci_default_setup,
 		.exit		= __devexit_p(pci_plx9050_exit),
 	},
+	{
+		.vendor		= PCI_VENDOR_ID_PLX,
+		.device		= PCI_DEVICE_ID_PLX_9050,
+		.subvendor	= PCI_VENDOR_ID_PLX,
+		.subdevice	= PCI_SUBDEVICE_ID_UNKNOWN_0x1584,
+		.init		= pci_plx9050_init,
+		.setup		= pci_default_setup,
+		.exit		= __devexit_p(pci_plx9050_exit),
+	},
 	{
 		.vendor		= PCI_VENDOR_ID_PLX,
 		.device		= PCI_DEVICE_ID_PLX_ROMULUS,
@@ -2197,6 +2209,11 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_1077,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_4_921600 },
+	/* Unknown card - subdevice 0x1584 */
+	{	PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
+		PCI_VENDOR_ID_PLX,
+		PCI_SUBDEVICE_ID_UNKNOWN_0x1584, 0, 0,
+		pbn_b0_4_115200 },
 	{	PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
 		PCI_SUBVENDOR_ID_KEYSPAN,
 		PCI_SUBDEVICE_ID_KEYSPAN_SX2, 0, 0,
-- 
GitLab


From 377135912806ddc87d56d64fafa685f4063c45f1 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 23 Jul 2008 21:29:48 -0700
Subject: [PATCH 297/853] serial: Z85C30: avoid a hang at console switch-over

Changes to the generic console support code that happened a while ago
introduced a scenario where the initial console is used in parallel with
the final console during a brief period when switching between the two is
in progress.  During that time a message about the switch-over is printed.

With some combinations of chips, firmware and drivers, such as the Zilog
Z85C30 SCC used with the DECstation, a hang may happen because the
firmware used for the initial console may not expect the state of the chip
after it has been initialised by the driver.  This is not a bug in the
firmware, as some registers it would have to examine are write-only.

This is a workaround for the Z85C30 which reuses the power-management
callback to keep the transmitter of the line associated with the console
enabled.  It reflects the consensus reached in a discussion a while ago.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/zs.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/serial/zs.c b/drivers/serial/zs.c
index bd45b6230fd..9e6a873f820 100644
--- a/drivers/serial/zs.c
+++ b/drivers/serial/zs.c
@@ -787,7 +787,6 @@ static int zs_startup(struct uart_port *uport)
 	zport->regs[1] &= ~RxINT_MASK;
 	zport->regs[1] |= RxINT_ALL | TxINT_ENAB | EXT_INT_ENAB;
 	zport->regs[3] |= RxENABLE;
-	zport->regs[5] |= TxENAB;
 	zport->regs[15] |= BRKIE;
 	write_zsreg(zport, R1, zport->regs[1]);
 	write_zsreg(zport, R3, zport->regs[3]);
@@ -814,7 +813,6 @@ static void zs_shutdown(struct uart_port *uport)
 
 	spin_lock_irqsave(&scc->zlock, flags);
 
-	zport->regs[5] &= ~TxENAB;
 	zport->regs[3] &= ~RxENABLE;
 	write_zsreg(zport, R5, zport->regs[5]);
 	write_zsreg(zport, R3, zport->regs[3]);
@@ -959,6 +957,23 @@ static void zs_set_termios(struct uart_port *uport, struct ktermios *termios,
 	spin_unlock_irqrestore(&scc->zlock, flags);
 }
 
+/*
+ * Hack alert!
+ * Required solely so that the initial PROM-based console
+ * works undisturbed in parallel with this one.
+ */
+static void zs_pm(struct uart_port *uport, unsigned int state,
+		  unsigned int oldstate)
+{
+	struct zs_port *zport = to_zport(uport);
+
+	if (state < 3)
+		zport->regs[5] |= TxENAB;
+	else
+		zport->regs[5] &= ~TxENAB;
+	write_zsreg(zport, R5, zport->regs[5]);
+}
+
 
 static const char *zs_type(struct uart_port *uport)
 {
@@ -1041,6 +1056,7 @@ static struct uart_ops zs_ops = {
 	.startup	= zs_startup,
 	.shutdown	= zs_shutdown,
 	.set_termios	= zs_set_termios,
+	.pm		= zs_pm,
 	.type		= zs_type,
 	.release_port	= zs_release_port,
 	.request_port	= zs_request_port,
@@ -1190,6 +1206,7 @@ static int __init zs_console_setup(struct console *co, char *options)
 		return ret;
 
 	zs_reset(zport);
+	zs_pm(uport, 0, -1);
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
-- 
GitLab


From e9a8f4d1de12633bfb71b5fee47745b32877b7b5 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 23 Jul 2008 21:29:49 -0700
Subject: [PATCH 298/853] serial: DZ11: avoid a hang at console switch-over

Changes to the generic console support code that happened a while ago
introduced a scenario where the initial console is used in parallel with
the final console during a brief period when switching between the two is
in progress.  During that time a message about the switch-over is printed.

With some combinations of chips, firmware and drivers, such as the DEC
DZ11 clone used with the DECstation, a hang may happen because the
firmware used for the initial console may not expect the state of the chip
after it has been initialised by the driver.

This is a workaround for the DZ11 which reuses the power-management
callback to keep the transmitter of the line associated with the console
enabled.  It reflects the consensus reached in a discussion a while ago.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/dz.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/serial/dz.c b/drivers/serial/dz.c
index a81d2c2ff8a..6042b87797a 100644
--- a/drivers/serial/dz.c
+++ b/drivers/serial/dz.c
@@ -642,6 +642,26 @@ static void dz_set_termios(struct uart_port *uport, struct ktermios *termios,
 	spin_unlock_irqrestore(&dport->port.lock, flags);
 }
 
+/*
+ * Hack alert!
+ * Required solely so that the initial PROM-based console
+ * works undisturbed in parallel with this one.
+ */
+static void dz_pm(struct uart_port *uport, unsigned int state,
+		  unsigned int oldstate)
+{
+	struct dz_port *dport = to_dport(uport);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dport->port.lock, flags);
+	if (state < 3)
+		dz_start_tx(&dport->port);
+	else
+		dz_stop_tx(&dport->port);
+	spin_unlock_irqrestore(&dport->port.lock, flags);
+}
+
+
 static const char *dz_type(struct uart_port *uport)
 {
 	return "DZ";
@@ -738,6 +758,7 @@ static struct uart_ops dz_ops = {
 	.startup	= dz_startup,
 	.shutdown	= dz_shutdown,
 	.set_termios	= dz_set_termios,
+	.pm		= dz_pm,
 	.type		= dz_type,
 	.release_port	= dz_release_port,
 	.request_port	= dz_request_port,
@@ -861,7 +882,10 @@ static int __init dz_console_setup(struct console *co, char *options)
 	if (ret)
 		return ret;
 
+	spin_lock_init(&dport->port.lock);	/* For dz_pm().  */
+
 	dz_reset(dport);
+	dz_pm(uport, 0, -1);
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
-- 
GitLab


From ae2d4c396e19f45918ed6e0900b031538d009823 Mon Sep 17 00:00:00 2001
From: Nye Liu <nyet@mrv.com>
Date: Wed, 23 Jul 2008 21:29:50 -0700
Subject: [PATCH 299/853] cpm1: don't send break on TX_STOP, don't interrupt
 RX/TX when adjusting termios parameters

Before setting STOP_TX, set _brkcr to 0 so the SMC does not send a break
character.  The driver appears to properly re-initialize _brkcr when the
SMC is restarted.

Do not interrupt RX/TX when the termios is being adjusted; it results in
corrupted characters appearing on the line.

Cc: Vitaly Bordug <vbordug@ru.mvista.com>
Cc: Scott Wood <scottwood@freescale.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/cpm_uart/cpm_uart_core.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index 1ff80de177d..a4f86927a74 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -435,10 +435,13 @@ static void cpm_uart_shutdown(struct uart_port *port)
 		}
 
 		/* Shut them really down and reinit buffer descriptors */
-		if (IS_SMC(pinfo))
+		if (IS_SMC(pinfo)) {
+			out_be16(&pinfo->smcup->smc_brkcr, 0);
 			cpm_line_cr_cmd(pinfo, CPM_CR_STOP_TX);
-		else
+		} else {
+			out_be16(&pinfo->sccup->scc_brkcr, 0);
 			cpm_line_cr_cmd(pinfo, CPM_CR_GRA_STOP_TX);
+		}
 
 		cpm_uart_initbd(pinfo);
 	}
@@ -554,9 +557,11 @@ static void cpm_uart_set_termios(struct uart_port *port,
 		 * enables, because we want to put them back if they were
 		 * present.
 		 */
-		prev_mode = in_be16(&smcp->smc_smcmr);
-		out_be16(&smcp->smc_smcmr, smcr_mk_clen(bits) | cval | SMCMR_SM_UART);
-		setbits16(&smcp->smc_smcmr, (prev_mode & (SMCMR_REN | SMCMR_TEN)));
+		prev_mode = in_be16(&smcp->smc_smcmr) & (SMCMR_REN | SMCMR_TEN);
+		/* Output in *one* operation, so we don't interrupt RX/TX if they
+		 * were already enabled. */
+		out_be16(&smcp->smc_smcmr, smcr_mk_clen(bits) | cval |
+		    SMCMR_SM_UART | prev_mode);
 	} else {
 		out_be16(&sccp->scc_psmr, (sbits << 12) | scval);
 	}
@@ -1198,12 +1203,14 @@ static int __init cpm_uart_console_setup(struct console *co, char *options)
 	udbg_putc = NULL;
 #endif
 
-	cpm_line_cr_cmd(pinfo, CPM_CR_STOP_TX);
-
 	if (IS_SMC(pinfo)) {
+		out_be16(&pinfo->smcup->smc_brkcr, 0);
+		cpm_line_cr_cmd(pinfo, CPM_CR_STOP_TX);
 		clrbits8(&pinfo->smcp->smc_smcm, SMCM_RX | SMCM_TX);
 		clrbits16(&pinfo->smcp->smc_smcmr, SMCMR_REN | SMCMR_TEN);
 	} else {
+		out_be16(&pinfo->sccup->scc_brkcr, 0);
+		cpm_line_cr_cmd(pinfo, CPM_CR_GRA_STOP_TX);
 		clrbits16(&pinfo->sccp->scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
 		clrbits32(&pinfo->sccp->scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
 	}
-- 
GitLab


From 708d8cefd0f6d8dc13027f899e865ccfa5f63871 Mon Sep 17 00:00:00 2001
From: Andre Haupt <andre@bitwigglers.org>
Date: Wed, 23 Jul 2008 21:29:51 -0700
Subject: [PATCH 300/853] stallion: removed unused variable

Signed-off-by: Andre Haupt <andre@bitwigglers.org>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/stallion.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index b976248e107..19db1eb87c2 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -1256,7 +1256,6 @@ static int stl_tiocmset(struct tty_struct *tty, struct file *file,
 static int stl_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct stlport	*portp;
-	unsigned int	ival;
 	int		rc;
 	void __user *argp = (void __user *)arg;
 
-- 
GitLab


From a61f5345eba34772a71523227de890a28410f320 Mon Sep 17 00:00:00 2001
From: Chen Gong <g.chen@freescale.com>
Date: Wed, 23 Jul 2008 21:29:52 -0700
Subject: [PATCH 301/853] spi: spi_mpc83xx clockrate fixes

This updates the SPI clock rate calculations for the spi_mpc83xx driver.
Some boundary conditions were wrong, and in several cases divide-by-16
wasn't always needed

Signed-off-by: Chen Gong <g.chen@freescale.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi_mpc83xx.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c
index 6832da6f710..070c6219e2d 100644
--- a/drivers/spi/spi_mpc83xx.c
+++ b/drivers/spi/spi_mpc83xx.c
@@ -266,21 +266,24 @@ int mpc83xx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 
 	cs->hw_mode |= SPMODE_LEN(bits_per_word);
 
-	if ((mpc83xx_spi->spibrg / hz) >= 64) {
-		pm = mpc83xx_spi->spibrg / (hz * 64) - 1;
-		if (pm > 0x0f) {
-			dev_err(&spi->dev, "Requested speed is too "
-				"low: %d Hz. Will use %d Hz instead.\n",
-				hz, mpc83xx_spi->spibrg / 1024);
-			pm = 0x0f;
+	if ((mpc83xx_spi->spibrg / hz) > 64) {
+		pm = mpc83xx_spi->spibrg / (hz * 64);
+		if (pm > 16) {
+			cs->hw_mode |= SPMODE_DIV16;
+			pm /= 16;
+			if (pm > 16) {
+				dev_err(&spi->dev, "Requested speed is too "
+					"low: %d Hz. Will use %d Hz instead.\n",
+					hz, mpc83xx_spi->spibrg / 1024);
+				pm = 16;
+			}
 		}
-		cs->hw_mode |= SPMODE_PM(pm) | SPMODE_DIV16;
-	} else {
+	} else
 		pm = mpc83xx_spi->spibrg / (hz * 4);
-		if (pm)
-			pm--;
-		cs->hw_mode |= SPMODE_PM(pm);
-	}
+	if (pm)
+		pm--;
+
+	cs->hw_mode |= SPMODE_PM(pm);
 	regval =  mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode);
 	if (cs->hw_mode != regval) {
 		unsigned long flags;
-- 
GitLab


From 166a375b657b7af494f4ce3f72c4d2002180da44 Mon Sep 17 00:00:00 2001
From: Roel Kluin <12o3l@tiscali.nl>
Date: Wed, 23 Jul 2008 21:29:53 -0700
Subject: [PATCH 302/853] xilinx_spi: test below 0 on unsigned irq in
 xilinx_spi_probe()

xilinx_spi->irq is unsigned, so the test fails

Signed-off-by: Roel Kluin <12o3l@tiscali.nl>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Andrei Konovalov <akonovalov@ru.mvista.com>
Cc: Yuri Frolov <yfrolov@ru.mvista.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/xilinx_spi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 113a0468ffc..68d6f4988fb 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -353,11 +353,12 @@ static int __init xilinx_spi_probe(struct platform_device *dev)
 		goto put_master;
 	}
 
-	xspi->irq = platform_get_irq(dev, 0);
-	if (xspi->irq < 0) {
+	ret = platform_get_irq(dev, 0);
+	if (ret < 0) {
 		ret = -ENXIO;
 		goto unmap_io;
 	}
+	xspi->irq = ret;
 
 	master->bus_num = pdata->bus_num;
 	master->num_chipselect = pdata->num_chipselect;
-- 
GitLab


From 6291fe2abce4689d6ee7cbaea16692c79bf0d01b Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 23 Jul 2008 21:29:53 -0700
Subject: [PATCH 303/853] SPI Kconfig simplifications

Use "if SPI_MASTER" to remove numerous dependencies.

[dbrownell@users.sourceforge.net: remove a couple now-needless EXPERIMENTAL dependencies too]
Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/Kconfig | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 66ec5d8808d..2303521b4f0 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -49,25 +49,26 @@ config SPI_MASTER
 	  controller and the protocol drivers for the SPI slave chips
 	  that are connected.
 
+if SPI_MASTER
+
 comment "SPI Master Controller Drivers"
-	depends on SPI_MASTER
 
 config SPI_ATMEL
 	tristate "Atmel SPI Controller"
-	depends on (ARCH_AT91 || AVR32) && SPI_MASTER
+	depends on (ARCH_AT91 || AVR32)
 	help
 	  This selects a driver for the Atmel SPI Controller, present on
 	  many AT32 (AVR32) and AT91 (ARM) chips.
 
 config SPI_BFIN
 	tristate "SPI controller driver for ADI Blackfin5xx"
-	depends on SPI_MASTER && BLACKFIN
+	depends on BLACKFIN
 	help
 	  This is the SPI controller master driver for Blackfin 5xx processor.
 
 config SPI_AU1550
 	tristate "Au1550/Au12x0 SPI Controller"
-	depends on SPI_MASTER && (SOC_AU1550 || SOC_AU1200) && EXPERIMENTAL
+	depends on (SOC_AU1550 || SOC_AU1200) && EXPERIMENTAL
 	select SPI_BITBANG
 	help
 	  If you say yes to this option, support will be included for the
@@ -78,7 +79,6 @@ config SPI_AU1550
 
 config SPI_BITBANG
 	tristate "Bitbanging SPI master"
-	depends on SPI_MASTER && EXPERIMENTAL
 	help
 	  With a few GPIO pins, your system can bitbang the SPI protocol.
 	  Select this to get SPI support through I/O pins (GPIO, parallel
@@ -92,7 +92,7 @@ config SPI_BITBANG
 
 config SPI_BUTTERFLY
 	tristate "Parallel port adapter for AVR Butterfly (DEVELOPMENT)"
-	depends on SPI_MASTER && PARPORT && EXPERIMENTAL
+	depends on PARPORT
 	select SPI_BITBANG
 	help
 	  This uses a custom parallel port cable to connect to an AVR
@@ -102,14 +102,14 @@ config SPI_BUTTERFLY
 
 config SPI_IMX
 	tristate "Freescale iMX SPI controller"
-	depends on SPI_MASTER && ARCH_IMX && EXPERIMENTAL
+	depends on ARCH_IMX && EXPERIMENTAL
 	help
 	  This enables using the Freescale iMX SPI controller in master
 	  mode.
 
 config SPI_LM70_LLP
 	tristate "Parallel port adapter for LM70 eval board (DEVELOPMENT)"
-	depends on SPI_MASTER && PARPORT && EXPERIMENTAL
+	depends on PARPORT && EXPERIMENTAL
 	select SPI_BITBANG
 	help
 	  This driver supports the NS LM70 LLP Evaluation Board,
@@ -118,14 +118,14 @@ config SPI_LM70_LLP
 
 config SPI_MPC52xx_PSC
 	tristate "Freescale MPC52xx PSC SPI controller"
-	depends on SPI_MASTER && PPC_MPC52xx && EXPERIMENTAL
+	depends on PPC_MPC52xx && EXPERIMENTAL
 	help
 	  This enables using the Freescale MPC52xx Programmable Serial
 	  Controller in master SPI mode.
 
 config SPI_MPC83xx
 	tristate "Freescale MPC83xx/QUICC Engine SPI controller"
-	depends on SPI_MASTER && (PPC_83xx || QUICC_ENGINE) && EXPERIMENTAL
+	depends on (PPC_83xx || QUICC_ENGINE) && EXPERIMENTAL
 	help
 	  This enables using the Freescale MPC83xx and QUICC Engine SPI
 	  controllers in master mode.
@@ -137,21 +137,21 @@ config SPI_MPC83xx
 
 config SPI_OMAP_UWIRE
 	tristate "OMAP1 MicroWire"
-	depends on SPI_MASTER && ARCH_OMAP1
+	depends on ARCH_OMAP1
 	select SPI_BITBANG
 	help
 	  This hooks up to the MicroWire controller on OMAP1 chips.
 
 config SPI_OMAP24XX
 	tristate "McSPI driver for OMAP24xx/OMAP34xx"
-	depends on SPI_MASTER && (ARCH_OMAP24XX || ARCH_OMAP34XX)
+	depends on ARCH_OMAP24XX || ARCH_OMAP34XX
 	help
 	  SPI master controller for OMAP24xx/OMAP34xx Multichannel SPI
 	  (McSPI) modules.
 
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
-	depends on SPI_MASTER && ARCH_PXA && EXPERIMENTAL
+	depends on ARCH_PXA && EXPERIMENTAL
 	select PXA_SSP
 	help
 	  This enables using a PXA2xx SSP port as a SPI master controller.
@@ -160,14 +160,14 @@ config SPI_PXA2XX
 
 config SPI_S3C24XX
 	tristate "Samsung S3C24XX series SPI"
-	depends on SPI_MASTER && ARCH_S3C2410 && EXPERIMENTAL
+	depends on ARCH_S3C2410 && EXPERIMENTAL
 	select SPI_BITBANG
 	help
 	  SPI driver for Samsung S3C24XX series ARM SoCs
 
 config SPI_S3C24XX_GPIO
 	tristate "Samsung S3C24XX series SPI by GPIO"
-	depends on SPI_MASTER && ARCH_S3C2410 && EXPERIMENTAL
+	depends on ARCH_S3C2410 && EXPERIMENTAL
 	select SPI_BITBANG
 	help
 	  SPI driver for Samsung S3C24XX series ARM SoCs using
@@ -177,20 +177,20 @@ config SPI_S3C24XX_GPIO
 
 config SPI_SH_SCI
 	tristate "SuperH SCI SPI controller"
-	depends on SPI_MASTER && SUPERH
+	depends on SUPERH
 	select SPI_BITBANG
 	help
 	  SPI driver for SuperH SCI blocks.
 
 config SPI_TXX9
 	tristate "Toshiba TXx9 SPI controller"
-	depends on SPI_MASTER && GENERIC_GPIO && CPU_TX49XX
+	depends on GENERIC_GPIO && CPU_TX49XX
 	help
 	  SPI driver for Toshiba TXx9 MIPS SoCs
 
 config SPI_XILINX
 	tristate "Xilinx SPI controller"
-	depends on SPI_MASTER && XILINX_VIRTEX && EXPERIMENTAL
+	depends on XILINX_VIRTEX && EXPERIMENTAL
 	select SPI_BITBANG
 	help
 	  This exposes the SPI controller IP from the Xilinx EDK.
@@ -207,11 +207,10 @@ config SPI_XILINX
 # being probably the most widely used ones.
 #
 comment "SPI Protocol Masters"
-	depends on SPI_MASTER
 
 config SPI_AT25
 	tristate "SPI EEPROMs from most vendors"
-	depends on SPI_MASTER && SYSFS
+	depends on SYSFS
 	help
 	  Enable this driver to get read/write support to most SPI EEPROMs,
 	  after you configure the board init code to know about each eeprom
@@ -222,7 +221,7 @@ config SPI_AT25
 
 config SPI_SPIDEV
 	tristate "User mode SPI device driver support"
-	depends on SPI_MASTER && EXPERIMENTAL
+	depends on EXPERIMENTAL
 	help
 	  This supports user mode SPI protocol drivers.
 
@@ -231,7 +230,7 @@ config SPI_SPIDEV
 
 config SPI_TLE62X0
 	tristate "Infineon TLE62X0 (for power switching)"
-	depends on SPI_MASTER && SYSFS
+	depends on SYSFS
 	help
 	  SPI driver for Infineon TLE62X0 series line driver chips,
 	  such as the TLE6220, TLE6230 and TLE6240.  This provides a
@@ -242,6 +241,8 @@ config SPI_TLE62X0
 # Add new SPI protocol masters in alphabetical order above this line
 #
 
+endif # SPI_MASTER
+
 # (slave support would go here)
 
 endif # SPI
-- 
GitLab


From 102eb97564c73ea73645b38599c5cbe6f54b030c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 23 Jul 2008 21:29:55 -0700
Subject: [PATCH 304/853] spi: make spi_board_info.modalias a char array

Currently, 'modalias' in the spi_device structure is a 'const char *'.
The spi_new_device() function fills in the modalias value from a passed in
spi_board_info data block.  Since it is a pointer copy, the new spi_device
remains dependent on the spi_board_info structure after the new spi_device
is registered (no other fields in spi_device directly depend on the
spi_board_info structure; all of the other data is copied).

This causes a problem when dynamically propulating the list of attached
SPI devices.  For example, in arch/powerpc, the list of SPI devices can be
populated from data in the device tree.  With the current code, the device
tree adapter must kmalloc() a new spi_board_info structure for each new
SPI device it finds in the device tree, and there is no simple mechanism
in place for keeping track of these allocations.

This patch changes modalias from a 'const char *' to a fixed char array.
By copying the modalias string instead of referencing it, the dependency
on the spi_board_info structure is eliminated and an outside caller does
not need to maintain a separate spi_board_info allocation for each device.

If searched through the code to the best of my ability for any references
to modalias which may be affected by this change and haven't found
anything.  It has been tested with the lite5200b platform in arch/powerpc.

[dbrownell@users.sourceforge.net: cope with linux-next changes: KOBJ_NAME_LEN obliterated, etc]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi.c       | 4 +++-
 include/linux/spi/spi.h | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 1771b2456bf..ecca4a6a6f9 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -218,6 +218,8 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	if (!spi_master_get(master))
 		return NULL;
 
+	WARN_ON(strlen(chip->modalias) >= sizeof(proxy->modalias));
+
 	proxy = kzalloc(sizeof *proxy, GFP_KERNEL);
 	if (!proxy) {
 		dev_err(dev, "can't alloc dev for cs%d\n",
@@ -229,7 +231,7 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	proxy->max_speed_hz = chip->max_speed_hz;
 	proxy->mode = chip->mode;
 	proxy->irq = chip->irq;
-	proxy->modalias = chip->modalias;
+	strlcpy(proxy->modalias, chip->modalias, sizeof(proxy->modalias));
 
 	snprintf(proxy->dev.bus_id, sizeof proxy->dev.bus_id,
 			"%s.%u", master->dev.bus_id,
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index b9a76c97208..a9cc29d4665 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -82,7 +82,7 @@ struct spi_device {
 	int			irq;
 	void			*controller_state;
 	void			*controller_data;
-	const char		*modalias;
+	char			modalias[32];
 
 	/*
 	 * likely need more hooks for more protocol options affecting how
-- 
GitLab


From 4ef754b7d7971a704d5b1b4608839da1bae37e5e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 23 Jul 2008 21:29:55 -0700
Subject: [PATCH 305/853] spidev: BKL removal

Another step to removing ->ioctl and to removing the BKL

[dbrownell@users.sourceforge.net: take final step; BKL not needed]
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spidev.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 2833fd772a2..e5e0cfed5e3 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -228,7 +228,6 @@ static int spidev_message(struct spidev_data *spidev,
 	 * We walk the array of user-provided transfers, using each one
 	 * to initialize a kernel version of the same transfer.
 	 */
-	mutex_lock(&spidev->buf_lock);
 	buf = spidev->buffer;
 	total = 0;
 	for (n = n_xfers, k_tmp = k_xfers, u_tmp = u_xfers;
@@ -296,14 +295,12 @@ static int spidev_message(struct spidev_data *spidev,
 	status = total;
 
 done:
-	mutex_unlock(&spidev->buf_lock);
 	kfree(k_xfers);
 	return status;
 }
 
-static int
-spidev_ioctl(struct inode *inode, struct file *filp,
-		unsigned int cmd, unsigned long arg)
+static long
+spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	int			err = 0;
 	int			retval = 0;
@@ -341,6 +338,14 @@ spidev_ioctl(struct inode *inode, struct file *filp,
 	if (spi == NULL)
 		return -ESHUTDOWN;
 
+	/* use the buffer lock here for triple duty:
+	 *  - prevent I/O (from us) so calling spi_setup() is safe;
+	 *  - prevent concurrent SPI_IOC_WR_* from morphing
+	 *    data fields while SPI_IOC_RD_* reads them;
+	 *  - SPI_IOC_MESSAGE needs the buffer locked "normally".
+	 */
+	mutex_lock(&spidev->buf_lock);
+
 	switch (cmd) {
 	/* read requests */
 	case SPI_IOC_RD_MODE:
@@ -456,6 +461,8 @@ spidev_ioctl(struct inode *inode, struct file *filp,
 		kfree(ioc);
 		break;
 	}
+
+	mutex_unlock(&spidev->buf_lock);
 	spi_dev_put(spi);
 	return retval;
 }
@@ -533,7 +540,7 @@ static struct file_operations spidev_fops = {
 	 */
 	.write =	spidev_write,
 	.read =		spidev_read,
-	.ioctl =	spidev_ioctl,
+	.unlocked_ioctl = spidev_ioctl,
 	.open =		spidev_open,
 	.release =	spidev_release,
 };
-- 
GitLab


From 3a93a159c61e38a12f7ecbb3a25cf3f012abcf7a Mon Sep 17 00:00:00 2001
From: Manuel Lauss <mano@roarinelk.homelinux.net>
Date: Wed, 23 Jul 2008 21:29:56 -0700
Subject: [PATCH 306/853] spi: au1550_spi: proper platform device

Remove the Au1550 resource table and instead extract MMIO/IRQ/DMA
resources from platform resource information like any well-behaved
platform driver.

Signed-off-by: Manuel Lauss <mano@roarinelk.homelinux.net>
Signed-off-by: Jan Nikitenko <jan.nikitenko@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/au1550_spi.c                  | 138 ++++++++++++----------
 include/asm-mips/mach-au1x00/au1550_spi.h |   1 -
 2 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c
index 072c4a59533..3860dd2fa5d 100644
--- a/drivers/spi/au1550_spi.c
+++ b/drivers/spi/au1550_spi.c
@@ -26,6 +26,7 @@
 #include <linux/errno.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
+#include <linux/resource.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/dma-mapping.h>
@@ -81,6 +82,7 @@ struct au1550_spi {
 	struct spi_master *master;
 	struct device *dev;
 	struct au1550_spi_info *pdata;
+	struct resource *ioarea;
 };
 
 
@@ -96,6 +98,8 @@ static dbdev_tab_t au1550_spi_mem_dbdev =
 	.dev_intpolarity	= 0
 };
 
+static int ddma_memid;	/* id to above mem dma device */
+
 static void au1550_spi_bits_handlers_set(struct au1550_spi *hw, int bpw);
 
 
@@ -732,6 +736,7 @@ static int __init au1550_spi_probe(struct platform_device *pdev)
 {
 	struct au1550_spi *hw;
 	struct spi_master *master;
+	struct resource *r;
 	int err = 0;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct au1550_spi));
@@ -753,76 +758,64 @@ static int __init au1550_spi_probe(struct platform_device *pdev)
 		goto err_no_pdata;
 	}
 
-	platform_set_drvdata(pdev, hw);
-
-	init_completion(&hw->master_done);
-
-	hw->bitbang.master = hw->master;
-	hw->bitbang.setup_transfer = au1550_spi_setupxfer;
-	hw->bitbang.chipselect = au1550_spi_chipsel;
-	hw->bitbang.master->setup = au1550_spi_setup;
-	hw->bitbang.txrx_bufs = au1550_spi_txrx_bufs;
+	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "no IRQ\n");
+		err = -ENODEV;
+		goto err_no_iores;
+	}
+	hw->irq = r->start;
+
+	hw->usedma = 0;
+	r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (r) {
+		hw->dma_tx_id = r->start;
+		r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+		if (r) {
+			hw->dma_rx_id = r->start;
+			if (usedma && ddma_memid) {
+				if (pdev->dev.dma_mask == NULL)
+					dev_warn(&pdev->dev, "no dma mask\n");
+				else
+					hw->usedma = 1;
+			}
+		}
+	}
 
-	switch (hw->pdata->bus_num) {
-	case 0:
-		hw->irq = AU1550_PSC0_INT;
-		hw->regs = (volatile psc_spi_t *)PSC0_BASE_ADDR;
-		hw->dma_rx_id = DSCR_CMD0_PSC0_RX;
-		hw->dma_tx_id = DSCR_CMD0_PSC0_TX;
-		break;
-	case 1:
-		hw->irq = AU1550_PSC1_INT;
-		hw->regs = (volatile psc_spi_t *)PSC1_BASE_ADDR;
-		hw->dma_rx_id = DSCR_CMD0_PSC1_RX;
-		hw->dma_tx_id = DSCR_CMD0_PSC1_TX;
-		break;
-	case 2:
-		hw->irq = AU1550_PSC2_INT;
-		hw->regs = (volatile psc_spi_t *)PSC2_BASE_ADDR;
-		hw->dma_rx_id = DSCR_CMD0_PSC2_RX;
-		hw->dma_tx_id = DSCR_CMD0_PSC2_TX;
-		break;
-	case 3:
-		hw->irq = AU1550_PSC3_INT;
-		hw->regs = (volatile psc_spi_t *)PSC3_BASE_ADDR;
-		hw->dma_rx_id = DSCR_CMD0_PSC3_RX;
-		hw->dma_tx_id = DSCR_CMD0_PSC3_TX;
-		break;
-	default:
-		dev_err(&pdev->dev, "Wrong bus_num of SPI\n");
-		err = -ENOENT;
-		goto err_no_pdata;
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "no mmio resource\n");
+		err = -ENODEV;
+		goto err_no_iores;
 	}
 
-	if (request_mem_region((unsigned long)hw->regs, sizeof(psc_spi_t),
-			pdev->name) == NULL) {
+	hw->ioarea = request_mem_region(r->start, sizeof(psc_spi_t),
+					pdev->name);
+	if (!hw->ioarea) {
 		dev_err(&pdev->dev, "Cannot reserve iomem region\n");
 		err = -ENXIO;
 		goto err_no_iores;
 	}
 
-
-	if (usedma) {
-		if (pdev->dev.dma_mask == NULL)
-			dev_warn(&pdev->dev, "no dma mask\n");
-		else
-			hw->usedma = 1;
+	hw->regs = (psc_spi_t __iomem *)ioremap(r->start, sizeof(psc_spi_t));
+	if (!hw->regs) {
+		dev_err(&pdev->dev, "cannot ioremap\n");
+		err = -ENXIO;
+		goto err_ioremap;
 	}
 
-	if (hw->usedma) {
-		/*
-		 * create memory device with 8 bits dev_devwidth
-		 * needed for proper byte ordering to spi fifo
-		 */
-		int memid = au1xxx_ddma_add_device(&au1550_spi_mem_dbdev);
-		if (!memid) {
-			dev_err(&pdev->dev,
-				"Cannot create dma 8 bit mem device\n");
-			err = -ENXIO;
-			goto err_dma_add_dev;
-		}
+	platform_set_drvdata(pdev, hw);
 
-		hw->dma_tx_ch = au1xxx_dbdma_chan_alloc(memid,
+	init_completion(&hw->master_done);
+
+	hw->bitbang.master = hw->master;
+	hw->bitbang.setup_transfer = au1550_spi_setupxfer;
+	hw->bitbang.chipselect = au1550_spi_chipsel;
+	hw->bitbang.master->setup = au1550_spi_setup;
+	hw->bitbang.txrx_bufs = au1550_spi_txrx_bufs;
+
+	if (hw->usedma) {
+		hw->dma_tx_ch = au1xxx_dbdma_chan_alloc(ddma_memid,
 			hw->dma_tx_id, NULL, (void *)hw);
 		if (hw->dma_tx_ch == 0) {
 			dev_err(&pdev->dev,
@@ -841,7 +834,7 @@ static int __init au1550_spi_probe(struct platform_device *pdev)
 
 
 		hw->dma_rx_ch = au1xxx_dbdma_chan_alloc(hw->dma_rx_id,
-			memid, NULL, (void *)hw);
+			ddma_memid, NULL, (void *)hw);
 		if (hw->dma_rx_ch == 0) {
 			dev_err(&pdev->dev,
 				"Cannot allocate rx dma channel\n");
@@ -874,7 +867,7 @@ static int __init au1550_spi_probe(struct platform_device *pdev)
 		goto err_no_irq;
 	}
 
-	master->bus_num = hw->pdata->bus_num;
+	master->bus_num = pdev->id;
 	master->num_chipselect = hw->pdata->num_chipselect;
 
 	/*
@@ -924,8 +917,11 @@ err_no_txdma_descr:
 		au1xxx_dbdma_chan_free(hw->dma_tx_ch);
 
 err_no_txdma:
-err_dma_add_dev:
-	release_mem_region((unsigned long)hw->regs, sizeof(psc_spi_t));
+	iounmap((void __iomem *)hw->regs);
+
+err_ioremap:
+	release_resource(hw->ioarea);
+	kfree(hw->ioarea);
 
 err_no_iores:
 err_no_pdata:
@@ -944,7 +940,9 @@ static int __exit au1550_spi_remove(struct platform_device *pdev)
 
 	spi_bitbang_stop(&hw->bitbang);
 	free_irq(hw->irq, hw);
-	release_mem_region((unsigned long)hw->regs, sizeof(psc_spi_t));
+	iounmap((void __iomem *)hw->regs);
+	release_resource(hw->ioarea);
+	kfree(hw->ioarea);
 
 	if (hw->usedma) {
 		au1550_spi_dma_rxtmp_free(hw);
@@ -971,12 +969,24 @@ static struct platform_driver au1550_spi_drv = {
 
 static int __init au1550_spi_init(void)
 {
+	/*
+	 * create memory device with 8 bits dev_devwidth
+	 * needed for proper byte ordering to spi fifo
+	 */
+	if (usedma) {
+		ddma_memid = au1xxx_ddma_add_device(&au1550_spi_mem_dbdev);
+		if (!ddma_memid)
+			printk(KERN_ERR "au1550-spi: cannot add memory"
+					"dbdma device\n");
+	}
 	return platform_driver_probe(&au1550_spi_drv, au1550_spi_probe);
 }
 module_init(au1550_spi_init);
 
 static void __exit au1550_spi_exit(void)
 {
+	if (usedma && ddma_memid)
+		au1xxx_ddma_del_device(ddma_memid);
 	platform_driver_unregister(&au1550_spi_drv);
 }
 module_exit(au1550_spi_exit);
diff --git a/include/asm-mips/mach-au1x00/au1550_spi.h b/include/asm-mips/mach-au1x00/au1550_spi.h
index 40e6c489833..08e1958e941 100644
--- a/include/asm-mips/mach-au1x00/au1550_spi.h
+++ b/include/asm-mips/mach-au1x00/au1550_spi.h
@@ -6,7 +6,6 @@
 #define _AU1550_SPI_H_
 
 struct au1550_spi_info {
-	s16 bus_num;		/* defines which PSC and IRQ to use */
 	u32 mainclk_hz;		/* main input clock frequency of PSC */
 	u16 num_chipselect;	/* number of chipselects supported */
 	void (*activate_cs)(struct au1550_spi_info *spi, int cs, int polarity);
-- 
GitLab


From bbe48ecc7f6559318cfc6c023da225a0b0e14ab3 Mon Sep 17 00:00:00 2001
From: Jan Nikitenko <jan.nikitenko@gmail.com>
Date: Wed, 23 Jul 2008 21:29:57 -0700
Subject: [PATCH 307/853] spi: au1550_spi: improve pio transfer mode

Improve PIO transfer mode of au1550 spi controller by continuing of spi
transfer, instead of aborting transfer when transmit underflow interrupt
occurrs.

Verified by oscilloscope that the spi clock pauses on trasmit underflow,
so transfer continuation is perfectly valid even though au1550 datasheet
says that on tx underflow zeroes will be transfered.

Also make some error messages more specific.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jan Nikitenko <jan.nikitenko@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/au1550_spi.c | 69 ++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 20 deletions(-)

diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c
index 3860dd2fa5d..9149689c79d 100644
--- a/drivers/spi/au1550_spi.c
+++ b/drivers/spi/au1550_spi.c
@@ -484,9 +484,13 @@ static irqreturn_t au1550_spi_dma_irq_callback(struct au1550_spi *hw)
 		au1xxx_dbdma_reset(hw->dma_tx_ch);
 		au1550_spi_reset_fifos(hw);
 
-		dev_err(hw->dev,
-			"Unexpected SPI error: event=0x%x stat=0x%x!\n",
-			evnt, stat);
+		if (evnt == PSC_SPIEVNT_RO)
+			dev_err(hw->dev,
+				"dma transfer: receive FIFO overflow!\n");
+		else
+			dev_err(hw->dev,
+				"dma transfer: unexpected SPI error "
+				"(event=0x%x stat=0x%x)!\n", evnt, stat);
 
 		complete(&hw->master_done);
 		return IRQ_HANDLED;
@@ -596,17 +600,17 @@ static irqreturn_t au1550_spi_pio_irq_callback(struct au1550_spi *hw)
 
 	if ((evnt & (PSC_SPIEVNT_MM | PSC_SPIEVNT_RO
 				| PSC_SPIEVNT_RU | PSC_SPIEVNT_TO
-				| PSC_SPIEVNT_TU | PSC_SPIEVNT_SD))
+				| PSC_SPIEVNT_SD))
 			!= 0) {
-		dev_err(hw->dev,
-			"Unexpected SPI error: event=0x%x stat=0x%x!\n",
-			evnt, stat);
 		/*
 		 * due to an error we consider transfer as done,
 		 * so mask all events until before next transfer start
 		 */
 		au1550_spi_mask_ack_all(hw);
 		au1550_spi_reset_fifos(hw);
+		dev_err(hw->dev,
+			"pio transfer: unexpected SPI error "
+			"(event=0x%x stat=0x%x)!\n", evnt, stat);
 		complete(&hw->master_done);
 		return IRQ_HANDLED;
 	}
@@ -620,27 +624,50 @@ static irqreturn_t au1550_spi_pio_irq_callback(struct au1550_spi *hw)
 		stat = hw->regs->psc_spistat;
 		au_sync();
 
-		if ((stat & PSC_SPISTAT_RE) == 0 && hw->rx_count < hw->len) {
+		/*
+		 * Take care to not let the Rx FIFO overflow.
+		 *
+		 * We only write a byte if we have read one at least. Initially,
+		 * the write fifo is full, so we should read from the read fifo
+		 * first.
+		 * In case we miss a word from the read fifo, we should get a
+		 * RO event and should back out.
+		 */
+		if (!(stat & PSC_SPISTAT_RE) && hw->rx_count < hw->len) {
 			hw->rx_word(hw);
-			/* ack the receive request event */
-			hw->regs->psc_spievent = PSC_SPIEVNT_RR;
-			au_sync();
 			busy = 1;
-		}
 
-		if ((stat & PSC_SPISTAT_TF) == 0 && hw->tx_count < hw->len) {
-			hw->tx_word(hw);
-			/* ack the transmit request event */
-			hw->regs->psc_spievent = PSC_SPIEVNT_TR;
-			au_sync();
-			busy = 1;
+			if (!(stat & PSC_SPISTAT_TF) && hw->tx_count < hw->len)
+				hw->tx_word(hw);
 		}
 	} while (busy);
 
-	evnt = hw->regs->psc_spievent;
+	hw->regs->psc_spievent = PSC_SPIEVNT_RR | PSC_SPIEVNT_TR;
 	au_sync();
 
-	if (hw->rx_count >= hw->len || (evnt & PSC_SPIEVNT_MD) != 0) {
+	/*
+	 * Restart the SPI transmission in case of a transmit underflow.
+	 * This seems to work despite the notes in the Au1550 data book
+	 * of Figure 8-4 with flowchart for SPI master operation:
+	 *
+	 * """Note 1: An XFR Error Interrupt occurs, unless masked,
+	 * for any of the following events: Tx FIFO Underflow,
+	 * Rx FIFO Overflow, or Multiple-master Error
+	 *    Note 2: In case of a Tx Underflow Error, all zeroes are
+	 * transmitted."""
+	 *
+	 * By simply restarting the spi transfer on Tx Underflow Error,
+	 * we assume that spi transfer was paused instead of zeroes
+	 * transmittion mentioned in the Note 2 of Au1550 data book.
+	 */
+	if (evnt & PSC_SPIEVNT_TU) {
+		hw->regs->psc_spievent = PSC_SPIEVNT_TU | PSC_SPIEVNT_MD;
+		au_sync();
+		hw->regs->psc_spipcr = PSC_SPIPCR_MS;
+		au_sync();
+	}
+
+	if (hw->rx_count >= hw->len) {
 		/* transfer completed successfully */
 		au1550_spi_mask_ack_all(hw);
 		complete(&hw->master_done);
@@ -729,6 +756,8 @@ static void __init au1550_spi_setup_psc_as_spi(struct au1550_spi *hw)
 		stat = hw->regs->psc_spistat;
 		au_sync();
 	} while ((stat & PSC_SPISTAT_DR) == 0);
+
+	au1550_spi_reset_fifos(hw);
 }
 
 
-- 
GitLab


From e0426e6a09954d205da2d674a3d368d2715e3afd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 23 Jul 2008 21:29:58 -0700
Subject: [PATCH 308/853] vt: hold console_sem across sysfs operations

Hold console sem while creating/destroying sysfs files.  Serialisation is
so far done by BKL held in tty release_dev and chrdev_open, but no other
locks are held in open path.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Aristeu Rozanski <aris@ruivo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/vt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index e32a076d5f1..ab53a1d4d89 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2749,8 +2749,8 @@ static int con_open(struct tty_struct *tty, struct file *filp)
 				tty->termios->c_iflag |= IUTF8;
 			else
 				tty->termios->c_iflag &= ~IUTF8;
-			release_console_sem();
 			vcs_make_sysfs(tty);
+			release_console_sem();
 			return ret;
 		}
 	}
@@ -2775,8 +2775,8 @@ static void con_close(struct tty_struct *tty, struct file *filp)
 		if (vc)
 			vc->vc_tty = NULL;
 		tty->driver_data = NULL;
-		release_console_sem();
 		vcs_remove_sysfs(tty);
+		release_console_sem();
 		mutex_unlock(&tty_mutex);
 		/*
 		 * tty_mutex is released, but we still hold BKL, so there is
-- 
GitLab


From f700d6e5e5549cb9349d22043f4bd153792c621f Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 23 Jul 2008 21:29:59 -0700
Subject: [PATCH 309/853] vt: do not update when the console is blanked

vt.c DO_UPDATE macro checks if the console is visible but doesn't check if
the console is blanked.

In fact updating fbcon while the console is blanked is not only
unnecessary but can even cause screen corruption.

Therefore I am adding a simple check on console_blanked in DO_UPDATE.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/vt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index ab53a1d4d89..cb8c90da393 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -261,7 +261,7 @@ static void notify_update(struct vc_data *vc)
 #ifdef VT_BUF_VRAM_ONLY
 #define DO_UPDATE(vc)	0
 #else
-#define DO_UPDATE(vc)	CON_IS_VISIBLE(vc)
+#define DO_UPDATE(vc)	(CON_IS_VISIBLE(vc) && !console_blanked)
 #endif
 
 static inline unsigned short *screenpos(struct vc_data *vc, int offset, int viewed)
-- 
GitLab


From 0293902a4d66fab27d0ddcc0766e05dae68f004e Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Wed, 23 Jul 2008 21:30:01 -0700
Subject: [PATCH 310/853] I2O: handle sysfs_create_link() failures

Compile warning:
ignoring return value of `sysfs_create_link', declared with attribute warn_unused_result.

If sysfs_create_link failed, take care of the return value and do some
error handle after the failure.

Since sysfs_remove_link() will check whether a link exists, when removing the
link in error path, we don't need to care whether a link was created.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/message/i2o/device.c | 54 ++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c
index 489d7c5c496..8774c670e66 100644
--- a/drivers/message/i2o/device.c
+++ b/drivers/message/i2o/device.c
@@ -243,29 +243,41 @@ static int i2o_device_add(struct i2o_controller *c, i2o_lct_entry *entry)
 
 	/* create user entries for this device */
 	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid);
-	if (tmp && (tmp != i2o_dev))
-		sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
-				  "user");
+	if (tmp && (tmp != i2o_dev)) {
+		rc = sysfs_create_link(&i2o_dev->device.kobj,
+				       &tmp->device.kobj, "user");
+		if (rc)
+			goto unreg_dev;
+	}
 
 	/* create user entries refering to this device */
 	list_for_each_entry(tmp, &c->devices, list)
 	    if ((tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
-		&& (tmp != i2o_dev))
-		sysfs_create_link(&tmp->device.kobj,
-				  &i2o_dev->device.kobj, "user");
+		&& (tmp != i2o_dev)) {
+		rc = sysfs_create_link(&tmp->device.kobj,
+				       &i2o_dev->device.kobj, "user");
+		if (rc)
+			goto rmlink1;
+	}
 
 	/* create parent entries for this device */
 	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid);
-	if (tmp && (tmp != i2o_dev))
-		sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
-				  "parent");
+	if (tmp && (tmp != i2o_dev)) {
+		rc = sysfs_create_link(&i2o_dev->device.kobj,
+				       &tmp->device.kobj, "parent");
+		if (rc)
+			goto rmlink1;
+	}
 
 	/* create parent entries refering to this device */
 	list_for_each_entry(tmp, &c->devices, list)
 	    if ((tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
-		&& (tmp != i2o_dev))
-		sysfs_create_link(&tmp->device.kobj,
-				  &i2o_dev->device.kobj, "parent");
+		&& (tmp != i2o_dev)) {
+		rc = sysfs_create_link(&tmp->device.kobj,
+				       &i2o_dev->device.kobj, "parent");
+		if (rc)
+			goto rmlink2;
+	}
 
 	i2o_driver_notify_device_add_all(i2o_dev);
 
@@ -273,6 +285,24 @@ static int i2o_device_add(struct i2o_controller *c, i2o_lct_entry *entry)
 
 	return 0;
 
+rmlink2:
+	/* If link creating failed halfway, we loop whole list to cleanup.
+	 * And we don't care wrong removing of link, because sysfs_remove_link
+	 * will take care of it.
+	 */
+	list_for_each_entry(tmp, &c->devices, list) {
+		if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
+			sysfs_remove_link(&tmp->device.kobj, "parent");
+	}
+	sysfs_remove_link(&i2o_dev->device.kobj, "parent");
+rmlink1:
+	list_for_each_entry(tmp, &c->devices, list)
+		if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
+			sysfs_remove_link(&tmp->device.kobj, "user");
+	sysfs_remove_link(&i2o_dev->device.kobj, "user");
+unreg_dev:
+	list_del(&i2o_dev->list);
+	device_unregister(&i2o_dev->device);
 err:
 	kfree(i2o_dev);
 	return rc;
-- 
GitLab


From 746f1e558bc52b9693c1a1ecdab60f8392e5ff18 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Wed, 23 Jul 2008 21:30:02 -0700
Subject: [PATCH 311/853] eCryptfs: Privileged kthread for lower file opens

eCryptfs would really like to have read-write access to all files in the
lower filesystem.  Right now, the persistent lower file may be opened
read-only if the attempt to open it read-write fails.  One way to keep
from having to do that is to have a privileged kthread that can open the
lower persistent file on behalf of the user opening the eCryptfs file;
this patch implements this functionality.

This patch will properly allow a less-privileged user to open the eCryptfs
file, followed by a more-privileged user opening the eCryptfs file, with
the first user only being able to read and the second user being able to
both read and write.  eCryptfs currently does this wrong; it will wind up
calling vfs_write() on a file that was opened read-only.  This is fixed in
this patch.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/Makefile          |   2 +-
 fs/ecryptfs/ecryptfs_kernel.h |  19 ++++
 fs/ecryptfs/file.c            |   7 ++
 fs/ecryptfs/kthread.c         | 203 ++++++++++++++++++++++++++++++++++
 fs/ecryptfs/main.c            |  42 +++----
 5 files changed, 251 insertions(+), 22 deletions(-)
 create mode 100644 fs/ecryptfs/kthread.c

diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1e34a7fd488..b4755a85996 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
 
-ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
+ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c15c25745e0..b4a0cccfdd7 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -559,6 +559,20 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
 extern struct kmem_cache *ecryptfs_key_sig_cache;
 extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
 extern struct kmem_cache *ecryptfs_key_tfm_cache;
+extern struct kmem_cache *ecryptfs_open_req_cache;
+
+struct ecryptfs_open_req {
+#define ECRYPTFS_REQ_PROCESSED 0x00000001
+#define ECRYPTFS_REQ_DROPPED   0x00000002
+#define ECRYPTFS_REQ_ZOMBIE    0x00000004
+	u32 flags;
+	struct file **lower_file;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	wait_queue_head_t wait;
+	struct mutex mux;
+	struct list_head kthread_ctl_list;
+};
 
 int ecryptfs_interpose(struct dentry *hidden_dentry,
 		       struct dentry *this_dentry, struct super_block *sb,
@@ -690,5 +704,10 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
 int
 ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
 		      struct user_namespace *user_ns, struct pid *pid);
+int ecryptfs_init_kthread(void);
+void ecryptfs_destroy_kthread(void);
+int ecryptfs_privileged_open(struct file **lower_file,
+			     struct dentry *lower_dentry,
+			     struct vfsmount *lower_mnt);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 24749bf0668..f0be2905152 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -192,6 +192,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 				      | ECRYPTFS_ENCRYPTED);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
+	if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
+	    && !(file->f_flags & O_RDONLY)) {
+		rc = -EPERM;
+		printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
+		       "file must hence be opened RO\n", __func__);
+		goto out;
+	}
 	ecryptfs_set_file_lower(
 		file, ecryptfs_inode_to_private(inode)->lower_file);
 	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
new file mode 100644
index 00000000000..c440c6b58b2
--- /dev/null
+++ b/fs/ecryptfs/kthread.c
@@ -0,0 +1,203 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 2008 International Business Machines Corp.
+ *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/wait.h>
+#include <linux/mount.h>
+#include "ecryptfs_kernel.h"
+
+struct kmem_cache *ecryptfs_open_req_cache;
+
+static struct ecryptfs_kthread_ctl {
+#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
+	u32 flags;
+	struct mutex mux;
+	struct list_head req_list;
+	wait_queue_head_t wait;
+} ecryptfs_kthread_ctl;
+
+static struct task_struct *ecryptfs_kthread;
+
+/**
+ * ecryptfs_threadfn
+ * @ignored: ignored
+ *
+ * The eCryptfs kernel thread that has the responsibility of getting
+ * the lower persistent file with RW permissions.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_threadfn(void *ignored)
+{
+	set_freezable();
+	while (1)  {
+		struct ecryptfs_open_req *req;
+
+		wait_event_freezable(
+			ecryptfs_kthread_ctl.wait,
+			(!list_empty(&ecryptfs_kthread_ctl.req_list)
+			 || kthread_should_stop()));
+		mutex_lock(&ecryptfs_kthread_ctl.mux);
+		if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
+			mutex_unlock(&ecryptfs_kthread_ctl.mux);
+			goto out;
+		}
+		while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
+			req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
+					       struct ecryptfs_open_req,
+					       kthread_ctl_list);
+			mutex_lock(&req->mux);
+			list_del(&req->kthread_ctl_list);
+			if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
+				dget(req->lower_dentry);
+				mntget(req->lower_mnt);
+				(*req->lower_file) = dentry_open(
+					req->lower_dentry, req->lower_mnt,
+					(O_RDWR | O_LARGEFILE));
+				req->flags |= ECRYPTFS_REQ_PROCESSED;
+			}
+			wake_up(&req->wait);
+			mutex_unlock(&req->mux);
+		}
+		mutex_unlock(&ecryptfs_kthread_ctl.mux);
+	}
+out:
+	return 0;
+}
+
+int ecryptfs_init_kthread(void)
+{
+	int rc = 0;
+
+	mutex_init(&ecryptfs_kthread_ctl.mux);
+	init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
+	INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
+	ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
+				       "ecryptfs-kthread");
+	if (IS_ERR(ecryptfs_kthread)) {
+		rc = PTR_ERR(ecryptfs_kthread);
+		printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
+		       "\n", __func__, rc);
+	}
+	return rc;
+}
+
+void ecryptfs_destroy_kthread(void)
+{
+	struct ecryptfs_open_req *req;
+
+	mutex_lock(&ecryptfs_kthread_ctl.mux);
+	ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
+	list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
+			    kthread_ctl_list) {
+		mutex_lock(&req->mux);
+		req->flags |= ECRYPTFS_REQ_ZOMBIE;
+		wake_up(&req->wait);
+		mutex_unlock(&req->mux);
+	}
+	mutex_unlock(&ecryptfs_kthread_ctl.mux);
+	kthread_stop(ecryptfs_kthread);
+	wake_up(&ecryptfs_kthread_ctl.wait);
+}
+
+/**
+ * ecryptfs_privileged_open
+ * @lower_file: Result of dentry_open by root on lower dentry
+ * @lower_dentry: Lower dentry for file to open
+ * @lower_mnt: Lower vfsmount for file to open
+ *
+ * This function gets a r/w file opened againt the lower dentry.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_privileged_open(struct file **lower_file,
+			     struct dentry *lower_dentry,
+			     struct vfsmount *lower_mnt)
+{
+	struct ecryptfs_open_req *req;
+	int rc = 0;
+
+	/* Corresponding dput() and mntput() are done when the
+	 * persistent file is fput() when the eCryptfs inode is
+	 * destroyed. */
+	dget(lower_dentry);
+	mntget(lower_mnt);
+	(*lower_file) = dentry_open(lower_dentry, lower_mnt,
+				    (O_RDWR | O_LARGEFILE));
+	if (!IS_ERR(*lower_file))
+		goto out;
+	req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
+	if (!req) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	mutex_init(&req->mux);
+	req->lower_file = lower_file;
+	req->lower_dentry = lower_dentry;
+	req->lower_mnt = lower_mnt;
+	init_waitqueue_head(&req->wait);
+	req->flags = 0;
+	mutex_lock(&ecryptfs_kthread_ctl.mux);
+	if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
+		rc = -EIO;
+		mutex_unlock(&ecryptfs_kthread_ctl.mux);
+		printk(KERN_ERR "%s: We are in the middle of shutting down; "
+		       "aborting privileged request to open lower file\n",
+			__func__);
+		goto out_free;
+	}
+	list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
+	mutex_unlock(&ecryptfs_kthread_ctl.mux);
+	wake_up(&ecryptfs_kthread_ctl.wait);
+	wait_event(req->wait, (req->flags != 0));
+	mutex_lock(&req->mux);
+	BUG_ON(req->flags == 0);
+	if (req->flags & ECRYPTFS_REQ_DROPPED
+	    || req->flags & ECRYPTFS_REQ_ZOMBIE) {
+		rc = -EIO;
+		printk(KERN_WARNING "%s: Privileged open request dropped\n",
+		       __func__);
+		goto out_unlock;
+	}
+	if (IS_ERR(*req->lower_file)) {
+		rc = PTR_ERR(*req->lower_file);
+		dget(lower_dentry);
+		mntget(lower_mnt);
+		(*lower_file) = dentry_open(lower_dentry, lower_mnt,
+					    (O_RDONLY | O_LARGEFILE));
+		if (IS_ERR(*lower_file)) {
+			rc = PTR_ERR(*req->lower_file);
+			(*lower_file) = NULL;
+			printk(KERN_WARNING "%s: Error attempting privileged "
+			       "open of lower file with either RW or RO "
+			       "perms; rc = [%d]. Giving up.\n",
+			       __func__, rc);
+		}
+	}
+out_unlock:
+	mutex_unlock(&req->mux);
+out_free:
+	kmem_cache_free(ecryptfs_open_req_cache, req);
+out:
+	return rc;
+}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d603631601e..f36ab2feea2 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
 			ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
 
 		lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
-		/* Corresponding dput() and mntput() are done when the
-		 * persistent file is fput() when the eCryptfs inode
-		 * is destroyed. */
-		dget(lower_dentry);
-		mntget(lower_mnt);
-		inode_info->lower_file = dentry_open(lower_dentry,
-						     lower_mnt,
-						     (O_RDWR | O_LARGEFILE));
-		if (IS_ERR(inode_info->lower_file)) {
-			dget(lower_dentry);
-			mntget(lower_mnt);
-			inode_info->lower_file = dentry_open(lower_dentry,
-							     lower_mnt,
-							     (O_RDONLY
-							      | O_LARGEFILE));
-		}
-		if (IS_ERR(inode_info->lower_file)) {
+		rc = ecryptfs_privileged_open(&inode_info->lower_file,
+						     lower_dentry, lower_mnt);
+		if (rc || IS_ERR(inode_info->lower_file)) {
 			printk(KERN_ERR "Error opening lower persistent file "
-			       "for lower_dentry [0x%p] and lower_mnt [0x%p]\n",
-			       lower_dentry, lower_mnt);
+			       "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
+			       "rc = [%d]\n", lower_dentry, lower_mnt, rc);
 			rc = PTR_ERR(inode_info->lower_file);
 			inode_info->lower_file = NULL;
 		}
@@ -679,6 +665,11 @@ static struct ecryptfs_cache_info {
 		.name = "ecryptfs_key_tfm_cache",
 		.size = sizeof(struct ecryptfs_key_tfm),
 	},
+	{
+		.cache = &ecryptfs_open_req_cache,
+		.name = "ecryptfs_open_req_cache",
+		.size = sizeof(struct ecryptfs_open_req),
+	},
 };
 
 static void ecryptfs_free_kmem_caches(void)
@@ -795,11 +786,17 @@ static int __init ecryptfs_init(void)
 		printk(KERN_ERR "sysfs registration failed\n");
 		goto out_unregister_filesystem;
 	}
+	rc = ecryptfs_init_kthread();
+	if (rc) {
+		printk(KERN_ERR "%s: kthread initialization failed; "
+		       "rc = [%d]\n", __func__, rc);
+		goto out_do_sysfs_unregistration;
+	}
 	rc = ecryptfs_init_messaging(ecryptfs_transport);
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
+		printk(KERN_ERR "Failure occured while attempting to "
 				"initialize the eCryptfs netlink socket\n");
-		goto out_do_sysfs_unregistration;
+		goto out_destroy_kthread;
 	}
 	rc = ecryptfs_init_crypto();
 	if (rc) {
@@ -814,6 +811,8 @@ static int __init ecryptfs_init(void)
 	goto out;
 out_release_messaging:
 	ecryptfs_release_messaging(ecryptfs_transport);
+out_destroy_kthread:
+	ecryptfs_destroy_kthread();
 out_do_sysfs_unregistration:
 	do_sysfs_unregistration();
 out_unregister_filesystem:
@@ -833,6 +832,7 @@ static void __exit ecryptfs_exit(void)
 		printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
 		       "rc = [%d]\n", rc);
 	ecryptfs_release_messaging(ecryptfs_transport);
+	ecryptfs_destroy_kthread();
 	do_sysfs_unregistration();
 	unregister_filesystem(&ecryptfs_fs_type);
 	ecryptfs_free_kmem_caches();
-- 
GitLab


From 6c4c17b073cd4a5a61bc04329561632870bb21fc Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:30:04 -0700
Subject: [PATCH 312/853] ecryptfs: discard ecryptfsd registration messages in
 miscdev

The userspace eCryptfs daemon sends HELO and QUIT messages to the kernel
for per-user daemon (un)registration.  These messages are required when
netlink is used as the transport, but (un)registration is handled by
opening and closing the device file when miscdev is the transport.  These
messages should be discarded in the miscdev transport so that a daemon
isn't registered twice.

Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/miscdev.c | 59 -------------------------------------------
 1 file changed, 59 deletions(-)

diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 09a4522f65e..b484792a099 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -357,46 +357,6 @@ out_unlock_daemon:
 	return rc;
 }
 
-/**
- * ecryptfs_miscdev_helo
- * @euid: effective user id of miscdevess sending helo packet
- * @user_ns: The namespace in which @euid applies
- * @pid: miscdevess id of miscdevess sending helo packet
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
-				 struct pid *pid)
-{
-	int rc;
-
-	rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
-				   pid);
-	if (rc)
-		printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
-	return rc;
-}
-
-/**
- * ecryptfs_miscdev_quit
- * @euid: effective user id of miscdevess sending quit packet
- * @user_ns: The namespace in which @euid applies
- * @pid: miscdevess id of miscdevess sending quit packet
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
-				 struct pid *pid)
-{
-	int rc;
-
-	rc = ecryptfs_process_quit(euid, user_ns, pid);
-	if (rc)
-		printk(KERN_WARNING
-		       "Error processing QUIT message; rc = [%d]\n", rc);
-	return rc;
-}
-
 /**
  * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
  * @data: Bytes comprising struct ecryptfs_message
@@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
 			       __func__, rc);
 		break;
 	case ECRYPTFS_MSG_HELO:
-		rc = ecryptfs_miscdev_helo(current->euid,
-					   current->nsproxy->user_ns,
-					   task_pid(current));
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to process "
-			       "helo from pid [0x%p]; rc = [%d]\n", __func__,
-			       task_pid(current), rc);
-			goto out_free;
-		}
-		break;
 	case ECRYPTFS_MSG_QUIT:
-		rc = ecryptfs_miscdev_quit(current->euid,
-					   current->nsproxy->user_ns,
-					   task_pid(current));
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to process "
-			       "quit from pid [0x%p]; rc = [%d]\n", __func__,
-			       task_pid(current), rc);
-			goto out_free;
-		}
 		break;
 	default:
 		ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
-- 
GitLab


From 982363c97f8cad7aea4c3d2cfebffc1cc2d2f166 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 23 Jul 2008 21:30:04 -0700
Subject: [PATCH 313/853] ecryptfs: propagate key errors up at mount time

Mounting with invalid key signatures should probably fail, if they were
specifically requested but not available.

Also fix case checks in process_request_key_err() for the right sign of
the errnos, as spotted by Jan Tluka.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Jan Tluka <jtluka@redhat.com>
Acked-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/keystore.c | 9 ++++-----
 fs/ecryptfs/main.c     | 4 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e82b457180b..f5b76a331b9 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -44,15 +44,15 @@ static int process_request_key_err(long err_code)
 	int rc = 0;
 
 	switch (err_code) {
-	case ENOKEY:
+	case -ENOKEY:
 		ecryptfs_printk(KERN_WARNING, "No key\n");
 		rc = -ENOENT;
 		break;
-	case EKEYEXPIRED:
+	case -EKEYEXPIRED:
 		ecryptfs_printk(KERN_WARNING, "Key expired\n");
 		rc = -ETIME;
 		break;
-	case EKEYREVOKED:
+	case -EKEYREVOKED:
 		ecryptfs_printk(KERN_WARNING, "Key revoked\n");
 		rc = -EINVAL;
 		break;
@@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
 	if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
 		printk(KERN_ERR "Could not find key with description: [%s]\n",
 		       sig);
-		process_request_key_err(PTR_ERR(*auth_tok_key));
-		rc = -EINVAL;
+		rc = process_request_key_err(PTR_ERR(*auth_tok_key));
 		goto out;
 	}
 	(*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index f36ab2feea2..8876fe7c76e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -248,10 +248,11 @@ static int ecryptfs_init_global_auth_toks(
 			       "session keyring for sig specified in mount "
 			       "option: [%s]\n", global_auth_tok->sig);
 			global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
-			rc = 0;
+			goto out;
 		} else
 			global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
 	}
+out:
 	return rc;
 }
 
@@ -416,7 +417,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		printk(KERN_WARNING "One or more global auth toks could not "
 		       "properly register; rc = [%d]\n", rc);
 	}
-	rc = 0;
 out:
 	return rc;
 }
-- 
GitLab


From 8f2368095e25018838e1bf145041f58270ccd32e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 23 Jul 2008 21:30:05 -0700
Subject: [PATCH 314/853] ecryptfs: string copy cleanup

Clean up overcomplicated string copy, which also gets rid of this
bogus warning:

fs/ecryptfs/main.c: In function 'ecryptfs_parse_options':
include/asm/arch/string_32.h:75: warning: array subscript is above array bounds

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/main.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 8876fe7c76e..10475d93ff5 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -301,7 +301,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 	char *cipher_name_dst;
 	char *cipher_name_src;
 	char *cipher_key_bytes_src;
-	int cipher_name_len;
 
 	if (!options) {
 		rc = -EINVAL;
@@ -382,17 +381,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		goto out;
 	}
 	if (!cipher_name_set) {
-		cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
-		if (unlikely(cipher_name_len
-			     >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) {
-			rc = -EINVAL;
-			BUG();
-			goto out;
-		}
-		memcpy(mount_crypt_stat->global_default_cipher_name,
-		       ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
-		mount_crypt_stat->global_default_cipher_name[cipher_name_len]
-		    = '\0';
+		int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
+
+		BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+
+		strcpy(mount_crypt_stat->global_default_cipher_name,
+		       ECRYPTFS_DEFAULT_CIPHER);
 	}
 	if (!cipher_key_bytes_set) {
 		mount_crypt_stat->global_default_cipher_key_size = 0;
-- 
GitLab


From 29335c6a41568d4708d4ec3b9187f9b6d302e5ea Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 23 Jul 2008 21:30:06 -0700
Subject: [PATCH 315/853] ecryptfs: crypto.c use unaligned byteorder helpers

Fixes the following sparse warnings:
fs/ecryptfs/crypto.c:1036:8: warning: cast to restricted __be32
fs/ecryptfs/crypto.c:1038:8: warning: cast to restricted __be32
fs/ecryptfs/crypto.c:1077:10: warning: cast to restricted __be32
fs/ecryptfs/crypto.c:1103:6: warning: incorrect type in assignment (different base types)
fs/ecryptfs/crypto.c:1105:6: warning: incorrect type in assignment (different base types)
fs/ecryptfs/crypto.c:1124:8: warning: incorrect type in assignment (different base types)
fs/ecryptfs/crypto.c:1241:21: warning: incorrect type in assignment (different base types)
fs/ecryptfs/crypto.c:1244:30: warning: incorrect type in assignment (different base types)
fs/ecryptfs/crypto.c:1414:23: warning: cast to restricted __be32
fs/ecryptfs/crypto.c:1417:32: warning: cast to restricted __be16

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/crypto.c | 37 ++++++++++++++-----------------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e2832bc7869..7b99917ffad 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
 #include <linux/crypto.h>
 #include <linux/file.h>
 #include <linux/scatterlist.h>
+#include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 
 static int
@@ -1032,10 +1033,8 @@ static int contains_ecryptfs_marker(char *data)
 {
 	u32 m_1, m_2;
 
-	memcpy(&m_1, data, 4);
-	m_1 = be32_to_cpu(m_1);
-	memcpy(&m_2, (data + 4), 4);
-	m_2 = be32_to_cpu(m_2);
+	m_1 = get_unaligned_be32(data);
+	m_2 = get_unaligned_be32(data + 4);
 	if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
 		return 1;
 	ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
@@ -1073,8 +1072,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
 	int i;
 	u32 flags;
 
-	memcpy(&flags, page_virt, 4);
-	flags = be32_to_cpu(flags);
+	flags = get_unaligned_be32(page_virt);
 	for (i = 0; i < ((sizeof(ecryptfs_flag_map)
 			  / sizeof(struct ecryptfs_flag_map_elem))); i++)
 		if (flags & ecryptfs_flag_map[i].file_flag) {
@@ -1100,11 +1098,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
 
 	get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
 	m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
-	m_1 = cpu_to_be32(m_1);
-	memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
-	m_2 = cpu_to_be32(m_2);
-	memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
-	       (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
+	put_unaligned_be32(m_1, page_virt);
+	page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2);
+	put_unaligned_be32(m_2, page_virt);
 	(*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
 }
 
@@ -1121,8 +1117,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
 			flags |= ecryptfs_flag_map[i].file_flag;
 	/* Version is in top 8 bits of the 32-bit flag vector */
 	flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
-	flags = cpu_to_be32(flags);
-	memcpy(page_virt, &flags, 4);
+	put_unaligned_be32(flags, page_virt);
 	(*written) = 4;
 }
 
@@ -1238,11 +1233,9 @@ ecryptfs_write_header_metadata(char *virt,
 	num_header_extents_at_front =
 		(u16)(crypt_stat->num_header_bytes_at_front
 		      / crypt_stat->extent_size);
-	header_extent_size = cpu_to_be32(header_extent_size);
-	memcpy(virt, &header_extent_size, 4);
+	put_unaligned_be32(header_extent_size, virt);
 	virt += 4;
-	num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front);
-	memcpy(virt, &num_header_extents_at_front, 2);
+	put_unaligned_be16(num_header_extents_at_front, virt);
 	(*written) = 6;
 }
 
@@ -1410,15 +1403,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
 	u32 header_extent_size;
 	u16 num_header_extents_at_front;
 
-	memcpy(&header_extent_size, virt, sizeof(u32));
-	header_extent_size = be32_to_cpu(header_extent_size);
-	virt += sizeof(u32);
-	memcpy(&num_header_extents_at_front, virt, sizeof(u16));
-	num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
+	header_extent_size = get_unaligned_be32(virt);
+	virt += sizeof(__be32);
+	num_header_extents_at_front = get_unaligned_be16(virt);
 	crypt_stat->num_header_bytes_at_front =
 		(((size_t)num_header_extents_at_front
 		  * (size_t)header_extent_size));
-	(*bytes_read) = (sizeof(u32) + sizeof(u16));
+	(*bytes_read) = (sizeof(__be32) + sizeof(__be16));
 	if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
 	    && (crypt_stat->num_header_bytes_at_front
 		< ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
-- 
GitLab


From 0a688ad713949643e201431d3f4a4ceddfeb70ca Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 23 Jul 2008 21:30:07 -0700
Subject: [PATCH 316/853] ecryptfs: inode.c mmap.c use unaligned byteorder
 helpers

Fixe sparse warnings:
fs/ecryptfs/inode.c:368:15: warning: cast to restricted __be64
fs/ecryptfs/mmap.c:385:12: warning: incorrect type in assignment (different base types)
fs/ecryptfs/mmap.c:385:12:    expected unsigned long long [unsigned] [assigned] [usertype] file_size
fs/ecryptfs/mmap.c:385:12:    got restricted __be64 [usertype] <noident>
fs/ecryptfs/mmap.c:428:12: warning: incorrect type in assignment (different base types)
fs/ecryptfs/mmap.c:428:12:    expected unsigned long long [unsigned] [assigned] [usertype] file_size
fs/ecryptfs/mmap.c:428:12:    got restricted __be64 [usertype] <noident>

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/inode.c |  4 ++--
 fs/ecryptfs/mmap.c  | 11 +++--------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c92cc1c00aa..7315547193e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
 #include <linux/mount.h>
 #include <linux/crypto.h>
 #include <linux/fs_stack.h>
+#include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 
 static struct dentry *lock_parent(struct dentry *dentry)
@@ -364,8 +365,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		else
 			file_size = i_size_read(lower_dentry->d_inode);
 	} else {
-		memcpy(&file_size, page_virt, sizeof(file_size));
-		file_size = be64_to_cpu(file_size);
+		file_size = get_unaligned_be64(page_virt);
 	}
 	i_size_write(dentry->d_inode, (loff_t)file_size);
 	kmem_cache_free(ecryptfs_header_cache_2, page_virt);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2b6fe1e6e8b..245c2dc02d5 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
+#include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -372,7 +373,6 @@ out:
  */
 static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
 {
-	u64 file_size;
 	char *file_size_virt;
 	int rc;
 
@@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
 		rc = -ENOMEM;
 		goto out;
 	}
-	file_size = (u64)i_size_read(ecryptfs_inode);
-	file_size = cpu_to_be64(file_size);
-	memcpy(file_size_virt, &file_size, sizeof(u64));
+	put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt);
 	rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
 				  sizeof(u64));
 	kfree(file_size_virt);
@@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
 	struct dentry *lower_dentry =
 		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
 	struct inode *lower_inode = lower_dentry->d_inode;
-	u64 file_size;
 	int rc;
 
 	if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
@@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
 					   xattr_virt, PAGE_CACHE_SIZE);
 	if (size < 0)
 		size = 8;
-	file_size = (u64)i_size_read(ecryptfs_inode);
-	file_size = cpu_to_be64(file_size);
-	memcpy(xattr_virt, &file_size, sizeof(u64));
+	put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
 	rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
 					 xattr_virt, size, 0);
 	mutex_unlock(&lower_inode->i_mutex);
-- 
GitLab


From 72b55fffd631a89e5be6fe1b4f2565bc4cd90deb Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Wed, 23 Jul 2008 21:30:07 -0700
Subject: [PATCH 317/853] eCryptfs: do not try to open device files on mknod

When creating device nodes, eCryptfs needs to delay actually opening the lower
persistent file until an application tries to open.  Device handles may not be
backed by anything when they first come into existence.

[Valdis.Kletnieks@vt.edu: build fix]
Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Cc: <Valdis.Kletnieks@vt.edu}
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/ecryptfs_kernel.h |  6 +++++-
 fs/ecryptfs/file.c            | 14 ++++++++++++++
 fs/ecryptfs/inode.c           |  6 ++++--
 fs/ecryptfs/main.c            | 29 +++++++++++++++++++----------
 4 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index b4a0cccfdd7..b0727f91454 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -235,6 +235,7 @@ struct ecryptfs_crypt_stat {
 #define ECRYPTFS_METADATA_IN_XATTR  0x00000100
 #define ECRYPTFS_VIEW_AS_ENCRYPTED  0x00000200
 #define ECRYPTFS_KEY_SET            0x00000400
+#define ECRYPTFS_DELAY_PERSISTENT   0x00000800
 	u32 flags;
 	unsigned int file_version;
 	size_t iv_bytes;
@@ -574,9 +575,11 @@ struct ecryptfs_open_req {
 	struct list_head kthread_ctl_list;
 };
 
+#define ECRYPTFS_INTERPOSE_FLAG_D_ADD                 0x00000001
+#define ECRYPTFS_INTERPOSE_FLAG_DELAY_PERSISTENT_FILE 0x00000002
 int ecryptfs_interpose(struct dentry *hidden_dentry,
 		       struct dentry *this_dentry, struct super_block *sb,
-		       int flag);
+		       u32 flags);
 int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
 int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
 			     const char *name, int length,
@@ -709,5 +712,6 @@ void ecryptfs_destroy_kthread(void);
 int ecryptfs_privileged_open(struct file **lower_file,
 			     struct dentry *lower_dentry,
 			     struct vfsmount *lower_mnt);
+int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index f0be2905152..2c2d60df3f6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -199,6 +199,20 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		       "file must hence be opened RO\n", __func__);
 		goto out;
 	}
+	if (!ecryptfs_inode_to_private(inode)->lower_file) {
+		BUG_ON(!(crypt_stat->flags & ECRYPTFS_DELAY_PERSISTENT));
+		mutex_lock(&crypt_stat->cs_mutex);
+		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+		mutex_unlock(&crypt_stat->cs_mutex);
+		rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to initialize "
+			       "the persistent file for the dentry with name "
+			       "[%s]; rc = [%d]\n", __func__,
+			       ecryptfs_dentry->d_name.name, rc);
+			goto out;
+		}
+	}
 	ecryptfs_set_file_lower(
 		file, ecryptfs_inode_to_private(inode)->lower_file);
 	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 7315547193e..26090878c93 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -308,7 +308,8 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		d_add(dentry, NULL);
 		goto out;
 	}
-	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1);
+	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
+				ECRYPTFS_INTERPOSE_FLAG_D_ADD);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error interposing\n");
 		goto out_dput;
@@ -537,7 +538,8 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
 	if (rc || !lower_dentry->d_inode)
 		goto out;
-	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
+	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
+				ECRYPTFS_INTERPOSE_FLAG_DELAY_PERSISTENT_FILE);
 	if (rc)
 		goto out;
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 10475d93ff5..ee4f84b2041 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...)
  *
  * Returns zero on success; non-zero otherwise
  */
-static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
+int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
 {
 	struct ecryptfs_inode_info *inode_info =
 		ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
@@ -149,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
  * @lower_dentry: Existing dentry in the lower filesystem
  * @dentry: ecryptfs' dentry
  * @sb: ecryptfs's super_block
- * @flag: If set to true, then d_add is called, else d_instantiate is called
+ * @flags: flags to govern behavior of interpose procedure
  *
  * Interposes upper and lower dentries.
  *
  * Returns zero on success; non-zero otherwise
  */
 int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
-		       struct super_block *sb, int flag)
+		       struct super_block *sb, u32 flags)
 {
 	struct inode *lower_inode;
 	struct inode *inode;
@@ -193,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
 	dentry->d_op = &ecryptfs_dops;
-	if (flag)
+	if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
 		d_add(dentry, inode);
 	else
 		d_instantiate(dentry, inode);
@@ -201,12 +201,21 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
 	fsstack_copy_inode_size(inode, lower_inode);
-	rc = ecryptfs_init_persistent_file(dentry);
-	if (rc) {
-		printk(KERN_ERR "%s: Error attempting to initialize the "
-		       "persistent file for the dentry with name [%s]; "
-		       "rc = [%d]\n", __func__, dentry->d_name.name, rc);
-		goto out;
+	if (!(flags & ECRYPTFS_INTERPOSE_FLAG_DELAY_PERSISTENT_FILE)) {
+		rc = ecryptfs_init_persistent_file(dentry);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to initialize "
+			       "the persistent file for the dentry with name "
+			       "[%s]; rc = [%d]\n", __func__,
+			       dentry->d_name.name, rc);
+			goto out;
+		}
+	} else {
+		struct ecryptfs_inode_info *inode_info =
+			ecryptfs_inode_to_private(dentry->d_inode);
+
+		inode_info->lower_file = NULL;
+		inode_info->crypt_stat.flags |= ECRYPTFS_DELAY_PERSISTENT;
 	}
 out:
 	return rc;
-- 
GitLab


From 391b52f98cf2e9bff227dad8bf9ea206fec43fa4 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Wed, 23 Jul 2008 21:30:08 -0700
Subject: [PATCH 318/853] eCryptfs: Make all persistent file opens delayed

There is no good reason to immediately open the lower file, and that can
cause problems with files that the user does not intend to immediately
open, such as device nodes.

This patch removes the persistent file open from the interpose step and
pushes that to the locations where eCryptfs really does need the lower
persistent file, such as just before reading or writing the metadata
stored in the lower file header.

Two functions are jumping to out_dput when they should just be jumping to
out on error paths.  This patch also fixes these.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/ecryptfs_kernel.h |  2 --
 fs/ecryptfs/file.c            |  4 ----
 fs/ecryptfs/inode.c           | 27 +++++++++++++++++++++++----
 fs/ecryptfs/main.c            | 16 ----------------
 4 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index b0727f91454..b73fb752c5f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -235,7 +235,6 @@ struct ecryptfs_crypt_stat {
 #define ECRYPTFS_METADATA_IN_XATTR  0x00000100
 #define ECRYPTFS_VIEW_AS_ENCRYPTED  0x00000200
 #define ECRYPTFS_KEY_SET            0x00000400
-#define ECRYPTFS_DELAY_PERSISTENT   0x00000800
 	u32 flags;
 	unsigned int file_version;
 	size_t iv_bytes;
@@ -576,7 +575,6 @@ struct ecryptfs_open_req {
 };
 
 #define ECRYPTFS_INTERPOSE_FLAG_D_ADD                 0x00000001
-#define ECRYPTFS_INTERPOSE_FLAG_DELAY_PERSISTENT_FILE 0x00000002
 int ecryptfs_interpose(struct dentry *hidden_dentry,
 		       struct dentry *this_dentry, struct super_block *sb,
 		       u32 flags);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2c2d60df3f6..9244d653743 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -200,10 +200,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 	if (!ecryptfs_inode_to_private(inode)->lower_file) {
-		BUG_ON(!(crypt_stat->flags & ECRYPTFS_DELAY_PERSISTENT));
-		mutex_lock(&crypt_stat->cs_mutex);
-		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
-		mutex_unlock(&crypt_stat->cs_mutex);
 		rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
 		if (rc) {
 			printk(KERN_ERR "%s: Error attempting to initialize "
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 26090878c93..d755455e3bf 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -189,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 				"context; rc = [%d]\n", rc);
 		goto out;
 	}
+	if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
+		rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to initialize "
+			       "the persistent file for the dentry with name "
+			       "[%s]; rc = [%d]\n", __func__,
+			       ecryptfs_dentry->d_name.name, rc);
+			goto out;
+		}
+	}
 	rc = ecryptfs_write_metadata(ecryptfs_dentry);
 	if (rc) {
 		printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
@@ -312,7 +322,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 				ECRYPTFS_INTERPOSE_FLAG_D_ADD);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error interposing\n");
-		goto out_dput;
+		goto out;
 	}
 	if (S_ISDIR(lower_inode->i_mode)) {
 		ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
@@ -338,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR,
 				"Cannot ecryptfs_kmalloc a page\n");
-		goto out_dput;
+		goto out;
 	}
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
 	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
 		ecryptfs_set_default_sizes(crypt_stat);
+	if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
+		rc = ecryptfs_init_persistent_file(dentry);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to initialize "
+			       "the persistent file for the dentry with name "
+			       "[%s]; rc = [%d]\n", __func__,
+			       dentry->d_name.name, rc);
+			goto out;
+		}
+	}
 	rc = ecryptfs_read_and_validate_header_region(page_virt,
 						      dentry->d_inode);
 	if (rc) {
@@ -538,8 +558,7 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
 	if (rc || !lower_dentry->d_inode)
 		goto out;
-	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
-				ECRYPTFS_INTERPOSE_FLAG_DELAY_PERSISTENT_FILE);
+	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
 	if (rc)
 		goto out;
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index ee4f84b2041..6f403cfba14 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -201,22 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
 	fsstack_copy_inode_size(inode, lower_inode);
-	if (!(flags & ECRYPTFS_INTERPOSE_FLAG_DELAY_PERSISTENT_FILE)) {
-		rc = ecryptfs_init_persistent_file(dentry);
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to initialize "
-			       "the persistent file for the dentry with name "
-			       "[%s]; rc = [%d]\n", __func__,
-			       dentry->d_name.name, rc);
-			goto out;
-		}
-	} else {
-		struct ecryptfs_inode_info *inode_info =
-			ecryptfs_inode_to_private(dentry->d_inode);
-
-		inode_info->lower_file = NULL;
-		inode_info->crypt_stat.flags |= ECRYPTFS_DELAY_PERSISTENT;
-	}
 out:
 	return rc;
 }
-- 
GitLab


From 5f6f4f28b6ba543beef8bad91aa6f69c7ffeee51 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:09 -0700
Subject: [PATCH 319/853] autofs4: don't make expiring dentry negative

Correct the error of making a positive dentry negative after it has been
instantiated.

The code that makes this error attempts to re-use the dentry from a
concurrent expire and mount to resolve a race and the dentry used for the
lookup must be negative for mounts to trigger in the required cases.  The
fact is that the dentry doesn't need to be re-used because all that is
needed is to preserve the flag that indicates an expire is still
incomplete at the time of the mount request.

This change uses the the dentry to check the flag and wait for the expire
to complete then discards it instead of attempting to re-use it.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h |   6 +--
 fs/autofs4/inode.c    |   6 +--
 fs/autofs4/root.c     | 118 ++++++++++++++++--------------------------
 3 files changed, 52 insertions(+), 78 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c3d352d7fa9..69b1497b002 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,7 +52,7 @@ struct autofs_info {
 
 	int		flags;
 
-	struct list_head rehash;
+	struct list_head expiring;
 
 	struct autofs_sb_info *sbi;
 	unsigned long last_used;
@@ -112,8 +112,8 @@ struct autofs_sb_info {
 	struct mutex wq_mutex;
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
-	spinlock_t rehash_lock;
-	struct list_head rehash_list;
+	spinlock_t lookup_lock;
+	struct list_head expiring_list;
 };
 
 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2fdcf5e1d23..94bfc154d7a 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -47,7 +47,7 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 	ino->dentry = NULL;
 	ino->size = 0;
 
-	INIT_LIST_HEAD(&ino->rehash);
+	INIT_LIST_HEAD(&ino->expiring);
 
 	ino->last_used = jiffies;
 	atomic_set(&ino->count, 0);
@@ -338,8 +338,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
-	spin_lock_init(&sbi->rehash_lock);
-	INIT_LIST_HEAD(&sbi->rehash_list);
+	spin_lock_init(&sbi->lookup_lock);
+	INIT_LIST_HEAD(&sbi->expiring_list);
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
 	s->s_magic = AUTOFS_SUPER_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index edf5b6bddb5..9ead2279df4 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -493,10 +493,10 @@ void autofs4_dentry_release(struct dentry *de)
 		struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
 
 		if (sbi) {
-			spin_lock(&sbi->rehash_lock);
-			if (!list_empty(&inf->rehash))
-				list_del(&inf->rehash);
-			spin_unlock(&sbi->rehash_lock);
+			spin_lock(&sbi->lookup_lock);
+			if (!list_empty(&inf->expiring))
+				list_del(&inf->expiring);
+			spin_unlock(&sbi->lookup_lock);
 		}
 
 		inf->dentry = NULL;
@@ -518,7 +518,7 @@ static struct dentry_operations autofs4_dentry_operations = {
 	.d_release	= autofs4_dentry_release,
 };
 
-static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
@@ -526,14 +526,14 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
 	struct list_head *p, *head;
 
 	spin_lock(&dcache_lock);
-	spin_lock(&sbi->rehash_lock);
-	head = &sbi->rehash_list;
+	spin_lock(&sbi->lookup_lock);
+	head = &sbi->expiring_list;
 	list_for_each(p, head) {
 		struct autofs_info *ino;
 		struct dentry *dentry;
 		struct qstr *qstr;
 
-		ino = list_entry(p, struct autofs_info, rehash);
+		ino = list_entry(p, struct autofs_info, expiring);
 		dentry = ino->dentry;
 
 		spin_lock(&dentry->d_lock);
@@ -555,33 +555,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
 			goto next;
 
 		if (d_unhashed(dentry)) {
-			struct inode *inode = dentry->d_inode;
-
-			ino = autofs4_dentry_ino(dentry);
-			list_del_init(&ino->rehash);
 			dget(dentry);
-			/*
-			 * Make the rehashed dentry negative so the VFS
-			 * behaves as it should.
-			 */
-			if (inode) {
-				dentry->d_inode = NULL;
-				list_del_init(&dentry->d_alias);
-				spin_unlock(&dentry->d_lock);
-				spin_unlock(&sbi->rehash_lock);
-				spin_unlock(&dcache_lock);
-				iput(inode);
-				return dentry;
-			}
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&sbi->rehash_lock);
+			spin_unlock(&sbi->lookup_lock);
 			spin_unlock(&dcache_lock);
 			return dentry;
 		}
 next:
 		spin_unlock(&dentry->d_lock);
 	}
-	spin_unlock(&sbi->rehash_lock);
+	spin_unlock(&sbi->lookup_lock);
 	spin_unlock(&dcache_lock);
 
 	return NULL;
@@ -591,7 +574,7 @@ next:
 static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct autofs_sb_info *sbi;
-	struct dentry *unhashed;
+	struct dentry *expiring;
 	int oz_mode;
 
 	DPRINTK("name = %.*s",
@@ -607,44 +590,44 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
 		 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
 
-	unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
-	if (!unhashed) {
-		/*
-		 * Mark the dentry incomplete but don't hash it. We do this
-		 * to serialize our inode creation operations (symlink and
-		 * mkdir) which prevents deadlock during the callback to
-		 * the daemon. Subsequent user space lookups for the same
-		 * dentry are placed on the wait queue while the daemon
-		 * itself is allowed passage unresticted so the create
-		 * operation itself can then hash the dentry. Finally,
-		 * we check for the hashed dentry and return the newly
-		 * hashed dentry.
-		 */
-		dentry->d_op = &autofs4_root_dentry_operations;
-
-		dentry->d_fsdata = NULL;
-		d_instantiate(dentry, NULL);
-	} else {
-		struct autofs_info *ino = autofs4_dentry_ino(unhashed);
-		DPRINTK("rehash %p with %p", dentry, unhashed);
+	expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
+	if (expiring) {
+		struct autofs_info *ino = autofs4_dentry_ino(expiring);
 		/*
 		 * If we are racing with expire the request might not
 		 * be quite complete but the directory has been removed
 		 * so it must have been successful, so just wait for it.
-		 * We need to ensure the AUTOFS_INF_EXPIRING flag is clear
-		 * before continuing as revalidate may fail when calling
-		 * try_to_fill_dentry (returning EAGAIN) if we don't.
 		 */
 		while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
 			DPRINTK("wait for incomplete expire %p name=%.*s",
-				unhashed, unhashed->d_name.len,
-				unhashed->d_name.name);
-			autofs4_wait(sbi, unhashed, NFY_NONE);
+				expiring, expiring->d_name.len,
+				expiring->d_name.name);
+			autofs4_wait(sbi, expiring, NFY_NONE);
 			DPRINTK("request completed");
 		}
-		dentry = unhashed;
+		spin_lock(&sbi->lookup_lock);
+		if (!list_empty(&ino->expiring))
+			list_del_init(&ino->expiring);
+		spin_unlock(&sbi->lookup_lock);
+		dput(expiring);
 	}
 
+	/*
+	 * Mark the dentry incomplete but don't hash it. We do this
+	 * to serialize our inode creation operations (symlink and
+	 * mkdir) which prevents deadlock during the callback to
+	 * the daemon. Subsequent user space lookups for the same
+	 * dentry are placed on the wait queue while the daemon
+	 * itself is allowed passage unresticted so the create
+	 * operation itself can then hash the dentry. Finally,
+	 * we check for the hashed dentry and return the newly
+	 * hashed dentry.
+	 */
+	dentry->d_op = &autofs4_root_dentry_operations;
+
+	dentry->d_fsdata = NULL;
+	d_instantiate(dentry, NULL);
+
 	if (!oz_mode) {
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags |= DCACHE_AUTOFS_PENDING;
@@ -668,8 +651,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 			if (sigismember (sigset, SIGKILL) ||
 			    sigismember (sigset, SIGQUIT) ||
 			    sigismember (sigset, SIGINT)) {
-			    if (unhashed)
-				dput(unhashed);
 			    return ERR_PTR(-ERESTARTNOINTR);
 			}
 		}
@@ -699,15 +680,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		else
 			dentry = ERR_PTR(-ENOENT);
 
-		if (unhashed)
-			dput(unhashed);
-
 		return dentry;
 	}
 
-	if (unhashed)
-		return dentry;
-
 	return NULL;
 }
 
@@ -769,9 +744,8 @@ static int autofs4_dir_symlink(struct inode *dir,
  * that the file no longer exists. However, doing that means that the
  * VFS layer can turn the dentry into a negative dentry.  We don't want
  * this, because the unlink is probably the result of an expire.
- * We simply d_drop it and add it to a rehash candidates list in the
- * super block, which allows the dentry lookup to reuse it retaining
- * the flags, such as expire in progress, in case we're racing with expire.
+ * We simply d_drop it and add it to a expiring list in the super block,
+ * which allows the dentry lookup to check for an incomplete expire.
  *
  * If a process is blocked on the dentry waiting for the expire to finish,
  * it will invalidate the dentry and try to mount with a new one.
@@ -801,9 +775,9 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 	dir->i_mtime = CURRENT_TIME;
 
 	spin_lock(&dcache_lock);
-	spin_lock(&sbi->rehash_lock);
-	list_add(&ino->rehash, &sbi->rehash_list);
-	spin_unlock(&sbi->rehash_lock);
+	spin_lock(&sbi->lookup_lock);
+	list_add(&ino->expiring, &sbi->expiring_list);
+	spin_unlock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -829,9 +803,9 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 		spin_unlock(&dcache_lock);
 		return -ENOTEMPTY;
 	}
-	spin_lock(&sbi->rehash_lock);
-	list_add(&ino->rehash, &sbi->rehash_list);
-	spin_unlock(&sbi->rehash_lock);
+	spin_lock(&sbi->lookup_lock);
+	list_add(&ino->expiring, &sbi->expiring_list);
+	spin_unlock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-- 
GitLab


From caf7da3d5d4d9dd873eb52d025d8cc63b89f1fdb Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:11 -0700
Subject: [PATCH 320/853] autofs4: revert - redo lookup in ttfd

This patch series enables the use of a single dentry for lookups prior to
the dentry being hashed and so we no longer need to redo the lookup.  This
patch reverts the patch of commit
033790449ba9c4dcf8478a87693d33df625c23b5.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 9ead2279df4..53dabe8d5b8 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -242,7 +242,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
-	struct dentry *new;
 	int status;
 
 	/* Block on any pending expiry here; invalidate the dentry
@@ -320,26 +319,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 	spin_unlock(&dentry->d_lock);
 
-	/*
-	 * The dentry that is passed in from lookup may not be the one
-	 * we end up using, as mkdir can create a new one.  If this
-	 * happens, and another process tries the lookup at the same time,
-	 * it will set the PENDING flag on this new dentry, but add itself
-	 * to our waitq.  Then, if after the lookup succeeds, the first
-	 * process that requested the mount performs another lookup of the
-	 * same directory, it will show up as still pending!  So, we need
-	 * to redo the lookup here and clear pending on that dentry.
-	 */
-	if (d_unhashed(dentry)) {
-		new = d_lookup(dentry->d_parent, &dentry->d_name);
-		if (new) {
-			spin_lock(&new->d_lock);
-			new->d_flags &= ~DCACHE_AUTOFS_PENDING;
-			spin_unlock(&new->d_lock);
-			dput(new);
-		}
-	}
-
 	return 0;
 }
 
-- 
GitLab


From 2576737873dc1d9ea461a5955a5f6779b569a350 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:12 -0700
Subject: [PATCH 321/853] autofs4: use look aside list for lookups

A while ago a patch to resolve a deadlock during directory creation was
merged.  This delayed the hashing of lookup dentrys until the ->mkdir()
(or ->symlink()) operation completed to ensure we always went through
->lookup() instead of also having processes go through ->revalidate() so
our VFS locking remained consistent.

Now we are seeing a couple of side affects of that change in situations
with heavy mount activity.

Two cases have been identified:

1) When a mount request is triggered, due to the delayed hashing, the
   directory created by user space for the mount point doesn't have the
   DCACHE_AUTOFS_PENDING flag set.  In the case of an autofs multi-mount
   where a tree of mount point directories are created this can lead to
   the path walk continuing rather than the dentry being sent to the wait
   queue to wait for request completion.  This is because, if the pending
   flag isn't set, the criteria for deciding this is a mount in progress
   fails to hold, namely that the dentry is not a mount point and has no
   subdirectories.

2) A mount request dentry is initially created negative and unhashed.
   It remains this way until the ->mkdir() callback completes.  Since it
   is unhashed a fresh dentry is used when the user space mount request
   creates the mount point directory.  This leaves the original dentry
   negative and unhashed.  But revalidate has no way to tell the VFS that
   the dentry has changed, other than to force another ->lookup() by
   returning false, which is at best wastefull and at worst not possible.
   This results in an -ENOENT return from the original path walk when in
   fact the mount succeeded.

To resolve this we need to ensure that the same dentry is used in all
calls to ->lookup() during the course of a mount request.  This patch
achieves that by adding the initial dentry to a look aside list and
removes it at ->mkdir() or ->symlink() completion (or when the dentry is
released), since these are the only create operations autofs4 supports.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h |   2 +
 fs/autofs4/inode.c    |  25 ++++---
 fs/autofs4/root.c     | 169 ++++++++++++++++++++++++++++++++++--------
 3 files changed, 156 insertions(+), 40 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 69b1497b002..2dce2334737 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,6 +52,7 @@ struct autofs_info {
 
 	int		flags;
 
+	struct list_head active;
 	struct list_head expiring;
 
 	struct autofs_sb_info *sbi;
@@ -113,6 +114,7 @@ struct autofs_sb_info {
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
 	spinlock_t lookup_lock;
+	struct list_head active_list;
 	struct list_head expiring_list;
 };
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 94bfc154d7a..e3e70994ab4 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
 
 static void ino_lnkfree(struct autofs_info *ino)
 {
-	kfree(ino->u.symlink);
-	ino->u.symlink = NULL;
+	if (ino->u.symlink) {
+		kfree(ino->u.symlink);
+		ino->u.symlink = NULL;
+	}
 }
 
 struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 	if (ino == NULL)
 		return NULL;
 
-	ino->flags = 0;
-	ino->mode = mode;
-	ino->inode = NULL;
-	ino->dentry = NULL;
-	ino->size = 0;
-
-	INIT_LIST_HEAD(&ino->expiring);
+	if (!reinit) {
+		ino->flags = 0;
+		ino->inode = NULL;
+		ino->dentry = NULL;
+		ino->size = 0;
+		INIT_LIST_HEAD(&ino->active);
+		INIT_LIST_HEAD(&ino->expiring);
+		atomic_set(&ino->count, 0);
+	}
 
+	ino->mode = mode;
 	ino->last_used = jiffies;
-	atomic_set(&ino->count, 0);
 
 	ino->sbi = sbi;
 
@@ -339,6 +343,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
 	spin_lock_init(&sbi->lookup_lock);
+	INIT_LIST_HEAD(&sbi->active_list);
 	INIT_LIST_HEAD(&sbi->expiring_list);
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 53dabe8d5b8..dbb70d5a488 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -473,6 +473,8 @@ void autofs4_dentry_release(struct dentry *de)
 
 		if (sbi) {
 			spin_lock(&sbi->lookup_lock);
+			if (!list_empty(&inf->active))
+				list_del(&inf->active);
 			if (!list_empty(&inf->expiring))
 				list_del(&inf->expiring);
 			spin_unlock(&sbi->lookup_lock);
@@ -497,6 +499,58 @@ static struct dentry_operations autofs4_dentry_operations = {
 	.d_release	= autofs4_dentry_release,
 };
 
+static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+{
+	unsigned int len = name->len;
+	unsigned int hash = name->hash;
+	const unsigned char *str = name->name;
+	struct list_head *p, *head;
+
+	spin_lock(&dcache_lock);
+	spin_lock(&sbi->lookup_lock);
+	head = &sbi->active_list;
+	list_for_each(p, head) {
+		struct autofs_info *ino;
+		struct dentry *dentry;
+		struct qstr *qstr;
+
+		ino = list_entry(p, struct autofs_info, active);
+		dentry = ino->dentry;
+
+		spin_lock(&dentry->d_lock);
+
+		/* Already gone? */
+		if (atomic_read(&dentry->d_count) == 0)
+			goto next;
+
+		qstr = &dentry->d_name;
+
+		if (dentry->d_name.hash != hash)
+			goto next;
+		if (dentry->d_parent != parent)
+			goto next;
+
+		if (qstr->len != len)
+			goto next;
+		if (memcmp(qstr->name, str, len))
+			goto next;
+
+		if (d_unhashed(dentry)) {
+			dget(dentry);
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&sbi->lookup_lock);
+			spin_unlock(&dcache_lock);
+			return dentry;
+		}
+next:
+		spin_unlock(&dentry->d_lock);
+	}
+	spin_unlock(&sbi->lookup_lock);
+	spin_unlock(&dcache_lock);
+
+	return NULL;
+}
+
 static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
 {
 	unsigned int len = name->len;
@@ -553,7 +607,8 @@ next:
 static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct autofs_sb_info *sbi;
-	struct dentry *expiring;
+	struct autofs_info *ino;
+	struct dentry *expiring, *unhashed;
 	int oz_mode;
 
 	DPRINTK("name = %.*s",
@@ -571,12 +626,12 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 
 	expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
 	if (expiring) {
-		struct autofs_info *ino = autofs4_dentry_ino(expiring);
 		/*
 		 * If we are racing with expire the request might not
 		 * be quite complete but the directory has been removed
 		 * so it must have been successful, so just wait for it.
 		 */
+		ino = autofs4_dentry_ino(expiring);
 		while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
 			DPRINTK("wait for incomplete expire %p name=%.*s",
 				expiring, expiring->d_name.len,
@@ -591,21 +646,41 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		dput(expiring);
 	}
 
-	/*
-	 * Mark the dentry incomplete but don't hash it. We do this
-	 * to serialize our inode creation operations (symlink and
-	 * mkdir) which prevents deadlock during the callback to
-	 * the daemon. Subsequent user space lookups for the same
-	 * dentry are placed on the wait queue while the daemon
-	 * itself is allowed passage unresticted so the create
-	 * operation itself can then hash the dentry. Finally,
-	 * we check for the hashed dentry and return the newly
-	 * hashed dentry.
-	 */
-	dentry->d_op = &autofs4_root_dentry_operations;
+	unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
+	if (unhashed)
+		dentry = unhashed;
+	else {
+		/*
+		 * Mark the dentry incomplete but don't hash it. We do this
+		 * to serialize our inode creation operations (symlink and
+		 * mkdir) which prevents deadlock during the callback to
+		 * the daemon. Subsequent user space lookups for the same
+		 * dentry are placed on the wait queue while the daemon
+		 * itself is allowed passage unresticted so the create
+		 * operation itself can then hash the dentry. Finally,
+		 * we check for the hashed dentry and return the newly
+		 * hashed dentry.
+		 */
+		dentry->d_op = &autofs4_root_dentry_operations;
+
+		/*
+		 * And we need to ensure that the same dentry is used for
+		 * all following lookup calls until it is hashed so that
+		 * the dentry flags are persistent throughout the request.
+		 */
+		ino = autofs4_init_ino(NULL, sbi, 0555);
+		if (!ino)
+			return ERR_PTR(-ENOMEM);
+
+		dentry->d_fsdata = ino;
+		ino->dentry = dentry;
+
+		spin_lock(&sbi->lookup_lock);
+		list_add(&ino->active, &sbi->active_list);
+		spin_unlock(&sbi->lookup_lock);
 
-	dentry->d_fsdata = NULL;
-	d_instantiate(dentry, NULL);
+		d_instantiate(dentry, NULL);
+	}
 
 	if (!oz_mode) {
 		spin_lock(&dentry->d_lock);
@@ -630,12 +705,16 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 			if (sigismember (sigset, SIGKILL) ||
 			    sigismember (sigset, SIGQUIT) ||
 			    sigismember (sigset, SIGINT)) {
+			    if (unhashed)
+				dput(unhashed);
 			    return ERR_PTR(-ERESTARTNOINTR);
 			}
 		}
-		spin_lock(&dentry->d_lock);
-		dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
-		spin_unlock(&dentry->d_lock);
+		if (!oz_mode) {
+			spin_lock(&dentry->d_lock);
+			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
+			spin_unlock(&dentry->d_lock);
+		}
 	}
 
 	/*
@@ -659,9 +738,15 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		else
 			dentry = ERR_PTR(-ENOENT);
 
+		if (unhashed)
+			dput(unhashed);
+
 		return dentry;
 	}
 
+	if (unhashed)
+		return unhashed;
+
 	return NULL;
 }
 
@@ -682,20 +767,30 @@ static int autofs4_dir_symlink(struct inode *dir,
 		return -EACCES;
 
 	ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
-	if (ino == NULL)
-		return -ENOSPC;
+	if (!ino)
+		return -ENOMEM;
 
-	ino->size = strlen(symname);
-	ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL);
+	spin_lock(&sbi->lookup_lock);
+	if (!list_empty(&ino->active))
+		list_del_init(&ino->active);
+	spin_unlock(&sbi->lookup_lock);
 
-	if (cp == NULL) {
-		kfree(ino);
-		return -ENOSPC;
+	cp = kmalloc(ino->size + 1, GFP_KERNEL);
+	if (!cp) {
+		if (!dentry->d_fsdata)
+			kfree(ino);
+		return -ENOMEM;
 	}
 
 	strcpy(cp, symname);
 
 	inode = autofs4_get_inode(dir->i_sb, ino);
+	if (!inode) {
+		kfree(cp);
+		if (!dentry->d_fsdata)
+			kfree(ino);
+		return -ENOMEM;
+	}
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)
@@ -711,6 +806,8 @@ static int autofs4_dir_symlink(struct inode *dir,
 		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 
+	ino->size = strlen(symname);
+	ino->u.symlink = cp;
 	dir->i_mtime = CURRENT_TIME;
 
 	return 0;
@@ -755,7 +852,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 
 	spin_lock(&dcache_lock);
 	spin_lock(&sbi->lookup_lock);
-	list_add(&ino->expiring, &sbi->expiring_list);
+	if (list_empty(&ino->expiring))
+		list_add(&ino->expiring, &sbi->expiring_list);
 	spin_unlock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
@@ -783,7 +881,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 		return -ENOTEMPTY;
 	}
 	spin_lock(&sbi->lookup_lock);
-	list_add(&ino->expiring, &sbi->expiring_list);
+	if (list_empty(&ino->expiring))
+		list_add(&ino->expiring, &sbi->expiring_list);
 	spin_unlock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
@@ -819,10 +918,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		dentry, dentry->d_name.len, dentry->d_name.name);
 
 	ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
-	if (ino == NULL)
-		return -ENOSPC;
+	if (!ino)
+		return -ENOMEM;
+
+	spin_lock(&sbi->lookup_lock);
+	if (!list_empty(&ino->active))
+		list_del_init(&ino->active);
+	spin_unlock(&sbi->lookup_lock);
 
 	inode = autofs4_get_inode(dir->i_sb, ino);
+	if (!inode) {
+		if (!dentry->d_fsdata)
+			kfree(ino);
+		return -ENOMEM;
+	}
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)
-- 
GitLab


From ef581a742874ebc4c28d24b374c78b762144ebdc Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:13 -0700
Subject: [PATCH 322/853] autofs4: fix symlink name allocation

The length of the symlink name has been moved but it needs to be set
before allocating space for it in the dentry info struct.  This corrects a
mistake in a recent patch.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index dbb70d5a488..324290c6827 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -775,6 +775,7 @@ static int autofs4_dir_symlink(struct inode *dir,
 		list_del_init(&ino->active);
 	spin_unlock(&sbi->lookup_lock);
 
+	ino->size = strlen(symname);
 	cp = kmalloc(ino->size + 1, GFP_KERNEL);
 	if (!cp) {
 		if (!dentry->d_fsdata)
@@ -806,7 +807,6 @@ static int autofs4_dir_symlink(struct inode *dir,
 		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 
-	ino->size = strlen(symname);
 	ino->u.symlink = cp;
 	dir->i_mtime = CURRENT_TIME;
 
-- 
GitLab


From c432c2586a0811c7d0030d78f0993568bc889a6f Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:14 -0700
Subject: [PATCH 323/853] autofs4: don't release directory mutex if called in
 oz_mode

Since we now delay hashing of dentrys until the ->mkdir() call, droping
and re-taking the directory mutex within the ->lookup() function when we
are being called by user space is not needed.  This can lead to a race
when other processes are attempting to access the same directory during
mount point directory creation.

In this case we need to hang onto the mutex to ensure we don't get user
processes trying to create a mount request for a newly created dentry
after the mount point entry has already been created.  This ensures that
when we need to check a dentry passed to autofs4_wait(), if it is hashed,
it is always the mount point dentry and not a new dentry created by
another lookup during directory creation.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 324290c6827..1e901e5ea01 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -686,12 +686,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags |= DCACHE_AUTOFS_PENDING;
 		spin_unlock(&dentry->d_lock);
-	}
-
-	if (dentry->d_op && dentry->d_op->d_revalidate) {
-		mutex_unlock(&dir->i_mutex);
-		(dentry->d_op->d_revalidate)(dentry, nd);
-		mutex_lock(&dir->i_mutex);
+		if (dentry->d_op && dentry->d_op->d_revalidate) {
+			mutex_unlock(&dir->i_mutex);
+			(dentry->d_op->d_revalidate)(dentry, nd);
+			mutex_lock(&dir->i_mutex);
+		}
 	}
 
 	/*
-- 
GitLab


From 6d5cb926fa0162b1e62f37c117cc7ce763cfcbb9 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:15 -0700
Subject: [PATCH 324/853] autofs4: use lookup intent flags to trigger mounts

When an open(2) call is made on an autofs mount point directory that
already exists and the O_DIRECTORY flag is not used the needed mount
callback to the daemon is not done. This leads to the path walk
continuing resulting in a callback to the daemon with an incorrect
key. open(2) is called without O_DIRECTORY by the "find" utility but
this should be handled properly anyway.

This happens because autofs needs to use the lookup flags to decide
when to callback to the daemon to perform a mount to prevent mount
storms. For example, an autofs indirect mount map that has the "browse"
option will have the mount point directories are pre-created and the
stat(2) call made by a color ls against each directory will cause all
these directories to be mounted. It is unfortunate we need to resort
to this but mount maps can be quite large. Additionally, if a user
manually umounts an autofs indirect mount the directory isn't removed
which also leads to this situation.

To resolve this autofs needs to use the lookup intent flags to enable
it to make this decision. This patch adds this check and triggers a
call back if any of the lookup intent flags are set as all these calls
warrant a mount attempt be requested.

I know that external VFS code which uses the lookup flags is something
that the VFS would like to eliminate but I have no choice as I can't
see any other way to do this. A VFS dentry or inode operation callback
which returns the lookup "type" (requires a definition) would be
sufficient. But this change is needed now and I'm not aware of the form
that coming VFS changes will take so I'm not willing to propose anything
along these lines.

If anyone can provide an alternate method I would be happy to use it.

[akpm@linux-foundation.org: fix build for concurrent VFS changes]
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 1e901e5ea01..87352654ff4 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -31,6 +31,9 @@ static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t fil
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
+#define TRIGGER_FLAGS   (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
+#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
+
 const struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
@@ -291,7 +294,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 			return status;
 		}
 	/* Trigger mount for path component or follow link */
-	} else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
+	} else if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
 			current->link_count) {
 		DPRINTK("waiting for mount name=%.*s",
 			dentry->d_name.len, dentry->d_name.name);
@@ -336,7 +339,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		nd->flags);
 
 	/* If it's our master or we shouldn't trigger a mount we're done */
-	lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY);
+	lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
 	if (oz_mode || !lookup_type)
 		goto done;
 
-- 
GitLab


From 70b52a0a5005ce6a0ceec56e97222437a0ba7506 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 23 Jul 2008 21:30:16 -0700
Subject: [PATCH 325/853] autofs4: use struct qstr in waitq.c

The autofs_wait_queue already contains all of the fields of the
struct qstr, so change it into a qstr.

This patch, from Jeff Moyer, has been modified a liitle by myself.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h |  4 +-
 fs/autofs4/waitq.c    | 86 ++++++++++++++++++++++---------------------
 2 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2dce2334737..da8882ff31e 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -75,9 +75,7 @@ struct autofs_wait_queue {
 	struct autofs_wait_queue *next;
 	autofs_wqt_t wait_queue_token;
 	/* We use the following to see what we are waiting for */
-	unsigned int hash;
-	unsigned int len;
-	char *name;
+	struct qstr name;
 	u32 dev;
 	u64 ino;
 	uid_t uid;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 75e5955c3f6..5208cfb1df4 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -36,8 +36,10 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 	while (wq) {
 		nwq = wq->next;
 		wq->status = -ENOENT; /* Magic is gone - report failure */
-		kfree(wq->name);
-		wq->name = NULL;
+		if (wq->name.name) {
+			kfree(wq->name.name);
+			wq->name.name = NULL;
+		}
 		wake_up_interruptible(&wq->queue);
 		wq = nwq;
 	}
@@ -92,7 +94,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
-		wq->wait_queue_token, wq->len, wq->name, type);
+		wq->wait_queue_token, wq->name.len, wq->name.name, type);
 
 	memset(&pkt,0,sizeof pkt); /* For security reasons */
 
@@ -107,9 +109,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		pktsz = sizeof(*mp);
 
 		mp->wait_queue_token = wq->wait_queue_token;
-		mp->len = wq->len;
-		memcpy(mp->name, wq->name, wq->len);
-		mp->name[wq->len] = '\0';
+		mp->len = wq->name.len;
+		memcpy(mp->name, wq->name.name, wq->name.len);
+		mp->name[wq->name.len] = '\0';
 		break;
 	}
 	case autofs_ptype_expire_multi:
@@ -119,9 +121,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		pktsz = sizeof(*ep);
 
 		ep->wait_queue_token = wq->wait_queue_token;
-		ep->len = wq->len;
-		memcpy(ep->name, wq->name, wq->len);
-		ep->name[wq->len] = '\0';
+		ep->len = wq->name.len;
+		memcpy(ep->name, wq->name.name, wq->name.len);
+		ep->name[wq->name.len] = '\0';
 		break;
 	}
 	/*
@@ -138,9 +140,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		pktsz = sizeof(*packet);
 
 		packet->wait_queue_token = wq->wait_queue_token;
-		packet->len = wq->len;
-		memcpy(packet->name, wq->name, wq->len);
-		packet->name[wq->len] = '\0';
+		packet->len = wq->name.len;
+		memcpy(packet->name, wq->name.name, wq->name.len);
+		packet->name[wq->name.len] = '\0';
 		packet->dev = wq->dev;
 		packet->ino = wq->ino;
 		packet->uid = wq->uid;
@@ -191,15 +193,15 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
 }
 
 static struct autofs_wait_queue *
-autofs4_find_wait(struct autofs_sb_info *sbi,
-		  char *name, unsigned int hash, unsigned int len)
+autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
 {
 	struct autofs_wait_queue *wq;
 
 	for (wq = sbi->queues; wq; wq = wq->next) {
-		if (wq->hash == hash &&
-		    wq->len == len &&
-		    wq->name && !memcmp(wq->name, name, len))
+		if (wq->name.hash == qstr->hash &&
+		    wq->name.len == qstr->len &&
+		    wq->name.name &&
+			 !memcmp(wq->name.name, qstr->name, qstr->len))
 			break;
 	}
 	return wq;
@@ -210,9 +212,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 {
 	struct autofs_info *ino;
 	struct autofs_wait_queue *wq;
+	struct qstr qstr;
 	char *name;
-	unsigned int len = 0;
-	unsigned int hash = 0;
 	int status, type;
 
 	/* In catatonic mode, we don't wait for nobody */
@@ -225,22 +226,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 	/* If this is a direct mount request create a dummy name */
 	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
-		len = sprintf(name, "%p", dentry);
+		qstr.len = sprintf(name, "%p", dentry);
 	else {
-		len = autofs4_getpath(sbi, dentry, &name);
-		if (!len) {
+		qstr.len = autofs4_getpath(sbi, dentry, &name);
+		if (!qstr.len) {
 			kfree(name);
 			return -ENOENT;
 		}
 	}
-	hash = full_name_hash(name, len);
+	qstr.name = name;
+	qstr.hash = full_name_hash(name, qstr.len);
 
 	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
-		kfree(name);
+		kfree(qstr.name);
 		return -EINTR;
 	}
 
-	wq = autofs4_find_wait(sbi, name, hash, len);
+	wq = autofs4_find_wait(sbi, &qstr);
 	ino = autofs4_dentry_ino(dentry);
 	if (!wq && ino && notify == NFY_NONE) {
 		/*
@@ -254,10 +256,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			mutex_unlock(&sbi->wq_mutex);
 			schedule_timeout_interruptible(HZ/10);
 			if (mutex_lock_interruptible(&sbi->wq_mutex)) {
-				kfree(name);
+				kfree(qstr.name);
 				return -EINTR;
 			}
-			wq = autofs4_find_wait(sbi, name, hash, len);
+			wq = autofs4_find_wait(sbi, &qstr);
 			if (wq)
 				break;
 		}
@@ -268,7 +270,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		 * return status of the wait.
 		 */
 		if (!wq) {
-			kfree(name);
+			kfree(qstr.name);
 			mutex_unlock(&sbi->wq_mutex);
 			return 0;
 		}
@@ -278,7 +280,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		/* Create a new wait queue */
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
 		if (!wq) {
-			kfree(name);
+			kfree(qstr.name);
 			mutex_unlock(&sbi->wq_mutex);
 			return -ENOMEM;
 		}
@@ -289,9 +291,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->next = sbi->queues;
 		sbi->queues = wq;
 		init_waitqueue_head(&wq->queue);
-		wq->hash = hash;
-		wq->name = name;
-		wq->len = len;
+		memcpy(&wq->name, &qstr, sizeof(struct qstr));
 		wq->dev = autofs4_get_dev(sbi);
 		wq->ino = autofs4_get_ino(sbi);
 		wq->uid = current->uid;
@@ -319,16 +319,18 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		}
 
 		DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
+			(unsigned long) wq->wait_queue_token, wq->name.len,
+			wq->name.name, notify);
 
 		/* autofs4_notify_daemon() may block */
 		autofs4_notify_daemon(sbi, wq, type);
 	} else {
 		atomic_inc(&wq->wait_ctr);
 		mutex_unlock(&sbi->wq_mutex);
-		kfree(name);
+		kfree(qstr.name);
 		DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
-			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
+			(unsigned long) wq->wait_queue_token, wq->name.len,
+			wq->name.name, notify);
 	}
 
 	/* wq->name is NULL if and only if the lock is already released */
@@ -336,11 +338,13 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	if (sbi->catatonic) {
 		/* We might have slept, so check again for catatonic mode */
 		wq->status = -ENOENT;
-		kfree(wq->name);
-		wq->name = NULL;
+		if (wq->name.name) {
+			kfree(wq->name.name);
+			wq->name.name = NULL;
+		}
 	}
 
-	if (wq->name) {
+	if (wq->name.name) {
 		/* Block all but "shutdown" signals while waiting */
 		sigset_t oldset;
 		unsigned long irqflags;
@@ -351,7 +355,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		recalc_sigpending();
 		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 
-		wait_event_interruptible(wq->queue, wq->name == NULL);
+		wait_event_interruptible(wq->queue, wq->name.name == NULL);
 
 		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		current->blocked = oldset;
@@ -388,8 +392,8 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 
 	*wql = wq->next;	/* Unlink from chain */
 	mutex_unlock(&sbi->wq_mutex);
-	kfree(wq->name);
-	wq->name = NULL;	/* Do not wait on this queue */
+	kfree(wq->name.name);
+	wq->name.name = NULL;	/* Do not wait on this queue */
 
 	wq->status = status;
 
-- 
GitLab


From 5a11d4d0ee1ff284271f7265929d07ea4a1168a6 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:17 -0700
Subject: [PATCH 326/853] autofs4: fix waitq locking

The autofs4_catatonic_mode() function accesses the wait queue without any
locking but can be called at any time.  This could lead to a possible
double free of the name field of the wait and a double fput of the daemon
communication pipe or an fput of a NULL file pointer.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/inode.c |  4 ++--
 fs/autofs4/waitq.c | 23 ++++++++++++-----------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e3e70994ab4..7bb3e5ba053 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -163,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb)
 	if (!sbi)
 		goto out_kill_sb;
 
-	if (!sbi->catatonic)
-		autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
+	/* Free wait queues, close pipe */
+	autofs4_catatonic_mode(sbi);
 
 	/* Clean up and release dangling references */
 	autofs4_force_release(sbi);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 5208cfb1df4..55aac10cf32 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 {
 	struct autofs_wait_queue *wq, *nwq;
 
+	mutex_lock(&sbi->wq_mutex);
+	if (sbi->catatonic) {
+		mutex_unlock(&sbi->wq_mutex);
+		return;
+	}
+
 	DPRINTK("entering catatonic mode");
 
 	sbi->catatonic = 1;
@@ -45,6 +51,8 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 	}
 	fput(sbi->pipe);	/* Close the pipe */
 	sbi->pipe = NULL;
+	sbi->pipefd = -1;
+	mutex_unlock(&sbi->wq_mutex);
 }
 
 static int autofs4_write(struct file *file, const void *addr, int bytes)
@@ -333,17 +341,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			wq->name.name, notify);
 	}
 
-	/* wq->name is NULL if and only if the lock is already released */
-
-	if (sbi->catatonic) {
-		/* We might have slept, so check again for catatonic mode */
-		wq->status = -ENOENT;
-		if (wq->name.name) {
-			kfree(wq->name.name);
-			wq->name.name = NULL;
-		}
-	}
-
+	/*
+	 * wq->name.name is NULL iff the lock is already released
+	 * or the mount has been made catatonic.
+	 */
 	if (wq->name.name) {
 		/* Block all but "shutdown" signals while waiting */
 		sigset_t oldset;
-- 
GitLab


From a1362fe92f1bde687b3a9e93d6b8d105d0a84f74 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:19 -0700
Subject: [PATCH 327/853] autofs4: fix pending mount race

Close a race between a pending mount that is about to finish and a new
lookup for the same directory.

Process P1 triggers a mount of directory foo.  It sets
DCACHE_AUTOFS_PENDING in the ->lookup routine, creates a waitq entry for
'foo', and calls out to the daemon to perform the mount.  The autofs
daemon will then create the directory 'foo', using a new dentry that will
be hashed in the dcache.

Before the mount completes, another process, P2, tries to walk into the
'foo' directory.  The vfs path walking code finds an entry for 'foo' and
calls the revalidate method.  Revalidate finds that the entry is not
PENDING (because PENDING was never set on the dentry created by the
mkdir), but it does find the directory is empty.  Revalidate calls
try_to_fill_dentry, which sets the PENDING flag and then calls into the
autofs4 wait code to trigger or wait for a mount of 'foo'.  The wait code
finds the entry for 'foo' and goes to sleep waiting for the completion of
the mount.

Yet another process, P3, tries to walk into the 'foo' directory.  This
process again finds a dentry in the dcache for 'foo', and calls into the
autofs revalidate code.

The revalidate code finds that the PENDING flag is set, and so calls
try_to_fill_dentry.

a) try_to_fill_dentry sets the PENDING flag redundantly for this
   dentry, then calls into the autofs4 wait code.

b) the autofs4 wait code takes the waitq mutex and searches for an
   entry for 'foo'

Between a and b, P1 is woken up because the mount completed.  P1 takes the
wait queue mutex, clears the PENDING flag from the dentry, and removes the
waitqueue entry for 'foo' from the list.

When it releases the waitq mutex, P3 (eventually) acquires it.  At this
time, it looks for an existing waitq for 'foo', finds none, and so creates
a new one and calls out to the daemon to mount the 'foo' directory.

Now, the reason that three processes are required to trigger this race is
that, because the PENDING flag is not set on the dentry created by mkdir,
the window for the race would be way to slim for it to ever occur.
Basically, between the testing of d_mountpoint(dentry) and the taking of
the waitq mutex, the mount would have to complete and the daemon would
have to be woken up, and that in turn would have to wake up P1.  This is
simply impossible.  Add the third process, though, and it becomes slightly
more likely.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/waitq.c | 135 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 97 insertions(+), 38 deletions(-)

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 55aac10cf32..cd3b2a67169 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -215,19 +215,106 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
 	return wq;
 }
 
+/*
+ * Check if we have a valid request.
+ * Returns
+ * 1 if the request should continue.
+ *   In this case we can return an autofs_wait_queue entry if one is
+ *   found or NULL to idicate a new wait needs to be created.
+ * 0 or a negative errno if the request shouldn't continue.
+ */
+static int validate_request(struct autofs_wait_queue **wait,
+			    struct autofs_sb_info *sbi,
+			    struct qstr *qstr,
+			    struct dentry*dentry, enum autofs_notify notify)
+{
+	struct autofs_wait_queue *wq;
+	struct autofs_info *ino;
+
+	/* Wait in progress, continue; */
+	wq = autofs4_find_wait(sbi, qstr);
+	if (wq) {
+		*wait = wq;
+		return 1;
+	}
+
+	*wait = NULL;
+
+	/* If we don't yet have any info this is a new request */
+	ino = autofs4_dentry_ino(dentry);
+	if (!ino)
+		return 1;
+
+	/*
+	 * If we've been asked to wait on an existing expire (NFY_NONE)
+	 * but there is no wait in the queue ...
+	 */
+	if (notify == NFY_NONE) {
+		/*
+		 * Either we've betean the pending expire to post it's
+		 * wait or it finished while we waited on the mutex.
+		 * So we need to wait till either, the wait appears
+		 * or the expire finishes.
+		 */
+
+		while (ino->flags & AUTOFS_INF_EXPIRING) {
+			mutex_unlock(&sbi->wq_mutex);
+			schedule_timeout_interruptible(HZ/10);
+			if (mutex_lock_interruptible(&sbi->wq_mutex))
+				return -EINTR;
+
+			wq = autofs4_find_wait(sbi, qstr);
+			if (wq) {
+				*wait = wq;
+				return 1;
+			}
+		}
+
+		/*
+		 * Not ideal but the status has already gone. Of the two
+		 * cases where we wait on NFY_NONE neither depend on the
+		 * return status of the wait.
+		 */
+		return 0;
+	}
+
+	/*
+	 * If we've been asked to trigger a mount and the request
+	 * completed while we waited on the mutex ...
+	 */
+	if (notify == NFY_MOUNT) {
+		/*
+		 * If the dentry isn't hashed just go ahead and try the
+		 * mount again with a new wait (not much else we can do).
+		*/
+		if (!d_unhashed(dentry)) {
+			/*
+			 * But if the dentry is hashed, that means that we
+			 * got here through the revalidate path.  Thus, we
+			 * need to check if the dentry has been mounted
+			 * while we waited on the wq_mutex. If it has,
+			 * simply return success.
+			 */
+			if (d_mountpoint(dentry))
+				return 0;
+		}
+	}
+
+	return 1;
+}
+
 int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		enum autofs_notify notify)
 {
-	struct autofs_info *ino;
 	struct autofs_wait_queue *wq;
 	struct qstr qstr;
 	char *name;
-	int status, type;
+	int status, ret, type;
 
 	/* In catatonic mode, we don't wait for nobody */
 	if (sbi->catatonic)
 		return -ENOENT;
-	
+
 	name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
 	if (!name)
 		return -ENOMEM;
@@ -245,43 +332,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	qstr.name = name;
 	qstr.hash = full_name_hash(name, qstr.len);
 
-	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
-		kfree(qstr.name);
+	if (mutex_lock_interruptible(&sbi->wq_mutex))
 		return -EINTR;
-	}
-
-	wq = autofs4_find_wait(sbi, &qstr);
-	ino = autofs4_dentry_ino(dentry);
-	if (!wq && ino && notify == NFY_NONE) {
-		/*
-		 * Either we've betean the pending expire to post it's
-		 * wait or it finished while we waited on the mutex.
-		 * So we need to wait till either, the wait appears
-		 * or the expire finishes.
-		 */
 
-		while (ino->flags & AUTOFS_INF_EXPIRING) {
-			mutex_unlock(&sbi->wq_mutex);
-			schedule_timeout_interruptible(HZ/10);
-			if (mutex_lock_interruptible(&sbi->wq_mutex)) {
-				kfree(qstr.name);
-				return -EINTR;
-			}
-			wq = autofs4_find_wait(sbi, &qstr);
-			if (wq)
-				break;
-		}
-
-		/*
-		 * Not ideal but the status has already gone. Of the two
-		 * cases where we wait on NFY_NONE neither depend on the
-		 * return status of the wait.
-		 */
-		if (!wq) {
-			kfree(qstr.name);
+	ret = validate_request(&wq, sbi, &qstr, dentry, notify);
+	if (ret <= 0) {
+		if (ret == 0)
 			mutex_unlock(&sbi->wq_mutex);
-			return 0;
-		}
+		kfree(qstr.name);
+		return ret;
 	}
 
 	if (!wq) {
@@ -392,9 +451,9 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 	}
 
 	*wql = wq->next;	/* Unlink from chain */
-	mutex_unlock(&sbi->wq_mutex);
 	kfree(wq->name.name);
 	wq->name.name = NULL;	/* Do not wait on this queue */
+	mutex_unlock(&sbi->wq_mutex);
 
 	wq->status = status;
 
-- 
GitLab


From f4c7da02615bebcaf89f15a8d055922f515160b8 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:19 -0700
Subject: [PATCH 328/853] autofs4: add missing kfree

It see that the patch tittled "autofs4 - fix pending mount race" is
missing a change that I had recently made.

It's missing a kfree for the case mutex_lock_interruptible() fails
to aquire the wait queue mutex.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/waitq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index cd3b2a67169..1132cc2a031 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -332,8 +332,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	qstr.name = name;
 	qstr.hash = full_name_hash(name, qstr.len);
 
-	if (mutex_lock_interruptible(&sbi->wq_mutex))
+	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
+		kfree(qstr.name);
 		return -EINTR;
+	}
 
 	ret = validate_request(&wq, sbi, &qstr, dentry, notify);
 	if (ret <= 0) {
-- 
GitLab


From e64be33ccaceaca67c84237dff8805b861398eab Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:20 -0700
Subject: [PATCH 329/853] autofs4: check kernel communication pipe is valid for
 write

It is possible for an autofs mount to become catatonic (and for the daemon
communication pipe to become NULL) after a wait has been initiallized but
before the request has been sent to the daemon.  We need to check for this
before sending the request packet.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/waitq.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1132cc2a031..dd2914d7ad7 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -99,6 +99,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		union autofs_packet_union v4_pkt;
 		union autofs_v5_packet_union v5_pkt;
 	} pkt;
+	struct file *pipe = NULL;
 	size_t pktsz;
 
 	DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
@@ -164,8 +165,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		return;
 	}
 
-	if (autofs4_write(sbi->pipe, &pkt, pktsz))
-		autofs4_catatonic_mode(sbi);
+	/* Check if we have become catatonic */
+	mutex_lock(&sbi->wq_mutex);
+	if (!sbi->catatonic) {
+		pipe = sbi->pipe;
+		get_file(pipe);
+	}
+	mutex_unlock(&sbi->wq_mutex);
+
+	if (pipe) {
+		if (autofs4_write(pipe, &pkt, pktsz))
+			autofs4_catatonic_mode(sbi);
+		fput(pipe);
+	}
 }
 
 static int autofs4_getpath(struct autofs_sb_info *sbi,
-- 
GitLab


From 296f7bf78bc5c7a4d772aea580ce800d14040d1a Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:21 -0700
Subject: [PATCH 330/853] autofs4: fix waitq memory leak

If an autofs mount becomes catatonic before autofs4_wait_release() is
called the wait queue counter will not be decremented down to zero and the
entry will never be freed.  There are also races decrementing the wait
counter in the wait release function.  To deal with this the counter needs
to be updated while holding the wait queue mutex and waiters need to be
woken up unconditionally when the wait is removed from the queue to ensure
we eventually free the wait.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h |  2 +-
 fs/autofs4/waitq.c    | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index da8882ff31e..058e1800cae 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -84,7 +84,7 @@ struct autofs_wait_queue {
 	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
-	atomic_t wait_ctr;
+	unsigned int wait_ctr;
 };
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index dd2914d7ad7..3458dbc8fff 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -46,6 +46,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 			kfree(wq->name.name);
 			wq->name.name = NULL;
 		}
+		wq->wait_ctr--;
 		wake_up_interruptible(&wq->queue);
 		wq = nwq;
 	}
@@ -380,7 +381,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->pid = current->pid;
 		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
-		atomic_set(&wq->wait_ctr, 2);
+		wq->wait_ctr = 2;
 		mutex_unlock(&sbi->wq_mutex);
 
 		if (sbi->version < 5) {
@@ -406,7 +407,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		/* autofs4_notify_daemon() may block */
 		autofs4_notify_daemon(sbi, wq, type);
 	} else {
-		atomic_inc(&wq->wait_ctr);
+		wq->wait_ctr++;
 		mutex_unlock(&sbi->wq_mutex);
 		kfree(qstr.name);
 		DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
@@ -442,8 +443,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	status = wq->status;
 
 	/* Are we the last process to need status? */
-	if (atomic_dec_and_test(&wq->wait_ctr))
+	mutex_lock(&sbi->wq_mutex);
+	if (!--wq->wait_ctr)
 		kfree(wq);
+	mutex_unlock(&sbi->wq_mutex);
 
 	return status;
 }
@@ -467,14 +470,11 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 	*wql = wq->next;	/* Unlink from chain */
 	kfree(wq->name.name);
 	wq->name.name = NULL;	/* Do not wait on this queue */
-	mutex_unlock(&sbi->wq_mutex);
-
 	wq->status = status;
-
-	if (atomic_dec_and_test(&wq->wait_ctr))	/* Is anyone still waiting for this guy? */
+	wake_up_interruptible(&wq->queue);
+	if (!--wq->wait_ctr)
 		kfree(wq);
-	else
-		wake_up_interruptible(&wq->queue);
+	mutex_unlock(&sbi->wq_mutex);
 
 	return 0;
 }
-- 
GitLab


From eb3b176796b0e53fd26fce86847231542eb0d198 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:22 -0700
Subject: [PATCH 331/853] autofs4: detect invalid direct mount requests

autofs v5 direct and offset mounts within an autofs filesystem are
triggered by existing autofs triger mounts so the mount point dentry must
be positive.  If the mount point dentry is negative then the trigger
doesn't exist so we can return fail immediately.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/waitq.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 3458dbc8fff..bcb6c526546 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -328,6 +328,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	if (sbi->catatonic)
 		return -ENOENT;
 
+	if (!dentry->d_inode &&
+	    (sbi->type & (AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET)))
+		return -ENOENT;
+
 	name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
 	if (!name)
 		return -ENOMEM;
-- 
GitLab


From c72305b5472522299bb6f45b736080128eb1c822 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:23 -0700
Subject: [PATCH 332/853] autofs4: indirect dentry must almost always be
 positive

We have been seeing mount requests comming to the automount daemon for
keys of the form "<map key>/<non key directory>" which are lookups for
invalid map keys.  But we can check for this in the kernel module and
return a fail immediately, without having to send a request to the daemon.

It is possible to recognise these requests are invalid based on whether
the request dentry is negative and its relation to the autofs file system
root.

For example, given the indirect multi-mount map entry:

idm1  \
    /mm1  <server>:/<path1>
    /mm2  <server>:/<path2>

For a request to mount idm1, IS_ROOT((idm1)->d_parent) will be always be
true and the dentry may be negative.  But directories idm1/mm1 and
idm1/mm2 will always be created as part of the mount request for idm1.  So
any mount request within idm1 itself must have a positive dentry otherwise
the map key is invalid.

In version 4 these multi-mount entries are all mounted and umounted as a
single request and in version 5 the directories idm1/mm1 and idm1/mm2 are
created and an autofs fs mounted on them to act as a mount trigger so the
above is also true.

This also holds true for the autofs version 4 pseudo direct mount feature.
 When this feature is used without the "--ghost" option automount(8) will
create internal submounts as we go down the map key paths which are
essentially normal indirect mounts for which the above holds.  If the
"--ghost" option is given the directories for map keys are created at
daemon startup so valid map entries correspond to postive dentries in the
autofs fs.

autofs version 5 direct mount maps are similar except that the IS_ROOT
check is not needed.  This has been addressed in a previous patch tittled
"autofs4 - detect invalid direct mount requests".

For example, given the direct multi-mount map entry:

/test/dm1  \
    /mm1  <server>:/<path1>
    /mm2  <server>:/<path2>

An autofs fs is mounted on /test/dm1 as a trigger mount and when a mount
is triggered for /test/dm1, the multi-mount offset directories
/test/dm1/mm1 and /test/dm1/mm2 are created and an autofs fs is mounted on
them to act as mount triggers.  So valid direct mount requests must always
have a positive dentry if they correspond to a valid map entry.

Signed-off-by: Ian Kent <raven@themaw.net>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/waitq.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index bcb6c526546..35216d18d8b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -328,9 +328,20 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	if (sbi->catatonic)
 		return -ENOENT;
 
-	if (!dentry->d_inode &&
-	    (sbi->type & (AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET)))
-		return -ENOENT;
+	if (!dentry->d_inode) {
+		/*
+		 * A wait for a negative dentry is invalid for certain
+		 * cases. A direct or offset mount "always" has its mount
+		 * point directory created and so the request dentry must
+		 * be positive or the map key doesn't exist. The situation
+		 * is very similar for indirect mounts except only dentrys
+		 * in the root of the autofs file system may be negative.
+		 */
+		if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
+			return -ENOENT;
+		else if (!IS_ROOT(dentry->d_parent))
+			return -ENOENT;
+	}
 
 	name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
 	if (!name)
-- 
GitLab


From ff9cd499d6258952385cb2f12e9a3c0908fd5786 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:24 -0700
Subject: [PATCH 333/853] autofs4: cleanup redundant readir code

The mount triggering functionality of readdir and related functions is no
longer used (and is quite broken as well).  The unused portions have been
removed.

Signed-off-by: Ian Kent <raven@themaw.net>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 149 +++++-----------------------------------------
 1 file changed, 16 insertions(+), 133 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 87352654ff4..51c873ca8e8 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,8 +25,6 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
 static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
 static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 static int autofs4_dir_open(struct inode *inode, struct file *file);
-static int autofs4_dir_close(struct inode *inode, struct file *file);
-static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
@@ -44,9 +42,9 @@ const struct file_operations autofs4_root_operations = {
 
 const struct file_operations autofs4_dir_operations = {
 	.open		= autofs4_dir_open,
-	.release	= autofs4_dir_close,
+	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
-	.readdir	= autofs4_dir_readdir,
+	.readdir	= dcache_readdir,
 };
 
 const struct inode_operations autofs4_indirect_root_inode_operations = {
@@ -98,17 +96,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
-	struct vfsmount *mnt = file->f_path.mnt;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-	struct dentry *cursor;
-	int status;
-
-	status = dcache_dir_open(inode, file);
-	if (status)
-		goto out;
-
-	cursor = file->private_data;
-	cursor->d_fsdata = NULL;
 
 	DPRINTK("file=%p dentry=%p %.*s",
 		file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -116,129 +104,24 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 	if (autofs4_oz_mode(sbi))
 		goto out;
 
-	if (autofs4_ispending(dentry)) {
-		DPRINTK("dentry busy");
-		dcache_dir_close(inode, file);
-		status = -EBUSY;
-		goto out;
-	}
-
-	status = -ENOENT;
-	if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) {
-		struct nameidata nd;
-		int empty, ret;
-
-		/* In case there are stale directory dentrys from a failed mount */
-		spin_lock(&dcache_lock);
-		empty = list_empty(&dentry->d_subdirs);
+	/*
+	 * An empty directory in an autofs file system is always a
+	 * mount point. The daemon must have failed to mount this
+	 * during lookup so it doesn't exist. This can happen, for
+	 * example, if user space returns an incorrect status for a
+	 * mount request. Otherwise we're doing a readdir on the
+	 * autofs file system so just let the libfs routines handle
+	 * it.
+	 */
+	spin_lock(&dcache_lock);
+	if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
 		spin_unlock(&dcache_lock);
-
-		if (!empty)
-			d_invalidate(dentry);
-
-		nd.flags = LOOKUP_DIRECTORY;
-		ret = (dentry->d_op->d_revalidate)(dentry, &nd);
-
-		if (ret <= 0) {
-			if (ret < 0)
-				status = ret;
-			dcache_dir_close(inode, file);
-			goto out;
-		}
+		return -ENOENT;
 	}
+	spin_unlock(&dcache_lock);
 
-	if (d_mountpoint(dentry)) {
-		struct file *fp = NULL;
-		struct path fp_path = { .dentry = dentry, .mnt = mnt };
-
-		path_get(&fp_path);
-
-		if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
-			path_put(&fp_path);
-			dcache_dir_close(inode, file);
-			goto out;
-		}
-
-		fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
-		status = PTR_ERR(fp);
-		if (IS_ERR(fp)) {
-			dcache_dir_close(inode, file);
-			goto out;
-		}
-		cursor->d_fsdata = fp;
-	}
-	return 0;
-out:
-	return status;
-}
-
-static int autofs4_dir_close(struct inode *inode, struct file *file)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-	struct dentry *cursor = file->private_data;
-	int status = 0;
-
-	DPRINTK("file=%p dentry=%p %.*s",
-		file, dentry, dentry->d_name.len, dentry->d_name.name);
-
-	if (autofs4_oz_mode(sbi))
-		goto out;
-
-	if (autofs4_ispending(dentry)) {
-		DPRINTK("dentry busy");
-		status = -EBUSY;
-		goto out;
-	}
-
-	if (d_mountpoint(dentry)) {
-		struct file *fp = cursor->d_fsdata;
-		if (!fp) {
-			status = -ENOENT;
-			goto out;
-		}
-		filp_close(fp, current->files);
-	}
-out:
-	dcache_dir_close(inode, file);
-	return status;
-}
-
-static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-	struct dentry *cursor = file->private_data;
-	int status;
-
-	DPRINTK("file=%p dentry=%p %.*s",
-		file, dentry, dentry->d_name.len, dentry->d_name.name);
-
-	if (autofs4_oz_mode(sbi))
-		goto out;
-
-	if (autofs4_ispending(dentry)) {
-		DPRINTK("dentry busy");
-		return -EBUSY;
-	}
-
-	if (d_mountpoint(dentry)) {
-		struct file *fp = cursor->d_fsdata;
-
-		if (!fp)
-			return -ENOENT;
-
-		if (!fp->f_op || !fp->f_op->readdir)
-			goto out;
-
-		status = vfs_readdir(fp, filldir, dirent);
-		file->f_pos = fp->f_pos;
-		if (status)
-			autofs4_copy_atime(file, fp);
-		return status;
-	}
 out:
-	return dcache_readdir(file, dirent, filldir);
+	return dcache_dir_open(inode, file);
 }
 
 static int try_to_fill_dentry(struct dentry *dentry, int flags)
-- 
GitLab


From 26e81b3142f1ba497d4cd0365c13661684b784ce Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:25 -0700
Subject: [PATCH 334/853] autofs4: fix pending checks

There are two cases for which a dentry that has a pending mount request
does not wait for completion.  One is via autofs4_revalidate() and the
other via autofs4_follow_link().

In revalidate, after the mount point directory is created, but before the
mount is done, the check in try_to_fill_dentry() can can fail to send the
dentry to the wait queue since the dentry is positive and the lookup flags
may contain only LOOKUP_FOLLOW.  Although we don't trigger a mount for the
LOOKUP_FOLLOW flag, if ther's one pending we might as well wait and use
the mounted dentry for the lookup.

In autofs4_follow_link() the dentry is not checked to see if it is pending
so it may fail to call try_to_fill_dentry() and not wait for mount
completion.

A dentry that is pending must always be sent to the wait queue.

Signed-off-by: Ian Kent <raven@themaw.net>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 51c873ca8e8..61d1dca1688 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,7 +177,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 			return status;
 		}
 	/* Trigger mount for path component or follow link */
-	} else if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
+	} else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
+			flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
 			current->link_count) {
 		DPRINTK("waiting for mount name=%.*s",
 			dentry->d_name.len, dentry->d_name.name);
@@ -223,7 +224,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 
 	/* If it's our master or we shouldn't trigger a mount we're done */
 	lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
-	if (oz_mode || !lookup_type)
+	if (oz_mode ||
+	    !(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
 		goto done;
 
 	/* If an expire request is pending wait for it. */
@@ -242,7 +244,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	 * don't try to mount it again.
 	 */
 	spin_lock(&dcache_lock);
-	if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
+	if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
+	    (!d_mountpoint(dentry) && __simple_empty(dentry))) {
 		spin_unlock(&dcache_lock);
 
 		status = try_to_fill_dentry(dentry, 0);
-- 
GitLab


From 97e7449a7ad883bf9f516fc970778d75999c7843 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:26 -0700
Subject: [PATCH 335/853] autofs4: fix indirect mount pending expire race

The selection of a dentry for expiration and the setting of the
AUTOFS_INF_EXPIRING flag isn't done atomically which can lead to lookups
walking into an expiring mount.

What happens is that an expire is initiated by the daemon and a dentry is
selected for expire but, since there is no lock held between the selection
and setting of the expiring flag, a process may find the flag clear and
continue walking into the mount tree at the same time the daemon attempts
the expire it.

Signed-off-by: Ian Kent <raven@themaw.net>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h | 10 +++-------
 fs/autofs4/expire.c   | 46 ++++++++++++++++++++++++++++++++-----------
 fs/autofs4/root.c     | 32 +++++++++++++++++++++++++-----
 3 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 058e1800cae..5d90ed3b4b4 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -138,18 +138,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
 static inline int autofs4_ispending(struct dentry *dentry)
 {
 	struct autofs_info *inf = autofs4_dentry_ino(dentry);
-	int pending = 0;
 
 	if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
 		return 1;
 
-	if (inf) {
-		spin_lock(&inf->sbi->fs_lock);
-		pending = inf->flags & AUTOFS_INF_EXPIRING;
-		spin_unlock(&inf->sbi->fs_lock);
-	}
+	if (inf->flags & AUTOFS_INF_EXPIRING)
+		return 1;
 
-	return pending;
+	return 0;
 }
 
 static inline void autofs4_copy_atime(struct file *src, struct file *dst)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 894fee54d4d..19f5bea2704 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -292,6 +292,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	struct list_head *next;
 	int do_now = how & AUTOFS_EXP_IMMEDIATE;
 	int exp_leaves = how & AUTOFS_EXP_LEAVES;
+	struct autofs_info *ino;
+	unsigned int ino_count;
 
 	if (!root)
 		return NULL;
@@ -316,6 +318,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
 		dentry = dget(dentry);
 		spin_unlock(&dcache_lock);
 
+		spin_lock(&sbi->fs_lock);
+		ino = autofs4_dentry_ino(dentry);
+
 		/*
 		 * Case 1: (i) indirect mount or top level pseudo direct mount
 		 *	   (autofs-4.1).
@@ -326,6 +331,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
+			/* Path walk currently on this dentry? */
+			ino_count = atomic_read(&ino->count) + 2;
+			if (atomic_read(&dentry->d_count) > ino_count)
+				goto next;
+
 			/* Can we umount this guy */
 			if (autofs4_mount_busy(mnt, dentry))
 				goto next;
@@ -343,23 +353,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
 
 		/* Case 2: tree mount, expire iff entire tree is not busy */
 		if (!exp_leaves) {
-			/* Lock the tree as we must expire as a whole */
-			spin_lock(&sbi->fs_lock);
-			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
-				struct autofs_info *inf = autofs4_dentry_ino(dentry);
+			/* Path walk currently on this dentry? */
+			ino_count = atomic_read(&ino->count) + 1;
+			if (atomic_read(&dentry->d_count) > ino_count)
+				goto next;
 
-				/* Set this flag early to catch sys_chdir and the like */
-				inf->flags |= AUTOFS_INF_EXPIRING;
-				spin_unlock(&sbi->fs_lock);
+			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
 				expired = dentry;
 				goto found;
 			}
-			spin_unlock(&sbi->fs_lock);
 		/*
 		 * Case 3: pseudo direct mount, expire individual leaves
 		 *	   (autofs-4.1).
 		 */
 		} else {
+			/* Path walk currently on this dentry? */
+			ino_count = atomic_read(&ino->count) + 1;
+			if (atomic_read(&dentry->d_count) > ino_count)
+				goto next;
+
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
 			if (expired) {
 				dput(dentry);
@@ -367,6 +379,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			}
 		}
 next:
+		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
 		spin_lock(&dcache_lock);
 		next = next->next;
@@ -377,6 +390,9 @@ next:
 found:
 	DPRINTK("returning %p %.*s",
 		expired, (int)expired->d_name.len, expired->d_name.name);
+	ino = autofs4_dentry_ino(expired);
+	ino->flags |= AUTOFS_INF_EXPIRING;
+	spin_unlock(&sbi->fs_lock);
 	spin_lock(&dcache_lock);
 	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
 	spin_unlock(&dcache_lock);
@@ -390,7 +406,9 @@ int autofs4_expire_run(struct super_block *sb,
 		      struct autofs_packet_expire __user *pkt_p)
 {
 	struct autofs_packet_expire pkt;
+	struct autofs_info *ino;
 	struct dentry *dentry;
+	int ret = 0;
 
 	memset(&pkt,0,sizeof pkt);
 
@@ -406,9 +424,14 @@ int autofs4_expire_run(struct super_block *sb,
 	dput(dentry);
 
 	if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
-		return -EFAULT;
+		ret = -EFAULT;
 
-	return 0;
+	spin_lock(&sbi->fs_lock);
+	ino = autofs4_dentry_ino(dentry);
+	ino->flags &= ~AUTOFS_INF_EXPIRING;
+	spin_unlock(&sbi->fs_lock);
+
+	return ret;
 }
 
 /* Call repeatedly until it returns -EAGAIN, meaning there's nothing
@@ -433,9 +456,10 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 
 		/* This is synchronous because it makes the daemon a
                    little easier */
-		ino->flags |= AUTOFS_INF_EXPIRING;
 		ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
+		spin_lock(&sbi->fs_lock);
 		ino->flags &= ~AUTOFS_INF_EXPIRING;
+		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
 	}
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 61d1dca1688..1c2579de1f2 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -133,7 +133,10 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 	/* Block on any pending expiry here; invalidate the dentry
            when expiration is done to trigger mount request with a new
            dentry */
-	if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
+	spin_lock(&sbi->fs_lock);
+	if (ino->flags & AUTOFS_INF_EXPIRING) {
+		spin_unlock(&sbi->fs_lock);
+
 		DPRINTK("waiting for expire %p name=%.*s",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 
@@ -149,8 +152,11 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 		status = d_invalidate(dentry);
 		if (status != -EBUSY)
 			return -EAGAIN;
-	}
 
+		goto cont;
+	}
+	spin_unlock(&sbi->fs_lock);
+cont:
 	DPRINTK("dentry=%p %.*s ino=%p",
 		 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
 
@@ -229,15 +235,21 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		goto done;
 
 	/* If an expire request is pending wait for it. */
-	if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
+	spin_lock(&sbi->fs_lock);
+	if (ino->flags & AUTOFS_INF_EXPIRING) {
+		spin_unlock(&sbi->fs_lock);
+
 		DPRINTK("waiting for active request %p name=%.*s",
 			dentry, dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
 
 		DPRINTK("request done status=%d", status);
-	}
 
+		goto cont;
+	}
+	spin_unlock(&sbi->fs_lock);
+cont:
 	/*
 	 * If the dentry contains directories then it is an
 	 * autofs multi-mount with no root mount offset. So
@@ -292,8 +304,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	int status = 1;
 
 	/* Pending dentry */
+	spin_lock(&sbi->fs_lock);
 	if (autofs4_ispending(dentry)) {
 		/* The daemon never causes a mount to trigger */
+		spin_unlock(&sbi->fs_lock);
+
 		if (oz_mode)
 			return 1;
 
@@ -316,6 +331,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 		return status;
 	}
+	spin_unlock(&sbi->fs_lock);
 
 	/* Negative dentry.. invalidate if "old" */
 	if (dentry->d_inode == NULL)
@@ -329,6 +345,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
+
 		/* The daemon never causes a mount to trigger */
 		if (oz_mode)
 			return 1;
@@ -521,13 +538,18 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		 * so it must have been successful, so just wait for it.
 		 */
 		ino = autofs4_dentry_ino(expiring);
-		while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
+		spin_lock(&sbi->fs_lock);
+		if (ino->flags & AUTOFS_INF_EXPIRING) {
+			spin_unlock(&sbi->fs_lock);
 			DPRINTK("wait for incomplete expire %p name=%.*s",
 				expiring, expiring->d_name.len,
 				expiring->d_name.name);
 			autofs4_wait(sbi, expiring, NFY_NONE);
 			DPRINTK("request completed");
+			goto cont;
 		}
+		spin_unlock(&sbi->fs_lock);
+cont:
 		spin_lock(&sbi->lookup_lock);
 		if (!list_empty(&ino->expiring))
 			list_del_init(&ino->expiring);
-- 
GitLab


From 6e60a9ab5f5d314735467752f623072f5b75157a Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:27 -0700
Subject: [PATCH 336/853] autofs4: fix direct mount pending expire race

For direct and offset type mounts that are covered by another mount we
cannot check the AUTOFS_INF_EXPIRING flag during a path walk which leads
to lookups walking into an expiring mount while it is being expired.

For example, for the direct multi-mount map entry with a couple of
offsets:

/race/mm1  /      <server1>:/<path1>
           /om1   <server2>:/<path2>
           /om2   <server1>:/<path3>

an autofs trigger mount is mounted on /race/mm1 and when accessed it is
over mounted and trigger mounts made for /race/mm1/om1 and /race/mm1/om2.
So it isn't possible for path walks to see the expiring flag at all and
they happily walk into the file system while it is expiring.

When expiring these mounts follow_down() must stop at the autofs mount and
all processes must block in the ->follow_link() method (except the daemon)
until the expire is complete.  This is done by decrementing the d_mounted
field of the autofs trigger mount root dentry until the expire is
completed.  In ->follow_link() all processes wait on the expire and the
mount following is completed for the daemon until the expire is complete.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h |  3 ++
 fs/autofs4/expire.c   | 16 ++++++++--
 fs/autofs4/root.c     | 72 +++++++++++++++++++++++++++++--------------
 3 files changed, 65 insertions(+), 26 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 5d90ed3b4b4..4b40cbc71e9 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,6 +52,8 @@ struct autofs_info {
 
 	int		flags;
 
+	struct completion expire_complete;
+
 	struct list_head active;
 	struct list_head expiring;
 
@@ -69,6 +71,7 @@ struct autofs_info {
 };
 
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry is in the process of expiring */
+#define AUTOFS_INF_MOUNTPOINT	(1<<1) /* mountpoint status for direct expire */
 
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 19f5bea2704..705b9f057fb 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
 	now = jiffies;
 	timeout = sbi->exp_timeout;
 
-	/* Lock the tree as we must expire as a whole */
 	spin_lock(&sbi->fs_lock);
 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
 		struct autofs_info *ino = autofs4_dentry_ino(root);
-
-		/* Set this flag early to catch sys_chdir and the like */
+		if (d_mountpoint(root)) {
+			ino->flags |= AUTOFS_INF_MOUNTPOINT;
+			root->d_mounted--;
+		}
 		ino->flags |= AUTOFS_INF_EXPIRING;
+		init_completion(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		return root;
 	}
@@ -392,6 +394,7 @@ found:
 		expired, (int)expired->d_name.len, expired->d_name.name);
 	ino = autofs4_dentry_ino(expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
+	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 	spin_lock(&dcache_lock);
 	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
@@ -429,6 +432,7 @@ int autofs4_expire_run(struct super_block *sb,
 	spin_lock(&sbi->fs_lock);
 	ino = autofs4_dentry_ino(dentry);
 	ino->flags &= ~AUTOFS_INF_EXPIRING;
+	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 
 	return ret;
@@ -457,8 +461,14 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		/* This is synchronous because it makes the daemon a
                    little easier */
 		ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
+
 		spin_lock(&sbi->fs_lock);
+		if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
+			sb->s_root->d_mounted++;
+			ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
+		}
 		ino->flags &= ~AUTOFS_INF_EXPIRING;
+		complete_all(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
 	}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 1c2579de1f2..adbd8559e87 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -141,6 +141,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
+		wait_for_completion(&ino->expire_complete);
 
 		DPRINTK("expire done status=%d", status);
 
@@ -227,14 +228,32 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
 		dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
 		nd->flags);
-
-	/* If it's our master or we shouldn't trigger a mount we're done */
-	lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
-	if (oz_mode ||
-	    !(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
+	/*
+	 * For an expire of a covered direct or offset mount we need
+	 * to beeak out of follow_down() at the autofs mount trigger
+	 * (d_mounted--), so we can see the expiring flag, and manage
+	 * the blocking and following here until the expire is completed.
+	 */
+	if (oz_mode) {
+		spin_lock(&sbi->fs_lock);
+		if (ino->flags & AUTOFS_INF_EXPIRING) {
+			spin_unlock(&sbi->fs_lock);
+			/* Follow down to our covering mount. */
+			if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+				goto done;
+			/*
+			 * We shouldn't need to do this but we have no way
+			 * of knowing what may have been done so try a follow
+			 * just in case.
+			 */
+			autofs4_follow_mount(&nd->path.mnt, &nd->path.dentry);
+			goto done;
+		}
+		spin_unlock(&sbi->fs_lock);
 		goto done;
+	}
 
-	/* If an expire request is pending wait for it. */
+	/* If an expire request is pending everyone must wait. */
 	spin_lock(&sbi->fs_lock);
 	if (ino->flags & AUTOFS_INF_EXPIRING) {
 		spin_unlock(&sbi->fs_lock);
@@ -243,6 +262,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 			dentry, dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
+		wait_for_completion(&ino->expire_complete);
 
 		DPRINTK("request done status=%d", status);
 
@@ -250,10 +270,15 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	}
 	spin_unlock(&sbi->fs_lock);
 cont:
+	/* We trigger a mount for almost all flags */
+	lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
+	if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
+		goto done;
+
 	/*
-	 * If the dentry contains directories then it is an
-	 * autofs multi-mount with no root mount offset. So
-	 * don't try to mount it again.
+	 * If the dentry contains directories then it is an autofs
+	 * multi-mount with no root mount offset. So don't try to
+	 * mount it again.
 	 */
 	spin_lock(&dcache_lock);
 	if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
@@ -264,22 +289,22 @@ cont:
 		if (status)
 			goto out_error;
 
-		/*
-		 * The mount succeeded but if there is no root mount
-		 * it must be an autofs multi-mount with no root offset
-		 * so we don't need to follow the mount.
-		 */
-		if (d_mountpoint(dentry)) {
-			if (!autofs4_follow_mount(&nd->path.mnt,
-						  &nd->path.dentry)) {
-				status = -ENOENT;
-				goto out_error;
-			}
-		}
-
-		goto done;
+		goto follow;
 	}
 	spin_unlock(&dcache_lock);
+follow:
+	/*
+	 * If there is no root mount it must be an autofs
+	 * multi-mount with no root offset so we don't need
+	 * to follow it.
+	 */
+	if (d_mountpoint(dentry)) {
+		if (!autofs4_follow_mount(&nd->path.mnt,
+					  &nd->path.dentry)) {
+			status = -ENOENT;
+			goto out_error;
+		}
+	}
 
 done:
 	return NULL;
@@ -545,6 +570,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 				expiring, expiring->d_name.len,
 				expiring->d_name.name);
 			autofs4_wait(sbi, expiring, NFY_NONE);
+			wait_for_completion(&ino->expire_complete);
 			DPRINTK("request completed");
 			goto cont;
 		}
-- 
GitLab


From ec6e8c7d3f9073336ec7b2eed3fcda6f922087c3 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:28 -0700
Subject: [PATCH 337/853] autofs4: fix direct mount pending expire race -
 correction

Appologies, somehow I seem to have sent an out dated version of this
patch. Here is an additional patch that brings the patch up to date.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index adbd8559e87..e062ee5a3ed 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -241,13 +241,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 			/* Follow down to our covering mount. */
 			if (!follow_down(&nd->path.mnt, &nd->path.dentry))
 				goto done;
-			/*
-			 * We shouldn't need to do this but we have no way
-			 * of knowing what may have been done so try a follow
-			 * just in case.
-			 */
-			autofs4_follow_mount(&nd->path.mnt, &nd->path.dentry);
-			goto done;
+			goto follow;
 		}
 		spin_unlock(&sbi->fs_lock);
 		goto done;
@@ -273,7 +267,7 @@ cont:
 	/* We trigger a mount for almost all flags */
 	lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
 	if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
-		goto done;
+		goto follow;
 
 	/*
 	 * If the dentry contains directories then it is an autofs
-- 
GitLab


From 06a3598552dc3b2b30eb18bd53bbac2a901489d7 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:28 -0700
Subject: [PATCH 338/853] autofs4: reorganize expire pending wait function
 calls

This patch re-orgnirzes the checking for and waiting on active expires and
elininates redundant checks.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h |  1 +
 fs/autofs4/expire.c   | 29 +++++++++++++++++
 fs/autofs4/root.c     | 75 ++++++-------------------------------------
 3 files changed, 40 insertions(+), 65 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 4b40cbc71e9..69a2f5c9231 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -163,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *);
 
 /* Expiration */
 int is_autofs4_dentry(struct dentry *);
+int autofs4_expire_wait(struct dentry *dentry);
 int autofs4_expire_run(struct super_block *, struct vfsmount *,
 			struct autofs_sb_info *,
 			struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 705b9f057fb..cdabb796ff0 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -402,6 +402,35 @@ found:
 	return expired;
 }
 
+int autofs4_expire_wait(struct dentry *dentry)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	int status;
+
+	/* Block on any pending expire */
+	spin_lock(&sbi->fs_lock);
+	if (ino->flags & AUTOFS_INF_EXPIRING) {
+		spin_unlock(&sbi->fs_lock);
+
+		DPRINTK("waiting for expire %p name=%.*s",
+			 dentry, dentry->d_name.len, dentry->d_name.name);
+
+		status = autofs4_wait(sbi, dentry, NFY_NONE);
+		wait_for_completion(&ino->expire_complete);
+
+		DPRINTK("expire done status=%d", status);
+
+		if (d_unhashed(dentry))
+			return -EAGAIN;
+
+		return status;
+	}
+	spin_unlock(&sbi->fs_lock);
+
+	return 0;
+}
+
 /* Perform an expiry operation */
 int autofs4_expire_run(struct super_block *sb,
 		      struct vfsmount *mnt,
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e062ee5a3ed..ae22bde0bbd 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -130,34 +130,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status;
 
-	/* Block on any pending expiry here; invalidate the dentry
-           when expiration is done to trigger mount request with a new
-           dentry */
-	spin_lock(&sbi->fs_lock);
-	if (ino->flags & AUTOFS_INF_EXPIRING) {
-		spin_unlock(&sbi->fs_lock);
-
-		DPRINTK("waiting for expire %p name=%.*s",
-			 dentry, dentry->d_name.len, dentry->d_name.name);
-
-		status = autofs4_wait(sbi, dentry, NFY_NONE);
-		wait_for_completion(&ino->expire_complete);
-
-		DPRINTK("expire done status=%d", status);
-
-		/*
-		 * If the directory still exists the mount request must
-		 * continue otherwise it can't be followed at the right
-		 * time during the walk.
-		 */
-		status = d_invalidate(dentry);
-		if (status != -EBUSY)
-			return -EAGAIN;
-
-		goto cont;
-	}
-	spin_unlock(&sbi->fs_lock);
-cont:
 	DPRINTK("dentry=%p %.*s ino=%p",
 		 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
 
@@ -248,22 +220,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	}
 
 	/* If an expire request is pending everyone must wait. */
-	spin_lock(&sbi->fs_lock);
-	if (ino->flags & AUTOFS_INF_EXPIRING) {
-		spin_unlock(&sbi->fs_lock);
-
-		DPRINTK("waiting for active request %p name=%.*s",
-			dentry, dentry->d_name.len, dentry->d_name.name);
-
-		status = autofs4_wait(sbi, dentry, NFY_NONE);
-		wait_for_completion(&ino->expire_complete);
+	autofs4_expire_wait(dentry);
 
-		DPRINTK("request done status=%d", status);
-
-		goto cont;
-	}
-	spin_unlock(&sbi->fs_lock);
-cont:
 	/* We trigger a mount for almost all flags */
 	lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
 	if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
@@ -331,6 +289,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		if (oz_mode)
 			return 1;
 
+		/*
+		 * If the directory has gone away due to an expire
+		 * we have been called as ->d_revalidate() and so
+		 * we need to return false and proceed to ->lookup().
+		 */
+		if (autofs4_expire_wait(dentry) == -EAGAIN)
+			return 0;
+
 		/*
 		 * A zero status is success otherwise we have a
 		 * negative error code.
@@ -339,15 +305,6 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		if (status == 0)
 			return 1;
 
-		/*
-		 * A status of EAGAIN here means that the dentry has gone
-		 * away while waiting for an expire to complete. If we are
-		 * racing with expire lookup will wait for it so this must
-		 * be a revalidate and we need to send it to lookup.
-		 */
-		if (status == -EAGAIN)
-			return 0;
-
 		return status;
 	}
 	spin_unlock(&sbi->fs_lock);
@@ -557,19 +514,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		 * so it must have been successful, so just wait for it.
 		 */
 		ino = autofs4_dentry_ino(expiring);
-		spin_lock(&sbi->fs_lock);
-		if (ino->flags & AUTOFS_INF_EXPIRING) {
-			spin_unlock(&sbi->fs_lock);
-			DPRINTK("wait for incomplete expire %p name=%.*s",
-				expiring, expiring->d_name.len,
-				expiring->d_name.name);
-			autofs4_wait(sbi, expiring, NFY_NONE);
-			wait_for_completion(&ino->expire_complete);
-			DPRINTK("request completed");
-			goto cont;
-		}
-		spin_unlock(&sbi->fs_lock);
-cont:
+		autofs4_expire_wait(expiring);
 		spin_lock(&sbi->lookup_lock);
 		if (!list_empty(&ino->expiring))
 			list_del_init(&ino->expiring);
-- 
GitLab


From aa55ddf340c9fa3f303ee16bbf35887e42c50304 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:29 -0700
Subject: [PATCH 339/853] autofs4: remove unused ioctls

The ioctls AUTOFS_IOC_TOGGLEREGHOST and AUTOFS_IOC_ASKREGHOST were added
several years ago but what they were intended for has never been
implemented (as far as I'm aware noone uses them) so remove them.

Signed-off-by: Ian Kent <raven@themaw.net>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c        | 68 +---------------------------------------
 fs/compat_ioctl.c        |  2 --
 include/linux/auto_fs4.h |  2 --
 3 files changed, 1 insertion(+), 71 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index ae22bde0bbd..bcfb2dc0a61 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,7 +25,6 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
 static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
 static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 static int autofs4_dir_open(struct inode *inode, struct file *file);
-static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
@@ -36,7 +35,7 @@ const struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
-	.readdir	= autofs4_root_readdir,
+	.readdir	= dcache_readdir,
 	.ioctl		= autofs4_root_ioctl,
 };
 
@@ -71,28 +70,6 @@ const struct inode_operations autofs4_dir_inode_operations = {
 	.rmdir		= autofs4_dir_rmdir,
 };
 
-static int autofs4_root_readdir(struct file *file, void *dirent,
-				filldir_t filldir)
-{
-	struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
-	int oz_mode = autofs4_oz_mode(sbi);
-
-	DPRINTK("called, filp->f_pos = %lld", file->f_pos);
-
-	/*
-	 * Don't set reghost flag if:
-	 * 1) f_pos is larger than zero -- we've already been here.
-	 * 2) we haven't even enabled reghosting in the 1st place.
-	 * 3) this is the daemon doing a readdir
-	 */
-	if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
-		sbi->needs_reghost = 1;
-
-	DPRINTK("needs_reghost = %d", sbi->needs_reghost);
-
-	return dcache_readdir(file, dirent, filldir);
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
@@ -858,44 +835,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
 	return put_user(sbi->sub_version, p);
 }
 
-/*
- * Tells the daemon whether we need to reghost or not. Also, clears
- * the reghost_needed flag.
- */
-static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
-	int status;
-
-	DPRINTK("returning %d", sbi->needs_reghost);
-
-	status = put_user(sbi->needs_reghost, p);
-	if (status)
-		return status;
-
-	sbi->needs_reghost = 0;
-	return 0;
-}
-
-/*
- * Enable / Disable reghosting ioctl() operation
- */
-static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
-	int status;
-	int val;
-
-	status = get_user(val, p);
-
-	DPRINTK("reghost = %d", val);
-
-	if (status)
-		return status;
-
-	/* turn on/off reghosting, with the val */
-	sbi->reghost_enabled = val;
-	return 0;
-}
-
 /*
 * Tells the daemon whether it can umount the autofs mount.
 */
@@ -960,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
 	case AUTOFS_IOC_SETTIMEOUT:
 		return autofs4_get_set_timeout(sbi, p);
 
-	case AUTOFS_IOC_TOGGLEREGHOST:
-		return autofs4_toggle_reghost(sbi, p);
-	case AUTOFS_IOC_ASKREGHOST:
-		return autofs4_ask_reghost(sbi, p);
-
 	case AUTOFS_IOC_ASKUMOUNT:
 		return autofs4_ask_umount(filp->f_path.mnt, p);
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7b3a03c7c6a..18e2c548161 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2297,8 +2297,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
 COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
 COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
 COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
-COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
 COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
 /* Raw devices */
 COMPATIBLE_IOCTL(RAW_SETBIND)
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index 31a29541b50..b785c6f8644 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -98,8 +98,6 @@ union autofs_v5_packet_union {
 #define AUTOFS_IOC_EXPIRE_INDIRECT	AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_EXPIRE_DIRECT	AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_PROTOSUBVER		_IOR(0x93,0x67,int)
-#define AUTOFS_IOC_ASKREGHOST           _IOR(0x93,0x68,int)
-#define AUTOFS_IOC_TOGGLEREGHOST        _IOR(0x93,0x69,int)
 #define AUTOFS_IOC_ASKUMOUNT		_IOR(0x93,0x70,int)
 
 
-- 
GitLab


From 35aa64f3a117a16c466f688f52ac3847b3b572e8 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 23 Jul 2008 21:30:29 -0700
Subject: [PATCH 340/853] rtc: m41t80: sort header inclusions for readability

Sort the header inclusions for readability.  No functional changes.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexander Bigga <ab@mycable.de>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-m41t80.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 0a19c06019b..4b260664547 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -13,21 +13,21 @@
  *
  */
 
-#include <linux/module.h>
+#include <linux/bcd.h>
+#include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rtc.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/string.h>
-#include <linux/i2c.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
 #ifdef CONFIG_RTC_DRV_M41T80_WDT
-#include <linux/miscdevice.h>
-#include <linux/watchdog.h>
-#include <linux/reboot.h>
 #include <linux/fs.h>
 #include <linux/ioctl.h>
+#include <linux/miscdevice.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
 #endif
 
 #define M41T80_REG_SSEC	0
-- 
GitLab


From 4c228db0b30fa12d65ae7461ce29ed1f4da12c5b Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 23 Jul 2008 21:30:32 -0700
Subject: [PATCH 341/853] rtc: m41t80: use pr_info() as appropriate

Replace printk(KERN_INFO ...) calls with appropriate pr_info(...)
equivalents.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexander Bigga <ab@mycable.de>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-m41t80.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 4b260664547..24bc1689fc7 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -631,14 +631,12 @@ static int wdt_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			return -EFAULT;
 
 		if (rv & WDIOS_DISABLECARD) {
-			printk(KERN_INFO
-			       "rtc-m41t80: disable watchdog\n");
+			pr_info("rtc-m41t80: disable watchdog\n");
 			wdt_disable();
 		}
 
 		if (rv & WDIOS_ENABLECARD) {
-			printk(KERN_INFO
-			       "rtc-m41t80: enable watchdog\n");
+			pr_info("rtc-m41t80: enable watchdog\n");
 			wdt_ping();
 		}
 
-- 
GitLab


From 53f1b1433da7eac2607a4a0898a221a4485fd732 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 23 Jul 2008 21:30:32 -0700
Subject: [PATCH 342/853] rtc: push the BKL down into the driver ioctl method

For now just wrap the main logic, but this driver is a prime candidate for
someone wanting to eliminate the lock entirely

[lizf@cn.fujitsu.com: fix build failure]
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/rtc.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index fa92a8af5a5..d1569a0d050 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -78,9 +78,10 @@
 #include <linux/wait.h>
 #include <linux/bcd.h>
 #include <linux/delay.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
 
 #include <asm/current.h>
-#include <asm/uaccess.h>
 #include <asm/system.h>
 
 #ifdef CONFIG_X86
@@ -144,8 +145,7 @@ static DEFINE_TIMER(rtc_irq_timer, rtc_dropped_irq, 0, 0);
 static ssize_t rtc_read(struct file *file, char __user *buf,
 			size_t count, loff_t *ppos);
 
-static int rtc_ioctl(struct inode *inode, struct file *file,
-		     unsigned int cmd, unsigned long arg);
+static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 
 #ifdef RTC_IRQ
 static unsigned int rtc_poll(struct file *file, poll_table *wait);
@@ -719,10 +719,13 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 			    &wtime, sizeof wtime) ? -EFAULT : 0;
 }
 
-static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-		     unsigned long arg)
+static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	return rtc_do_ioctl(cmd, arg, 0);
+	long ret;
+	lock_kernel();
+	ret = rtc_do_ioctl(cmd, arg, 0);
+	unlock_kernel();
+	return ret;
 }
 
 /*
@@ -915,7 +918,7 @@ static const struct file_operations rtc_fops = {
 #ifdef RTC_IRQ
 	.poll		= rtc_poll,
 #endif
-	.ioctl		= rtc_ioctl,
+	.unlocked_ioctl	= rtc_ioctl,
 	.open		= rtc_open,
 	.release	= rtc_release,
 	.fasync		= rtc_fasync,
-- 
GitLab


From 5ad31a575157147b43fa84ef1e21471661653878 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:33 -0700
Subject: [PATCH 343/853] rtc: remove BKL for ioctl()

Remove implicit use of BKL in ioctl() from the RTC framework.

Instead, the rtc->ops_lock is used.  That's the same lock that already
protects the RTC operations when they're issued through the exported
rtc_*() calls in drivers/rtc/interface.c ...  making this a bugfix, not
just a cleanup, since both ioctl calls and set_alarm() need to update IRQ
enable flags and that implies a common lock (which RTC drivers as a rule
do not provide on their own).

A new comment at the declaration of "struct rtc_class_ops" summarizes
current locking rules.  It's not clear to me that the exceptions listed
there should exist ...  if not, those are pre-existing problems which can
be fixed in a patch that doesn't relate to BKL removal.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Jonathan Corbet <corbet@lwn.net>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-dev.c | 58 ++++++++++++++++++++++++++++---------------
 include/linux/rtc.h   | 17 +++++++++++++
 2 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 0114a78b7cb..0a870b7e5c3 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -209,7 +209,7 @@ static unsigned int rtc_dev_poll(struct file *file, poll_table *wait)
 	return (data != 0) ? (POLLIN | POLLRDNORM) : 0;
 }
 
-static int rtc_dev_ioctl(struct inode *inode, struct file *file,
+static long rtc_dev_ioctl(struct file *file,
 		unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
@@ -219,6 +219,10 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 	struct rtc_wkalrm alarm;
 	void __user *uarg = (void __user *) arg;
 
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return -EBUSY;
+
 	/* check that the calling task has appropriate permissions
 	 * for certain ioctls. doing this check here is useful
 	 * to avoid duplicate code in each driver.
@@ -227,26 +231,31 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 	case RTC_EPOCH_SET:
 	case RTC_SET_TIME:
 		if (!capable(CAP_SYS_TIME))
-			return -EACCES;
+			err = -EACCES;
 		break;
 
 	case RTC_IRQP_SET:
 		if (arg > rtc->max_user_freq && !capable(CAP_SYS_RESOURCE))
-			return -EACCES;
+			err = -EACCES;
 		break;
 
 	case RTC_PIE_ON:
 		if (rtc->irq_freq > rtc->max_user_freq &&
 				!capable(CAP_SYS_RESOURCE))
-			return -EACCES;
+			err = -EACCES;
 		break;
 	}
 
+	if (err)
+		goto done;
+
 	/* try the driver's ioctl interface */
 	if (ops->ioctl) {
 		err = ops->ioctl(rtc->dev.parent, cmd, arg);
-		if (err != -ENOIOCTLCMD)
+		if (err != -ENOIOCTLCMD) {
+			mutex_unlock(&rtc->ops_lock);
 			return err;
+		}
 	}
 
 	/* if the driver does not provide the ioctl interface
@@ -265,15 +274,19 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 
 	switch (cmd) {
 	case RTC_ALM_READ:
+		mutex_unlock(&rtc->ops_lock);
+
 		err = rtc_read_alarm(rtc, &alarm);
 		if (err < 0)
 			return err;
 
 		if (copy_to_user(uarg, &alarm.time, sizeof(tm)))
-			return -EFAULT;
-		break;
+			err = -EFAULT;
+		return err;
 
 	case RTC_ALM_SET:
+		mutex_unlock(&rtc->ops_lock);
+
 		if (copy_from_user(&alarm.time, uarg, sizeof(tm)))
 			return -EFAULT;
 
@@ -321,24 +334,26 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 			}
 		}
 
-		err = rtc_set_alarm(rtc, &alarm);
-		break;
+		return rtc_set_alarm(rtc, &alarm);
 
 	case RTC_RD_TIME:
+		mutex_unlock(&rtc->ops_lock);
+
 		err = rtc_read_time(rtc, &tm);
 		if (err < 0)
 			return err;
 
 		if (copy_to_user(uarg, &tm, sizeof(tm)))
-			return -EFAULT;
-		break;
+			err = -EFAULT;
+		return err;
 
 	case RTC_SET_TIME:
+		mutex_unlock(&rtc->ops_lock);
+
 		if (copy_from_user(&tm, uarg, sizeof(tm)))
 			return -EFAULT;
 
-		err = rtc_set_time(rtc, &tm);
-		break;
+		return rtc_set_time(rtc, &tm);
 
 	case RTC_PIE_ON:
 		err = rtc_irq_set_state(rtc, NULL, 1);
@@ -376,34 +391,37 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 		break;
 #endif
 	case RTC_WKALM_SET:
+		mutex_unlock(&rtc->ops_lock);
 		if (copy_from_user(&alarm, uarg, sizeof(alarm)))
 			return -EFAULT;
 
-		err = rtc_set_alarm(rtc, &alarm);
-		break;
+		return rtc_set_alarm(rtc, &alarm);
 
 	case RTC_WKALM_RD:
+		mutex_unlock(&rtc->ops_lock);
 		err = rtc_read_alarm(rtc, &alarm);
 		if (err < 0)
 			return err;
 
 		if (copy_to_user(uarg, &alarm, sizeof(alarm)))
-			return -EFAULT;
-		break;
+			err = -EFAULT;
+		return err;
 
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	case RTC_UIE_OFF:
 		clear_uie(rtc);
-		return 0;
+		break;
 
 	case RTC_UIE_ON:
-		return set_uie(rtc);
+		err = set_uie(rtc);
 #endif
 	default:
 		err = -ENOTTY;
 		break;
 	}
 
+done:
+	mutex_unlock(&rtc->ops_lock);
 	return err;
 }
 
@@ -432,7 +450,7 @@ static const struct file_operations rtc_dev_fops = {
 	.llseek		= no_llseek,
 	.read		= rtc_dev_read,
 	.poll		= rtc_dev_poll,
-	.ioctl		= rtc_dev_ioctl,
+	.unlocked_ioctl	= rtc_dev_ioctl,
 	.open		= rtc_dev_open,
 	.release	= rtc_dev_release,
 	.fasync		= rtc_dev_fasync,
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index f2d0d152772..b01fe004cb5 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -115,6 +115,23 @@ extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
 
 extern struct class *rtc_class;
 
+/*
+ * For these RTC methods the device parameter is the physical device
+ * on whatever bus holds the hardware (I2C, Platform, SPI, etc), which
+ * was passed to rtc_device_register().  Its driver_data normally holds
+ * device state, including the rtc_device pointer for the RTC.
+ *
+ * Most of these methods are called with rtc_device.ops_lock held,
+ * through the rtc_*(struct rtc_device *, ...) calls.
+ *
+ * The (current) exceptions are mostly filesystem hooks:
+ *   - the proc() hook for procfs
+ *   - non-ioctl() chardev hooks:  open(), release(), read_callback()
+ *   - periodic irq calls:  irq_set_state(), irq_set_freq()
+ *
+ * REVISIT those periodic irq calls *do* have ops_lock when they're
+ * issued through ioctl() ...
+ */
 struct rtc_class_ops {
 	int (*open)(struct device *);
 	void (*release)(struct device *);
-- 
GitLab


From 8fc2c767b06067b417c565c4e75731e68ed41fd8 Mon Sep 17 00:00:00 2001
From: "Kim B. Heino" <Kim.Heino@bluegiga.com>
Date: Wed, 23 Jul 2008 21:30:34 -0700
Subject: [PATCH 344/853] rtc: add support for ST M41T94 SPI RTC

This patch adds kernel driver for M41T94 RTC chip connected via SPI.
I've tested it on two different AT91-based hardwares.

This is third revision of the patch: some comments made by
Alessandro Zummo fixed.

Revision two added support for century bit and fixes.

Signed-off-by: Kim B. Heino <Kim.Heino@bluegiga.com>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/Kconfig      |   9 ++
 drivers/rtc/Makefile     |   1 +
 drivers/rtc/rtc-m41t94.c | 173 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+)
 create mode 100644 drivers/rtc/rtc-m41t94.c

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index fc85bf2e4a9..beffb834c44 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -273,6 +273,15 @@ comment "SPI RTC drivers"
 
 if SPI_MASTER
 
+config RTC_DRV_M41T94
+	tristate "ST M41T94"
+	help
+	  If you say yes here you will get support for the
+	  ST M41T94 SPI RTC chip.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-m41t94.
+
 config RTC_DRV_MAX6902
 	tristate "Maxim MAX6902"
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index b5d9d67df88..b0e1af54f80 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_RTC_DRV_EP93XX)	+= rtc-ep93xx.o
 obj-$(CONFIG_RTC_DRV_FM3130)	+= rtc-fm3130.o
 obj-$(CONFIG_RTC_DRV_ISL1208)	+= rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_M41T80)	+= rtc-m41t80.o
+obj-$(CONFIG_RTC_DRV_M41T94)	+= rtc-m41t94.o
 obj-$(CONFIG_RTC_DRV_M48T59)	+= rtc-m48t59.o
 obj-$(CONFIG_RTC_DRV_M48T86)	+= rtc-m48t86.o
 obj-$(CONFIG_RTC_DRV_MAX6900)	+= rtc-max6900.o
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
new file mode 100644
index 00000000000..9b19499c829
--- /dev/null
+++ b/drivers/rtc/rtc-m41t94.c
@@ -0,0 +1,173 @@
+/*
+ * Driver for ST M41T94 SPI RTC
+ *
+ * Copyright (C) 2008 Kim B. Heino
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/spi/spi.h>
+#include <linux/bcd.h>
+
+#define M41T94_REG_SECONDS	0x01
+#define M41T94_REG_MINUTES	0x02
+#define M41T94_REG_HOURS	0x03
+#define M41T94_REG_WDAY		0x04
+#define M41T94_REG_DAY		0x05
+#define M41T94_REG_MONTH	0x06
+#define M41T94_REG_YEAR		0x07
+#define M41T94_REG_HT		0x0c
+
+#define M41T94_BIT_HALT		0x40
+#define M41T94_BIT_STOP		0x80
+#define M41T94_BIT_CB		0x40
+#define M41T94_BIT_CEB		0x80
+
+static int m41t94_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	u8 buf[8]; /* write cmd + 7 registers */
+
+	dev_dbg(dev, "%s secs=%d, mins=%d, "
+		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
+		"write", tm->tm_sec, tm->tm_min,
+		tm->tm_hour, tm->tm_mday,
+		tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	buf[0] = 0x80 | M41T94_REG_SECONDS; /* write time + date */
+	buf[M41T94_REG_SECONDS] = BIN2BCD(tm->tm_sec);
+	buf[M41T94_REG_MINUTES] = BIN2BCD(tm->tm_min);
+	buf[M41T94_REG_HOURS]   = BIN2BCD(tm->tm_hour);
+	buf[M41T94_REG_WDAY]    = BIN2BCD(tm->tm_wday + 1);
+	buf[M41T94_REG_DAY]     = BIN2BCD(tm->tm_mday);
+	buf[M41T94_REG_MONTH]   = BIN2BCD(tm->tm_mon + 1);
+
+	buf[M41T94_REG_HOURS] |= M41T94_BIT_CEB;
+	if (tm->tm_year >= 100)
+		buf[M41T94_REG_HOURS] |= M41T94_BIT_CB;
+	buf[M41T94_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+
+	return spi_write(spi, buf, 8);
+}
+
+static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	u8 buf[2];
+	int ret, hour;
+
+	/* clear halt update bit */
+	ret = spi_w8r8(spi, M41T94_REG_HT);
+	if (ret < 0)
+		return ret;
+	if (ret & M41T94_BIT_HALT) {
+		buf[0] = 0x80 | M41T94_REG_HT;
+		buf[1] = ret & ~M41T94_BIT_HALT;
+		spi_write(spi, buf, 2);
+	}
+
+	/* clear stop bit */
+	ret = spi_w8r8(spi, M41T94_REG_SECONDS);
+	if (ret < 0)
+		return ret;
+	if (ret & M41T94_BIT_STOP) {
+		buf[0] = 0x80 | M41T94_REG_SECONDS;
+		buf[1] = ret & ~M41T94_BIT_STOP;
+		spi_write(spi, buf, 2);
+	}
+
+	tm->tm_sec  = BCD2BIN(spi_w8r8(spi, M41T94_REG_SECONDS));
+	tm->tm_min  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MINUTES));
+	hour = spi_w8r8(spi, M41T94_REG_HOURS);
+	tm->tm_hour = BCD2BIN(hour & 0x3f);
+	tm->tm_wday = BCD2BIN(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
+	tm->tm_mday = BCD2BIN(spi_w8r8(spi, M41T94_REG_DAY));
+	tm->tm_mon  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
+	tm->tm_year = BCD2BIN(spi_w8r8(spi, M41T94_REG_YEAR));
+	if ((hour & M41T94_BIT_CB) || !(hour & M41T94_BIT_CEB))
+		tm->tm_year += 100;
+
+	dev_dbg(dev, "%s secs=%d, mins=%d, "
+		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
+		"read", tm->tm_sec, tm->tm_min,
+		tm->tm_hour, tm->tm_mday,
+		tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	/* initial clock setting can be undefined */
+	return rtc_valid_tm(tm);
+}
+
+static const struct rtc_class_ops m41t94_rtc_ops = {
+	.read_time	= m41t94_read_time,
+	.set_time	= m41t94_set_time,
+};
+
+static struct spi_driver m41t94_driver;
+
+static int __devinit m41t94_probe(struct spi_device *spi)
+{
+	struct rtc_device *rtc;
+	int res;
+
+	spi->bits_per_word = 8;
+	spi_setup(spi);
+
+	res = spi_w8r8(spi, M41T94_REG_SECONDS);
+	if (res < 0) {
+		dev_err(&spi->dev, "not found.\n");
+		return res;
+	}
+
+	rtc = rtc_device_register(m41t94_driver.driver.name,
+		&spi->dev, &m41t94_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+
+	dev_set_drvdata(&spi->dev, rtc);
+
+	return 0;
+}
+
+static int __devexit m41t94_remove(struct spi_device *spi)
+{
+	struct rtc_device *rtc = platform_get_drvdata(spi);
+
+	if (rtc)
+		rtc_device_unregister(rtc);
+
+	return 0;
+}
+
+static struct spi_driver m41t94_driver = {
+	.driver = {
+		.name	= "rtc-m41t94",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe	= m41t94_probe,
+	.remove = __devexit_p(m41t94_remove),
+};
+
+static __init int m41t94_init(void)
+{
+	return spi_register_driver(&m41t94_driver);
+}
+
+module_init(m41t94_init);
+
+static __exit void m41t94_exit(void)
+{
+	spi_unregister_driver(&m41t94_driver);
+}
+
+module_exit(m41t94_exit);
+
+MODULE_AUTHOR("Kim B. Heino <Kim.Heino@bluegiga.com>");
+MODULE_DESCRIPTION("Driver for ST M41T94 SPI RTC");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 53e84b672c1a8190af2b376c35c7a39cf1214f59 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:36 -0700
Subject: [PATCH 345/853] rtc: ds1305/ds1306 driver

Support the Dallas/Maxim DS1305 and DS1306 RTC chips.  These use SPI, and
support alarms, NVRAM, and a trickle charger for use when their backup
power supply is a supercap or rechargeable cell.

This basic driver doesn't yet support suspend/resume or wakealarms.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/Kconfig        |  10 +
 drivers/rtc/Makefile       |   1 +
 drivers/rtc/rtc-ds1305.c   | 847 +++++++++++++++++++++++++++++++++++++
 include/linux/spi/ds1305.h |  35 ++
 4 files changed, 893 insertions(+)
 create mode 100644 drivers/rtc/rtc-ds1305.c
 create mode 100644 include/linux/spi/ds1305.h

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index beffb834c44..90ab7382540 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -282,6 +282,16 @@ config RTC_DRV_M41T94
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-m41t94.
 
+config RTC_DRV_DS1305
+	tristate "Dallas/Maxim DS1305/DS1306"
+	help
+	  Select this driver to get support for the Dallas/Maxim DS1305
+	  and DS1306 real time clock chips.  These support a trickle
+	  charger, alarms, and NVRAM in addition to the clock.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-ds1305.
+
 config RTC_DRV_MAX6902
 	tristate "Maxim MAX6902"
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index b0e1af54f80..18622ef84ca 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_RTC_DRV_BFIN)	+= rtc-bfin.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1302)	+= rtc-ds1302.o
+obj-$(CONFIG_RTC_DRV_DS1305)	+= rtc-ds1305.o
 obj-$(CONFIG_RTC_DRV_DS1307)	+= rtc-ds1307.o
 obj-$(CONFIG_RTC_DRV_DS1374)	+= rtc-ds1374.o
 obj-$(CONFIG_RTC_DRV_DS1511)	+= rtc-ds1511.o
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
new file mode 100644
index 00000000000..b91d02a3ace
--- /dev/null
+++ b/drivers/rtc/rtc-ds1305.c
@@ -0,0 +1,847 @@
+/*
+ * rtc-ds1305.c -- driver for DS1305 and DS1306 SPI RTC chips
+ *
+ * Copyright (C) 2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/workqueue.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/ds1305.h>
+
+
+/*
+ * Registers ... mask DS1305_WRITE into register address to write,
+ * otherwise you're reading it.  All non-bitmask values are BCD.
+ */
+#define DS1305_WRITE		0x80
+
+
+/* RTC date/time ... the main special cases are that we:
+ *  - Need fancy "hours" encoding in 12hour mode
+ *  - Don't rely on the "day-of-week" field (or tm_wday)
+ *  - Are a 21st-century clock (2000 <= year < 2100)
+ */
+#define DS1305_RTC_LEN		7		/* bytes for RTC regs */
+
+#define DS1305_SEC		0x00		/* register addresses */
+#define DS1305_MIN		0x01
+#define DS1305_HOUR		0x02
+#	define DS1305_HR_12		0x40	/* set == 12 hr mode */
+#	define DS1305_HR_PM		0x20	/* set == PM (12hr mode) */
+#define DS1305_WDAY		0x03
+#define DS1305_MDAY		0x04
+#define DS1305_MON		0x05
+#define DS1305_YEAR		0x06
+
+
+/* The two alarms have only sec/min/hour/wday fields (ALM_LEN).
+ * DS1305_ALM_DISABLE disables a match field (some combos are bad).
+ *
+ * NOTE that since we don't use WDAY, we limit ourselves to alarms
+ * only one day into the future (vs potentially up to a week).
+ *
+ * NOTE ALSO that while we could generate once-a-second IRQs (UIE), we
+ * don't currently support them.  We'd either need to do it only when
+ * no alarm is pending (not the standard model), or to use the second
+ * alarm (implying that this is a DS1305 not DS1306, *and* that either
+ * it's wired up a second IRQ we know, or that INTCN is set)
+ */
+#define DS1305_ALM_LEN		4		/* bytes for ALM regs */
+#define DS1305_ALM_DISABLE	0x80
+
+#define DS1305_ALM0(r)		(0x07 + (r))	/* register addresses */
+#define DS1305_ALM1(r)		(0x0b + (r))
+
+
+/* three control registers */
+#define DS1305_CONTROL_LEN	3		/* bytes of control regs */
+
+#define DS1305_CONTROL		0x0f		/* register addresses */
+#	define DS1305_nEOSC		0x80	/* low enables oscillator */
+#	define DS1305_WP		0x40	/* write protect */
+#	define DS1305_INTCN		0x04	/* clear == only int0 used */
+#	define DS1306_1HZ		0x04	/* enable 1Hz output */
+#	define DS1305_AEI1		0x02	/* enable ALM1 IRQ */
+#	define DS1305_AEI0		0x01	/* enable ALM0 IRQ */
+#define DS1305_STATUS		0x10
+/* status has just AEIx bits, mirrored as IRQFx */
+#define DS1305_TRICKLE		0x11
+/* trickle bits are defined in <linux/spi/ds1305.h> */
+
+/* a bunch of NVRAM */
+#define DS1305_NVRAM_LEN	96		/* bytes of NVRAM */
+
+#define DS1305_NVRAM		0x20		/* register addresses */
+
+
+struct ds1305 {
+	struct spi_device	*spi;
+	struct rtc_device	*rtc;
+
+	struct work_struct	work;
+
+	unsigned long		flags;
+#define FLAG_EXITING	0
+
+	bool			hr12;
+	u8			ctrl[DS1305_CONTROL_LEN];
+};
+
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Utilities ...  tolerate 12-hour AM/PM notation in case of non-Linux
+ * software (like a bootloader) which may require it.
+ */
+
+static unsigned bcd2hour(u8 bcd)
+{
+	if (bcd & DS1305_HR_12) {
+		unsigned	hour = 0;
+
+		bcd &= ~DS1305_HR_12;
+		if (bcd & DS1305_HR_PM) {
+			hour = 12;
+			bcd &= ~DS1305_HR_PM;
+		}
+		hour += BCD2BIN(bcd);
+		return hour - 1;
+	}
+	return BCD2BIN(bcd);
+}
+
+static u8 hour2bcd(bool hr12, int hour)
+{
+	if (hr12) {
+		hour++;
+		if (hour <= 12)
+			return DS1305_HR_12 | BIN2BCD(hour);
+		hour -= 12;
+		return DS1305_HR_12 | DS1305_HR_PM | BIN2BCD(hour);
+	}
+	return BIN2BCD(hour);
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Interface to RTC framework
+ */
+
+#ifdef CONFIG_RTC_INTF_DEV
+
+/*
+ * Context: caller holds rtc->ops_lock (to protect ds1305->ctrl)
+ */
+static int ds1305_ioctl(struct device *dev, unsigned cmd, unsigned long arg)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	u8		buf[2];
+	int		status = -ENOIOCTLCMD;
+
+	buf[0] = DS1305_WRITE | DS1305_CONTROL;
+	buf[1] = ds1305->ctrl[0];
+
+	switch (cmd) {
+	case RTC_AIE_OFF:
+		status = 0;
+		if (!(buf[1] & DS1305_AEI0))
+			goto done;
+		buf[1] &= ~DS1305_AEI0;
+		break;
+
+	case RTC_AIE_ON:
+		status = 0;
+		if (ds1305->ctrl[0] & DS1305_AEI0)
+			goto done;
+		buf[1] |= DS1305_AEI0;
+		break;
+	}
+	if (status == 0) {
+		status = spi_write_then_read(ds1305->spi, buf, sizeof buf,
+				NULL, 0);
+		if (status >= 0)
+			ds1305->ctrl[0] = buf[1];
+	}
+
+done:
+	return status;
+}
+
+#else
+#define ds1305_ioctl	NULL
+#endif
+
+/*
+ * Get/set of date and time is pretty normal.
+ */
+
+static int ds1305_get_time(struct device *dev, struct rtc_time *time)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	u8		addr = DS1305_SEC;
+	u8		buf[DS1305_RTC_LEN];
+	int		status;
+
+	/* Use write-then-read to get all the date/time registers
+	 * since dma from stack is nonportable
+	 */
+	status = spi_write_then_read(ds1305->spi, &addr, sizeof addr,
+			buf, sizeof buf);
+	if (status < 0)
+		return status;
+
+	dev_vdbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
+		"read", buf[0], buf[1], buf[2], buf[3],
+		buf[4], buf[5], buf[6]);
+
+	/* Decode the registers */
+	time->tm_sec = BCD2BIN(buf[DS1305_SEC]);
+	time->tm_min = BCD2BIN(buf[DS1305_MIN]);
+	time->tm_hour = bcd2hour(buf[DS1305_HOUR]);
+	time->tm_wday = buf[DS1305_WDAY] - 1;
+	time->tm_mday = BCD2BIN(buf[DS1305_MDAY]);
+	time->tm_mon = BCD2BIN(buf[DS1305_MON]) - 1;
+	time->tm_year = BCD2BIN(buf[DS1305_YEAR]) + 100;
+
+	dev_vdbg(dev, "%s secs=%d, mins=%d, "
+		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
+		"read", time->tm_sec, time->tm_min,
+		time->tm_hour, time->tm_mday,
+		time->tm_mon, time->tm_year, time->tm_wday);
+
+	/* Time may not be set */
+	return rtc_valid_tm(time);
+}
+
+static int ds1305_set_time(struct device *dev, struct rtc_time *time)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	u8		buf[1 + DS1305_RTC_LEN];
+	u8		*bp = buf;
+
+	dev_vdbg(dev, "%s secs=%d, mins=%d, "
+		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
+		"write", time->tm_sec, time->tm_min,
+		time->tm_hour, time->tm_mday,
+		time->tm_mon, time->tm_year, time->tm_wday);
+
+	/* Write registers starting at the first time/date address. */
+	*bp++ = DS1305_WRITE | DS1305_SEC;
+
+	*bp++ = BIN2BCD(time->tm_sec);
+	*bp++ = BIN2BCD(time->tm_min);
+	*bp++ = hour2bcd(ds1305->hr12, time->tm_hour);
+	*bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1;
+	*bp++ = BIN2BCD(time->tm_mday);
+	*bp++ = BIN2BCD(time->tm_mon + 1);
+	*bp++ = BIN2BCD(time->tm_year - 100);
+
+	dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
+		"write", buf[1], buf[2], buf[3],
+		buf[4], buf[5], buf[6], buf[7]);
+
+	/* use write-then-read since dma from stack is nonportable */
+	return spi_write_then_read(ds1305->spi, buf, sizeof buf,
+			NULL, 0);
+}
+
+/*
+ * Get/set of alarm is a bit funky:
+ *
+ * - First there's the inherent raciness of getting the (partitioned)
+ *   status of an alarm that could trigger while we're reading parts
+ *   of that status.
+ *
+ * - Second there's its limited range (we could increase it a bit by
+ *   relying on WDAY), which means it will easily roll over.
+ *
+ * - Third there's the choice of two alarms and alarm signals.
+ *   Here we use ALM0 and expect that nINT0 (open drain) is used;
+ *   that's the only real option for DS1306 runtime alarms, and is
+ *   natural on DS1305.
+ *
+ * - Fourth, there's also ALM1, and a second interrupt signal:
+ *     + On DS1305 ALM1 uses nINT1 (when INTCN=1) else nINT0;
+ *     + On DS1306 ALM1 only uses INT1 (an active high pulse)
+ *       and it won't work when VCC1 is active.
+ *
+ *   So to be most general, we should probably set both alarms to the
+ *   same value, letting ALM1 be the wakeup event source on DS1306
+ *   and handling several wiring options on DS1305.
+ *
+ * - Fifth, we support the polled mode (as well as possible; why not?)
+ *   even when no interrupt line is wired to an IRQ.
+ */
+
+/*
+ * Context: caller holds rtc->ops_lock (to protect ds1305->ctrl)
+ */
+static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	struct spi_device *spi = ds1305->spi;
+	u8		addr;
+	int		status;
+	u8		buf[DS1305_ALM_LEN];
+
+	/* Refresh control register cache BEFORE reading ALM0 registers,
+	 * since reading alarm registers acks any pending IRQ.  That
+	 * makes returning "pending" status a bit of a lie, but that bit
+	 * of EFI status is at best fragile anyway (given IRQ handlers).
+	 */
+	addr = DS1305_CONTROL;
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+			ds1305->ctrl, sizeof ds1305->ctrl);
+	if (status < 0)
+		return status;
+
+	alm->enabled = !!(ds1305->ctrl[0] & DS1305_AEI0);
+	alm->pending = !!(ds1305->ctrl[1] & DS1305_AEI0);
+
+	/* get and check ALM0 registers */
+	addr = DS1305_ALM0(DS1305_SEC);
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+			buf, sizeof buf);
+	if (status < 0)
+		return status;
+
+	dev_vdbg(dev, "%s: %02x %02x %02x %02x\n",
+		"alm0 read", buf[DS1305_SEC], buf[DS1305_MIN],
+		buf[DS1305_HOUR], buf[DS1305_WDAY]);
+
+	if ((DS1305_ALM_DISABLE & buf[DS1305_SEC])
+			|| (DS1305_ALM_DISABLE & buf[DS1305_MIN])
+			|| (DS1305_ALM_DISABLE & buf[DS1305_HOUR]))
+		return -EIO;
+
+	/* Stuff these values into alm->time and let RTC framework code
+	 * fill in the rest ... and also handle rollover to tomorrow when
+	 * that's needed.
+	 */
+	alm->time.tm_sec = BCD2BIN(buf[DS1305_SEC]);
+	alm->time.tm_min = BCD2BIN(buf[DS1305_MIN]);
+	alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
+	alm->time.tm_mday = -1;
+	alm->time.tm_mon = -1;
+	alm->time.tm_year = -1;
+	/* next three fields are unused by Linux */
+	alm->time.tm_wday = -1;
+	alm->time.tm_mday = -1;
+	alm->time.tm_isdst = -1;
+
+	return 0;
+}
+
+/*
+ * Context: caller holds rtc->ops_lock (to protect ds1305->ctrl)
+ */
+static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	struct spi_device *spi = ds1305->spi;
+	unsigned long	now, later;
+	struct rtc_time	tm;
+	int		status;
+	u8		buf[1 + DS1305_ALM_LEN];
+
+	/* convert desired alarm to time_t */
+	status = rtc_tm_to_time(&alm->time, &later);
+	if (status < 0)
+		return status;
+
+	/* Read current time as time_t */
+	status = ds1305_get_time(dev, &tm);
+	if (status < 0)
+		return status;
+	status = rtc_tm_to_time(&tm, &now);
+	if (status < 0)
+		return status;
+
+	/* make sure alarm fires within the next 24 hours */
+	if (later <= now)
+		return -EINVAL;
+	if ((later - now) > 24 * 60 * 60)
+		return -EDOM;
+
+	/* disable alarm if needed */
+	if (ds1305->ctrl[0] & DS1305_AEI0) {
+		ds1305->ctrl[0] &= ~DS1305_AEI0;
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		status = spi_write_then_read(ds1305->spi, buf, 2, NULL, 0);
+		if (status < 0)
+			return status;
+	}
+
+	/* write alarm */
+	buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC);
+	buf[1 + DS1305_SEC] = BIN2BCD(alm->time.tm_sec);
+	buf[1 + DS1305_MIN] = BIN2BCD(alm->time.tm_min);
+	buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour);
+	buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE;
+
+	dev_dbg(dev, "%s: %02x %02x %02x %02x\n",
+		"alm0 write", buf[1 + DS1305_SEC], buf[1 + DS1305_MIN],
+		buf[1 + DS1305_HOUR], buf[1 + DS1305_WDAY]);
+
+	status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0);
+	if (status < 0)
+		return status;
+
+	/* enable alarm if requested */
+	if (alm->enabled) {
+		ds1305->ctrl[0] |= DS1305_AEI0;
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		status = spi_write_then_read(ds1305->spi, buf, 2, NULL, 0);
+	}
+
+	return status;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int ds1305_proc(struct device *dev, struct seq_file *seq)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	char		*diodes = "no";
+	char		*resistors = "";
+
+	/* ctrl[2] is treated as read-only; no locking needed */
+	if ((ds1305->ctrl[2] & 0xf0) == DS1305_TRICKLE_MAGIC) {
+		switch (ds1305->ctrl[2] & 0x0c) {
+		case DS1305_TRICKLE_DS2:
+			diodes = "2 diodes, ";
+			break;
+		case DS1305_TRICKLE_DS1:
+			diodes = "1 diode, ";
+			break;
+		default:
+			goto done;
+		}
+		switch (ds1305->ctrl[2] & 0x03) {
+		case DS1305_TRICKLE_2K:
+			resistors = "2k Ohm";
+			break;
+		case DS1305_TRICKLE_4K:
+			resistors = "4k Ohm";
+			break;
+		case DS1305_TRICKLE_8K:
+			resistors = "8k Ohm";
+			break;
+		default:
+			diodes = "no";
+			break;
+		}
+	}
+
+done:
+	return seq_printf(seq,
+			"trickle_charge\t: %s%s\n",
+			diodes, resistors);
+}
+
+#else
+#define ds1305_proc	NULL
+#endif
+
+static const struct rtc_class_ops ds1305_ops = {
+	.ioctl		= ds1305_ioctl,
+	.read_time	= ds1305_get_time,
+	.set_time	= ds1305_set_time,
+	.read_alarm	= ds1305_get_alarm,
+	.set_alarm	= ds1305_set_alarm,
+	.proc		= ds1305_proc,
+};
+
+static void ds1305_work(struct work_struct *work)
+{
+	struct ds1305	*ds1305 = container_of(work, struct ds1305, work);
+	struct mutex	*lock = &ds1305->rtc->ops_lock;
+	struct spi_device *spi = ds1305->spi;
+	u8		buf[3];
+	int		status;
+
+	/* lock to protect ds1305->ctrl */
+	mutex_lock(lock);
+
+	/* Disable the IRQ, and clear its status ... for now, we "know"
+	 * that if more than one alarm is active, they're in sync.
+	 * Note that reading ALM data registers also clears IRQ status.
+	 */
+	ds1305->ctrl[0] &= ~(DS1305_AEI1 | DS1305_AEI0);
+	ds1305->ctrl[1] = 0;
+
+	buf[0] = DS1305_WRITE | DS1305_CONTROL;
+	buf[1] = ds1305->ctrl[0];
+	buf[2] = 0;
+
+	status = spi_write_then_read(spi, buf, sizeof buf,
+			NULL, 0);
+	if (status < 0)
+		dev_dbg(&spi->dev, "clear irq --> %d\n", status);
+
+	mutex_unlock(lock);
+
+	if (!test_bit(FLAG_EXITING, &ds1305->flags))
+		enable_irq(spi->irq);
+
+	/* rtc_update_irq() requires an IRQ-disabled context */
+	local_irq_disable();
+	rtc_update_irq(ds1305->rtc, 1, RTC_AF | RTC_IRQF);
+	local_irq_enable();
+}
+
+/*
+ * This "real" IRQ handler hands off to a workqueue mostly to allow
+ * mutex locking for ds1305->ctrl ... unlike I2C, we could issue async
+ * I/O requests in IRQ context (to clear the IRQ status).
+ */
+static irqreturn_t ds1305_irq(int irq, void *p)
+{
+	struct ds1305		*ds1305 = p;
+
+	disable_irq(irq);
+	schedule_work(&ds1305->work);
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Interface for NVRAM
+ */
+
+static void msg_init(struct spi_message *m, struct spi_transfer *x,
+		u8 *addr, size_t count, char *tx, char *rx)
+{
+	spi_message_init(m);
+	memset(x, 0, 2 * sizeof(*x));
+
+	x->tx_buf = addr;
+	x->len = 1;
+	spi_message_add_tail(x, m);
+
+	x++;
+
+	x->tx_buf = tx;
+	x->rx_buf = rx;
+	x->len = count;
+	spi_message_add_tail(x, m);
+}
+
+static ssize_t
+ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct spi_device	*spi;
+	u8			addr;
+	struct spi_message	m;
+	struct spi_transfer	x[2];
+	int			status;
+
+	spi = container_of(kobj, struct spi_device, dev.kobj);
+
+	if (unlikely(off >= DS1305_NVRAM_LEN))
+		return 0;
+	if (count >= DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN;
+	if ((off + count) > DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN - off;
+	if (unlikely(!count))
+		return count;
+
+	addr = DS1305_NVRAM + off;
+	msg_init(&m, x, &addr, count, NULL, buf);
+
+	status = spi_sync(spi, &m);
+	if (status < 0)
+		dev_err(&spi->dev, "nvram %s error %d\n", "read", status);
+	return (status < 0) ? status : count;
+}
+
+static ssize_t
+ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct spi_device	*spi;
+	u8			addr;
+	struct spi_message	m;
+	struct spi_transfer	x[2];
+	int			status;
+
+	spi = container_of(kobj, struct spi_device, dev.kobj);
+
+	if (unlikely(off >= DS1305_NVRAM_LEN))
+		return -EFBIG;
+	if (count >= DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN;
+	if ((off + count) > DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN - off;
+	if (unlikely(!count))
+		return count;
+
+	addr = (DS1305_WRITE | DS1305_NVRAM) + off;
+	msg_init(&m, x, &addr, count, buf, NULL);
+
+	status = spi_sync(spi, &m);
+	if (status < 0)
+		dev_err(&spi->dev, "nvram %s error %d\n", "write", status);
+	return (status < 0) ? status : count;
+}
+
+static struct bin_attribute nvram = {
+	.attr.name	= "nvram",
+	.attr.mode	= S_IRUGO | S_IWUSR,
+	.attr.owner	= THIS_MODULE,
+	.read		= ds1305_nvram_read,
+	.write		= ds1305_nvram_write,
+	.size		= DS1305_NVRAM_LEN,
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Interface to SPI stack
+ */
+
+static int __devinit ds1305_probe(struct spi_device *spi)
+{
+	struct ds1305			*ds1305;
+	struct rtc_device		*rtc;
+	int				status;
+	u8				addr, value;
+	struct ds1305_platform_data	*pdata = spi->dev.platform_data;
+	bool				write_ctrl = false;
+
+	/* Sanity check board setup data.  This may be hooked up
+	 * in 3wire mode, but we don't care.  Note that unless
+	 * there's an inverter in place, this needs SPI_CS_HIGH!
+	 */
+	if ((spi->bits_per_word && spi->bits_per_word != 8)
+			|| (spi->max_speed_hz > 2000000)
+			|| !(spi->mode & SPI_CPHA))
+		return -EINVAL;
+
+	/* set up driver data */
+	ds1305 = kzalloc(sizeof *ds1305, GFP_KERNEL);
+	if (!ds1305)
+		return -ENOMEM;
+	ds1305->spi = spi;
+	spi_set_drvdata(spi, ds1305);
+
+	/* read and cache control registers */
+	addr = DS1305_CONTROL;
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+			ds1305->ctrl, sizeof ds1305->ctrl);
+	if (status < 0) {
+		dev_dbg(&spi->dev, "can't %s, %d\n",
+				"read", status);
+		goto fail0;
+	}
+
+	dev_dbg(&spi->dev, "ctrl %s: %02x %02x %02x\n",
+			"read", ds1305->ctrl[0],
+			ds1305->ctrl[1], ds1305->ctrl[2]);
+
+	/* Sanity check register values ... partially compensating for the
+	 * fact that SPI has no device handshake.  A pullup on MISO would
+	 * make these tests fail; but not all systems will have one.  If
+	 * some register is neither 0x00 nor 0xff, a chip is likely there.
+	 */
+	if ((ds1305->ctrl[0] & 0x38) != 0 || (ds1305->ctrl[1] & 0xfc) != 0) {
+		dev_dbg(&spi->dev, "RTC chip is not present\n");
+		status = -ENODEV;
+		goto fail0;
+	}
+	if (ds1305->ctrl[2] == 0)
+		dev_dbg(&spi->dev, "chip may not be present\n");
+
+	/* enable writes if needed ... if we were paranoid it would
+	 * make sense to enable them only when absolutely necessary.
+	 */
+	if (ds1305->ctrl[0] & DS1305_WP) {
+		u8		buf[2];
+
+		ds1305->ctrl[0] &= ~DS1305_WP;
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0);
+
+		dev_dbg(&spi->dev, "clear WP --> %d\n", status);
+		if (status < 0)
+			goto fail0;
+	}
+
+	/* on DS1305, maybe start oscillator; like most low power
+	 * oscillators, it may take a second to stabilize
+	 */
+	if (ds1305->ctrl[0] & DS1305_nEOSC) {
+		ds1305->ctrl[0] &= ~DS1305_nEOSC;
+		write_ctrl = true;
+		dev_warn(&spi->dev, "SET TIME!\n");
+	}
+
+	/* ack any pending IRQs */
+	if (ds1305->ctrl[1]) {
+		ds1305->ctrl[1] = 0;
+		write_ctrl = true;
+	}
+
+	/* this may need one-time (re)init */
+	if (pdata) {
+		/* maybe enable trickle charge */
+		if (((ds1305->ctrl[2] & 0xf0) != DS1305_TRICKLE_MAGIC)) {
+			ds1305->ctrl[2] = DS1305_TRICKLE_MAGIC
+						| pdata->trickle;
+			write_ctrl = true;
+		}
+
+		/* on DS1306, configure 1 Hz signal */
+		if (pdata->is_ds1306) {
+			if (pdata->en_1hz) {
+				if (!(ds1305->ctrl[0] & DS1306_1HZ)) {
+					ds1305->ctrl[0] |= DS1306_1HZ;
+					write_ctrl = true;
+				}
+			} else {
+				if (ds1305->ctrl[0] & DS1306_1HZ) {
+					ds1305->ctrl[0] &= ~DS1306_1HZ;
+					write_ctrl = true;
+				}
+			}
+		}
+	}
+
+	if (write_ctrl) {
+		u8		buf[4];
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		buf[2] = ds1305->ctrl[1];
+		buf[3] = ds1305->ctrl[2];
+		status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0);
+		if (status < 0) {
+			dev_dbg(&spi->dev, "can't %s, %d\n",
+					"write", status);
+			goto fail0;
+		}
+
+		dev_dbg(&spi->dev, "ctrl %s: %02x %02x %02x\n",
+				"write", ds1305->ctrl[0],
+				ds1305->ctrl[1], ds1305->ctrl[2]);
+	}
+
+	/* see if non-Linux software set up AM/PM mode */
+	addr = DS1305_HOUR;
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+				&value, sizeof value);
+	if (status < 0) {
+		dev_dbg(&spi->dev, "read HOUR --> %d\n", status);
+		goto fail0;
+	}
+
+	ds1305->hr12 = (DS1305_HR_12 & value) != 0;
+	if (ds1305->hr12)
+		dev_dbg(&spi->dev, "AM/PM\n");
+
+	/* register RTC ... from here on, ds1305->ctrl needs locking */
+	rtc = rtc_device_register("ds1305", &spi->dev,
+			&ds1305_ops, THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		status = PTR_ERR(rtc);
+		dev_dbg(&spi->dev, "register rtc --> %d\n", status);
+		goto fail0;
+	}
+	ds1305->rtc = rtc;
+
+	/* Maybe set up alarm IRQ; be ready to handle it triggering right
+	 * away.  NOTE that we don't share this.  The signal is active low,
+	 * and we can't ack it before a SPI message delay.  We temporarily
+	 * disable the IRQ until it's acked, which lets us work with more
+	 * IRQ trigger modes (not all IRQ controllers can do falling edge).
+	 */
+	if (spi->irq) {
+		INIT_WORK(&ds1305->work, ds1305_work);
+		status = request_irq(spi->irq, ds1305_irq,
+				0, dev_name(&rtc->dev), ds1305);
+		if (status < 0) {
+			dev_dbg(&spi->dev, "request_irq %d --> %d\n",
+					spi->irq, status);
+			goto fail1;
+		}
+	}
+
+	/* export NVRAM */
+	status = sysfs_create_bin_file(&spi->dev.kobj, &nvram);
+	if (status < 0) {
+		dev_dbg(&spi->dev, "register nvram --> %d\n", status);
+		goto fail2;
+	}
+
+	return 0;
+
+fail2:
+	free_irq(spi->irq, ds1305);
+fail1:
+	rtc_device_unregister(rtc);
+fail0:
+	kfree(ds1305);
+	return status;
+}
+
+static int __devexit ds1305_remove(struct spi_device *spi)
+{
+	struct ds1305	*ds1305 = spi_get_drvdata(spi);
+
+	sysfs_remove_bin_file(&spi->dev.kobj, &nvram);
+
+	/* carefully shut down irq and workqueue, if present */
+	if (spi->irq) {
+		set_bit(FLAG_EXITING, &ds1305->flags);
+		free_irq(spi->irq, ds1305);
+		flush_scheduled_work();
+	}
+
+	rtc_device_unregister(ds1305->rtc);
+	spi_set_drvdata(spi, NULL);
+	kfree(ds1305);
+	return 0;
+}
+
+static struct spi_driver ds1305_driver = {
+	.driver.name	= "rtc-ds1305",
+	.driver.owner	= THIS_MODULE,
+	.probe		= ds1305_probe,
+	.remove		= __devexit_p(ds1305_remove),
+	/* REVISIT add suspend/resume */
+};
+
+static int __init ds1305_init(void)
+{
+	return spi_register_driver(&ds1305_driver);
+}
+module_init(ds1305_init);
+
+static void __exit ds1305_exit(void)
+{
+	spi_unregister_driver(&ds1305_driver);
+}
+module_exit(ds1305_exit);
+
+MODULE_DESCRIPTION("RTC driver for DS1305 and DS1306 chips");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/spi/ds1305.h b/include/linux/spi/ds1305.h
new file mode 100644
index 00000000000..287ec830eab
--- /dev/null
+++ b/include/linux/spi/ds1305.h
@@ -0,0 +1,35 @@
+#ifndef __LINUX_SPI_DS1305_H
+#define __LINUX_SPI_DS1305_H
+
+/*
+ * One-time configuration for ds1305 and ds1306 RTC chips.
+ *
+ * Put a pointer to this in spi_board_info.platform_data if you want to
+ * be sure that Linux (re)initializes this as needed ... after losing
+ * backup power, and potentially on the first boot.
+ */
+struct ds1305_platform_data {
+
+	/* Trickle charge configuration:  it's OK to leave out the MAGIC
+	 * bitmask; mask in either DS1 or DS2, and then one of 2K/4k/8K.
+	 */
+#define DS1305_TRICKLE_MAGIC	0xa0
+#define DS1305_TRICKLE_DS2	0x08	/* two diodes */
+#define DS1305_TRICKLE_DS1	0x04	/* one diode */
+#define DS1305_TRICKLE_2K	0x01	/* 2 KOhm resistance */
+#define DS1305_TRICKLE_4K	0x02	/* 4 KOhm resistance */
+#define DS1305_TRICKLE_8K	0x03	/* 8 KOhm resistance */
+	u8	trickle;
+
+	/* set only on ds1306 parts */
+	bool	is_ds1306;
+
+	/* ds1306 only:  enable 1 Hz output */
+	bool	en_1hz;
+
+	/* REVISIT:  the driver currently expects nINT0 to be wired
+	 * as the alarm IRQ.  ALM1 may also need to be set up ...
+	 */
+};
+
+#endif /* __LINUX_SPI_DS1305_H */
-- 
GitLab


From d3de851a445123f24ad8ece18662014b5e8a8b4e Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:37 -0700
Subject: [PATCH 346/853] rtc: BCD codeshrink

This updates <linux/bcd.h> to define the key routines as constant
functions, which the macros will then call.  Newer code can now call
bcd2bin() instead of SCREAMING BCD2BIN() TO THE FOUR WINDS.

This lets each driver shrink their codespace by using N function calls to
a single (global) copy of those routines, instead of N inlined copies of
these functions per driver.

These routines aren't used in speed-critical code.  Almost all callers are
in the RTC framework.  Typical per-driver savings is near 300 bytes.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Adrian Bunk <bunk@kernel.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bcd.h |  9 +++++++--
 lib/Makefile        |  2 +-
 lib/bcd.c           | 14 ++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 lib/bcd.c

diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index c545308125b..7ac518e3c15 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -10,8 +10,13 @@
 #ifndef _BCD_H
 #define _BCD_H
 
-#define BCD2BIN(val)	(((val) & 0x0f) + ((val)>>4)*10)
-#define BIN2BCD(val)	((((val)/10)<<4) + (val)%10)
+#include <linux/compiler.h>
+
+unsigned bcd2bin(unsigned char val) __attribute_const__;
+unsigned char bin2bcd(unsigned val) __attribute_const__;
+
+#define BCD2BIN(val)	bcd2bin(val)
+#define BIN2BCD(val)	bin2bcd(val)
 
 /* backwards compat */
 #define BCD_TO_BIN(val) ((val)=BCD2BIN(val))
diff --git a/lib/Makefile b/lib/Makefile
index 818c4d45551..9085ad6fa53 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o kref.o klist.o
 
-obj-y += div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
diff --git a/lib/bcd.c b/lib/bcd.c
new file mode 100644
index 00000000000..d74257fd0fe
--- /dev/null
+++ b/lib/bcd.c
@@ -0,0 +1,14 @@
+#include <linux/bcd.h>
+#include <linux/module.h>
+
+unsigned bcd2bin(unsigned char val)
+{
+	return (val & 0x0f) + (val >> 4) * 10;
+}
+EXPORT_SYMBOL(bcd2bin);
+
+unsigned char bin2bcd(unsigned val)
+{
+	return ((val / 10) << 4) + val % 10;
+}
+EXPORT_SYMBOL(bin2bcd);
-- 
GitLab


From 71fc822455ccb63a66be0b6e97a415aceb0062c6 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:38 -0700
Subject: [PATCH 347/853] rtc: rtc-omap footprint shrinkage

Shrink the runtime footprint of the OMAP1 RTC driver a bunch by removing
some old hacks and switching to platform_driver_probe().

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-omap.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index eb23d8423f4..8876605d4d4 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -92,18 +92,6 @@
 #define rtc_write(val, addr)	omap_writeb(val, OMAP_RTC_BASE + (addr))
 
 
-/* platform_bus isn't hotpluggable, so for static linkage it'd be safe
- * to get rid of probe() and remove() code ... too bad the driver struct
- * remembers probe(), that's about 25% of the runtime footprint!!
- */
-#ifndef	MODULE
-#undef	__devexit
-#undef	__devexit_p
-#define	__devexit	__exit
-#define	__devexit_p	__exit_p
-#endif
-
-
 /* we rely on the rtc framework to handle locking (rtc->ops_lock),
  * so the only other requirement is that register accesses which
  * require BUSY to be clear are made with IRQs locally disabled
@@ -324,7 +312,7 @@ static struct rtc_class_ops omap_rtc_ops = {
 static int omap_rtc_alarm;
 static int omap_rtc_timer;
 
-static int __devinit omap_rtc_probe(struct platform_device *pdev)
+static int __init omap_rtc_probe(struct platform_device *pdev)
 {
 	struct resource		*res, *mem;
 	struct rtc_device	*rtc;
@@ -440,7 +428,7 @@ fail:
 	return -EIO;
 }
 
-static int __devexit omap_rtc_remove(struct platform_device *pdev)
+static int __exit omap_rtc_remove(struct platform_device *pdev)
 {
 	struct rtc_device	*rtc = platform_get_drvdata(pdev);;
 
@@ -498,8 +486,7 @@ static void omap_rtc_shutdown(struct platform_device *pdev)
 
 MODULE_ALIAS("platform:omap_rtc");
 static struct platform_driver omap_rtc_driver = {
-	.probe		= omap_rtc_probe,
-	.remove		= __devexit_p(omap_rtc_remove),
+	.remove		= __exit_p(omap_rtc_remove),
 	.suspend	= omap_rtc_suspend,
 	.resume		= omap_rtc_resume,
 	.shutdown	= omap_rtc_shutdown,
@@ -511,7 +498,7 @@ static struct platform_driver omap_rtc_driver = {
 
 static int __init rtc_init(void)
 {
-	return platform_driver_register(&omap_rtc_driver);
+	return platform_driver_probe(&omap_rtc_driver, omap_rtc_probe);
 }
 module_init(rtc_init);
 
-- 
GitLab


From 02bb584f3b1cfc8188522a4d2c8881b65073a4f1 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Wed, 23 Jul 2008 21:30:39 -0700
Subject: [PATCH 348/853] rtc: convert the PCF8583 driver to the new I2C style
 framework with device_ids

Convert the PCF8583 driver to the new I2C style framework with device_ids

Signed-off-by: Juergen Beisert <j.beisert@pengutronix.de>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-pcf8583.c | 129 ++++++++++++--------------------------
 1 file changed, 39 insertions(+), 90 deletions(-)

diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c
index 3d09d8f0b1f..d388c662bf4 100644
--- a/drivers/rtc/rtc-pcf8583.c
+++ b/drivers/rtc/rtc-pcf8583.c
@@ -2,6 +2,7 @@
  *  drivers/rtc/rtc-pcf8583.c
  *
  *  Copyright (C) 2000 Russell King
+ *  Copyright (C) 2008 Wolfram Sang & Juergen Beisert, Pengutronix
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -14,7 +15,6 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/slab.h>
-#include <linux/string.h>
 #include <linux/rtc.h>
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -27,7 +27,6 @@ struct rtc_mem {
 };
 
 struct pcf8583 {
-	struct i2c_client client;
 	struct rtc_device *rtc;
 	unsigned char ctrl;
 };
@@ -40,10 +39,6 @@ struct pcf8583 {
 #define CTRL_ALARM	0x02
 #define CTRL_TIMER	0x01
 
-static const unsigned short normal_i2c[] = { 0x50, I2C_CLIENT_END };
-
-/* Module parameters */
-I2C_CLIENT_INSMOD;
 
 static struct i2c_driver pcf8583_driver;
 
@@ -269,106 +264,60 @@ static const struct rtc_class_ops pcf8583_rtc_ops = {
 	.set_time	= pcf8583_rtc_set_time,
 };
 
-static int pcf8583_probe(struct i2c_adapter *adap, int addr, int kind);
-
-static int pcf8583_attach(struct i2c_adapter *adap)
-{
-	return i2c_probe(adap, &addr_data, pcf8583_probe);
-}
-
-static int pcf8583_detach(struct i2c_client *client)
-{
-	int err;
-	struct pcf8583 *pcf = i2c_get_clientdata(client);
-	struct rtc_device *rtc = pcf->rtc;
-
-	if (rtc)
-		rtc_device_unregister(rtc);
-
-	if ((err = i2c_detach_client(client)))
-		return err;
-
-	kfree(pcf);
-	return 0;
-}
-
-static struct i2c_driver pcf8583_driver = {
-	.driver = {
-		.name	= "pcf8583",
-	},
-	.id		= I2C_DRIVERID_PCF8583,
-	.attach_adapter	= pcf8583_attach,
-	.detach_client	= pcf8583_detach,
-};
-
-static int pcf8583_probe(struct i2c_adapter *adap, int addr, int kind)
+static int pcf8583_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
 {
-	struct pcf8583 *pcf;
-	struct i2c_client *client;
-	struct rtc_device *rtc;
-	unsigned char buf[1], ad[1] = { 0 };
+	struct pcf8583 *pcf8583;
 	int err;
-	struct i2c_msg msgs[2] = {
-		{
-			.addr = addr,
-			.flags = 0,
-			.len = 1,
-			.buf = ad,
-		}, {
-			.addr = addr,
-			.flags = I2C_M_RD,
-			.len = 1,
-			.buf = buf,
-		}
-	};
 
-	if (!i2c_check_functionality(adap, I2C_FUNC_I2C))
-		return 0;
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -ENODEV;
 
-	pcf = kzalloc(sizeof(*pcf), GFP_KERNEL);
-	if (!pcf)
+	pcf8583 = kzalloc(sizeof(struct pcf8583), GFP_KERNEL);
+	if (!pcf8583)
 		return -ENOMEM;
 
-	client = &pcf->client;
+	pcf8583->rtc = rtc_device_register(pcf8583_driver.driver.name,
+			&client->dev, &pcf8583_rtc_ops, THIS_MODULE);
 
-	client->addr		= addr;
-	client->adapter	= adap;
-	client->driver	= &pcf8583_driver;
-
-	strlcpy(client->name, pcf8583_driver.driver.name, I2C_NAME_SIZE);
-
-	if (i2c_transfer(client->adapter, msgs, 2) != 2) {
-		err = -EIO;
+	if (IS_ERR(pcf8583->rtc)) {
+		err = PTR_ERR(pcf8583->rtc);
 		goto exit_kfree;
 	}
 
-	err = i2c_attach_client(client);
-
-	if (err)
-		goto exit_kfree;
-
-	rtc = rtc_device_register(pcf8583_driver.driver.name, &client->dev,
-				  &pcf8583_rtc_ops, THIS_MODULE);
+	i2c_set_clientdata(client, pcf8583);
+	return 0;
 
-	if (IS_ERR(rtc)) {
-		err = PTR_ERR(rtc);
-		goto exit_detach;
-	}
+exit_kfree:
+	kfree(pcf8583);
+	return err;
+}
 
-	pcf->rtc = rtc;
-	i2c_set_clientdata(client, pcf);
-	set_ctrl(client, buf[0]);
+static int __devexit pcf8583_remove(struct i2c_client *client)
+{
+	struct pcf8583 *pcf8583 = i2c_get_clientdata(client);
 
+	if (pcf8583->rtc)
+		rtc_device_unregister(pcf8583->rtc);
+	kfree(pcf8583);
 	return 0;
+}
 
-exit_detach:
-	i2c_detach_client(client);
-
-exit_kfree:
-	kfree(pcf);
+static const struct i2c_device_id pcf8583_id[] = {
+	{ "pcf8583", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pcf8583_id);
 
-	return err;
-}
+static struct i2c_driver pcf8583_driver = {
+	.driver = {
+		.name	= "pcf8583",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pcf8583_probe,
+	.remove		= __devexit_p(pcf8583_remove),
+	.id_table	= pcf8583_id,
+};
 
 static __init int pcf8583_init(void)
 {
-- 
GitLab


From c68d07b2da54c941bb36c9d6d35fe8f263ee10ef Mon Sep 17 00:00:00 2001
From: "Carlos R. Mafra" <crmafra@ift.unesp.br>
Date: Wed, 23 Jul 2008 21:30:40 -0700
Subject: [PATCH 349/853] rtc: remove and clarify unneeded externs

When CONFIG_HPET_EMULATE_RTC is defined the external declaration of
hpet_rtc_interrupt is redundant due to the inclusion of hpet.h.

When !CONFIG_HPET_EMULATE_RTC we make it clear that hpet_rtc_interrupt is
not used by defining it to return zero.

Signed-off-by: Carlos R. Mafra <crmafra@ift.unesp.br>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/rtc.c     | 2 --
 drivers/rtc/rtc-cmos.c | 5 ++++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index d1569a0d050..dbefbb30ed4 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -121,8 +121,6 @@ static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 	return 0;
 }
 #endif
-#else
-extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
 #endif
 
 /*
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index d7bb9bac71d..94b89a2d9c2 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -52,7 +52,10 @@
 #define hpet_rtc_timer_init() 			do { } while (0)
 #define hpet_register_irq_handler(h) 		0
 #define hpet_unregister_irq_handler(h)		do { } while (0)
-extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
+static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
+{
+	return 0;
+}
 #endif
 
 struct cmos_rtc {
-- 
GitLab


From 35d3fdd5f304c06654c940921fc045c60df34693 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 23 Jul 2008 21:30:43 -0700
Subject: [PATCH 350/853] rtc-cmos: improve HPET IRQ glue

Resolve http://bugzilla.kernel.org/show_bug.cgi?id=11051 and other bugs
related to the way the HPET glue code in rtc-cmos was incomplete and
inconsistent:

 * Switch the approach so that the basic driver code flow isn't
   changed by having HPET ... instead, just have HPET shadow the
   RTC_CONTROL irq enables and RTC_FREQ_SELECT data.  It's only
   coping with IRQ thievery, after all.

 * Do that consistently (!!) to avoid problems when the HPET code
   is out of sync with the real RTC intent.  Examples include:

   - cmos_procfs(), which now reports correct data

   - cmos_irq_set_state() ... also removing the previous PIE_{ON,OFF}
     ioctl support so only one code path manages "periodic" IRQs

   - cmos_do_shutdown() ... currently a "just in case" change.

   - cmos_suspend() and cmos_resume() ... also handling a bug that
     was specific to HPET's IRQ thievery, where the alarm wasn't
     disabled after waking the system

 * Always call that HPET code under the RTC spinlock (it doesn't do
   its own locking)

Also clean up the HPET glue:

 * Add some comments explaining what's going on.

 * Switch to having just one #ifdef for the HPET glue, and inline
   functions (not #defines) to avoid some compiler warnings.

 * Have the probe message also report when HPET IRQs are involved

This still leaves various holes in the HPET glue, like the emulated update
IRQs being out of sync with the RTC, alarms never using day or month
matches, and many extra IRQs (at 64 Hz).

[akpm@linux-foundation.org: fix build]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Tomas Janousek <tomi@nomi.cz>
Cc: Bernhard Walle <bwalle@suse.de>
Cc: Carlos R. Mafra <crmafra@ift.unesp.br>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-cmos.c | 193 ++++++++++++++++++++++++++++-------------
 1 file changed, 131 insertions(+), 62 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 94b89a2d9c2..e9984650ea9 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -36,28 +36,9 @@
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
 
-#ifdef CONFIG_HPET_EMULATE_RTC
-#include <asm/hpet.h>
-#endif
-
 /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
 #include <asm-generic/rtc.h>
 
-#ifndef CONFIG_HPET_EMULATE_RTC
-#define is_hpet_enabled()			0
-#define hpet_set_alarm_time(hrs, min, sec) 	do { } while (0)
-#define hpet_set_periodic_freq(arg) 		0
-#define hpet_mask_rtc_irq_bit(arg) 		do { } while (0)
-#define hpet_set_rtc_irq_bit(arg) 		do { } while (0)
-#define hpet_rtc_timer_init() 			do { } while (0)
-#define hpet_register_irq_handler(h) 		0
-#define hpet_unregister_irq_handler(h)		do { } while (0)
-static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
-{
-	return 0;
-}
-#endif
-
 struct cmos_rtc {
 	struct rtc_device	*rtc;
 	struct device		*dev;
@@ -96,6 +77,72 @@ static inline int is_intr(u8 rtc_intr)
 
 /*----------------------------------------------------------------*/
 
+/* Much modern x86 hardware has HPETs (10+ MHz timers) which, because
+ * many BIOS programmers don't set up "sane mode" IRQ routing, are mostly
+ * used in a broken "legacy replacement" mode.  The breakage includes
+ * HPET #1 hijacking the IRQ for this RTC, and being unavailable for
+ * other (better) use.
+ *
+ * When that broken mode is in use, platform glue provides a partial
+ * emulation of hardware RTC IRQ facilities using HPET #1.  We don't
+ * want to use HPET for anything except those IRQs though...
+ */
+#ifdef CONFIG_HPET_EMULATE_RTC
+#include <asm/hpet.h>
+#else
+
+static inline int is_hpet_enabled(void)
+{
+	return 0;
+}
+
+static inline int hpet_mask_rtc_irq_bit(unsigned long mask)
+{
+	return 0;
+}
+
+static inline int hpet_set_rtc_irq_bit(unsigned long mask)
+{
+	return 0;
+}
+
+static inline int
+hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
+{
+	return 0;
+}
+
+static inline int hpet_set_periodic_freq(unsigned long freq)
+{
+	return 0;
+}
+
+static inline int hpet_rtc_dropped_irq(void)
+{
+	return 0;
+}
+
+static inline int hpet_rtc_timer_init(void)
+{
+	return 0;
+}
+
+extern irq_handler_t hpet_rtc_interrupt;
+
+static inline int hpet_register_irq_handler(irq_handler_t handler)
+{
+	return 0;
+}
+
+static inline int hpet_unregister_irq_handler(irq_handler_t handler)
+{
+	return 0;
+}
+
+#endif
+
+/*----------------------------------------------------------------*/
+
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
 	/* REVISIT:  if the clock has a "century" register, use
@@ -216,13 +263,14 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	sec = t->time.tm_sec;
 	sec = (sec < 60) ? BIN2BCD(sec) : 0xff;
 
-	hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min, t->time.tm_sec);
 	spin_lock_irq(&rtc_lock);
 
 	/* next rtc irq must not be from previous alarm setting */
 	rtc_control = CMOS_READ(RTC_CONTROL);
 	rtc_control &= ~RTC_AIE;
 	CMOS_WRITE(rtc_control, RTC_CONTROL);
+	hpet_mask_rtc_irq_bit(RTC_AIE);
+
 	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
 	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
 	if (is_intr(rtc_intr))
@@ -240,9 +288,16 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 			CMOS_WRITE(mon, cmos->mon_alrm);
 	}
 
+	/* FIXME the HPET alarm glue currently ignores day_alrm
+	 * and mon_alrm ...
+	 */
+	hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min, t->time.tm_sec);
+
 	if (t->enabled) {
 		rtc_control |= RTC_AIE;
 		CMOS_WRITE(rtc_control, RTC_CONTROL);
+		hpet_set_rtc_irq_bit(RTC_AIE);
+
 		rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
 		rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
 		if (is_intr(rtc_intr))
@@ -270,8 +325,8 @@ static int cmos_irq_set_freq(struct device *dev, int freq)
 	f = 16 - f;
 
 	spin_lock_irqsave(&rtc_lock, flags);
-	if (!hpet_set_periodic_freq(freq))
-		CMOS_WRITE(RTC_REF_CLCK_32KHZ | f, RTC_FREQ_SELECT);
+	hpet_set_periodic_freq(freq);
+	CMOS_WRITE(RTC_REF_CLCK_32KHZ | f, RTC_FREQ_SELECT);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return 0;
@@ -289,11 +344,13 @@ static int cmos_irq_set_state(struct device *dev, int enabled)
 	spin_lock_irqsave(&rtc_lock, flags);
 	rtc_control = CMOS_READ(RTC_CONTROL);
 
-	if (enabled)
+	if (enabled) {
 		rtc_control |= RTC_PIE;
-	else
+		hpet_set_rtc_irq_bit(RTC_PIE);
+	} else {
 		rtc_control &= ~RTC_PIE;
-
+		hpet_mask_rtc_irq_bit(RTC_PIE);
+	}
 	CMOS_WRITE(rtc_control, RTC_CONTROL);
 
 	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
@@ -319,11 +376,10 @@ cmos_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 	case RTC_AIE_ON:
 	case RTC_UIE_OFF:
 	case RTC_UIE_ON:
-	case RTC_PIE_OFF:
-	case RTC_PIE_ON:
 		if (!is_valid_irq(cmos->irq))
 			return -EINVAL;
 		break;
+	/* PIE ON/OFF is handled by cmos_irq_set_state() */
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -347,17 +403,8 @@ cmos_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 		rtc_control |= RTC_UIE;
 		hpet_set_rtc_irq_bit(RTC_UIE);
 		break;
-	case RTC_PIE_OFF:	/* periodic off */
-		rtc_control &= ~RTC_PIE;
-		hpet_mask_rtc_irq_bit(RTC_PIE);
-		break;
-	case RTC_PIE_ON:	/* periodic on */
-		rtc_control |= RTC_PIE;
-		hpet_set_rtc_irq_bit(RTC_PIE);
-		break;
 	}
-	if (!is_hpet_enabled())
-		CMOS_WRITE(rtc_control, RTC_CONTROL);
+	CMOS_WRITE(rtc_control, RTC_CONTROL);
 
 	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
 	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
@@ -505,18 +552,19 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
 	u8		rtc_control;
 
 	spin_lock(&rtc_lock);
-	/*
-	 * In this case it is HPET RTC interrupt handler
-	 * calling us, with the interrupt information
-	 * passed as arg1, instead of irq.
+
+	/* When the HPET interrupt handler calls us, the interrupt
+	 * status is passed as arg1 instead of the irq number.  But
+	 * always clear irq status, even when HPET is in the way.
+	 *
+	 * Note that HPET and RTC are almost certainly out of phase,
+	 * giving different IRQ status ...
 	 */
+	irqstat = CMOS_READ(RTC_INTR_FLAGS);
+	rtc_control = CMOS_READ(RTC_CONTROL);
 	if (is_hpet_enabled())
 		irqstat = (unsigned long)irq & 0xF0;
-	else {
-		irqstat = CMOS_READ(RTC_INTR_FLAGS);
-		rtc_control = CMOS_READ(RTC_CONTROL);
-		irqstat &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
-	}
+	irqstat &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
 
 	/* All Linux RTC alarms should be treated as if they were oneshot.
 	 * Similar code may be needed in system wakeup paths, in case the
@@ -526,6 +574,8 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
 		rtc_control = CMOS_READ(RTC_CONTROL);
 		rtc_control &= ~RTC_AIE;
 		CMOS_WRITE(rtc_control, RTC_CONTROL);
+		hpet_mask_rtc_irq_bit(RTC_AIE);
+
 		CMOS_READ(RTC_INTR_FLAGS);
 	}
 	spin_unlock(&rtc_lock);
@@ -632,8 +682,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	 * do something about other clock frequencies.
 	 */
 	cmos_rtc.rtc->irq_freq = 1024;
-	if (!hpet_set_periodic_freq(cmos_rtc.rtc->irq_freq))
-		CMOS_WRITE(RTC_REF_CLCK_32KHZ | 0x06, RTC_FREQ_SELECT);
+	hpet_set_periodic_freq(cmos_rtc.rtc->irq_freq);
+	CMOS_WRITE(RTC_REF_CLCK_32KHZ | 0x06, RTC_FREQ_SELECT);
 
 	/* disable irqs.
 	 *
@@ -643,6 +693,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	rtc_control = CMOS_READ(RTC_CONTROL);
 	rtc_control &= ~(RTC_PIE | RTC_AIE | RTC_UIE);
 	CMOS_WRITE(rtc_control, RTC_CONTROL);
+	hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE);
+
 	CMOS_READ(RTC_INTR_FLAGS);
 
 	spin_unlock_irq(&rtc_lock);
@@ -690,7 +742,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 		goto cleanup2;
 	}
 
-	pr_info("%s: alarms up to one %s%s\n",
+	pr_info("%s: alarms up to one %s%s%s\n",
 			cmos_rtc.rtc->dev.bus_id,
 			is_valid_irq(rtc_irq)
 				?  (cmos_rtc.mon_alrm
@@ -698,8 +750,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 					: (cmos_rtc.day_alrm
 						? "month" : "day"))
 				: "no",
-			cmos_rtc.century ? ", y3k" : ""
-			);
+			cmos_rtc.century ? ", y3k" : "",
+			is_hpet_enabled() ? ", hpet irqs" : "");
 
 	return 0;
 
@@ -720,8 +772,10 @@ static void cmos_do_shutdown(void)
 
 	spin_lock_irq(&rtc_lock);
 	rtc_control = CMOS_READ(RTC_CONTROL);
-	rtc_control &= ~(RTC_PIE|RTC_AIE|RTC_UIE);
+	rtc_control &= ~RTC_IRQMASK;
 	CMOS_WRITE(rtc_control, RTC_CONTROL);
+	hpet_mask_rtc_irq_bit(RTC_IRQMASK);
+
 	CMOS_READ(RTC_INTR_FLAGS);
 	spin_unlock_irq(&rtc_lock);
 }
@@ -764,12 +818,16 @@ static int cmos_suspend(struct device *dev, pm_message_t mesg)
 	cmos->suspend_ctrl = tmp = CMOS_READ(RTC_CONTROL);
 	if (tmp & (RTC_PIE|RTC_AIE|RTC_UIE)) {
 		unsigned char	irqstat;
+		unsigned char	mask;
 
 		if (do_wake)
-			tmp &= ~(RTC_PIE|RTC_UIE);
+			mask = RTC_IRQMASK & ~RTC_AIE;
 		else
-			tmp &= ~(RTC_PIE|RTC_AIE|RTC_UIE);
+			mask = RTC_IRQMASK;
+		tmp &= ~mask;
 		CMOS_WRITE(tmp, RTC_CONTROL);
+		hpet_mask_rtc_irq_bit(mask);
+
 		irqstat = CMOS_READ(RTC_INTR_FLAGS);
 		irqstat &= (tmp & RTC_IRQMASK) | RTC_IRQF;
 		if (is_intr(irqstat))
@@ -799,7 +857,8 @@ static int cmos_resume(struct device *dev)
 	unsigned char	tmp = cmos->suspend_ctrl;
 
 	/* re-enable any irqs previously active */
-	if (tmp & (RTC_PIE|RTC_AIE|RTC_UIE)) {
+	if (tmp & RTC_IRQMASK) {
+		unsigned char	mask;
 
 		if (cmos->enabled_wake) {
 			if (cmos->wake_off)
@@ -810,18 +869,28 @@ static int cmos_resume(struct device *dev)
 		}
 
 		spin_lock_irq(&rtc_lock);
-		CMOS_WRITE(tmp, RTC_CONTROL);
-		tmp = CMOS_READ(RTC_INTR_FLAGS);
-		tmp &= (cmos->suspend_ctrl & RTC_IRQMASK) | RTC_IRQF;
-		if (is_intr(tmp))
-			rtc_update_irq(cmos->rtc, 1, tmp);
+		do {
+			CMOS_WRITE(tmp, RTC_CONTROL);
+			hpet_set_rtc_irq_bit(tmp & RTC_IRQMASK);
+
+			mask = CMOS_READ(RTC_INTR_FLAGS);
+			mask &= (tmp & RTC_IRQMASK) | RTC_IRQF;
+			if (!is_intr(mask))
+				break;
+
+			/* force one-shot behavior if HPET blocked
+			 * the wake alarm's irq
+			 */
+			rtc_update_irq(cmos->rtc, 1, mask);
+			tmp &= ~RTC_AIE;
+			hpet_mask_rtc_irq_bit(RTC_AIE);
+		} while (mask & RTC_AIE);
 		spin_unlock_irq(&rtc_lock);
 	}
 
 	pr_debug("%s: resume, ctrl %02x\n",
 			cmos_rtc.rtc->dev.bus_id,
-			cmos->suspend_ctrl);
-
+			tmp);
 
 	return 0;
 }
-- 
GitLab


From 4cd0c5c40b64ef9fd94fb8382dade2fd28f2b935 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:30:44 -0700
Subject: [PATCH 351/853] rtc: rtc-s3c: add __devexit and __devinit markers

Add the relevant __devinit and __devexit attributes to the rtc-s3c driver.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s3c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index fed86e507fd..b81ba7020d9 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -430,7 +430,7 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en)
 	}
 }
 
-static int s3c_rtc_remove(struct platform_device *dev)
+static int __devexit s3c_rtc_remove(struct platform_device *dev)
 {
 	struct rtc_device *rtc = platform_get_drvdata(dev);
 
@@ -447,7 +447,7 @@ static int s3c_rtc_remove(struct platform_device *dev)
 	return 0;
 }
 
-static int s3c_rtc_probe(struct platform_device *pdev)
+static int __devinit s3c_rtc_probe(struct platform_device *pdev)
 {
 	struct rtc_device *rtc;
 	struct resource *res;
@@ -560,7 +560,7 @@ static int s3c_rtc_resume(struct platform_device *pdev)
 
 static struct platform_driver s3c2410_rtcdrv = {
 	.probe		= s3c_rtc_probe,
-	.remove		= s3c_rtc_remove,
+	.remove		= __devexit_p(s3c_rtc_remove),
 	.suspend	= s3c_rtc_suspend,
 	.resume		= s3c_rtc_resume,
 	.driver		= {
-- 
GitLab


From 773be7ee97c11fbb6b8a912a58b268dbe8a6a3fe Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:30:45 -0700
Subject: [PATCH 352/853] rtc: rtc-s3c: update IRQ handling

The rtc-s3c.c driver has been using its own ioctl() handling to deal with
alarm and periodic interrupts to handle what should now be done with the
rtc core code.

Change to using the .irq_set_freq and .irq_set_state driver entries and
remove the .ioctl handling.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s3c.c | 83 +++++++++----------------------------------
 1 file changed, 16 insertions(+), 67 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index b81ba7020d9..54b1ebb0150 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -36,10 +36,8 @@ static struct resource *s3c_rtc_mem;
 static void __iomem *s3c_rtc_base;
 static int s3c_rtc_alarmno = NO_IRQ;
 static int s3c_rtc_tickno  = NO_IRQ;
-static int s3c_rtc_freq    = 1;
 
 static DEFINE_SPINLOCK(s3c_rtc_pie_lock);
-static unsigned int tick_count;
 
 /* IRQ Handlers */
 
@@ -55,7 +53,7 @@ static irqreturn_t s3c_rtc_tickirq(int irq, void *id)
 {
 	struct rtc_device *rdev = id;
 
-	rtc_update_irq(rdev, tick_count++, RTC_PF | RTC_IRQF);
+	rtc_update_irq(rdev, 1, RTC_PF | RTC_IRQF);
 	return IRQ_HANDLED;
 }
 
@@ -74,35 +72,37 @@ static void s3c_rtc_setaie(int to)
 	writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
 }
 
-static void s3c_rtc_setpie(int to)
+static int s3c_rtc_setpie(struct device *dev, int enabled)
 {
 	unsigned int tmp;
 
-	pr_debug("%s: pie=%d\n", __func__, to);
+	pr_debug("%s: pie=%d\n", __func__, enabled);
 
 	spin_lock_irq(&s3c_rtc_pie_lock);
 	tmp = readb(s3c_rtc_base + S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE;
 
-	if (to)
+	if (enabled)
 		tmp |= S3C2410_TICNT_ENABLE;
 
 	writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
 	spin_unlock_irq(&s3c_rtc_pie_lock);
+
+	return 0;
 }
 
-static void s3c_rtc_setfreq(int freq)
+static int s3c_rtc_setfreq(struct device *dev, int freq)
 {
 	unsigned int tmp;
 
 	spin_lock_irq(&s3c_rtc_pie_lock);
-	tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
-
-	s3c_rtc_freq = freq;
 
+	tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
 	tmp |= (128 / freq)-1;
 
 	writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
 	spin_unlock_irq(&s3c_rtc_pie_lock);
+
+	return 0;
 }
 
 /* Time read/write */
@@ -267,12 +267,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	writeb(alrm_en, base + S3C2410_RTCALM);
 
-	if (0) {
-		alrm_en = readb(base + S3C2410_RTCALM);
-		alrm_en &= ~S3C2410_RTCALM_ALMEN;
-		writeb(alrm_en, base + S3C2410_RTCALM);
-		disable_irq_wake(s3c_rtc_alarmno);
-	}
+	s3c_rtc_setaie(alrm->enabled);
 
 	if (alrm->enabled)
 		enable_irq_wake(s3c_rtc_alarmno);
@@ -282,59 +277,12 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	return 0;
 }
 
-static int s3c_rtc_ioctl(struct device *dev,
-			 unsigned int cmd, unsigned long arg)
-{
-	unsigned int ret = -ENOIOCTLCMD;
-
-	switch (cmd) {
-	case RTC_AIE_OFF:
-	case RTC_AIE_ON:
-		s3c_rtc_setaie((cmd == RTC_AIE_ON) ? 1 : 0);
-		ret = 0;
-		break;
-
-	case RTC_PIE_OFF:
-	case RTC_PIE_ON:
-		tick_count = 0;
-		s3c_rtc_setpie((cmd == RTC_PIE_ON) ? 1 : 0);
-		ret = 0;
-		break;
-
-	case RTC_IRQP_READ:
-		ret = put_user(s3c_rtc_freq, (unsigned long __user *)arg);
-		break;
-
-	case RTC_IRQP_SET:
-		if (!is_power_of_2(arg)) {
-			ret = -EINVAL;
-			goto exit;
-		}
-
-		pr_debug("s3c2410_rtc: setting frequency %ld\n", arg);
-
-		s3c_rtc_setfreq(arg);
-		ret = 0;
-		break;
-
-	case RTC_UIE_ON:
-	case RTC_UIE_OFF:
-		ret = -EINVAL;
-	}
-
- exit:
-	return ret;
-}
-
 static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
 {
 	unsigned int ticnt = readb(s3c_rtc_base + S3C2410_TICNT);
 
 	seq_printf(seq, "periodic_IRQ\t: %s\n",
 		     (ticnt & S3C2410_TICNT_ENABLE) ? "yes" : "no" );
-
-	seq_printf(seq, "periodic_freq\t: %d\n", s3c_rtc_freq);
-
 	return 0;
 }
 
@@ -374,7 +322,7 @@ static void s3c_rtc_release(struct device *dev)
 
 	/* do not clear AIE here, it may be needed for wake */
 
-	s3c_rtc_setpie(0);
+	s3c_rtc_setpie(dev, 0);
 	free_irq(s3c_rtc_alarmno, rtc_dev);
 	free_irq(s3c_rtc_tickno, rtc_dev);
 }
@@ -382,11 +330,12 @@ static void s3c_rtc_release(struct device *dev)
 static const struct rtc_class_ops s3c_rtcops = {
 	.open		= s3c_rtc_open,
 	.release	= s3c_rtc_release,
-	.ioctl		= s3c_rtc_ioctl,
 	.read_time	= s3c_rtc_gettime,
 	.set_time	= s3c_rtc_settime,
 	.read_alarm	= s3c_rtc_getalarm,
 	.set_alarm	= s3c_rtc_setalarm,
+	.irq_set_freq	= s3c_rtc_setfreq,
+	.irq_set_state	= s3c_rtc_setpie,
 	.proc	        = s3c_rtc_proc,
 };
 
@@ -437,7 +386,7 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
 	platform_set_drvdata(dev, NULL);
 	rtc_device_unregister(rtc);
 
-	s3c_rtc_setpie(0);
+	s3c_rtc_setpie(&dev->dev, 0);
 	s3c_rtc_setaie(0);
 
 	iounmap(s3c_rtc_base);
@@ -504,7 +453,7 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev)
  	pr_debug("s3c2410_rtc: RTCCON=%02x\n",
 		 readb(s3c_rtc_base + S3C2410_RTCCON));
 
-	s3c_rtc_setfreq(s3c_rtc_freq);
+	s3c_rtc_setfreq(&pdev->dev, 1);
 
 	/* register RTC and exit */
 
-- 
GitLab


From 449321b39f6c6ebfa15d6da24f134240bd51db29 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 23 Jul 2008 21:30:46 -0700
Subject: [PATCH 353/853] rtc-at91rm9200: avoid spurious irqs

This fixes kernel http://bugzilla.kernel.org/show_bug.cgi?id=11112 (bogus
RTC update IRQs reported) for rtc-at91rm9200 by scrubbing old IRQ status
before enabling IRQs.

It also removes nonfunctional periodic IRQ support from this driver;
only update IRQs are reported, or provided by the hardware.

I suspect some other RTCs probably have versions of #11112; it's easy to
overlook, since most non-RTC drivers don't care about spurious IRQs:
they're not reported to userspace.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Report-by: W Unruh <unruh@physics.ubc.ca>
Cc: Andrew Victor <avictor.za@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-at91rm9200.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 9c3db934cc2..cd32d05db77 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -171,8 +171,10 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 		| BIN2BCD(tm.tm_mday) << 24
 		| AT91_RTC_DATEEN | AT91_RTC_MTHEN);
 
-	if (alrm->enabled)
+	if (alrm->enabled) {
+		at91_sys_write(AT91_RTC_SCCR, AT91_RTC_ALARM);
 		at91_sys_write(AT91_RTC_IER, AT91_RTC_ALARM);
+	}
 
 	pr_debug("%s(): %4d-%02d-%02d %02d:%02d:%02d\n", __func__,
 		at91_alarm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour,
@@ -191,28 +193,22 @@ static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
 
 	pr_debug("%s(): cmd=%08x, arg=%08lx.\n", __func__, cmd, arg);
 
+	/* important:  scrub old status before enabling IRQs */
 	switch (cmd) {
 	case RTC_AIE_OFF:	/* alarm off */
 		at91_sys_write(AT91_RTC_IDR, AT91_RTC_ALARM);
 		break;
 	case RTC_AIE_ON:	/* alarm on */
+		at91_sys_write(AT91_RTC_SCCR, AT91_RTC_ALARM);
 		at91_sys_write(AT91_RTC_IER, AT91_RTC_ALARM);
 		break;
 	case RTC_UIE_OFF:	/* update off */
-	case RTC_PIE_OFF:	/* periodic off */
 		at91_sys_write(AT91_RTC_IDR, AT91_RTC_SECEV);
 		break;
 	case RTC_UIE_ON:	/* update on */
-	case RTC_PIE_ON:	/* periodic on */
+		at91_sys_write(AT91_RTC_SCCR, AT91_RTC_SECEV);
 		at91_sys_write(AT91_RTC_IER, AT91_RTC_SECEV);
 		break;
-	case RTC_IRQP_READ:	/* read periodic alarm frequency */
-		ret = put_user(AT91_RTC_FREQ, (unsigned long *) arg);
-		break;
-	case RTC_IRQP_SET:	/* set periodic alarm frequency */
-		if (arg != AT91_RTC_FREQ)
-			ret = -EINVAL;
-		break;
 	default:
 		ret = -ENOIOCTLCMD;
 		break;
-- 
GitLab


From 7e2a31da854dcf8324012a83a31b40bc11e52589 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 23 Jul 2008 21:30:47 -0700
Subject: [PATCH 354/853] rtc-cmos: avoid spurious irqs

This fixes kernel http://bugzilla.kernel.org/show_bug.cgi?id=11112 (bogus
RTC update IRQs reported) for rtc-cmos, in two ways:

  - When HPET is stealing the IRQs, use the first IRQ to grab
    the seconds counter which will be monitored (instead of
    using whatever was previously in that memory);

  - In sane IRQ handling modes, scrub out old IRQ status before
    enabling IRQs.

That latter is done by tightening up IRQ handling for rtc-cmos everywhere,
also ensuring that when HPET is used it's the only thing triggering IRQ
reports to userspace; net object shrink.

Also fix a bogus HPET message related to its RTC emulation.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Report-by: W Unruh <unruh@physics.ubc.ca>
Cc: Andrew Victor <avictor.za@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/hpet.c |  10 ++-
 drivers/rtc/rtc-cmos.c | 140 +++++++++++++++++++----------------------
 2 files changed, 70 insertions(+), 80 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 0ea6a19bfdf..ad2b15a1334 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -468,7 +468,7 @@ void hpet_disable(void)
 #define RTC_NUM_INTS		1
 
 static unsigned long hpet_rtc_flags;
-static unsigned long hpet_prev_update_sec;
+static int hpet_prev_update_sec;
 static struct rtc_time hpet_alarm_time;
 static unsigned long hpet_pie_count;
 static unsigned long hpet_t1_cmp;
@@ -575,6 +575,9 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
 
 	hpet_rtc_flags |= bit_mask;
 
+	if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE))
+		hpet_prev_update_sec = -1;
+
 	if (!oldbits)
 		hpet_rtc_timer_init();
 
@@ -652,7 +655,7 @@ static void hpet_rtc_timer_reinit(void)
 		if (hpet_rtc_flags & RTC_PIE)
 			hpet_pie_count += lost_ints;
 		if (printk_ratelimit())
-			printk(KERN_WARNING "rtc: lost %d interrupts\n",
+			printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n",
 				lost_ints);
 	}
 }
@@ -670,7 +673,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 
 	if (hpet_rtc_flags & RTC_UIE &&
 	    curr_time.tm_sec != hpet_prev_update_sec) {
-		rtc_int_flag = RTC_UF;
+		if (hpet_prev_update_sec >= 0)
+			rtc_int_flag = RTC_UF;
 		hpet_prev_update_sec = curr_time.tm_sec;
 	}
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index e9984650ea9..6ea349aba3b 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -235,11 +235,56 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 	return 0;
 }
 
+static void cmos_checkintr(struct cmos_rtc *cmos, unsigned char rtc_control)
+{
+	unsigned char	rtc_intr;
+
+	/* NOTE after changing RTC_xIE bits we always read INTR_FLAGS;
+	 * allegedly some older rtcs need that to handle irqs properly
+	 */
+	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
+
+	if (is_hpet_enabled())
+		return;
+
+	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
+	if (is_intr(rtc_intr))
+		rtc_update_irq(cmos->rtc, 1, rtc_intr);
+}
+
+static void cmos_irq_enable(struct cmos_rtc *cmos, unsigned char mask)
+{
+	unsigned char	rtc_control;
+
+	/* flush any pending IRQ status, notably for update irqs,
+	 * before we enable new IRQs
+	 */
+	rtc_control = CMOS_READ(RTC_CONTROL);
+	cmos_checkintr(cmos, rtc_control);
+
+	rtc_control |= mask;
+	CMOS_WRITE(rtc_control, RTC_CONTROL);
+	hpet_set_rtc_irq_bit(mask);
+
+	cmos_checkintr(cmos, rtc_control);
+}
+
+static void cmos_irq_disable(struct cmos_rtc *cmos, unsigned char mask)
+{
+	unsigned char	rtc_control;
+
+	rtc_control = CMOS_READ(RTC_CONTROL);
+	rtc_control &= ~mask;
+	CMOS_WRITE(rtc_control, RTC_CONTROL);
+	hpet_mask_rtc_irq_bit(mask);
+
+	cmos_checkintr(cmos, rtc_control);
+}
+
 static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
 	unsigned char	mon, mday, hrs, min, sec;
-	unsigned char	rtc_control, rtc_intr;
 
 	if (!is_valid_irq(cmos->irq))
 		return -EIO;
@@ -266,15 +311,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	spin_lock_irq(&rtc_lock);
 
 	/* next rtc irq must not be from previous alarm setting */
-	rtc_control = CMOS_READ(RTC_CONTROL);
-	rtc_control &= ~RTC_AIE;
-	CMOS_WRITE(rtc_control, RTC_CONTROL);
-	hpet_mask_rtc_irq_bit(RTC_AIE);
-
-	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
-	if (is_intr(rtc_intr))
-		rtc_update_irq(cmos->rtc, 1, rtc_intr);
+	cmos_irq_disable(cmos, RTC_AIE);
 
 	/* update alarm */
 	CMOS_WRITE(hrs, RTC_HOURS_ALARM);
@@ -293,16 +330,8 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	 */
 	hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min, t->time.tm_sec);
 
-	if (t->enabled) {
-		rtc_control |= RTC_AIE;
-		CMOS_WRITE(rtc_control, RTC_CONTROL);
-		hpet_set_rtc_irq_bit(RTC_AIE);
-
-		rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-		rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
-		if (is_intr(rtc_intr))
-			rtc_update_irq(cmos->rtc, 1, rtc_intr);
-	}
+	if (t->enabled)
+		cmos_irq_enable(cmos, RTC_AIE);
 
 	spin_unlock_irq(&rtc_lock);
 
@@ -335,28 +364,17 @@ static int cmos_irq_set_freq(struct device *dev, int freq)
 static int cmos_irq_set_state(struct device *dev, int enabled)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
-	unsigned char	rtc_control, rtc_intr;
 	unsigned long	flags;
 
 	if (!is_valid_irq(cmos->irq))
 		return -ENXIO;
 
 	spin_lock_irqsave(&rtc_lock, flags);
-	rtc_control = CMOS_READ(RTC_CONTROL);
-
-	if (enabled) {
-		rtc_control |= RTC_PIE;
-		hpet_set_rtc_irq_bit(RTC_PIE);
-	} else {
-		rtc_control &= ~RTC_PIE;
-		hpet_mask_rtc_irq_bit(RTC_PIE);
-	}
-	CMOS_WRITE(rtc_control, RTC_CONTROL);
 
-	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
-	if (is_intr(rtc_intr))
-		rtc_update_irq(cmos->rtc, 1, rtc_intr);
+	if (enabled)
+		cmos_irq_enable(cmos, RTC_PIE);
+	else
+		cmos_irq_disable(cmos, RTC_PIE);
 
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	return 0;
@@ -368,7 +386,6 @@ static int
 cmos_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
-	unsigned char	rtc_control, rtc_intr;
 	unsigned long	flags;
 
 	switch (cmd) {
@@ -385,32 +402,20 @@ cmos_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 	}
 
 	spin_lock_irqsave(&rtc_lock, flags);
-	rtc_control = CMOS_READ(RTC_CONTROL);
 	switch (cmd) {
 	case RTC_AIE_OFF:	/* alarm off */
-		rtc_control &= ~RTC_AIE;
-		hpet_mask_rtc_irq_bit(RTC_AIE);
+		cmos_irq_disable(cmos, RTC_AIE);
 		break;
 	case RTC_AIE_ON:	/* alarm on */
-		rtc_control |= RTC_AIE;
-		hpet_set_rtc_irq_bit(RTC_AIE);
+		cmos_irq_enable(cmos, RTC_AIE);
 		break;
 	case RTC_UIE_OFF:	/* update off */
-		rtc_control &= ~RTC_UIE;
-		hpet_mask_rtc_irq_bit(RTC_UIE);
+		cmos_irq_disable(cmos, RTC_UIE);
 		break;
 	case RTC_UIE_ON:	/* update on */
-		rtc_control |= RTC_UIE;
-		hpet_set_rtc_irq_bit(RTC_UIE);
+		cmos_irq_enable(cmos, RTC_UIE);
 		break;
 	}
-	CMOS_WRITE(rtc_control, RTC_CONTROL);
-
-	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
-	if (is_intr(rtc_intr))
-		rtc_update_irq(cmos->rtc, 1, rtc_intr);
-
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	return 0;
 }
@@ -571,7 +576,6 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
 	 * alarm woke the system.
 	 */
 	if (irqstat & RTC_AIE) {
-		rtc_control = CMOS_READ(RTC_CONTROL);
 		rtc_control &= ~RTC_AIE;
 		CMOS_WRITE(rtc_control, RTC_CONTROL);
 		hpet_mask_rtc_irq_bit(RTC_AIE);
@@ -685,17 +689,10 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	hpet_set_periodic_freq(cmos_rtc.rtc->irq_freq);
 	CMOS_WRITE(RTC_REF_CLCK_32KHZ | 0x06, RTC_FREQ_SELECT);
 
-	/* disable irqs.
-	 *
-	 * NOTE after changing RTC_xIE bits we always read INTR_FLAGS;
-	 * allegedly some older rtcs need that to handle irqs properly
-	 */
-	rtc_control = CMOS_READ(RTC_CONTROL);
-	rtc_control &= ~(RTC_PIE | RTC_AIE | RTC_UIE);
-	CMOS_WRITE(rtc_control, RTC_CONTROL);
-	hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE);
+	/* disable irqs */
+	cmos_irq_disable(&cmos_rtc, RTC_PIE | RTC_AIE | RTC_UIE);
 
-	CMOS_READ(RTC_INTR_FLAGS);
+	rtc_control = CMOS_READ(RTC_CONTROL);
 
 	spin_unlock_irq(&rtc_lock);
 
@@ -768,15 +765,8 @@ cleanup0:
 
 static void cmos_do_shutdown(void)
 {
-	unsigned char	rtc_control;
-
 	spin_lock_irq(&rtc_lock);
-	rtc_control = CMOS_READ(RTC_CONTROL);
-	rtc_control &= ~RTC_IRQMASK;
-	CMOS_WRITE(rtc_control, RTC_CONTROL);
-	hpet_mask_rtc_irq_bit(RTC_IRQMASK);
-
-	CMOS_READ(RTC_INTR_FLAGS);
+	cmos_irq_disable(&cmos_rtc, RTC_IRQMASK);
 	spin_unlock_irq(&rtc_lock);
 }
 
@@ -817,7 +807,6 @@ static int cmos_suspend(struct device *dev, pm_message_t mesg)
 	spin_lock_irq(&rtc_lock);
 	cmos->suspend_ctrl = tmp = CMOS_READ(RTC_CONTROL);
 	if (tmp & (RTC_PIE|RTC_AIE|RTC_UIE)) {
-		unsigned char	irqstat;
 		unsigned char	mask;
 
 		if (do_wake)
@@ -828,10 +817,7 @@ static int cmos_suspend(struct device *dev, pm_message_t mesg)
 		CMOS_WRITE(tmp, RTC_CONTROL);
 		hpet_mask_rtc_irq_bit(mask);
 
-		irqstat = CMOS_READ(RTC_INTR_FLAGS);
-		irqstat &= (tmp & RTC_IRQMASK) | RTC_IRQF;
-		if (is_intr(irqstat))
-			rtc_update_irq(cmos->rtc, 1, irqstat);
+		cmos_checkintr(cmos, tmp);
 	}
 	spin_unlock_irq(&rtc_lock);
 
@@ -875,7 +861,7 @@ static int cmos_resume(struct device *dev)
 
 			mask = CMOS_READ(RTC_INTR_FLAGS);
 			mask &= (tmp & RTC_IRQMASK) | RTC_IRQF;
-			if (!is_intr(mask))
+			if (!is_hpet_enabled() || !is_intr(mask))
 				break;
 
 			/* force one-shot behavior if HPET blocked
-- 
GitLab


From 4cad4431fcd872a1b2efc093b0db6df943f5a898 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Wed, 23 Jul 2008 21:30:48 -0700
Subject: [PATCH 355/853] rtc-vr41xx: add irq_set_freq() and irq_set_state()

Implement the ioctls RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET and
RTC_IRQP_READ in the standard RTC way.

Thanks Dave for noticing it.

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Cc: David Brownell <david-b@pacbell.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-vr41xx.c | 65 +++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index be9c70d0b19..884b635f028 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -1,7 +1,7 @@
 /*
  *  Driver for NEC VR4100 series Real Time Clock unit.
  *
- *  Copyright (C) 2003-2006  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *  Copyright (C) 2003-2008  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -34,7 +34,7 @@
 
 MODULE_AUTHOR("Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>");
 MODULE_DESCRIPTION("NEC VR4100 series RTC driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 
 /* RTC 1 registers */
 #define ETIMELREG		0x00
@@ -82,7 +82,6 @@ static unsigned long epoch = 1970;	/* Jan 1 1970 00:00:00 */
 
 static DEFINE_SPINLOCK(rtc_lock);
 static char rtc_name[] = "RTC";
-static unsigned long periodic_frequency;
 static unsigned long periodic_count;
 static unsigned int alarm_enabled;
 static int aie_irq = -1;
@@ -207,10 +206,37 @@ static int vr41xx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
 	return 0;
 }
 
-static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+static int vr41xx_rtc_irq_set_freq(struct device *dev, int freq)
 {
 	unsigned long count;
 
+	count = RTC_FREQUENCY;
+	do_div(count, freq);
+
+	periodic_count = count;
+
+	spin_lock_irq(&rtc_lock);
+
+	rtc1_write(RTCL1LREG, count);
+	rtc1_write(RTCL1HREG, count >> 16);
+
+	spin_unlock_irq(&rtc_lock);
+
+	return 0;
+}
+
+static int vr41xx_rtc_irq_set_state(struct device *dev, int enabled)
+{
+	if (enabled)
+		enable_irq(pie_irq);
+	else
+		disable_irq(pie_irq);
+
+	return 0;
+}
+
+static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
 	switch (cmd) {
 	case RTC_AIE_ON:
 		spin_lock_irq(&rtc_lock);
@@ -230,33 +256,6 @@ static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long
 			alarm_enabled = 0;
 		}
 
-		spin_unlock_irq(&rtc_lock);
-		break;
-	case RTC_PIE_ON:
-		enable_irq(pie_irq);
-		break;
-	case RTC_PIE_OFF:
-		disable_irq(pie_irq);
-		break;
-	case RTC_IRQP_READ:
-		return put_user(periodic_frequency, (unsigned long __user *)arg);
-		break;
-	case RTC_IRQP_SET:
-		if (arg > MAX_PERIODIC_RATE)
-			return -EINVAL;
-
-		periodic_frequency = arg;
-
-		count = RTC_FREQUENCY;
-		do_div(count, arg);
-
-		periodic_count = count;
-
-		spin_lock_irq(&rtc_lock);
-
-		rtc1_write(RTCL1LREG, count);
-		rtc1_write(RTCL1HREG, count >> 16);
-
 		spin_unlock_irq(&rtc_lock);
 		break;
 	case RTC_EPOCH_READ:
@@ -309,6 +308,8 @@ static const struct rtc_class_ops vr41xx_rtc_ops = {
 	.set_time	= vr41xx_rtc_set_time,
 	.read_alarm	= vr41xx_rtc_read_alarm,
 	.set_alarm	= vr41xx_rtc_set_alarm,
+	.irq_set_freq	= vr41xx_rtc_irq_set_freq,
+	.irq_set_state	= vr41xx_rtc_irq_set_state,
 };
 
 static int __devinit rtc_probe(struct platform_device *pdev)
@@ -346,6 +347,8 @@ static int __devinit rtc_probe(struct platform_device *pdev)
 		goto err_iounmap_all;
 	}
 
+	rtc->max_user_freq = MAX_PERIODIC_RATE;
+
 	spin_lock_irq(&rtc_lock);
 
 	rtc1_write(ECMPLREG, 0);
-- 
GitLab


From 2ece5f43b041b96fa2a05107a10a6b0ea0c03a3b Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Wed, 23 Jul 2008 21:30:49 -0700
Subject: [PATCH 356/853] fbdev: add the carmine FB driver

Basic FB driver for the carmine chip.  The driver registers two FB devices for
the two possible screens.  The DRAM settings can be be switched via Kconfig
(between eval board and custom).

Signed-off-by: Sebastian Siewior <bigeasy@linutronix.de>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/Kconfig          |  26 ++
 drivers/video/Makefile         |   1 +
 drivers/video/carminefb.c      | 790 +++++++++++++++++++++++++++++++++
 drivers/video/carminefb.h      |  64 +++
 drivers/video/carminefb_regs.h | 159 +++++++
 5 files changed, 1040 insertions(+)
 create mode 100644 drivers/video/carminefb.c
 create mode 100644 drivers/video/carminefb.h
 create mode 100644 drivers/video/carminefb_regs.h

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 9b887ef64ff..7072d2c5a04 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1658,6 +1658,32 @@ config FB_PM3
 	  similar boards, 3DLabs Permedia3 Create!, Appian Jeronimo 2000
 	  and maybe other boards.
 
+config FB_CARMINE
+	tristate "Fujitsu carmine frame buffer support"
+	depends on FB && PCI
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	  This is the frame buffer device driver for the Fujitsu Carmine chip.
+	  The driver provides two independent frame buffer devices.
+
+choice
+	depends on FB_CARMINE
+	prompt "DRAM timing"
+	default FB_CARMINE_DRAM_EVAL
+
+config FB_CARMINE_DRAM_EVAL
+	bool "Eval board timings"
+	help
+	  Use timings which work on the eval card.
+
+config CARMINE_DRAM_CUSTOM
+	bool "Custom board timings"
+	help
+	  Use custom board timings.
+endchoice
+
 config FB_AU1100
 	bool "Au1100 LCD Driver"
 	depends on (FB = y) && MIPS && SOC_AU1100
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 04bca35403f..7ee85c0d2e5 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -117,6 +117,7 @@ obj-$(CONFIG_FB_SM501)            += sm501fb.o
 obj-$(CONFIG_FB_XILINX)           += xilinxfb.o
 obj-$(CONFIG_FB_OMAP)             += omap/
 obj-$(CONFIG_XEN_FBDEV_FRONTEND)  += xen-fbfront.o
+obj-$(CONFIG_FB_CARMINE)          += carminefb.o
 
 # Platform or fallback drivers go here
 obj-$(CONFIG_FB_UVESA)            += uvesafb.o
diff --git a/drivers/video/carminefb.c b/drivers/video/carminefb.c
new file mode 100644
index 00000000000..e15bb447440
--- /dev/null
+++ b/drivers/video/carminefb.c
@@ -0,0 +1,790 @@
+/*
+ * Frame buffer driver for the Carmine GPU.
+ *
+ * The driver configures the GPU as follows
+ * - FB0 is display 0 with unique memory area
+ * - FB1 is display 1 with unique memory area
+ * - both display use 32 bit colors
+ */
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include "carminefb.h"
+#include "carminefb_regs.h"
+
+#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
+#error  "The endianness of the target host has not been defined."
+#endif
+
+/*
+ * The initial video mode can be supplied via two different ways:
+ * - as a string that is passed to fb_find_mode() (module option fb_mode_str)
+ * - as an integer that picks the video mode from carmine_modedb[] (module
+ *   option fb_mode)
+ *
+ * If nothing is used than the initial video mode will be the
+ * CARMINEFB_DEFAULT_VIDEO_MODE member of the carmine_modedb[].
+ */
+#define CARMINEFB_DEFAULT_VIDEO_MODE	1
+
+static unsigned int fb_mode = CARMINEFB_DEFAULT_VIDEO_MODE;
+module_param(fb_mode, uint, 444);
+MODULE_PARM_DESC(fb_mode, "Initial video mode as integer.");
+
+static char *fb_mode_str;
+module_param(fb_mode_str, charp, 444);
+MODULE_PARM_DESC(fb_mode_str, "Initial video mode in characters.");
+
+/*
+ * Carminefb displays:
+ * 0b000 None
+ * 0b001 Display 0
+ * 0b010 Display 1
+ */
+static int fb_displays = CARMINE_USE_DISPLAY0 | CARMINE_USE_DISPLAY1;
+module_param(fb_displays, int, 444);
+MODULE_PARM_DESC(fb_displays, "Bit mode, which displays are used");
+
+struct carmine_hw {
+	void __iomem *v_regs;
+	void __iomem *screen_mem;
+	struct fb_info *fb[MAX_DISPLAY];
+};
+
+struct carmine_resolution {
+	u32 htp;
+	u32 hsp;
+	u32 hsw;
+	u32 hdp;
+	u32 vtr;
+	u32 vsp;
+	u32 vsw;
+	u32 vdp;
+	u32 disp_mode;
+};
+
+struct carmine_fb {
+	void __iomem *display_reg;
+	void __iomem *screen_base;
+	u32 smem_offset;
+	u32 cur_mode;
+	u32 new_mode;
+	struct carmine_resolution *res;
+	u32 pseudo_palette[16];
+};
+
+static struct fb_fix_screeninfo carminefb_fix __devinitdata = {
+	.id = "Carmine",
+	.type = FB_TYPE_PACKED_PIXELS,
+	.visual = FB_VISUAL_TRUECOLOR,
+	.accel = FB_ACCEL_NONE,
+};
+
+static const struct fb_videomode carmine_modedb[] = {
+	{
+		.name		= "640x480",
+		.xres		= 640,
+		.yres		= 480,
+	}, {
+		.name		= "800x600",
+		.xres		= 800,
+		.yres		= 600,
+	},
+};
+
+static struct carmine_resolution car_modes[] = {
+	{
+		/* 640x480 */
+		.htp = 800,
+		.hsp = 672,
+		.hsw = 96,
+		.hdp = 640,
+		.vtr = 525,
+		.vsp = 490,
+		.vsw = 2,
+		.vdp = 480,
+		.disp_mode = 0x1400,
+	},
+	{
+		/* 800x600 */
+		.htp = 1060,
+		.hsp = 864,
+		.hsw = 72,
+		.hdp = 800,
+		.vtr = 628,
+		.vsp = 601,
+		.vsw = 2,
+		.vdp = 600,
+		.disp_mode = 0x0d00,
+	}
+};
+
+static int carmine_find_mode(const struct fb_var_screeninfo *var)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(car_modes); i++)
+		if (car_modes[i].hdp == var->xres &&
+		    car_modes[i].vdp == var->yres)
+			return i;
+	return -EINVAL;
+}
+
+static void c_set_disp_reg(const struct carmine_fb *par,
+		u32 offset, u32 val)
+{
+	writel(val, par->display_reg + offset);
+}
+
+static u32 c_get_disp_reg(const struct carmine_fb *par,
+		u32 offset)
+{
+	return readl(par->display_reg + offset);
+}
+
+static void c_set_hw_reg(const struct carmine_hw *hw,
+		u32 offset, u32 val)
+{
+	writel(val, hw->v_regs + offset);
+}
+
+static u32 c_get_hw_reg(const struct carmine_hw *hw,
+		u32 offset)
+{
+	return readl(hw->v_regs + offset);
+}
+
+static int carmine_setcolreg(unsigned regno, unsigned red, unsigned green,
+		unsigned blue, unsigned transp, struct fb_info *info)
+{
+	if (regno >= 16)
+		return 1;
+
+	red >>= 8;
+	green >>= 8;
+	blue >>= 8;
+	transp >>= 8;
+
+	((u32 *)info->pseudo_palette)[regno] = be32_to_cpu(transp << 24 |
+		red << 0 | green << 8 | blue << 16);
+	return 0;
+}
+
+static int carmine_check_var(struct fb_var_screeninfo *var,
+		struct fb_info *info)
+{
+	int ret;
+
+	ret = carmine_find_mode(var);
+	if (ret < 0)
+		return ret;
+
+	if (var->grayscale || var->rotate || var->nonstd)
+		return -EINVAL;
+
+	var->xres_virtual = var->xres;
+	var->yres_virtual = var->yres;
+
+	var->bits_per_pixel = 32;
+
+#ifdef __BIG_ENDIAN
+	var->transp.offset = 24;
+	var->red.offset = 0;
+	var->green.offset = 8;
+	var->blue.offset = 16;
+#else
+	var->transp.offset = 24;
+	var->red.offset = 16;
+	var->green.offset = 8;
+	var->blue.offset = 0;
+#endif
+
+	var->red.length = 8;
+	var->green.length = 8;
+	var->blue.length = 8;
+	var->transp.length = 8;
+
+	var->red.msb_right = 0;
+	var->green.msb_right = 0;
+	var->blue.msb_right = 0;
+	var->transp.msb_right = 0;
+	return 0;
+}
+
+static void carmine_init_display_param(struct carmine_fb *par)
+{
+	u32 width;
+	u32 height;
+	u32 param;
+	u32 window_size;
+	u32 soffset = par->smem_offset;
+
+	c_set_disp_reg(par, CARMINE_DISP_REG_C_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_MLMR_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_CURSOR_MODE,
+			CARMINE_CURSOR0_PRIORITY_MASK |
+			CARMINE_CURSOR1_PRIORITY_MASK |
+			CARMINE_CURSOR_CUTZ_MASK);
+
+	/* Set default cursor position */
+	c_set_disp_reg(par, CARMINE_DISP_REG_CUR1_POS, 0 << 16 | 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_CUR2_POS, 0 << 16 | 0);
+
+	/* Set default display mode */
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_EXT_MODE, CARMINE_WINDOW_MODE |
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L1_EXT_MODE,
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_EXT_MODE, CARMINE_EXTEND_MODE |
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_EXT_MODE, CARMINE_EXTEND_MODE |
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_EXT_MODE, CARMINE_EXTEND_MODE |
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_EXT_MODE, CARMINE_EXTEND_MODE |
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_EXT_MODE, CARMINE_EXTEND_MODE |
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_EXT_MODE, CARMINE_EXTEND_MODE |
+			CARMINE_EXT_CMODE_DIRECT24_RGBA);
+
+	/* Set default frame size to layer mode register */
+	width = par->res->hdp * 4 / CARMINE_DISP_WIDTH_UNIT;
+	width = width << CARMINE_DISP_WIDTH_SHIFT;
+
+	height = par->res->vdp - 1;
+	param = width | height;
+
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_MODE_W_H, param);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L1_WIDTH, width);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_MODE_W_H, param);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_MODE_W_H, param);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_MODE_W_H, param);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_MODE_W_H, param);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_MODE_W_H, param);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_MODE_W_H, param);
+
+	/* Set default pos and size */
+	window_size = (par->res->vdp - 1) << CARMINE_DISP_WIN_H_SHIFT;
+	window_size |= par->res->hdp;
+
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_WIN_SIZE, window_size);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L1_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L1_WIN_SIZE, window_size);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_WIN_SIZE, window_size);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_WIN_SIZE, window_size);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_WIN_SIZE, window_size);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_WIN_SIZE, window_size);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_WIN_SIZE, window_size);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_WIN_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_WIN_SIZE, window_size);
+
+	/* Set default origin address */
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_ORG_ADR, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L1_ORG_ADR, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_ORG_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_ORG_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_ORG_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_ORG_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_ORG_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_ORG_ADR1, soffset);
+
+	/* Set default display address */
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_DISP_ADR, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_DISP_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_DISP_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_DISP_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_DISP_ADR1, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_DISP_ADR0, soffset);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_DISP_ADR0, soffset);
+
+	/* Set default display position */
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_DISP_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_DISP_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_DISP_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_DISP_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_DISP_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_DISP_POS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_DISP_POS, 0);
+
+	/* Set default blend mode */
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L0, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L1, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L2, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L3, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L4, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L5, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L6, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_BLEND_MODE_L7, 0);
+
+	/* default transparency mode */
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L1_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6_TRANS, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7_TRANS, 0);
+
+	/* Set default read skip parameter */
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0RM, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2RM, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3RM, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4RM, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5RM, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6RM, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7RM, 0);
+
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0PX, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2PX, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3PX, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4PX, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5PX, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6PX, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7PX, 0);
+
+	c_set_disp_reg(par, CARMINE_DISP_REG_L0PY, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L2PY, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L3PY, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L4PY, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L5PY, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L6PY, 0);
+	c_set_disp_reg(par, CARMINE_DISP_REG_L7PY, 0);
+}
+
+static void set_display_parameters(struct carmine_fb *par)
+{
+	u32 mode;
+	u32 hdp, vdp, htp, hsp, hsw, vtr, vsp, vsw;
+
+	/*
+	 * display timing. Parameters are decreased by one because hardware
+	 * spec is 0 to (n - 1)
+	 * */
+	hdp = par->res->hdp - 1;
+	vdp = par->res->vdp - 1;
+	htp = par->res->htp - 1;
+	hsp = par->res->hsp - 1;
+	hsw = par->res->hsw - 1;
+	vtr = par->res->vtr - 1;
+	vsp = par->res->vsp - 1;
+	vsw = par->res->vsw - 1;
+
+	c_set_disp_reg(par, CARMINE_DISP_REG_H_TOTAL,
+			htp << CARMINE_DISP_HTP_SHIFT);
+	c_set_disp_reg(par, CARMINE_DISP_REG_H_PERIOD,
+			(hdp << CARMINE_DISP_HDB_SHIFT)	| hdp);
+	c_set_disp_reg(par, CARMINE_DISP_REG_V_H_W_H_POS,
+			(vsw << CARMINE_DISP_VSW_SHIFT) |
+			(hsw << CARMINE_DISP_HSW_SHIFT) |
+			(hsp));
+	c_set_disp_reg(par, CARMINE_DISP_REG_V_TOTAL,
+			vtr << CARMINE_DISP_VTR_SHIFT);
+	c_set_disp_reg(par, CARMINE_DISP_REG_V_PERIOD_POS,
+			(vdp << CARMINE_DISP_VDP_SHIFT) | vsp);
+
+	/* clock */
+	mode = c_get_disp_reg(par, CARMINE_DISP_REG_DCM1);
+	mode = (mode & ~CARMINE_DISP_DCM_MASK) |
+		(par->res->disp_mode & CARMINE_DISP_DCM_MASK);
+	/* enable video output and layer 0 */
+	mode |= CARMINE_DEN | CARMINE_L0E;
+	c_set_disp_reg(par, CARMINE_DISP_REG_DCM1, mode);
+}
+
+static int carmine_set_par(struct fb_info *info)
+{
+	struct carmine_fb *par = info->par;
+	int ret;
+
+	ret = carmine_find_mode(&info->var);
+	if (ret < 0)
+		return ret;
+
+	par->new_mode = ret;
+	if (par->cur_mode != par->new_mode) {
+
+		par->cur_mode = par->new_mode;
+		par->res = &car_modes[par->new_mode];
+
+		carmine_init_display_param(par);
+		set_display_parameters(par);
+	}
+
+	info->fix.line_length = info->var.xres * info->var.bits_per_pixel / 8;
+	return 0;
+}
+
+static int init_hardware(struct carmine_hw *hw)
+{
+	u32 flags;
+	u32 loops;
+	u32 ret;
+
+	/* Initalize Carmine */
+	/* Sets internal clock */
+	c_set_hw_reg(hw, CARMINE_CTL_REG + CARMINE_CTL_REG_CLOCK_ENABLE,
+			CARMINE_DFLT_IP_CLOCK_ENABLE);
+
+	/* Video signal output is turned off */
+	c_set_hw_reg(hw, CARMINE_DISP0_REG + CARMINE_DISP_REG_DCM1, 0);
+	c_set_hw_reg(hw, CARMINE_DISP1_REG + CARMINE_DISP_REG_DCM1, 0);
+
+	/* Software reset */
+	c_set_hw_reg(hw, CARMINE_CTL_REG + CARMINE_CTL_REG_SOFTWARE_RESET, 1);
+	c_set_hw_reg(hw, CARMINE_CTL_REG + CARMINE_CTL_REG_SOFTWARE_RESET, 0);
+
+	/* I/O mode settings */
+	flags = CARMINE_DFLT_IP_DCTL_IO_CONT1 << 16 |
+		CARMINE_DFLT_IP_DCTL_IO_CONT0;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_IOCONT1_IOCONT0,
+			flags);
+
+	/* DRAM initial sequence */
+	flags = CARMINE_DFLT_IP_DCTL_MODE << 16 | CARMINE_DFLT_IP_DCTL_ADD;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_MODE_ADD,
+			flags);
+
+	flags = CARMINE_DFLT_IP_DCTL_SET_TIME1 << 16 |
+		CARMINE_DFLT_IP_DCTL_EMODE;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_SETTIME1_EMODE,
+			flags);
+
+	flags = CARMINE_DFLT_IP_DCTL_REFRESH << 16 |
+		CARMINE_DFLT_IP_DCTL_SET_TIME2;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_REFRESH_SETTIME2,
+			flags);
+
+	flags = CARMINE_DFLT_IP_DCTL_RESERVE2 << 16 |
+		CARMINE_DFLT_IP_DCTL_FIFO_DEPTH;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_RSV2_RSV1, flags);
+
+	flags = CARMINE_DFLT_IP_DCTL_DDRIF2 << 16 | CARMINE_DFLT_IP_DCTL_DDRIF1;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_DDRIF2_DDRIF1,
+			flags);
+
+	flags = CARMINE_DFLT_IP_DCTL_RESERVE0 << 16 |
+		CARMINE_DFLT_IP_DCTL_STATES;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_RSV0_STATES,
+			flags);
+
+	/* Executes DLL reset */
+	if (CARMINE_DCTL_DLL_RESET) {
+		for (loops = 0; loops < CARMINE_DCTL_INIT_WAIT_LIMIT; loops++) {
+
+			ret = c_get_hw_reg(hw, CARMINE_DCTL_REG +
+					CARMINE_DCTL_REG_RSV0_STATES);
+			ret &= CARMINE_DCTL_REG_STATES_MASK;
+			if (!ret)
+				break;
+
+			mdelay(CARMINE_DCTL_INIT_WAIT_INTERVAL);
+		}
+
+		if (loops >= CARMINE_DCTL_INIT_WAIT_LIMIT) {
+			printk(KERN_ERR "DRAM init failed\n");
+			return -EIO;
+		}
+	}
+
+	flags = CARMINE_DFLT_IP_DCTL_MODE_AFT_RST << 16 |
+		CARMINE_DFLT_IP_DCTL_ADD;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_MODE_ADD, flags);
+
+	flags = CARMINE_DFLT_IP_DCTL_RESERVE0 << 16 |
+		CARMINE_DFLT_IP_DCTL_STATES_AFT_RST;
+	c_set_hw_reg(hw, CARMINE_DCTL_REG + CARMINE_DCTL_REG_RSV0_STATES,
+			flags);
+
+	/* Initialize the write back register */
+	c_set_hw_reg(hw, CARMINE_WB_REG + CARMINE_WB_REG_WBM,
+			CARMINE_WB_REG_WBM_DEFAULT);
+
+	/* Initialize the Kottos registers */
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_VRINTM, 0);
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_VRERRM, 0);
+
+	/* Set DC offsets */
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_DC_OFFSET_PX, 0);
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_DC_OFFSET_PY, 0);
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_DC_OFFSET_LX, 0);
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_DC_OFFSET_LY, 0);
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_DC_OFFSET_TX, 0);
+	c_set_hw_reg(hw, CARMINE_GRAPH_REG + CARMINE_GRAPH_REG_DC_OFFSET_TY, 0);
+	return 0;
+}
+
+static struct fb_ops carminefb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+
+	.fb_check_var	= carmine_check_var,
+	.fb_set_par	= carmine_set_par,
+	.fb_setcolreg	= carmine_setcolreg,
+};
+
+static int alloc_carmine_fb(void __iomem *regs, void __iomem *smem_base,
+		int smem_offset, struct device *device, struct fb_info **rinfo)
+{
+	int ret;
+	struct fb_info *info;
+	struct carmine_fb *par;
+
+	info = framebuffer_alloc(sizeof *par, device);
+	if (!info)
+		return -ENOMEM;
+
+	par = info->par;
+	par->display_reg = regs;
+	par->smem_offset = smem_offset;
+
+	info->screen_base = smem_base + smem_offset;
+	info->screen_size = CARMINE_DISPLAY_MEM;
+	info->fbops = &carminefb_ops;
+
+	info->fix = carminefb_fix;
+	info->pseudo_palette = par->pseudo_palette;
+	info->flags = FBINFO_DEFAULT;
+
+	ret = fb_alloc_cmap(&info->cmap, 256, 1);
+	if (ret < 0)
+		goto err_free_fb;
+
+	if (fb_mode > ARRAY_SIZE(carmine_modedb))
+		fb_mode = CARMINEFB_DEFAULT_VIDEO_MODE;
+
+	par->cur_mode = par->new_mode = ~0;
+
+	ret = fb_find_mode(&info->var, info, fb_mode_str, carmine_modedb,
+			ARRAY_SIZE(carmine_modedb),
+			&carmine_modedb[fb_mode], 32);
+	if (!ret || ret == 4) {
+		ret = -EINVAL;
+		goto err_dealloc_cmap;
+	}
+
+	fb_videomode_to_modelist(carmine_modedb, ARRAY_SIZE(carmine_modedb),
+			&info->modelist);
+
+	ret = register_framebuffer(info);
+	if (ret < 0)
+		goto err_dealloc_cmap;
+
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+			info->fix.id);
+
+	*rinfo = info;
+	return 0;
+
+err_dealloc_cmap:
+	fb_dealloc_cmap(&info->cmap);
+err_free_fb:
+	framebuffer_release(info);
+	return ret;
+}
+
+static void cleanup_fb_device(struct fb_info *info)
+{
+	if (info) {
+		unregister_framebuffer(info);
+		fb_dealloc_cmap(&info->cmap);
+		framebuffer_release(info);
+	}
+}
+
+static int __devinit carminefb_probe(struct pci_dev *dev,
+		const struct pci_device_id *ent)
+{
+	struct carmine_hw *hw;
+	struct device *device = &dev->dev;
+	struct fb_info *info;
+	int ret;
+
+	ret = pci_enable_device(dev);
+	if (ret)
+		return ret;
+
+	ret = -ENOMEM;
+	hw = kzalloc(sizeof *hw, GFP_KERNEL);
+	if (!hw)
+		goto err_enable_pci;
+
+	carminefb_fix.mmio_start = pci_resource_start(dev, CARMINE_CONFIG_BAR);
+	carminefb_fix.mmio_len = pci_resource_len(dev, CARMINE_CONFIG_BAR);
+
+	if (!request_mem_region(carminefb_fix.mmio_start,
+				carminefb_fix.mmio_len,
+				"carminefb regbase")) {
+		printk(KERN_ERR "carminefb: Can't reserve regbase.\n");
+		ret = -EBUSY;
+		goto err_free_hw;
+	}
+	hw->v_regs = ioremap_nocache(carminefb_fix.mmio_start,
+			carminefb_fix.mmio_len);
+	if (!hw->v_regs) {
+		printk(KERN_ERR "carminefb: Can't remap %s register.\n",
+				carminefb_fix.id);
+		goto err_free_reg_mmio;
+	}
+
+	carminefb_fix.smem_start = pci_resource_start(dev, CARMINE_MEMORY_BAR);
+	carminefb_fix.smem_len = pci_resource_len(dev, CARMINE_MEMORY_BAR);
+
+	/* The memory area tends to be very large (256 MiB). Remap only what
+	 * is required for that largest resolution to avoid remaps at run
+	 * time
+	 */
+	if (carminefb_fix.smem_len > CARMINE_TOTAL_DIPLAY_MEM)
+		carminefb_fix.smem_len = CARMINE_TOTAL_DIPLAY_MEM;
+
+	else if (carminefb_fix.smem_len < CARMINE_TOTAL_DIPLAY_MEM) {
+		printk(KERN_ERR "carminefb: Memory bar is only %d bytes, %d "
+				"are required.", carminefb_fix.smem_len,
+				CARMINE_TOTAL_DIPLAY_MEM);
+		goto err_free_reg_mmio;
+	}
+
+	if (!request_mem_region(carminefb_fix.smem_start,
+				carminefb_fix.smem_len,	"carminefb smem")) {
+		printk(KERN_ERR "carminefb: Can't reserve smem.\n");
+		goto err_unmap_vregs;
+	}
+
+	hw->screen_mem = ioremap_nocache(carminefb_fix.smem_start,
+			carminefb_fix.smem_len);
+	if (!hw->screen_mem) {
+		printk(KERN_ERR "carmine: Can't ioremap smem area.\n");
+		release_mem_region(carminefb_fix.smem_start,
+				carminefb_fix.smem_len);
+		goto err_reg_smem;
+	}
+
+	ret = init_hardware(hw);
+	if (ret)
+		goto err_unmap_screen;
+
+	info = NULL;
+	if (fb_displays & CARMINE_USE_DISPLAY0) {
+		ret = alloc_carmine_fb(hw->v_regs + CARMINE_DISP0_REG,
+				hw->screen_mem, CARMINE_DISPLAY_MEM * 0,
+				device, &info);
+		if (ret)
+			goto err_deinit_hw;
+	}
+
+	hw->fb[0] = info;
+
+	info = NULL;
+	if (fb_displays & CARMINE_USE_DISPLAY1) {
+		ret = alloc_carmine_fb(hw->v_regs + CARMINE_DISP1_REG,
+				hw->screen_mem, CARMINE_DISPLAY_MEM * 1,
+				device, &info);
+		if (ret)
+			goto err_cleanup_fb0;
+	}
+
+	hw->fb[1] = info;
+	info = NULL;
+
+	pci_set_drvdata(dev, hw);
+	return 0;
+
+err_cleanup_fb0:
+	cleanup_fb_device(hw->fb[0]);
+err_deinit_hw:
+	/* disable clock, etc */
+	c_set_hw_reg(hw, CARMINE_CTL_REG + CARMINE_CTL_REG_CLOCK_ENABLE, 0);
+err_unmap_screen:
+	iounmap(hw->screen_mem);
+err_reg_smem:
+	release_mem_region(carminefb_fix.mmio_start, carminefb_fix.mmio_len);
+err_unmap_vregs:
+	iounmap(hw->v_regs);
+err_free_reg_mmio:
+	release_mem_region(carminefb_fix.mmio_start, carminefb_fix.mmio_len);
+err_free_hw:
+	kfree(hw);
+err_enable_pci:
+	pci_disable_device(dev);
+	return ret;
+}
+
+static void __devexit carminefb_remove(struct pci_dev *dev)
+{
+	struct carmine_hw *hw = pci_get_drvdata(dev);
+	struct fb_fix_screeninfo fix;
+	int i;
+
+	/* in case we use only fb1 and not fb1 */
+	if (hw->fb[0])
+		fix = hw->fb[0]->fix;
+	else
+		fix = hw->fb[1]->fix;
+
+	/* deactivate display(s) and switch clocks */
+	c_set_hw_reg(hw, CARMINE_DISP0_REG + CARMINE_DISP_REG_DCM1, 0);
+	c_set_hw_reg(hw, CARMINE_DISP1_REG + CARMINE_DISP_REG_DCM1, 0);
+	c_set_hw_reg(hw, CARMINE_CTL_REG + CARMINE_CTL_REG_CLOCK_ENABLE, 0);
+
+	for (i = 0; i < MAX_DISPLAY; i++)
+		cleanup_fb_device(hw->fb[i]);
+
+	iounmap(hw->screen_mem);
+	release_mem_region(fix.smem_start, fix.smem_len);
+	iounmap(hw->v_regs);
+	release_mem_region(fix.mmio_start, fix.mmio_len);
+
+	pci_set_drvdata(dev, NULL);
+	pci_disable_device(dev);
+	kfree(hw);
+}
+
+#define PCI_VENDOR_ID_FUJITU_LIMITED 0x10cf
+static struct pci_device_id carmine_devices[] __devinitdata = {
+{
+	PCI_DEVICE(PCI_VENDOR_ID_FUJITU_LIMITED, 0x202b)},
+	{0, 0, 0, 0, 0, 0, 0}
+};
+
+MODULE_DEVICE_TABLE(pci, carmine_devices);
+
+static struct pci_driver carmine_pci_driver = {
+	.name		= "carminefb",
+	.id_table	= carmine_devices,
+	.probe		= carminefb_probe,
+	.remove		= __devexit_p(carminefb_remove),
+};
+
+static int __init carminefb_init(void)
+{
+	if (!(fb_displays &
+		(CARMINE_USE_DISPLAY0 | CARMINE_USE_DISPLAY1))) {
+		printk(KERN_ERR "If you disable both displays than you don't "
+				"need the driver at all\n");
+		return -EINVAL;
+	}
+	return pci_register_driver(&carmine_pci_driver);
+}
+module_init(carminefb_init);
+
+static void __exit carminefb_cleanup(void)
+{
+	pci_unregister_driver(&carmine_pci_driver);
+}
+module_exit(carminefb_cleanup);
+
+MODULE_AUTHOR("Sebastian Siewior <bigeasy@linutronix.de>");
+MODULE_DESCRIPTION("Framebuffer driver for Fujitsu Carmine based devices");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/carminefb.h b/drivers/video/carminefb.h
new file mode 100644
index 00000000000..05306de0c6b
--- /dev/null
+++ b/drivers/video/carminefb.h
@@ -0,0 +1,64 @@
+#ifndef CARMINE_CARMINE_H
+#define CARMINE_CARMINE_H
+
+#define CARMINE_MEMORY_BAR	2
+#define CARMINE_CONFIG_BAR	3
+
+#define MAX_DISPLAY	2
+#define CARMINE_DISPLAY_MEM	(800 * 600 * 4)
+#define CARMINE_TOTAL_DIPLAY_MEM	(CARMINE_DISPLAY_MEM * MAX_DISPLAY)
+
+#define CARMINE_USE_DISPLAY0	(1 << 0)
+#define CARMINE_USE_DISPLAY1	(1 << 1)
+
+/*
+ * This values work on the eval card. Custom boards may use different timings,
+ * here an example :)
+ */
+
+/* DRAM initialization values */
+#ifdef CONFIG_FB_CARMINE_DRAM_EVAL
+
+#define CARMINE_DFLT_IP_CLOCK_ENABLE		(0x03ff)
+#define CARMINE_DFLT_IP_DCTL_ADD		(0x05c3)
+#define CARMINE_DFLT_IP_DCTL_MODE		(0x0121)
+#define CARMINE_DFLT_IP_DCTL_EMODE		(0x8000)
+#define CARMINE_DFLT_IP_DCTL_SET_TIME1		(0x4749)
+#define CARMINE_DFLT_IP_DCTL_SET_TIME2		(0x2a22)
+#define CARMINE_DFLT_IP_DCTL_REFRESH		(0x0042)
+#define CARMINE_DFLT_IP_DCTL_STATES		(0x0003)
+#define CARMINE_DFLT_IP_DCTL_RESERVE0		(0x0020)
+#define CARMINE_DFLT_IP_DCTL_FIFO_DEPTH		(0x000f)
+#define CARMINE_DFLT_IP_DCTL_RESERVE2		(0x0000)
+#define CARMINE_DFLT_IP_DCTL_DDRIF1		(0x6646)
+#define CARMINE_DFLT_IP_DCTL_DDRIF2		(0x0055)
+#define CARMINE_DFLT_IP_DCTL_MODE_AFT_RST	(0x0021)
+#define CARMINE_DFLT_IP_DCTL_STATES_AFT_RST	(0x0002)
+#define CARMINE_DFLT_IP_DCTL_IO_CONT0		(0x0555)
+#define CARMINE_DFLT_IP_DCTL_IO_CONT1		(0x0555)
+#define CARMINE_DCTL_DLL_RESET			(1)
+#endif
+
+#ifdef CONFIG_CARMINE_DRAM_CUSTOM
+
+#define CARMINE_DFLT_IP_CLOCK_ENABLE		(0x03ff)
+#define CARMINE_DFLT_IP_DCTL_ADD		(0x03b2)
+#define CARMINE_DFLT_IP_DCTL_MODE		(0x0161)
+#define CARMINE_DFLT_IP_DCTL_EMODE		(0x8000)
+#define CARMINE_DFLT_IP_DCTL_SET_TIME1		(0x2628)
+#define CARMINE_DFLT_IP_DCTL_SET_TIME2		(0x1a09)
+#define CARMINE_DFLT_IP_DCTL_REFRESH		(0x00fe)
+#define CARMINE_DFLT_IP_DCTL_STATES		(0x0003)
+#define CARMINE_DFLT_IP_DCTL_RESERVE0		(0x0020)
+#define CARMINE_DFLT_IP_DCTL_FIFO_DEPTH		(0x000f)
+#define CARMINE_DFLT_IP_DCTL_RESERVE2		(0x0000)
+#define CARMINE_DFLT_IP_DCTL_DDRIF1		(0x0646)
+#define CARMINE_DFLT_IP_DCTL_DDRIF2		(0x55aa)
+#define CARMINE_DFLT_IP_DCTL_MODE_AFT_RST	(0x0061)
+#define CARMINE_DFLT_IP_DCTL_STATES_AFT_RST	(0x0002)
+#define CARMINE_DFLT_IP_DCTL_IO_CONT0		(0x0555)
+#define CARMINE_DFLT_IP_DCTL_IO_CONT1		(0x0555)
+#define CARMINE_DCTL_DLL_RESET			(1)
+#endif
+
+#endif
diff --git a/drivers/video/carminefb_regs.h b/drivers/video/carminefb_regs.h
new file mode 100644
index 00000000000..045215600b7
--- /dev/null
+++ b/drivers/video/carminefb_regs.h
@@ -0,0 +1,159 @@
+#ifndef _CARMINEFB_REGS_H
+#define _CARMINEFB_REGS_H
+
+#define CARMINE_OVERLAY_EXT_MODE	(0x00000002)
+#define CARMINE_GRAPH_REG		(0x00000000)
+#define CARMINE_DISP0_REG		(0x00100000)
+#define CARMINE_DISP1_REG		(0x00140000)
+#define CARMINE_WB_REG			(0x00180000)
+#define CARMINE_DCTL_REG		(0x00300000)
+#define CARMINE_CTL_REG			(0x00400000)
+#define CARMINE_WINDOW_MODE		(0x00000001)
+#define CARMINE_EXTEND_MODE		(CARMINE_WINDOW_MODE | \
+					CARMINE_OVERLAY_EXT_MODE)
+#define CARMINE_L0E			(1 << 16)
+#define CARMINE_L2E			(1 << 18)
+#define CARMINE_DEN			(1 << 31)
+
+#define CARMINE_EXT_CMODE_DIRECT24_RGBA		(0xC0000000)
+#define CARMINE_DCTL_REG_MODE_ADD		(0x00)
+#define CARMINE_DCTL_REG_SETTIME1_EMODE		(0x04)
+#define CARMINE_DCTL_REG_REFRESH_SETTIME2	(0x08)
+#define CARMINE_DCTL_REG_RSV0_STATES		(0x0C)
+#define CARMINE_DCTL_REG_RSV2_RSV1		(0x10)
+#define CARMINE_DCTL_REG_DDRIF2_DDRIF1		(0x14)
+#define CARMINE_DCTL_REG_IOCONT1_IOCONT0	(0x24)
+#define CARMINE_DCTL_REG_STATES_MASK		(0x000F)
+#define CARMINE_DCTL_INIT_WAIT_INTERVAL		(1)
+#define CARMINE_DCTL_INIT_WAIT_LIMIT		(5000)
+#define CARMINE_WB_REG_WBM_DEFAULT		(0x0001c020)
+#define CARMINE_DISP_REG_L0RM			(0x1880)
+#define CARMINE_DISP_REG_L0PX			(0x1884)
+#define CARMINE_DISP_REG_L0PY			(0x1888)
+#define CARMINE_DISP_REG_L2RM			(0x18A0)
+#define CARMINE_DISP_REG_L2PX			(0x18A4)
+#define CARMINE_DISP_REG_L2PY			(0x18A8)
+#define CARMINE_DISP_REG_L3RM			(0x18B0)
+#define CARMINE_DISP_REG_L3PX			(0x18B4)
+#define CARMINE_DISP_REG_L3PY			(0x18B8)
+#define CARMINE_DISP_REG_L4RM			(0x18C0)
+#define CARMINE_DISP_REG_L4PX			(0x18C4)
+#define CARMINE_DISP_REG_L4PY			(0x18C8)
+#define CARMINE_DISP_REG_L5RM			(0x18D0)
+#define CARMINE_DISP_REG_L5PX			(0x18D4)
+#define CARMINE_DISP_REG_L5PY			(0x18D8)
+#define CARMINE_DISP_REG_L6RM			(0x1924)
+#define CARMINE_DISP_REG_L6PX			(0x1928)
+#define CARMINE_DISP_REG_L6PY			(0x192C)
+#define CARMINE_DISP_REG_L7RM			(0x1964)
+#define CARMINE_DISP_REG_L7PX			(0x1968)
+#define CARMINE_DISP_REG_L7PY			(0x196C)
+#define CARMINE_WB_REG_WBM			(0x0004)
+#define CARMINE_DISP_HTP_SHIFT			(16)
+#define CARMINE_DISP_HDB_SHIFT			(16)
+#define CARMINE_DISP_HSW_SHIFT			(16)
+#define CARMINE_DISP_VSW_SHIFT			(24)
+#define CARMINE_DISP_VTR_SHIFT			(16)
+#define CARMINE_DISP_VDP_SHIFT			(16)
+#define CARMINE_CURSOR_CUTZ_MASK		(0x00000100)
+#define CARMINE_CURSOR0_PRIORITY_MASK		(0x00010000)
+#define CARMINE_CURSOR1_PRIORITY_MASK		(0x00020000)
+#define CARMINE_DISP_WIDTH_SHIFT		(16)
+#define CARMINE_DISP_WIN_H_SHIFT		(16)
+#define CARMINE_DISP_REG_H_TOTAL		(0x0004)
+#define CARMINE_DISP_REG_H_PERIOD		(0x0008)
+#define CARMINE_DISP_REG_V_H_W_H_POS		(0x000C)
+#define CARMINE_DISP_REG_V_TOTAL		(0x0010)
+#define CARMINE_DISP_REG_V_PERIOD_POS		(0x0014)
+#define CARMINE_DISP_REG_L0_MODE_W_H		(0x0020)
+#define CARMINE_DISP_REG_L0_ORG_ADR		(0x0024)
+#define CARMINE_DISP_REG_L0_DISP_ADR		(0x0028)
+#define CARMINE_DISP_REG_L0_DISP_POS		(0x002C)
+#define CARMINE_DISP_REG_L1_WIDTH		(0x0030)
+#define CARMINE_DISP_REG_L1_ORG_ADR		(0x0034)
+#define CARMINE_DISP_REG_L2_MODE_W_H		(0x0040)
+#define CARMINE_DISP_REG_L2_ORG_ADR1		(0x0044)
+#define CARMINE_DISP_REG_L2_DISP_ADR1		(0x0048)
+#define CARMINE_DISP_REG_L2_DISP_POS		(0x0054)
+#define CARMINE_DISP_REG_L3_MODE_W_H		(0x0058)
+#define CARMINE_DISP_REG_L3_ORG_ADR1		(0x005C)
+#define CARMINE_DISP_REG_L3_DISP_ADR1		(0x0060)
+#define CARMINE_DISP_REG_L3_DISP_POS		(0x006C)
+#define CARMINE_DISP_REG_L4_MODE_W_H		(0x0070)
+#define CARMINE_DISP_REG_L4_ORG_ADR1		(0x0074)
+#define CARMINE_DISP_REG_L4_DISP_ADR1		(0x0078)
+#define CARMINE_DISP_REG_L4_DISP_POS		(0x0084)
+#define CARMINE_DISP_REG_L5_MODE_W_H		(0x0088)
+#define CARMINE_DISP_REG_L5_ORG_ADR1		(0x008C)
+#define CARMINE_DISP_REG_L5_DISP_ADR1		(0x0090)
+#define CARMINE_DISP_REG_L5_DISP_POS		(0x009C)
+#define CARMINE_DISP_REG_CURSOR_MODE		(0x00A0)
+#define CARMINE_DISP_REG_CUR1_POS		(0x00A8)
+#define CARMINE_DISP_REG_CUR2_POS		(0x00B0)
+#define CARMINE_DISP_REG_C_TRANS		(0x00BC)
+#define CARMINE_DISP_REG_MLMR_TRANS		(0x00C0)
+#define CARMINE_DISP_REG_L0_EXT_MODE		(0x0110)
+#define CARMINE_DISP_REG_L0_WIN_POS		(0x0114)
+#define CARMINE_DISP_REG_L0_WIN_SIZE		(0x0118)
+#define CARMINE_DISP_REG_L1_EXT_MODE		(0x0120)
+#define CARMINE_DISP_REG_L1_WIN_POS		(0x0124)
+#define CARMINE_DISP_REG_L1_WIN_SIZE		(0x0128)
+#define CARMINE_DISP_REG_L2_EXT_MODE		(0x0130)
+#define CARMINE_DISP_REG_L2_WIN_POS		(0x0134)
+#define CARMINE_DISP_REG_L2_WIN_SIZE		(0x0138)
+#define CARMINE_DISP_REG_L3_EXT_MODE		(0x0140)
+#define CARMINE_DISP_REG_L3_WIN_POS		(0x0144)
+#define CARMINE_DISP_REG_L3_WIN_SIZE		(0x0148)
+#define CARMINE_DISP_REG_L4_EXT_MODE		(0x0150)
+#define CARMINE_DISP_REG_L4_WIN_POS		(0x0154)
+#define CARMINE_DISP_REG_L4_WIN_SIZE		(0x0158)
+#define CARMINE_DISP_REG_L5_EXT_MODE		(0x0160)
+#define CARMINE_DISP_REG_L5_WIN_POS		(0x0164)
+#define CARMINE_DISP_REG_L5_WIN_SIZE		(0x0168)
+#define CARMINE_DISP_REG_L6_EXT_MODE		(0x1918)
+#define CARMINE_DISP_REG_L6_WIN_POS		(0x191c)
+#define CARMINE_DISP_REG_L6_WIN_SIZE		(0x1920)
+#define CARMINE_DISP_REG_L7_EXT_MODE		(0x1958)
+#define CARMINE_DISP_REG_L7_WIN_POS		(0x195c)
+#define CARMINE_DISP_REG_L7_WIN_SIZE		(0x1960)
+#define CARMINE_DISP_REG_BLEND_MODE_L0		(0x00B4)
+#define CARMINE_DISP_REG_BLEND_MODE_L1		(0x0188)
+#define CARMINE_DISP_REG_BLEND_MODE_L2		(0x018C)
+#define CARMINE_DISP_REG_BLEND_MODE_L3		(0x0190)
+#define CARMINE_DISP_REG_BLEND_MODE_L4		(0x0194)
+#define CARMINE_DISP_REG_BLEND_MODE_L5		(0x0198)
+#define CARMINE_DISP_REG_BLEND_MODE_L6		(0x1990)
+#define CARMINE_DISP_REG_BLEND_MODE_L7		(0x1994)
+#define CARMINE_DISP_REG_L0_TRANS		(0x01A0)
+#define CARMINE_DISP_REG_L1_TRANS		(0x01A4)
+#define CARMINE_DISP_REG_L2_TRANS		(0x01A8)
+#define CARMINE_DISP_REG_L3_TRANS		(0x01AC)
+#define CARMINE_DISP_REG_L4_TRANS		(0x01B0)
+#define CARMINE_DISP_REG_L5_TRANS		(0x01B4)
+#define CARMINE_DISP_REG_L6_TRANS		(0x1998)
+#define CARMINE_DISP_REG_L7_TRANS		(0x199c)
+#define CARMINE_EXTEND_MODE_MASK		(0x00000003)
+#define CARMINE_DISP_DCM_MASK			(0x0000FFFF)
+#define CARMINE_DISP_REG_DCM1			(0x0100)
+#define CARMINE_DISP_WIDTH_UNIT			(64)
+#define CARMINE_DISP_REG_L6_MODE_W_H		(0x1900)
+#define CARMINE_DISP_REG_L6_ORG_ADR1		(0x1904)
+#define CARMINE_DISP_REG_L6_DISP_ADR0		(0x1908)
+#define CARMINE_DISP_REG_L6_DISP_POS		(0x1914)
+#define CARMINE_DISP_REG_L7_MODE_W_H		(0x1940)
+#define CARMINE_DISP_REG_L7_ORG_ADR1		(0x1944)
+#define CARMINE_DISP_REG_L7_DISP_ADR0		(0x1948)
+#define CARMINE_DISP_REG_L7_DISP_POS		(0x1954)
+#define CARMINE_CTL_REG_CLOCK_ENABLE		(0x000C)
+#define CARMINE_CTL_REG_SOFTWARE_RESET		(0x0010)
+#define CARMINE_CTL_REG_IST_MASK_ALL		(0x07FFFFFF)
+#define CARMINE_GRAPH_REG_VRINTM		(0x00028064)
+#define CARMINE_GRAPH_REG_VRERRM		(0x0002806C)
+#define CARMINE_GRAPH_REG_DC_OFFSET_PX		(0x0004005C)
+#define CARMINE_GRAPH_REG_DC_OFFSET_PY		(0x00040060)
+#define CARMINE_GRAPH_REG_DC_OFFSET_LX		(0x00040064)
+#define CARMINE_GRAPH_REG_DC_OFFSET_LY		(0x00040068)
+#define CARMINE_GRAPH_REG_DC_OFFSET_TX		(0x0004006C)
+#define CARMINE_GRAPH_REG_DC_OFFSET_TY		(0x00040070)
+
+#endif
-- 
GitLab


From 306fa6f60a2870b7a9827a64e1b45cd35a9549aa Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:50 -0700
Subject: [PATCH 357/853] tridentfb: replace macros with functions

This patch replaces macros with static functions and puts tridentfb_par
pointer as the first argument of these functions.  These is a step toward
multihead support.

Additionally, bogus TRIDENT_MMIO define is removed as the driver supports
graphics cards only through the mmio mode.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 526 ++++++++++++++++++++------------------
 1 file changed, 283 insertions(+), 243 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index beefab2992c..3e8a1ef892c 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -137,28 +137,34 @@ static int iscyber(int id)
 
 #define CRT 0x3D0		/* CRTC registers offset for color display */
 
-#ifndef TRIDENT_MMIO
-	#define TRIDENT_MMIO 1
-#endif
-
-#if TRIDENT_MMIO
-	#define t_outb(val, reg)	writeb(val,((struct tridentfb_par *)(fb_info.par))->io_virt + reg)
-	#define t_inb(reg)	readb(((struct tridentfb_par*)(fb_info.par))->io_virt + reg)
-#else
-	#define t_outb(val, reg) outb(val, reg)
-	#define t_inb(reg) inb(reg)
-#endif
+static inline void t_outb(struct tridentfb_par *p, u8 val, u16 reg)
+{
+	fb_writeb(val, p->io_virt + reg);
+}
 
+static inline u8 t_inb(struct tridentfb_par *p, u16 reg)
+{
+	return fb_readb(p->io_virt + reg);
+}
 
 static struct accel_switch {
-	void (*init_accel) (int, int);
-	void (*wait_engine) (void);
-	void (*fill_rect) (u32, u32, u32, u32, u32, u32);
-	void (*copy_rect) (u32, u32, u32, u32, u32, u32);
+	void (*init_accel) (struct tridentfb_par *, int, int);
+	void (*wait_engine) (struct tridentfb_par *);
+	void (*fill_rect)
+		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
+	void (*copy_rect)
+		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
 } *acc;
 
-#define writemmr(r, v)	writel(v, ((struct tridentfb_par *)fb_info.par)->io_virt + r)
-#define readmmr(r)	readl(((struct tridentfb_par *)fb_info.par)->io_virt + r)
+static inline void writemmr(struct tridentfb_par *par, u16 r, u32 v)
+{
+	fb_writel(v, par->io_virt + r);
+}
+
+static inline u32 readmmr(struct tridentfb_par *par, u16 r)
+{
+	return fb_readl(par->io_virt + r);
+}
 
 /*
  * Blade specific acceleration.
@@ -176,7 +182,7 @@ static struct accel_switch {
 
 #define ROP_S	0xCC
 
-static void blade_init_accel(int pitch, int bpp)
+static void blade_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 {
 	int v1 = (pitch >> 3) << 20;
 	int tmp = 0, v2;
@@ -196,33 +202,35 @@ static void blade_init_accel(int pitch, int bpp)
 		break;
 	}
 	v2 = v1 | (tmp << 29);
-	writemmr(0x21C0, v2);
-	writemmr(0x21C4, v2);
-	writemmr(0x21B8, v2);
-	writemmr(0x21BC, v2);
-	writemmr(0x21D0, v1);
-	writemmr(0x21D4, v1);
-	writemmr(0x21C8, v1);
-	writemmr(0x21CC, v1);
-	writemmr(0x216C, 0);
+	writemmr(par, 0x21C0, v2);
+	writemmr(par, 0x21C4, v2);
+	writemmr(par, 0x21B8, v2);
+	writemmr(par, 0x21BC, v2);
+	writemmr(par, 0x21D0, v1);
+	writemmr(par, 0x21D4, v1);
+	writemmr(par, 0x21C8, v1);
+	writemmr(par, 0x21CC, v1);
+	writemmr(par, 0x216C, 0);
 }
 
-static void blade_wait_engine(void)
+static void blade_wait_engine(struct tridentfb_par *par)
 {
-	while (readmmr(STA) & 0xFA800000) ;
+	while (readmmr(par, STA) & 0xFA800000) ;
 }
 
-static void blade_fill_rect(u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
+static void blade_fill_rect(struct tridentfb_par *par,
+			    u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
-	writemmr(CLR, c);
-	writemmr(ROP, rop ? 0x66 : ROP_S);
-	writemmr(CMD, 0x20000000 | 1 << 19 | 1 << 4 | 2 << 2);
+	writemmr(par, CLR, c);
+	writemmr(par, ROP, rop ? 0x66 : ROP_S);
+	writemmr(par, CMD, 0x20000000 | 1 << 19 | 1 << 4 | 2 << 2);
 
-	writemmr(DR1, point(x, y));
-	writemmr(DR2, point(x + w - 1, y + h - 1));
+	writemmr(par, DR1, point(x, y));
+	writemmr(par, DR2, point(x + w - 1, y + h - 1));
 }
 
-static void blade_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
+static void blade_copy_rect(struct tridentfb_par *par,
+			    u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
 	u32 s1, s2, d1, d2;
 	int direction = 2;
@@ -234,13 +242,13 @@ static void blade_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 	if ((y1 > y2) || ((y1 == y2) && (x1 > x2)))
 		direction = 0;
 
-	writemmr(ROP, ROP_S);
-	writemmr(CMD, 0xE0000000 | 1 << 19 | 1 << 4 | 1 << 2 | direction);
+	writemmr(par, ROP, ROP_S);
+	writemmr(par, CMD, 0xE0000000 | 1 << 19 | 1 << 4 | 1 << 2 | direction);
 
-	writemmr(SR1, direction ? s2 : s1);
-	writemmr(SR2, direction ? s1 : s2);
-	writemmr(DR1, direction ? d2 : d1);
-	writemmr(DR2, direction ? d1 : d2);
+	writemmr(par, SR1, direction ? s2 : s1);
+	writemmr(par, SR2, direction ? s1 : s2);
+	writemmr(par, DR1, direction ? d2 : d1);
+	writemmr(par, DR2, direction ? d1 : d2);
 }
 
 static struct accel_switch accel_blade = {
@@ -257,7 +265,7 @@ static struct accel_switch accel_blade = {
 #define ROP_P 0xF0
 #define masked_point(x, y) ((y & 0xffff)<<16|(x & 0xffff))
 
-static void xp_init_accel(int pitch, int bpp)
+static void xp_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 {
 	int tmp = 0, v1;
 	unsigned char x = 0;
@@ -293,7 +301,7 @@ static void xp_init_accel(int pitch, int bpp)
 		break;
 	}
 
-	t_outb(x, 0x2125);
+	t_outb(par, x, 0x2125);
 
 	eng_oper = x | 0x40;
 
@@ -313,12 +321,12 @@ static void xp_init_accel(int pitch, int bpp)
 
 	v1 = pitch << tmp;
 
-	writemmr(0x2154, v1);
-	writemmr(0x2150, v1);
-	t_outb(3, 0x2126);
+	writemmr(par, 0x2154, v1);
+	writemmr(par, 0x2150, v1);
+	t_outb(par, 3, 0x2126);
 }
 
-static void xp_wait_engine(void)
+static void xp_wait_engine(struct tridentfb_par *par)
 {
 	int busy;
 	int count, timeout;
@@ -326,7 +334,7 @@ static void xp_wait_engine(void)
 	count = 0;
 	timeout = 0;
 	for (;;) {
-		busy = t_inb(STA) & 0x80;
+		busy = t_inb(par, STA) & 0x80;
 		if (busy != 0x80)
 			return;
 		count++;
@@ -336,25 +344,27 @@ static void xp_wait_engine(void)
 			timeout++;
 			if (timeout == 8) {
 				/* Reset engine */
-				t_outb(0x00, 0x2120);
+				t_outb(par, 0x00, 0x2120);
 				return;
 			}
 		}
 	}
 }
 
-static void xp_fill_rect(u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
+static void xp_fill_rect(struct tridentfb_par *par,
+			 u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
-	writemmr(0x2127, ROP_P);
-	writemmr(0x2158, c);
-	writemmr(0x2128, 0x4000);
-	writemmr(0x2140, masked_point(h, w));
-	writemmr(0x2138, masked_point(y, x));
-	t_outb(0x01, 0x2124);
-	t_outb(eng_oper, 0x2125);
+	writemmr(par, 0x2127, ROP_P);
+	writemmr(par, 0x2158, c);
+	writemmr(par, 0x2128, 0x4000);
+	writemmr(par, 0x2140, masked_point(h, w));
+	writemmr(par, 0x2138, masked_point(y, x));
+	t_outb(par, 0x01, 0x2124);
+	t_outb(par, eng_oper, 0x2125);
 }
 
-static void xp_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
+static void xp_copy_rect(struct tridentfb_par *par,
+			 u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
 	int direction;
 	u32 x1_tmp, x2_tmp, y1_tmp, y2_tmp;
@@ -379,12 +389,12 @@ static void xp_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 		y2_tmp = y2;
 	}
 
-	writemmr(0x2128, direction);
-	t_outb(ROP_S, 0x2127);
-	writemmr(0x213C, masked_point(y1_tmp, x1_tmp));
-	writemmr(0x2138, masked_point(y2_tmp, x2_tmp));
-	writemmr(0x2140, masked_point(h, w));
-	t_outb(0x01, 0x2124);
+	writemmr(par, 0x2128, direction);
+	t_outb(par, ROP_S, 0x2127);
+	writemmr(par, 0x213C, masked_point(y1_tmp, x1_tmp));
+	writemmr(par, 0x2138, masked_point(y2_tmp, x2_tmp));
+	writemmr(par, 0x2140, masked_point(h, w));
+	t_outb(par, 0x01, 0x2124);
 }
 
 static struct accel_switch accel_xp = {
@@ -397,7 +407,7 @@ static struct accel_switch accel_xp = {
 /*
  * Image specific acceleration functions
  */
-static void image_init_accel(int pitch, int bpp)
+static void image_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 {
 	int tmp = 0;
 	switch (bpp) {
@@ -415,40 +425,42 @@ static void image_init_accel(int pitch, int bpp)
 		tmp = 2;
 		break;
 	}
-	writemmr(0x2120, 0xF0000000);
-	writemmr(0x2120, 0x40000000 | tmp);
-	writemmr(0x2120, 0x80000000);
-	writemmr(0x2144, 0x00000000);
-	writemmr(0x2148, 0x00000000);
-	writemmr(0x2150, 0x00000000);
-	writemmr(0x2154, 0x00000000);
-	writemmr(0x2120, 0x60000000 | (pitch << 16) | pitch);
-	writemmr(0x216C, 0x00000000);
-	writemmr(0x2170, 0x00000000);
-	writemmr(0x217C, 0x00000000);
-	writemmr(0x2120, 0x10000000);
-	writemmr(0x2130, (2047 << 16) | 2047);
+	writemmr(par, 0x2120, 0xF0000000);
+	writemmr(par, 0x2120, 0x40000000 | tmp);
+	writemmr(par, 0x2120, 0x80000000);
+	writemmr(par, 0x2144, 0x00000000);
+	writemmr(par, 0x2148, 0x00000000);
+	writemmr(par, 0x2150, 0x00000000);
+	writemmr(par, 0x2154, 0x00000000);
+	writemmr(par, 0x2120, 0x60000000 | (pitch << 16) | pitch);
+	writemmr(par, 0x216C, 0x00000000);
+	writemmr(par, 0x2170, 0x00000000);
+	writemmr(par, 0x217C, 0x00000000);
+	writemmr(par, 0x2120, 0x10000000);
+	writemmr(par, 0x2130, (2047 << 16) | 2047);
 }
 
-static void image_wait_engine(void)
+static void image_wait_engine(struct tridentfb_par *par)
 {
-	while (readmmr(0x2164) & 0xF0000000) ;
+	while (readmmr(par, 0x2164) & 0xF0000000) ;
 }
 
-static void image_fill_rect(u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
+static void image_fill_rect(struct tridentfb_par *par,
+			    u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
-	writemmr(0x2120, 0x80000000);
-	writemmr(0x2120, 0x90000000 | ROP_S);
+	writemmr(par, 0x2120, 0x80000000);
+	writemmr(par, 0x2120, 0x90000000 | ROP_S);
 
-	writemmr(0x2144, c);
+	writemmr(par, 0x2144, c);
 
-	writemmr(DR1, point(x, y));
-	writemmr(DR2, point(x + w - 1, y + h - 1));
+	writemmr(par, DR1, point(x, y));
+	writemmr(par, DR2, point(x + w - 1, y + h - 1));
 
-	writemmr(0x2124, 0x80000000 | 3 << 22 | 1 << 10 | 1 << 9);
+	writemmr(par, 0x2124, 0x80000000 | 3 << 22 | 1 << 10 | 1 << 9);
 }
 
-static void image_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
+static void image_copy_rect(struct tridentfb_par *par,
+			    u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
 	u32 s1, s2, d1, d2;
 	int direction = 2;
@@ -460,14 +472,15 @@ static void image_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 	if ((y1 > y2) || ((y1 == y2) && (x1 > x2)))
 		direction = 0;
 
-	writemmr(0x2120, 0x80000000);
-	writemmr(0x2120, 0x90000000 | ROP_S);
+	writemmr(par, 0x2120, 0x80000000);
+	writemmr(par, 0x2120, 0x90000000 | ROP_S);
 
-	writemmr(SR1, direction ? s2 : s1);
-	writemmr(SR2, direction ? s1 : s2);
-	writemmr(DR1, direction ? d2 : d1);
-	writemmr(DR2, direction ? d1 : d2);
-	writemmr(0x2124, 0x80000000 | 1 << 22 | 1 << 10 | 1 << 7 | direction);
+	writemmr(par, SR1, direction ? s2 : s1);
+	writemmr(par, SR2, direction ? s1 : s2);
+	writemmr(par, DR1, direction ? d2 : d1);
+	writemmr(par, DR2, direction ? d1 : d2);
+	writemmr(par, 0x2124,
+		 0x80000000 | 1 << 22 | 1 << 10 | 1 << 7 | direction);
 }
 
 static struct accel_switch accel_image = {
@@ -484,6 +497,7 @@ static struct accel_switch accel_image = {
 static void tridentfb_fillrect(struct fb_info *info,
 			       const struct fb_fillrect *fr)
 {
+	struct tridentfb_par *par = info->par;
 	int bpp = info->var.bits_per_pixel;
 	int col = 0;
 
@@ -502,14 +516,18 @@ static void tridentfb_fillrect(struct fb_info *info,
 		break;
 	}
 
-	acc->fill_rect(fr->dx, fr->dy, fr->width, fr->height, col, fr->rop);
-	acc->wait_engine();
+	acc->fill_rect(par, fr->dx, fr->dy, fr->width,
+		       fr->height, col, fr->rop);
+	acc->wait_engine(par);
 }
 static void tridentfb_copyarea(struct fb_info *info,
 			       const struct fb_copyarea *ca)
 {
-	acc->copy_rect(ca->sx, ca->sy, ca->dx, ca->dy, ca->width, ca->height);
-	acc->wait_engine();
+	struct tridentfb_par *par = info->par;
+
+	acc->copy_rect(par, ca->sx, ca->sy, ca->dx, ca->dy,
+		       ca->width, ca->height);
+	acc->wait_engine(par);
 }
 #else /* !CONFIG_FB_TRIDENT_ACCEL */
 #define tridentfb_fillrect cfb_fillrect
@@ -521,49 +539,51 @@ static void tridentfb_copyarea(struct fb_info *info,
  * Hardware access functions
  */
 
-static inline unsigned char read3X4(int reg)
+static inline unsigned char read3X4(struct tridentfb_par *par, int reg)
 {
-	struct tridentfb_par *par = (struct tridentfb_par *)fb_info.par;
 	writeb(reg, par->io_virt + CRT + 4);
 	return readb(par->io_virt + CRT + 5);
 }
 
-static inline void write3X4(int reg, unsigned char val)
+static inline void write3X4(struct tridentfb_par *par, int reg,
+			    unsigned char val)
 {
-	struct tridentfb_par *par = (struct tridentfb_par *)fb_info.par;
 	writeb(reg, par->io_virt + CRT + 4);
 	writeb(val, par->io_virt + CRT + 5);
 }
 
-static inline unsigned char read3C4(int reg)
+static inline unsigned char read3C4(struct tridentfb_par *par, int reg)
 {
-	t_outb(reg, 0x3C4);
-	return t_inb(0x3C5);
+	t_outb(par, reg, 0x3C4);
+	return t_inb(par, 0x3C5);
 }
 
-static inline void write3C4(int reg, unsigned char val)
+static inline void write3C4(struct tridentfb_par *par, int reg,
+			    unsigned char val)
 {
-	t_outb(reg, 0x3C4);
-	t_outb(val, 0x3C5);
+	t_outb(par, reg, 0x3C4);
+	t_outb(par, val, 0x3C5);
 }
 
-static inline unsigned char read3CE(int reg)
+static inline unsigned char read3CE(struct tridentfb_par *par, int reg)
 {
-	t_outb(reg, 0x3CE);
-	return t_inb(0x3CF);
+	t_outb(par, reg, 0x3CE);
+	return t_inb(par, 0x3CF);
 }
 
-static inline void writeAttr(int reg, unsigned char val)
+static inline void writeAttr(struct tridentfb_par *par, int reg,
+			     unsigned char val)
 {
-	readb(((struct tridentfb_par *)fb_info.par)->io_virt + CRT + 0x0A);	/* flip-flop to index */
-	t_outb(reg, 0x3C0);
-	t_outb(val, 0x3C0);
+	fb_readb(par->io_virt + CRT + 0x0A);	/* flip-flop to index */
+	t_outb(par, reg, 0x3C0);
+	t_outb(par, val, 0x3C0);
 }
 
-static inline void write3CE(int reg, unsigned char val)
+static inline void write3CE(struct tridentfb_par *par, int reg,
+			    unsigned char val)
 {
-	t_outb(reg, 0x3CE);
-	t_outb(val, 0x3CF);
+	t_outb(par, reg, 0x3CE);
+	t_outb(par, val, 0x3CF);
 }
 
 static void enable_mmio(void)
@@ -581,32 +601,35 @@ static void enable_mmio(void)
 	outb(inb(0x3D5) | 0x01, 0x3D5);
 }
 
-static void disable_mmio(void)
+static void disable_mmio(struct tridentfb_par *par)
 {
 	/* Goto New Mode */
-	t_outb(0x0B, 0x3C4);
-	t_inb(0x3C5);
+	t_outb(par, 0x0B, 0x3C4);
+	t_inb(par, 0x3C5);
 
 	/* Unprotect registers */
-	t_outb(NewMode1, 0x3C4);
-	t_outb(0x80, 0x3C5);
+	t_outb(par, NewMode1, 0x3C4);
+	t_outb(par, 0x80, 0x3C5);
 
 	/* Disable MMIO */
-	t_outb(PCIReg, 0x3D4);
-	t_outb(t_inb(0x3D5) & ~0x01, 0x3D5);
+	t_outb(par, PCIReg, 0x3D4);
+	t_outb(par, t_inb(par, 0x3D5) & ~0x01, 0x3D5);
 }
 
-#define crtc_unlock()	write3X4(CRTVSyncEnd, read3X4(CRTVSyncEnd) & 0x7F)
+static void crtc_unlock(struct tridentfb_par *par)
+{
+	write3X4(par, CRTVSyncEnd, read3X4(par, CRTVSyncEnd) & 0x7F);
+}
 
 /*  Return flat panel's maximum x resolution */
-static int __devinit get_nativex(void)
+static int __devinit get_nativex(struct tridentfb_par *par)
 {
 	int x, y, tmp;
 
 	if (nativex)
 		return nativex;
 
-	tmp = (read3CE(VertStretch) >> 4) & 3;
+	tmp = (read3CE(par, VertStretch) >> 4) & 3;
 
 	switch (tmp) {
 	case 0:
@@ -632,44 +655,45 @@ static int __devinit get_nativex(void)
 }
 
 /* Set pitch */
-static void set_lwidth(int width)
+static void set_lwidth(struct tridentfb_par *par, int width)
 {
-	write3X4(Offset, width & 0xFF);
-	write3X4(AddColReg,
-		 (read3X4(AddColReg) & 0xCF) | ((width & 0x300) >> 4));
+	write3X4(par, Offset, width & 0xFF);
+	write3X4(par, AddColReg,
+		 (read3X4(par, AddColReg) & 0xCF) | ((width & 0x300) >> 4));
 }
 
 /* For resolutions smaller than FP resolution stretch */
-static void screen_stretch(void)
+static void screen_stretch(struct tridentfb_par *par)
 {
 	if (chip_id != CYBERBLADEXPAi1)
-		write3CE(BiosReg, 0);
+		write3CE(par, BiosReg, 0);
 	else
-		write3CE(BiosReg, 8);
-	write3CE(VertStretch, (read3CE(VertStretch) & 0x7C) | 1);
-	write3CE(HorStretch, (read3CE(HorStretch) & 0x7C) | 1);
+		write3CE(par, BiosReg, 8);
+	write3CE(par, VertStretch, (read3CE(par, VertStretch) & 0x7C) | 1);
+	write3CE(par, HorStretch, (read3CE(par, HorStretch) & 0x7C) | 1);
 }
 
 /* For resolutions smaller than FP resolution center */
-static void screen_center(void)
+static void screen_center(struct tridentfb_par *par)
 {
-	write3CE(VertStretch, (read3CE(VertStretch) & 0x7C) | 0x80);
-	write3CE(HorStretch, (read3CE(HorStretch) & 0x7C) | 0x80);
+	write3CE(par, VertStretch, (read3CE(par, VertStretch) & 0x7C) | 0x80);
+	write3CE(par, HorStretch, (read3CE(par, HorStretch) & 0x7C) | 0x80);
 }
 
 /* Address of first shown pixel in display memory */
-static void set_screen_start(int base)
+static void set_screen_start(struct tridentfb_par *par, int base)
 {
-	write3X4(StartAddrLow, base & 0xFF);
-	write3X4(StartAddrHigh, (base & 0xFF00) >> 8);
-	write3X4(CRTCModuleTest,
-		 (read3X4(CRTCModuleTest) & 0xDF) | ((base & 0x10000) >> 11));
-	write3X4(CRTHiOrd,
-		 (read3X4(CRTHiOrd) & 0xF8) | ((base & 0xE0000) >> 17));
+	u8 tmp;
+	write3X4(par, StartAddrLow, base & 0xFF);
+	write3X4(par, StartAddrHigh, (base & 0xFF00) >> 8);
+	tmp = read3X4(par, CRTCModuleTest) & 0xDF;
+	write3X4(par, CRTCModuleTest, tmp | ((base & 0x10000) >> 11));
+	tmp = read3X4(par, CRTHiOrd) & 0xF8;
+	write3X4(par, CRTHiOrd, tmp | ((base & 0xE0000) >> 17));
 }
 
 /* Set dotclock frequency */
-static void set_vclk(unsigned long freq)
+static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 {
 	int m, n, k;
 	unsigned long f, fi, d, di;
@@ -690,8 +714,8 @@ static void set_vclk(unsigned long freq)
 					break;
 			}
 	if (chip3D) {
-		write3C4(ClockHigh, hi);
-		write3C4(ClockLow, lo);
+		write3C4(par, ClockHigh, hi);
+		write3C4(par, ClockLow, lo);
 	} else {
 		outb(lo, 0x43C8);
 		outb(hi, 0x43C9);
@@ -700,9 +724,9 @@ static void set_vclk(unsigned long freq)
 }
 
 /* Set number of lines for flat panels*/
-static void set_number_of_lines(int lines)
+static void set_number_of_lines(struct tridentfb_par *par, int lines)
 {
-	int tmp = read3CE(CyberEnhance) & 0x8F;
+	int tmp = read3CE(par, CyberEnhance) & 0x8F;
 	if (lines > 1024)
 		tmp |= 0x50;
 	else if (lines > 768)
@@ -711,24 +735,24 @@ static void set_number_of_lines(int lines)
 		tmp |= 0x20;
 	else if (lines > 480)
 		tmp |= 0x10;
-	write3CE(CyberEnhance, tmp);
+	write3CE(par, CyberEnhance, tmp);
 }
 
 /*
  * If we see that FP is active we assume we have one.
  * Otherwise we have a CRT display.User can override.
  */
-static unsigned int __devinit get_displaytype(void)
+static unsigned int __devinit get_displaytype(struct tridentfb_par *par)
 {
 	if (fp)
 		return DISPLAY_FP;
 	if (crt || !chipcyber)
 		return DISPLAY_CRT;
-	return (read3CE(FPConfig) & 0x10) ? DISPLAY_FP : DISPLAY_CRT;
+	return (read3CE(par, FPConfig) & 0x10) ? DISPLAY_FP : DISPLAY_CRT;
 }
 
 /* Try detecting the video memory size */
-static unsigned int __devinit get_memsize(void)
+static unsigned int __devinit get_memsize(struct tridentfb_par *par)
 {
 	unsigned char tmp, tmp2;
 	unsigned int k;
@@ -742,7 +766,7 @@ static unsigned int __devinit get_memsize(void)
 			k = 2560 * Kb;
 			break;
 		default:
-			tmp = read3X4(SPR) & 0x0F;
+			tmp = read3X4(par, SPR) & 0x0F;
 			switch (tmp) {
 
 			case 0x01:
@@ -774,7 +798,7 @@ static unsigned int __devinit get_memsize(void)
 				break;
 			case 0x0E:		/* XP */
 
-				tmp2 = read3C4(0xC1);
+				tmp2 = read3C4(par, 0xC1);
 				switch (tmp2) {
 				case 0x00:
 					k = 20 * Mb;
@@ -862,6 +886,7 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 static int tridentfb_pan_display(struct fb_var_screeninfo *var,
 				 struct fb_info *info)
 {
+	struct tridentfb_par *par = info->par;
 	unsigned int offset;
 
 	debug("enter\n");
@@ -869,13 +894,20 @@ static int tridentfb_pan_display(struct fb_var_screeninfo *var,
 		* var->bits_per_pixel / 32;
 	info->var.xoffset = var->xoffset;
 	info->var.yoffset = var->yoffset;
-	set_screen_start(offset);
+	set_screen_start(par, offset);
 	debug("exit\n");
 	return 0;
 }
 
-#define shadowmode_on()  write3CE(CyberControl, read3CE(CyberControl) | 0x81)
-#define shadowmode_off() write3CE(CyberControl, read3CE(CyberControl) & 0x7E)
+static void shadowmode_on(struct tridentfb_par *par)
+{
+	write3CE(par, CyberControl, read3CE(par, CyberControl) | 0x81);
+}
+
+static void shadowmode_off(struct tridentfb_par *par)
+{
+	write3CE(par, CyberControl, read3CE(par, CyberControl) & 0x7E);
+}
 
 /* Set the hardware to the requested video mode */
 static int tridentfb_set_par(struct fb_info *info)
@@ -905,8 +937,8 @@ static int tridentfb_set_par(struct fb_info *info)
 	vblankstart = var->yres;
 	vblankend = vtotal + 2;
 
-	crtc_unlock();
-	write3CE(CyberControl, 8);
+	crtc_unlock(par);
+	write3CE(par, CyberControl, 8);
 
 	if (flatpanel && var->xres < nativex) {
 		/*
@@ -914,35 +946,36 @@ static int tridentfb_set_par(struct fb_info *info)
 		 * than requested resolution decide whether
 		 * we stretch or center
 		 */
-		t_outb(0xEB, 0x3C2);
+		t_outb(par, 0xEB, 0x3C2);
 
-		shadowmode_on();
+		shadowmode_on(par);
 
 		if (center)
-			screen_center();
+			screen_center(par);
 		else if (stretch)
-			screen_stretch();
+			screen_stretch(par);
 
 	} else {
-		t_outb(0x2B, 0x3C2);
-		write3CE(CyberControl, 8);
+		t_outb(par, 0x2B, 0x3C2);
+		write3CE(par, CyberControl, 8);
 	}
 
 	/* vertical timing values */
-	write3X4(CRTVTotal, vtotal & 0xFF);
-	write3X4(CRTVDispEnd, vdispend & 0xFF);
-	write3X4(CRTVSyncStart, vsyncstart & 0xFF);
-	write3X4(CRTVSyncEnd, (vsyncend & 0x0F));
-	write3X4(CRTVBlankStart, vblankstart & 0xFF);
-	write3X4(CRTVBlankEnd, 0 /* p->vblankend & 0xFF */ );
+	write3X4(par, CRTVTotal, vtotal & 0xFF);
+	write3X4(par, CRTVDispEnd, vdispend & 0xFF);
+	write3X4(par, CRTVSyncStart, vsyncstart & 0xFF);
+	write3X4(par, CRTVSyncEnd, (vsyncend & 0x0F));
+	write3X4(par, CRTVBlankStart, vblankstart & 0xFF);
+	write3X4(par, CRTVBlankEnd, 0 /* p->vblankend & 0xFF */);
 
 	/* horizontal timing values */
-	write3X4(CRTHTotal, htotal & 0xFF);
-	write3X4(CRTHDispEnd, hdispend & 0xFF);
-	write3X4(CRTHSyncStart, hsyncstart & 0xFF);
-	write3X4(CRTHSyncEnd, (hsyncend & 0x1F) | ((hblankend & 0x20) << 2));
-	write3X4(CRTHBlankStart, hblankstart & 0xFF);
-	write3X4(CRTHBlankEnd, 0 /* (p->hblankend & 0x1F) */ );
+	write3X4(par, CRTHTotal, htotal & 0xFF);
+	write3X4(par, CRTHDispEnd, hdispend & 0xFF);
+	write3X4(par, CRTHSyncStart, hsyncstart & 0xFF);
+	write3X4(par, CRTHSyncEnd,
+		 (hsyncend & 0x1F) | ((hblankend & 0x20) << 2));
+	write3X4(par, CRTHBlankStart, hblankstart & 0xFF);
+	write3X4(par, CRTHBlankEnd, 0 /* (p->hblankend & 0x1F) */);
 
 	/* higher bits of vertical timing values */
 	tmp = 0x10;
@@ -954,38 +987,40 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (vtotal & 0x200) tmp |= 0x20;
 	if (vdispend & 0x200) tmp |= 0x40;
 	if (vsyncstart & 0x200) tmp |= 0x80;
-	write3X4(CRTOverflow, tmp);
+	write3X4(par, CRTOverflow, tmp);
 
-	tmp = read3X4(CRTHiOrd) | 0x08;	/* line compare bit 10 */
+	tmp = read3X4(par, CRTHiOrd) | 0x08;	/* line compare bit 10 */
 	if (vtotal & 0x400) tmp |= 0x80;
 	if (vblankstart & 0x400) tmp |= 0x40;
 	if (vsyncstart & 0x400) tmp |= 0x20;
 	if (vdispend & 0x400) tmp |= 0x10;
-	write3X4(CRTHiOrd, tmp);
+	write3X4(par, CRTHiOrd, tmp);
 
 	tmp = 0;
 	if (htotal & 0x800) tmp |= 0x800 >> 11;
 	if (hblankstart & 0x800) tmp |= 0x800 >> 7;
-	write3X4(HorizOverflow, tmp);
+	write3X4(par, HorizOverflow, tmp);
 
 	tmp = 0x40;
 	if (vblankstart & 0x200) tmp |= 0x20;
 //FIXME	if (info->var.vmode & FB_VMODE_DOUBLE) tmp |= 0x80;  /* double scan for 200 line modes */
-	write3X4(CRTMaxScanLine, tmp);
+	write3X4(par, CRTMaxScanLine, tmp);
 
-	write3X4(CRTLineCompare, 0xFF);
-	write3X4(CRTPRowScan, 0);
-	write3X4(CRTModeControl, 0xC3);
+	write3X4(par, CRTLineCompare, 0xFF);
+	write3X4(par, CRTPRowScan, 0);
+	write3X4(par, CRTModeControl, 0xC3);
 
-	write3X4(LinearAddReg, 0x20);	/* enable linear addressing */
+	write3X4(par, LinearAddReg, 0x20);	/* enable linear addressing */
 
 	tmp = (info->var.vmode & FB_VMODE_INTERLACED) ? 0x84 : 0x80;
-	write3X4(CRTCModuleTest, tmp);	/* enable access extended memory */
+	/* enable access extended memory */
+	write3X4(par, CRTCModuleTest, tmp);
 
-	write3X4(GraphEngReg, 0x80);	/* enable GE for text acceleration */
+	/* enable GE for text acceleration */
+	write3X4(par, GraphEngReg, 0x80);
 
 #ifdef CONFIG_FB_TRIDENT_ACCEL
-	acc->init_accel(info->var.xres, bpp);
+	acc->init_accel(par, info->var.xres, bpp);
 #endif
 
 	switch (bpp) {
@@ -1003,49 +1038,52 @@ static int tridentfb_set_par(struct fb_info *info)
 		break;
 	}
 
-	write3X4(PixelBusReg, tmp);
+	write3X4(par, PixelBusReg, tmp);
 
 	tmp = 0x10;
 	if (chipcyber)
 		tmp |= 0x20;
-	write3X4(DRAMControl, tmp);	/* both IO, linear enable */
+	write3X4(par, DRAMControl, tmp);	/* both IO, linear enable */
 
-	write3X4(InterfaceSel, read3X4(InterfaceSel) | 0x40);
-	write3X4(Performance, 0x92);
-	write3X4(PCIReg, 0x07);		/* MMIO & PCI read and write burst enable */
+	write3X4(par, InterfaceSel, read3X4(par, InterfaceSel) | 0x40);
+	write3X4(par, Performance, 0x92);
+	/* MMIO & PCI read and write burst enable */
+	write3X4(par, PCIReg, 0x07);
 
 	/* convert from picoseconds to kHz */
 	vclk = PICOS2KHZ(info->var.pixclock);
 	if (bpp == 32)
 		vclk *= 2;
-	set_vclk(vclk);
+	set_vclk(par, vclk);
 
-	write3C4(0, 3);
-	write3C4(1, 1);		/* set char clock 8 dots wide */
-	write3C4(2, 0x0F);	/* enable 4 maps because needed in chain4 mode */
-	write3C4(3, 0);
-	write3C4(4, 0x0E);	/* memory mode enable bitmaps ?? */
+	write3C4(par, 0, 3);
+	write3C4(par, 1, 1);		/* set char clock 8 dots wide */
+	/* enable 4 maps because needed in chain4 mode */
+	write3C4(par, 2, 0x0F);
+	write3C4(par, 3, 0);
+	write3C4(par, 4, 0x0E);	/* memory mode enable bitmaps ?? */
 
-	write3CE(MiscExtFunc, (bpp == 32) ? 0x1A : 0x12);	/* divide clock by 2 if 32bpp */
-							/* chain4 mode display and CPU path */
-	write3CE(0x5, 0x40);	/* no CGA compat, allow 256 col */
-	write3CE(0x6, 0x05);	/* graphics mode */
-	write3CE(0x7, 0x0F);	/* planes? */
+	/* divide clock by 2 if 32bpp chain4 mode display and CPU path */
+	write3CE(par, MiscExtFunc, (bpp == 32) ? 0x1A : 0x12);
+	write3CE(par, 0x5, 0x40);	/* no CGA compat, allow 256 col */
+	write3CE(par, 0x6, 0x05);	/* graphics mode */
+	write3CE(par, 0x7, 0x0F);	/* planes? */
 
 	if (chip_id == CYBERBLADEXPAi1) {
 		/* This fixes snow-effect in 32 bpp */
-		write3X4(CRTHSyncStart, 0x84);
+		write3X4(par, CRTHSyncStart, 0x84);
 	}
 
-	writeAttr(0x10, 0x41);	/* graphics mode and support 256 color modes */
-	writeAttr(0x12, 0x0F);	/* planes */
-	writeAttr(0x13, 0);	/* horizontal pel panning */
+	/* graphics mode and support 256 color modes */
+	writeAttr(par, 0x10, 0x41);
+	writeAttr(par, 0x12, 0x0F);	/* planes */
+	writeAttr(par, 0x13, 0);	/* horizontal pel panning */
 
 	/* colors */
 	for (tmp = 0; tmp < 0x10; tmp++)
-		writeAttr(tmp, tmp);
-	readb(par->io_virt + CRT + 0x0A);	/* flip-flop to index */
-	t_outb(0x20, 0x3C0);			/* enable attr */
+		writeAttr(par, tmp, tmp);
+	fb_readb(par->io_virt + CRT + 0x0A);	/* flip-flop to index */
+	t_outb(par, 0x20, 0x3C0);		/* enable attr */
 
 	switch (bpp) {
 	case 8:
@@ -1063,17 +1101,17 @@ static int tridentfb_set_par(struct fb_info *info)
 		break;
 	}
 
-	t_inb(0x3C8);
-	t_inb(0x3C6);
-	t_inb(0x3C6);
-	t_inb(0x3C6);
-	t_inb(0x3C6);
-	t_outb(tmp, 0x3C6);
-	t_inb(0x3C8);
+	t_inb(par, 0x3C8);
+	t_inb(par, 0x3C6);
+	t_inb(par, 0x3C6);
+	t_inb(par, 0x3C6);
+	t_inb(par, 0x3C6);
+	t_outb(par, tmp, 0x3C6);
+	t_inb(par, 0x3C8);
 
 	if (flatpanel)
-		set_number_of_lines(info->var.yres);
-	set_lwidth(info->var.xres * bpp / (4 * 16));
+		set_number_of_lines(par, info->var.yres);
+	set_lwidth(par, info->var.xres * bpp / (4 * 16));
 	info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
 	info->fix.line_length = info->var.xres * (bpp >> 3);
 	info->cmap.len = (bpp == 8) ? 256 : 16;
@@ -1087,17 +1125,18 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			       struct fb_info *info)
 {
 	int bpp = info->var.bits_per_pixel;
+	struct tridentfb_par *par = info->par;
 
 	if (regno >= info->cmap.len)
 		return 1;
 
 	if (bpp == 8) {
-		t_outb(0xFF, 0x3C6);
-		t_outb(regno, 0x3C8);
+		t_outb(par, 0xFF, 0x3C6);
+		t_outb(par, regno, 0x3C8);
 
-		t_outb(red >> 10, 0x3C9);
-		t_outb(green >> 10, 0x3C9);
-		t_outb(blue >> 10, 0x3C9);
+		t_outb(par, red >> 10, 0x3C9);
+		t_outb(par, green >> 10, 0x3C9);
+		t_outb(par, blue >> 10, 0x3C9);
 
 	} else if (regno < 16) {
 		if (bpp == 16) {	/* RGB 565 */
@@ -1123,13 +1162,14 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 static int tridentfb_blank(int blank_mode, struct fb_info *info)
 {
 	unsigned char PMCont, DPMSCont;
+	struct tridentfb_par *par = info->par;
 
 	debug("enter\n");
 	if (flatpanel)
 		return 0;
-	t_outb(0x04, 0x83C8); /* Read DPMS Control */
-	PMCont = t_inb(0x83C6) & 0xFC;
-	DPMSCont = read3CE(PowerStatus) & 0xFC;
+	t_outb(par, 0x04, 0x83C8); /* Read DPMS Control */
+	PMCont = t_inb(par, 0x83C6) & 0xFC;
+	DPMSCont = read3CE(par, PowerStatus) & 0xFC;
 	switch (blank_mode) {
 	case FB_BLANK_UNBLANK:
 		/* Screen: On, HSync: On, VSync: On */
@@ -1155,9 +1195,9 @@ static int tridentfb_blank(int blank_mode, struct fb_info *info)
 		break;
 	}
 
-	write3CE(PowerStatus, DPMSCont);
-	t_outb(4, 0x83C8);
-	t_outb(PMCont, 0x83C6);
+	write3CE(par, PowerStatus, DPMSCont);
+	t_outb(par, 4, 0x83C8);
+	t_outb(par, PMCont, 0x83C6);
 
 	debug("exit\n");
 
@@ -1265,11 +1305,11 @@ static int __devinit trident_pci_probe(struct pci_dev * dev,
 
 	/* setup framebuffer memory */
 	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
-	tridentfb_fix.smem_len = get_memsize();
+	tridentfb_fix.smem_len = get_memsize(&default_par);
 
 	if (!request_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len, "tridentfb")) {
 		debug("request_mem_region failed!\n");
-		disable_mmio();
+		disable_mmio(fb_info.par);
 		err = -1;
 		goto out_unmap1;
 	}
@@ -1284,10 +1324,10 @@ static int __devinit trident_pci_probe(struct pci_dev * dev,
 	}
 
 	output("%s board found\n", pci_name(dev));
-	displaytype = get_displaytype();
+	displaytype = get_displaytype(&default_par);
 
 	if (flatpanel)
-		nativex = get_nativex();
+		nativex = get_nativex(&default_par);
 
 	fb_info.fix = tridentfb_fix;
 	fb_info.fbops = &tridentfb_ops;
@@ -1330,7 +1370,7 @@ out_unmap2:
 	if (fb_info.screen_base)
 		iounmap(fb_info.screen_base);
 	release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len);
-	disable_mmio();
+	disable_mmio(fb_info.par);
 out_unmap1:
 	if (default_par.io_virt)
 		iounmap(default_par.io_virt);
-- 
GitLab


From e09ed099d0169ac3a22b17cfeece0fa54a9e43eb Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:51 -0700
Subject: [PATCH 358/853] tridentfb: convert fb_info into allocated one

This patch converts fb_info structure from global variable to allocatable one.

The global default_par is moved into function variable.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 82 ++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 3e8a1ef892c..cb37e10734b 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -33,10 +33,7 @@ struct tridentfb_par {
 static unsigned char eng_oper;	/* engine operation... */
 static struct fb_ops tridentfb_ops;
 
-static struct tridentfb_par default_par;
-
 /* FIXME:kmalloc these 3 instead */
-static struct fb_info fb_info;
 static u32 pseudo_pal[16];
 
 static struct fb_var_screeninfo default_var;
@@ -1217,16 +1214,23 @@ static struct fb_ops tridentfb_ops = {
 	.fb_imageblit = cfb_imageblit,
 };
 
-static int __devinit trident_pci_probe(struct pci_dev * dev,
-				       const struct pci_device_id * id)
+static int __devinit trident_pci_probe(struct pci_dev *dev,
+				       const struct pci_device_id *id)
 {
 	int err;
 	unsigned char revision;
+	struct fb_info *info;
+	struct tridentfb_par *default_par;
 
 	err = pci_enable_device(dev);
 	if (err)
 		return err;
 
+	info = framebuffer_alloc(sizeof(struct tridentfb_par), &dev->dev);
+	if (!info)
+		return -ENOMEM;
+	default_par = info->par;
+
 	chip_id = id->device;
 
 	if (chip_id == CYBERBLADEi1)
@@ -1282,8 +1286,6 @@ static int __devinit trident_pci_probe(struct pci_dev * dev,
 	/* acceleration is on by default for 3D chips */
 	defaultaccel = chip3D && !noaccel;
 
-	fb_info.par = &default_par;
-
 	/* setup MMIO region */
 	tridentfb_fix.mmio_start = pci_resource_start(dev, 1);
 	tridentfb_fix.mmio_len = chip3D ? 0x20000 : 0x10000;
@@ -1293,9 +1295,10 @@ static int __devinit trident_pci_probe(struct pci_dev * dev,
 		return -1;
 	}
 
-	default_par.io_virt = ioremap_nocache(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len);
+	default_par->io_virt = ioremap_nocache(tridentfb_fix.mmio_start,
+					       tridentfb_fix.mmio_len);
 
-	if (!default_par.io_virt) {
+	if (!default_par->io_virt) {
 		debug("ioremap failed\n");
 		err = -1;
 		goto out_unmap1;
@@ -1305,46 +1308,46 @@ static int __devinit trident_pci_probe(struct pci_dev * dev,
 
 	/* setup framebuffer memory */
 	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
-	tridentfb_fix.smem_len = get_memsize(&default_par);
+	tridentfb_fix.smem_len = get_memsize(default_par);
 
 	if (!request_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len, "tridentfb")) {
 		debug("request_mem_region failed!\n");
-		disable_mmio(fb_info.par);
+		disable_mmio(info->par);
 		err = -1;
 		goto out_unmap1;
 	}
 
-	fb_info.screen_base = ioremap_nocache(tridentfb_fix.smem_start,
-					      tridentfb_fix.smem_len);
+	info->screen_base = ioremap_nocache(tridentfb_fix.smem_start,
+					    tridentfb_fix.smem_len);
 
-	if (!fb_info.screen_base) {
+	if (!info->screen_base) {
 		debug("ioremap failed\n");
 		err = -1;
 		goto out_unmap2;
 	}
 
 	output("%s board found\n", pci_name(dev));
-	displaytype = get_displaytype(&default_par);
+	displaytype = get_displaytype(default_par);
 
 	if (flatpanel)
-		nativex = get_nativex(&default_par);
+		nativex = get_nativex(default_par);
 
-	fb_info.fix = tridentfb_fix;
-	fb_info.fbops = &tridentfb_ops;
+	info->fix = tridentfb_fix;
+	info->fbops = &tridentfb_ops;
 
 
-	fb_info.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
+	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
 #ifdef CONFIG_FB_TRIDENT_ACCEL
-	fb_info.flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
+	info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
 #endif
-	fb_info.pseudo_palette = pseudo_pal;
+	info->pseudo_palette = pseudo_pal;
 
-	if (!fb_find_mode(&default_var, &fb_info,
+	if (!fb_find_mode(&default_var, info,
 			  mode_option, NULL, 0, NULL, bpp)) {
 		err = -EINVAL;
 		goto out_unmap2;
 	}
-	err = fb_alloc_cmap(&fb_info.cmap, 256, 0);
+	err = fb_alloc_cmap(&info->cmap, 256, 0);
 	if (err < 0)
 		goto out_unmap2;
 
@@ -1353,39 +1356,46 @@ static int __devinit trident_pci_probe(struct pci_dev * dev,
 	else
 		default_var.accel_flags &= ~FB_ACCELF_TEXT;
 	default_var.activate |= FB_ACTIVATE_NOW;
-	fb_info.var = default_var;
-	fb_info.device = &dev->dev;
-	if (register_framebuffer(&fb_info) < 0) {
+	info->var = default_var;
+	info->device = &dev->dev;
+	if (register_framebuffer(info) < 0) {
 		printk(KERN_ERR "tridentfb: could not register Trident framebuffer\n");
-		fb_dealloc_cmap(&fb_info.cmap);
+		fb_dealloc_cmap(&info->cmap);
 		err = -EINVAL;
 		goto out_unmap2;
 	}
 	output("fb%d: %s frame buffer device %dx%d-%dbpp\n",
-	   fb_info.node, fb_info.fix.id, default_var.xres,
+	   info->node, info->fix.id, default_var.xres,
 	   default_var.yres, default_var.bits_per_pixel);
+
+	pci_set_drvdata(dev, info);
 	return 0;
 
 out_unmap2:
-	if (fb_info.screen_base)
-		iounmap(fb_info.screen_base);
+	if (info->screen_base)
+		iounmap(info->screen_base);
 	release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len);
-	disable_mmio(fb_info.par);
+	disable_mmio(info->par);
 out_unmap1:
-	if (default_par.io_virt)
-		iounmap(default_par.io_virt);
+	if (default_par->io_virt)
+		iounmap(default_par->io_virt);
 	release_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len);
+	framebuffer_release(info);
 	return err;
 }
 
 static void __devexit trident_pci_remove(struct pci_dev *dev)
 {
-	struct tridentfb_par *par = (struct tridentfb_par*)fb_info.par;
-	unregister_framebuffer(&fb_info);
+	struct fb_info *info = pci_get_drvdata(dev);
+	struct tridentfb_par *par = info->par;
+
+	unregister_framebuffer(info);
 	iounmap(par->io_virt);
-	iounmap(fb_info.screen_base);
+	iounmap(info->screen_base);
 	release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len);
 	release_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len);
+	pci_set_drvdata(dev, NULL);
+	framebuffer_release(info);
 }
 
 /* List of boards that we are trying to support */
-- 
GitLab


From ea8ee55c12f77cbbb6e067f91e0cd794baa692ab Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:51 -0700
Subject: [PATCH 359/853] tridentfb: move global pseudo palette into structure

This patch moves pseudo palette int tridentfb_par structure and removes global
default_var.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index cb37e10734b..0f6e4054c99 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -28,16 +28,12 @@
 
 struct tridentfb_par {
 	void __iomem *io_virt;	/* iospace virtual memory address */
+	u32 pseudo_pal[16];
 };
 
 static unsigned char eng_oper;	/* engine operation... */
 static struct fb_ops tridentfb_ops;
 
-/* FIXME:kmalloc these 3 instead */
-static u32 pseudo_pal[16];
-
-static struct fb_var_screeninfo default_var;
-
 static struct fb_fix_screeninfo tridentfb_fix = {
 	.id = "Trident",
 	.type = FB_TYPE_PACKED_PIXELS,
@@ -1340,9 +1336,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 #ifdef CONFIG_FB_TRIDENT_ACCEL
 	info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
 #endif
-	info->pseudo_palette = pseudo_pal;
-
-	if (!fb_find_mode(&default_var, info,
+	if (!fb_find_mode(&info->var, info,
 			  mode_option, NULL, 0, NULL, bpp)) {
 		err = -EINVAL;
 		goto out_unmap2;
@@ -1352,11 +1346,10 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap2;
 
 	if (defaultaccel && acc)
-		default_var.accel_flags |= FB_ACCELF_TEXT;
+		info->var.accel_flags |= FB_ACCELF_TEXT;
 	else
-		default_var.accel_flags &= ~FB_ACCELF_TEXT;
-	default_var.activate |= FB_ACTIVATE_NOW;
-	info->var = default_var;
+		info->var.accel_flags &= ~FB_ACCELF_TEXT;
+	info->var.activate |= FB_ACTIVATE_NOW;
 	info->device = &dev->dev;
 	if (register_framebuffer(info) < 0) {
 		printk(KERN_ERR "tridentfb: could not register Trident framebuffer\n");
@@ -1365,8 +1358,8 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap2;
 	}
 	output("fb%d: %s frame buffer device %dx%d-%dbpp\n",
-	   info->node, info->fix.id, default_var.xres,
-	   default_var.yres, default_var.bits_per_pixel);
+	   info->node, info->fix.id, info->var.xres,
+	   info->var.yres, info->var.bits_per_pixel);
 
 	pci_set_drvdata(dev, info);
 	return 0;
-- 
GitLab


From 122e8ad3cbf172043ea93f2db8e107fa9f9b0192 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:52 -0700
Subject: [PATCH 360/853] tridentfb: move global chip_id into structure

This patch moves the chip_id into tridentfb_par structure and removes global
chip_id related constants.

It also bumps version of the driver to 0.7.9

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 0f6e4054c99..dfe52b424c9 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -24,11 +24,12 @@
 #include <linux/delay.h>
 #include <video/trident.h>
 
-#define VERSION		"0.7.8-NEWAPI"
+#define VERSION		"0.7.9-NEWAPI"
 
 struct tridentfb_par {
 	void __iomem *io_virt;	/* iospace virtual memory address */
 	u32 pseudo_pal[16];
+	int chip_id;
 };
 
 static unsigned char eng_oper;	/* engine operation... */
@@ -42,9 +43,6 @@ static struct fb_fix_screeninfo tridentfb_fix = {
 	.accel = FB_ACCEL_NONE,
 };
 
-static int chip_id;
-
-static int defaultaccel;
 static int displaytype;
 
 /* defaults which are normally overriden by user values */
@@ -79,9 +77,6 @@ module_param(nativex, int, 0);
 module_param(fp, int, 0);
 module_param(crt, int, 0);
 
-static int chip3D;
-static int chipcyber;
-
 static int is3Dchip(int id)
 {
 	return ((id == BLADE3D) || (id == CYBERBLADEE4) ||
@@ -658,7 +653,7 @@ static void set_lwidth(struct tridentfb_par *par, int width)
 /* For resolutions smaller than FP resolution stretch */
 static void screen_stretch(struct tridentfb_par *par)
 {
-	if (chip_id != CYBERBLADEXPAi1)
+	if (par->chip_id != CYBERBLADEXPAi1)
 		write3CE(par, BiosReg, 0);
 	else
 		write3CE(par, BiosReg, 8);
@@ -706,7 +701,7 @@ static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 				if (fi > freq)
 					break;
 			}
-	if (chip3D) {
+	if (is3Dchip(par->chip_id)) {
 		write3C4(par, ClockHigh, hi);
 		write3C4(par, ClockLow, lo);
 	} else {
@@ -739,7 +734,7 @@ static unsigned int __devinit get_displaytype(struct tridentfb_par *par)
 {
 	if (fp)
 		return DISPLAY_FP;
-	if (crt || !chipcyber)
+	if (crt || !iscyber(par->chip_id))
 		return DISPLAY_CRT;
 	return (read3CE(par, FPConfig) & 0x10) ? DISPLAY_FP : DISPLAY_CRT;
 }
@@ -754,7 +749,7 @@ static unsigned int __devinit get_memsize(struct tridentfb_par *par)
 	if (memsize)
 		k = memsize * Kb;
 	else
-		switch (chip_id) {
+		switch (par->chip_id) {
 		case CYBER9525DVD:
 			k = 2560 * Kb;
 			break;
@@ -1034,7 +1029,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3X4(par, PixelBusReg, tmp);
 
 	tmp = 0x10;
-	if (chipcyber)
+	if (iscyber(par->chip_id))
 		tmp |= 0x20;
 	write3X4(par, DRAMControl, tmp);	/* both IO, linear enable */
 
@@ -1062,7 +1057,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3CE(par, 0x6, 0x05);	/* graphics mode */
 	write3CE(par, 0x7, 0x0F);	/* planes? */
 
-	if (chip_id == CYBERBLADEXPAi1) {
+	if (par->chip_id == CYBERBLADEXPAi1) {
 		/* This fixes snow-effect in 32 bpp */
 		write3X4(par, CRTHSyncStart, 0x84);
 	}
@@ -1217,6 +1212,9 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	unsigned char revision;
 	struct fb_info *info;
 	struct tridentfb_par *default_par;
+	int defaultaccel;
+	int chip3D;
+	int chip_id;
 
 	err = pci_enable_device(dev);
 	if (err)
@@ -1269,7 +1267,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	}
 
 	chip3D = is3Dchip(chip_id);
-	chipcyber = iscyber(chip_id);
 
 	if (is_xp(chip_id)) {
 		acc = &accel_xp;
@@ -1279,6 +1276,8 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		acc = &accel_image;
 	}
 
+	default_par->chip_id = chip_id;
+
 	/* acceleration is on by default for 3D chips */
 	defaultaccel = chip3D && !noaccel;
 
-- 
GitLab


From 6eed8e1ec8532a6cd10c8b27236bde023c52c56a Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:53 -0700
Subject: [PATCH 361/853] tridentfb: move global flat panel variable into
 structure

This patch moves flat panel indicator into tridentfb_par structure and removes
related global variables and macros.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 44 ++++++++++++++++++++-------------------
 include/video/trident.h   |  6 ------
 2 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index dfe52b424c9..604b10a590d 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -30,6 +30,7 @@ struct tridentfb_par {
 	void __iomem *io_virt;	/* iospace virtual memory address */
 	u32 pseudo_pal[16];
 	int chip_id;
+	int flatpanel;
 };
 
 static unsigned char eng_oper;	/* engine operation... */
@@ -43,24 +44,22 @@ static struct fb_fix_screeninfo tridentfb_fix = {
 	.accel = FB_ACCEL_NONE,
 };
 
-static int displaytype;
-
 /* defaults which are normally overriden by user values */
 
 /* video mode */
 static char *mode_option __devinitdata = "640x480";
-static int bpp = 8;
+static int bpp __devinitdata = 8;
 
-static int noaccel;
+static int noaccel __devinitdata;
 
 static int center;
 static int stretch;
 
-static int fp;
-static int crt;
+static int fp __devinitdata;
+static int crt __devinitdata;
 
-static int memsize;
-static int memdiff;
+static int memsize __devinitdata;
+static int memdiff __devinitdata;
 static int nativex;
 
 module_param(mode_option, charp, 0);
@@ -75,7 +74,9 @@ module_param(memsize, int, 0);
 module_param(memdiff, int, 0);
 module_param(nativex, int, 0);
 module_param(fp, int, 0);
+MODULE_PARM_DESC(fp, "Define if flatpanel is connected");
 module_param(crt, int, 0);
+MODULE_PARM_DESC(crt, "Define if CRT is connected");
 
 static int is3Dchip(int id)
 {
@@ -728,15 +729,15 @@ static void set_number_of_lines(struct tridentfb_par *par, int lines)
 
 /*
  * If we see that FP is active we assume we have one.
- * Otherwise we have a CRT display.User can override.
+ * Otherwise we have a CRT display. User can override.
  */
-static unsigned int __devinit get_displaytype(struct tridentfb_par *par)
+static int __devinit is_flatpanel(struct tridentfb_par *par)
 {
 	if (fp)
-		return DISPLAY_FP;
+		return 1;
 	if (crt || !iscyber(par->chip_id))
-		return DISPLAY_CRT;
-	return (read3CE(par, FPConfig) & 0x10) ? DISPLAY_FP : DISPLAY_CRT;
+		return 0;
+	return (read3CE(par, FPConfig) & 0x10) ? 1 : 0;
 }
 
 /* Try detecting the video memory size */
@@ -824,6 +825,7 @@ static unsigned int __devinit get_memsize(struct tridentfb_par *par)
 static int tridentfb_check_var(struct fb_var_screeninfo *var,
 			       struct fb_info *info)
 {
+	struct tridentfb_par *par = info->par;
 	int bpp = var->bits_per_pixel;
 	debug("enter\n");
 
@@ -831,7 +833,7 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	if (bpp == 24)
 		bpp = var->bits_per_pixel = 32;
 	/* check whether resolution fits on panel and in memory */
-	if (flatpanel && nativex && var->xres > nativex)
+	if (par->flatpanel && nativex && var->xres > nativex)
 		return -EINVAL;
 	if (var->xres * var->yres_virtual * bpp / 8 > info->fix.smem_len)
 		return -EINVAL;
@@ -928,7 +930,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	crtc_unlock(par);
 	write3CE(par, CyberControl, 8);
 
-	if (flatpanel && var->xres < nativex) {
+	if (par->flatpanel && var->xres < nativex) {
 		/*
 		 * on flat panels with native size larger
 		 * than requested resolution decide whether
@@ -1097,7 +1099,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	t_outb(par, tmp, 0x3C6);
 	t_inb(par, 0x3C8);
 
-	if (flatpanel)
+	if (par->flatpanel)
 		set_number_of_lines(par, info->var.yres);
 	set_lwidth(par, info->var.xres * bpp / (4 * 16));
 	info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
@@ -1153,7 +1155,7 @@ static int tridentfb_blank(int blank_mode, struct fb_info *info)
 	struct tridentfb_par *par = info->par;
 
 	debug("enter\n");
-	if (flatpanel)
+	if (par->flatpanel)
 		return 0;
 	t_outb(par, 0x04, 0x83C8); /* Read DPMS Control */
 	PMCont = t_inb(par, 0x83C6) & 0xFC;
@@ -1322,9 +1324,9 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	}
 
 	output("%s board found\n", pci_name(dev));
-	displaytype = get_displaytype(default_par);
+	default_par->flatpanel = is_flatpanel(default_par);
 
-	if (flatpanel)
+	if (default_par->flatpanel)
 		nativex = get_nativex(default_par);
 
 	info->fix = tridentfb_fix;
@@ -1441,9 +1443,9 @@ static int __init tridentfb_setup(char *options)
 		if (!strncmp(opt, "noaccel", 7))
 			noaccel = 1;
 		else if (!strncmp(opt, "fp", 2))
-			displaytype = DISPLAY_FP;
+			fp = 1;
 		else if (!strncmp(opt, "crt", 3))
-			displaytype = DISPLAY_CRT;
+			fp = 0;
 		else if (!strncmp(opt, "bpp=", 4))
 			bpp = simple_strtoul(opt + 4, NULL, 0);
 		else if (!strncmp(opt, "center", 6))
diff --git a/include/video/trident.h b/include/video/trident.h
index 200be255168..d3dd737a1ba 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -63,12 +63,6 @@
 #define LCD_CENTER	1
 #define LCD_BIOS	2
 
-/* display types */
-#define DISPLAY_CRT	0
-#define DISPLAY_FP	1
-
-#define flatpanel (displaytype == DISPLAY_FP)
-
 /* General Registers */
 #define SPR	0x1F		/* Software Programming Register (videoram) */
 
-- 
GitLab


From e0759a5fbba12e0f2c9149d85bea1ec7df0178fd Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:53 -0700
Subject: [PATCH 362/853] tridentfb: convert is_blade and is_xp macros into
 functions

This patch converts the is_blade() and is_xp() macros into local functions.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 19 +++++++++++++++++++
 include/video/trident.h   | 19 -------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 604b10a590d..e87f77093ec 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -78,6 +78,25 @@ MODULE_PARM_DESC(fp, "Define if flatpanel is connected");
 module_param(crt, int, 0);
 MODULE_PARM_DESC(crt, "Define if CRT is connected");
 
+static int is_blade(int id)
+{
+	return	(id == BLADE3D) ||
+		(id == CYBERBLADEE4) ||
+		(id == CYBERBLADEi7) ||
+		(id == CYBERBLADEi7D) ||
+		(id == CYBERBLADEi1) ||
+		(id == CYBERBLADEi1D) ||
+		(id == CYBERBLADEAi1) ||
+		(id == CYBERBLADEAi1D);
+}
+
+static int is_xp(int id)
+{
+	return	(id == CYBERBLADEXPAi1) ||
+		(id == CYBERBLADEXPm8) ||
+		(id == CYBERBLADEXPm16);
+}
+
 static int is3Dchip(int id)
 {
 	return ((id == BLADE3D) || (id == CYBERBLADEE4) ||
diff --git a/include/video/trident.h b/include/video/trident.h
index d3dd737a1ba..9c3670b2890 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -39,25 +39,6 @@
 #define CYBERBLADEXPm8  0x9910
 #define CYBERBLADEXPm16 0x9930
 
-/* acceleration families */
-#define IMAGE	0
-#define BLADE	1
-#define XP	2
-
-#define is_image(id)	
-#define is_xp(id)	((id == CYBERBLADEXPAi1) ||\
-			 (id == CYBERBLADEXPm8) ||\
-			 (id == CYBERBLADEXPm16)) 
-
-#define is_blade(id)	((id == BLADE3D) ||\
-			 (id == CYBERBLADEE4) ||\
-			 (id == CYBERBLADEi7) ||\
-			 (id == CYBERBLADEi7D) ||\
-			 (id == CYBERBLADEi1) ||\
-			 (id == CYBERBLADEi1D) ||\
-			 (id ==	CYBERBLADEAi1) ||\
-			 (id ==	CYBERBLADEAi1D))
-
 /* these defines are for 'lcd' variable */
 #define LCD_STRETCH	0
 #define LCD_CENTER	1
-- 
GitLab


From d9cad04bcde00411976402eda726199ac13b29ca Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:54 -0700
Subject: [PATCH 363/853] tridentfb: move global acceleration hooks into
 structure

This patch moves acceleration hooks into the tridentfb_par structure and
removes global hooks.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 63 +++++++++++++++------------------------
 1 file changed, 24 insertions(+), 39 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index e87f77093ec..d6796448c5a 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -31,6 +31,12 @@ struct tridentfb_par {
 	u32 pseudo_pal[16];
 	int chip_id;
 	int flatpanel;
+	void (*init_accel) (struct tridentfb_par *, int, int);
+	void (*wait_engine) (struct tridentfb_par *);
+	void (*fill_rect)
+		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
+	void (*copy_rect)
+		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
 };
 
 static unsigned char eng_oper;	/* engine operation... */
@@ -155,15 +161,6 @@ static inline u8 t_inb(struct tridentfb_par *p, u16 reg)
 	return fb_readb(p->io_virt + reg);
 }
 
-static struct accel_switch {
-	void (*init_accel) (struct tridentfb_par *, int, int);
-	void (*wait_engine) (struct tridentfb_par *);
-	void (*fill_rect)
-		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
-	void (*copy_rect)
-		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
-} *acc;
-
 static inline void writemmr(struct tridentfb_par *par, u16 r, u32 v)
 {
 	fb_writel(v, par->io_virt + r);
@@ -259,13 +256,6 @@ static void blade_copy_rect(struct tridentfb_par *par,
 	writemmr(par, DR2, direction ? d1 : d2);
 }
 
-static struct accel_switch accel_blade = {
-	blade_init_accel,
-	blade_wait_engine,
-	blade_fill_rect,
-	blade_copy_rect,
-};
-
 /*
  * BladeXP specific acceleration functions
  */
@@ -405,13 +395,6 @@ static void xp_copy_rect(struct tridentfb_par *par,
 	t_outb(par, 0x01, 0x2124);
 }
 
-static struct accel_switch accel_xp = {
-	xp_init_accel,
-	xp_wait_engine,
-	xp_fill_rect,
-	xp_copy_rect,
-};
-
 /*
  * Image specific acceleration functions
  */
@@ -491,13 +474,6 @@ static void image_copy_rect(struct tridentfb_par *par,
 		 0x80000000 | 1 << 22 | 1 << 10 | 1 << 7 | direction);
 }
 
-static struct accel_switch accel_image = {
-	image_init_accel,
-	image_wait_engine,
-	image_fill_rect,
-	image_copy_rect,
-};
-
 /*
  * Accel functions called by the upper layers
  */
@@ -524,18 +500,18 @@ static void tridentfb_fillrect(struct fb_info *info,
 		break;
 	}
 
-	acc->fill_rect(par, fr->dx, fr->dy, fr->width,
+	par->fill_rect(par, fr->dx, fr->dy, fr->width,
 		       fr->height, col, fr->rop);
-	acc->wait_engine(par);
+	par->wait_engine(par);
 }
 static void tridentfb_copyarea(struct fb_info *info,
 			       const struct fb_copyarea *ca)
 {
 	struct tridentfb_par *par = info->par;
 
-	acc->copy_rect(par, ca->sx, ca->sy, ca->dx, ca->dy,
+	par->copy_rect(par, ca->sx, ca->sy, ca->dx, ca->dy,
 		       ca->width, ca->height);
-	acc->wait_engine(par);
+	par->wait_engine(par);
 }
 #else /* !CONFIG_FB_TRIDENT_ACCEL */
 #define tridentfb_fillrect cfb_fillrect
@@ -1029,7 +1005,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3X4(par, GraphEngReg, 0x80);
 
 #ifdef CONFIG_FB_TRIDENT_ACCEL
-	acc->init_accel(par, info->var.xres, bpp);
+	par->init_accel(par, info->var.xres, bpp);
 #endif
 
 	switch (bpp) {
@@ -1290,11 +1266,20 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	chip3D = is3Dchip(chip_id);
 
 	if (is_xp(chip_id)) {
-		acc = &accel_xp;
+		default_par->init_accel = xp_init_accel;
+		default_par->wait_engine = xp_wait_engine;
+		default_par->fill_rect = xp_fill_rect;
+		default_par->copy_rect = xp_copy_rect;
 	} else if (is_blade(chip_id)) {
-		acc = &accel_blade;
+		default_par->init_accel = blade_init_accel;
+		default_par->wait_engine = blade_wait_engine;
+		default_par->fill_rect = blade_fill_rect;
+		default_par->copy_rect = blade_copy_rect;
 	} else {
-		acc = &accel_image;
+		default_par->init_accel = image_init_accel;
+		default_par->wait_engine = image_wait_engine;
+		default_par->fill_rect = image_fill_rect;
+		default_par->copy_rect = image_copy_rect;
 	}
 
 	default_par->chip_id = chip_id;
@@ -1365,7 +1350,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	if (err < 0)
 		goto out_unmap2;
 
-	if (defaultaccel && acc)
+	if (defaultaccel && default_par->init_accel)
 		info->var.accel_flags |= FB_ACCELF_TEXT;
 	else
 		info->var.accel_flags &= ~FB_ACCELF_TEXT;
-- 
GitLab


From 10172ed6dc4d40ff42bf5ce2dd2f65f401a93696 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:54 -0700
Subject: [PATCH 364/853] tridentfb: make use of functions and constants from
 the vga.h

Make use of functions and constants from the vga.h header to compact the code
and make it more readable.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 144 ++++++++++++++++----------------------
 include/video/trident.h   |  26 -------
 2 files changed, 61 insertions(+), 109 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index d6796448c5a..381e5853df6 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -22,6 +22,7 @@
 #include <linux/pci.h>
 
 #include <linux/delay.h>
+#include <video/vga.h>
 #include <video/trident.h>
 
 #define VERSION		"0.7.9-NEWAPI"
@@ -149,8 +150,6 @@ static int iscyber(int id)
 	}
 }
 
-#define CRT 0x3D0		/* CRTC registers offset for color display */
-
 static inline void t_outb(struct tridentfb_par *p, u8 val, u16 reg)
 {
 	fb_writeb(val, p->io_virt + reg);
@@ -525,60 +524,41 @@ static void tridentfb_copyarea(struct fb_info *info,
 
 static inline unsigned char read3X4(struct tridentfb_par *par, int reg)
 {
-	writeb(reg, par->io_virt + CRT + 4);
-	return readb(par->io_virt + CRT + 5);
+	return vga_mm_rcrt(par->io_virt, reg);
 }
 
 static inline void write3X4(struct tridentfb_par *par, int reg,
 			    unsigned char val)
 {
-	writeb(reg, par->io_virt + CRT + 4);
-	writeb(val, par->io_virt + CRT + 5);
-}
-
-static inline unsigned char read3C4(struct tridentfb_par *par, int reg)
-{
-	t_outb(par, reg, 0x3C4);
-	return t_inb(par, 0x3C5);
-}
-
-static inline void write3C4(struct tridentfb_par *par, int reg,
-			    unsigned char val)
-{
-	t_outb(par, reg, 0x3C4);
-	t_outb(par, val, 0x3C5);
+	vga_mm_wcrt(par->io_virt, reg, val);
 }
 
-static inline unsigned char read3CE(struct tridentfb_par *par, int reg)
+static inline unsigned char read3CE(struct tridentfb_par *par,
+				    unsigned char reg)
 {
-	t_outb(par, reg, 0x3CE);
-	return t_inb(par, 0x3CF);
+	return vga_mm_rgfx(par->io_virt, reg);
 }
 
 static inline void writeAttr(struct tridentfb_par *par, int reg,
 			     unsigned char val)
 {
-	fb_readb(par->io_virt + CRT + 0x0A);	/* flip-flop to index */
-	t_outb(par, reg, 0x3C0);
-	t_outb(par, val, 0x3C0);
+	fb_readb(par->io_virt + VGA_IS1_RC);	/* flip-flop to index */
+	vga_mm_wattr(par->io_virt, reg, val);
 }
 
 static inline void write3CE(struct tridentfb_par *par, int reg,
 			    unsigned char val)
 {
-	t_outb(par, reg, 0x3CE);
-	t_outb(par, val, 0x3CF);
+	vga_mm_wgfx(par->io_virt, reg, val);
 }
 
 static void enable_mmio(void)
 {
 	/* Goto New Mode */
-	outb(0x0B, 0x3C4);
-	inb(0x3C5);
+	vga_io_rseq(0x0B);
 
 	/* Unprotect registers */
-	outb(NewMode1, 0x3C4);
-	outb(0x80, 0x3C5);
+	vga_io_wseq(NewMode1, 0x80);
 
 	/* Enable MMIO */
 	outb(PCIReg, 0x3D4);
@@ -588,12 +568,10 @@ static void enable_mmio(void)
 static void disable_mmio(struct tridentfb_par *par)
 {
 	/* Goto New Mode */
-	t_outb(par, 0x0B, 0x3C4);
-	t_inb(par, 0x3C5);
+	vga_mm_rseq(par->io_virt, 0x0B);
 
 	/* Unprotect registers */
-	t_outb(par, NewMode1, 0x3C4);
-	t_outb(par, 0x80, 0x3C5);
+	vga_mm_wseq(par->io_virt, NewMode1, 0x80);
 
 	/* Disable MMIO */
 	t_outb(par, PCIReg, 0x3D4);
@@ -602,7 +580,8 @@ static void disable_mmio(struct tridentfb_par *par)
 
 static void crtc_unlock(struct tridentfb_par *par)
 {
-	write3X4(par, CRTVSyncEnd, read3X4(par, CRTVSyncEnd) & 0x7F);
+	write3X4(par, VGA_CRTC_V_SYNC_END,
+		 read3X4(par, VGA_CRTC_V_SYNC_END) & 0x7F);
 }
 
 /*  Return flat panel's maximum x resolution */
@@ -641,7 +620,7 @@ static int __devinit get_nativex(struct tridentfb_par *par)
 /* Set pitch */
 static void set_lwidth(struct tridentfb_par *par, int width)
 {
-	write3X4(par, Offset, width & 0xFF);
+	write3X4(par, VGA_CRTC_OFFSET, width & 0xFF);
 	write3X4(par, AddColReg,
 		 (read3X4(par, AddColReg) & 0xCF) | ((width & 0x300) >> 4));
 }
@@ -668,8 +647,8 @@ static void screen_center(struct tridentfb_par *par)
 static void set_screen_start(struct tridentfb_par *par, int base)
 {
 	u8 tmp;
-	write3X4(par, StartAddrLow, base & 0xFF);
-	write3X4(par, StartAddrHigh, (base & 0xFF00) >> 8);
+	write3X4(par, VGA_CRTC_START_LO, base & 0xFF);
+	write3X4(par, VGA_CRTC_START_HI, (base & 0xFF00) >> 8);
 	tmp = read3X4(par, CRTCModuleTest) & 0xDF;
 	write3X4(par, CRTCModuleTest, tmp | ((base & 0x10000) >> 11));
 	tmp = read3X4(par, CRTHiOrd) & 0xF8;
@@ -698,8 +677,8 @@ static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 					break;
 			}
 	if (is3Dchip(par->chip_id)) {
-		write3C4(par, ClockHigh, hi);
-		write3C4(par, ClockLow, lo);
+		vga_mm_wseq(par->io_virt, ClockHigh, hi);
+		vga_mm_wseq(par->io_virt, ClockLow, lo);
 	} else {
 		outb(lo, 0x43C8);
 		outb(hi, 0x43C9);
@@ -782,7 +761,7 @@ static unsigned int __devinit get_memsize(struct tridentfb_par *par)
 				break;
 			case 0x0E:		/* XP */
 
-				tmp2 = read3C4(par, 0xC1);
+				tmp2 = vga_mm_rseq(par->io_virt, 0xC1);
 				switch (tmp2) {
 				case 0x00:
 					k = 20 * Mb;
@@ -931,7 +910,7 @@ static int tridentfb_set_par(struct fb_info *info)
 		 * than requested resolution decide whether
 		 * we stretch or center
 		 */
-		t_outb(par, 0xEB, 0x3C2);
+		t_outb(par, 0xEB, VGA_MIS_W);
 
 		shadowmode_on(par);
 
@@ -941,26 +920,26 @@ static int tridentfb_set_par(struct fb_info *info)
 			screen_stretch(par);
 
 	} else {
-		t_outb(par, 0x2B, 0x3C2);
+		t_outb(par, 0x2B, VGA_MIS_W);
 		write3CE(par, CyberControl, 8);
 	}
 
 	/* vertical timing values */
-	write3X4(par, CRTVTotal, vtotal & 0xFF);
-	write3X4(par, CRTVDispEnd, vdispend & 0xFF);
-	write3X4(par, CRTVSyncStart, vsyncstart & 0xFF);
-	write3X4(par, CRTVSyncEnd, (vsyncend & 0x0F));
-	write3X4(par, CRTVBlankStart, vblankstart & 0xFF);
-	write3X4(par, CRTVBlankEnd, 0 /* p->vblankend & 0xFF */);
+	write3X4(par, VGA_CRTC_V_TOTAL, vtotal & 0xFF);
+	write3X4(par, VGA_CRTC_V_DISP_END, vdispend & 0xFF);
+	write3X4(par, VGA_CRTC_V_SYNC_START, vsyncstart & 0xFF);
+	write3X4(par, VGA_CRTC_V_SYNC_END, (vsyncend & 0x0F));
+	write3X4(par, VGA_CRTC_V_BLANK_START, vblankstart & 0xFF);
+	write3X4(par, VGA_CRTC_V_BLANK_END, 0 /* p->vblankend & 0xFF */);
 
 	/* horizontal timing values */
-	write3X4(par, CRTHTotal, htotal & 0xFF);
-	write3X4(par, CRTHDispEnd, hdispend & 0xFF);
-	write3X4(par, CRTHSyncStart, hsyncstart & 0xFF);
-	write3X4(par, CRTHSyncEnd,
+	write3X4(par, VGA_CRTC_H_TOTAL, htotal & 0xFF);
+	write3X4(par, VGA_CRTC_H_DISP, hdispend & 0xFF);
+	write3X4(par, VGA_CRTC_H_SYNC_START, hsyncstart & 0xFF);
+	write3X4(par, VGA_CRTC_H_SYNC_END,
 		 (hsyncend & 0x1F) | ((hblankend & 0x20) << 2));
-	write3X4(par, CRTHBlankStart, hblankstart & 0xFF);
-	write3X4(par, CRTHBlankEnd, 0 /* (p->hblankend & 0x1F) */);
+	write3X4(par, VGA_CRTC_H_BLANK_START, hblankstart & 0xFF);
+	write3X4(par, VGA_CRTC_H_BLANK_END, 0 /* (p->hblankend & 0x1F) */);
 
 	/* higher bits of vertical timing values */
 	tmp = 0x10;
@@ -972,7 +951,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (vtotal & 0x200) tmp |= 0x20;
 	if (vdispend & 0x200) tmp |= 0x40;
 	if (vsyncstart & 0x200) tmp |= 0x80;
-	write3X4(par, CRTOverflow, tmp);
+	write3X4(par, VGA_CRTC_OVERFLOW, tmp);
 
 	tmp = read3X4(par, CRTHiOrd) | 0x08;	/* line compare bit 10 */
 	if (vtotal & 0x400) tmp |= 0x80;
@@ -989,11 +968,11 @@ static int tridentfb_set_par(struct fb_info *info)
 	tmp = 0x40;
 	if (vblankstart & 0x200) tmp |= 0x20;
 //FIXME	if (info->var.vmode & FB_VMODE_DOUBLE) tmp |= 0x80;  /* double scan for 200 line modes */
-	write3X4(par, CRTMaxScanLine, tmp);
+	write3X4(par, VGA_CRTC_MAX_SCAN, tmp);
 
-	write3X4(par, CRTLineCompare, 0xFF);
-	write3X4(par, CRTPRowScan, 0);
-	write3X4(par, CRTModeControl, 0xC3);
+	write3X4(par, VGA_CRTC_LINE_COMPARE, 0xFF);
+	write3X4(par, VGA_CRTC_PRESET_ROW, 0);
+	write3X4(par, VGA_CRTC_MODE, 0xC3);
 
 	write3X4(par, LinearAddReg, 0x20);	/* enable linear addressing */
 
@@ -1041,12 +1020,12 @@ static int tridentfb_set_par(struct fb_info *info)
 		vclk *= 2;
 	set_vclk(par, vclk);
 
-	write3C4(par, 0, 3);
-	write3C4(par, 1, 1);		/* set char clock 8 dots wide */
+	vga_mm_wseq(par->io_virt, 0, 3);
+	vga_mm_wseq(par->io_virt, 1, 1); /* set char clock 8 dots wide */
 	/* enable 4 maps because needed in chain4 mode */
-	write3C4(par, 2, 0x0F);
-	write3C4(par, 3, 0);
-	write3C4(par, 4, 0x0E);	/* memory mode enable bitmaps ?? */
+	vga_mm_wseq(par->io_virt, 2, 0x0F);
+	vga_mm_wseq(par->io_virt, 3, 0);
+	vga_mm_wseq(par->io_virt, 4, 0x0E); /* memory mode enable bitmaps ?? */
 
 	/* divide clock by 2 if 32bpp chain4 mode display and CPU path */
 	write3CE(par, MiscExtFunc, (bpp == 32) ? 0x1A : 0x12);
@@ -1056,7 +1035,7 @@ static int tridentfb_set_par(struct fb_info *info)
 
 	if (par->chip_id == CYBERBLADEXPAi1) {
 		/* This fixes snow-effect in 32 bpp */
-		write3X4(par, CRTHSyncStart, 0x84);
+		write3X4(par, VGA_CRTC_H_SYNC_START, 0x84);
 	}
 
 	/* graphics mode and support 256 color modes */
@@ -1067,8 +1046,8 @@ static int tridentfb_set_par(struct fb_info *info)
 	/* colors */
 	for (tmp = 0; tmp < 0x10; tmp++)
 		writeAttr(par, tmp, tmp);
-	fb_readb(par->io_virt + CRT + 0x0A);	/* flip-flop to index */
-	t_outb(par, 0x20, 0x3C0);		/* enable attr */
+	fb_readb(par->io_virt + VGA_IS1_RC);	/* flip-flop to index */
+	t_outb(par, 0x20, VGA_ATT_W);		/* enable attr */
 
 	switch (bpp) {
 	case 8:
@@ -1086,13 +1065,13 @@ static int tridentfb_set_par(struct fb_info *info)
 		break;
 	}
 
-	t_inb(par, 0x3C8);
-	t_inb(par, 0x3C6);
-	t_inb(par, 0x3C6);
-	t_inb(par, 0x3C6);
-	t_inb(par, 0x3C6);
-	t_outb(par, tmp, 0x3C6);
-	t_inb(par, 0x3C8);
+	t_inb(par, VGA_PEL_IW);
+	t_inb(par, VGA_PEL_MSK);
+	t_inb(par, VGA_PEL_MSK);
+	t_inb(par, VGA_PEL_MSK);
+	t_inb(par, VGA_PEL_MSK);
+	t_outb(par, tmp, VGA_PEL_MSK);
+	t_inb(par, VGA_PEL_IW);
 
 	if (par->flatpanel)
 		set_number_of_lines(par, info->var.yres);
@@ -1116,12 +1095,12 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 		return 1;
 
 	if (bpp == 8) {
-		t_outb(par, 0xFF, 0x3C6);
-		t_outb(par, regno, 0x3C8);
+		t_outb(par, 0xFF, VGA_PEL_MSK);
+		t_outb(par, regno, VGA_PEL_IW);
 
-		t_outb(par, red >> 10, 0x3C9);
-		t_outb(par, green >> 10, 0x3C9);
-		t_outb(par, blue >> 10, 0x3C9);
+		t_outb(par, red >> 10, VGA_PEL_D);
+		t_outb(par, green >> 10, VGA_PEL_D);
+		t_outb(par, blue >> 10, VGA_PEL_D);
 
 	} else if (regno < 16) {
 		if (bpp == 16) {	/* RGB 565 */
@@ -1232,8 +1211,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	/* If PCI id is 0x9660 then further detect chip type */
 
 	if (chip_id == TGUI9660) {
-		outb(RevisionID, 0x3C4);
-		revision = inb(0x3C5);
+		revision = vga_io_rseq(RevisionID);
 
 		switch (revision) {
 		case 0x22:
diff --git a/include/video/trident.h b/include/video/trident.h
index 9c3670b2890..51ec5a95d1a 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -63,33 +63,7 @@
 #define SKey 0x37
 #define SPKey 0x57
 
-/* 0x3x4 */
-#define CRTHTotal	0x00
-#define CRTHDispEnd	0x01
-#define CRTHBlankStart	0x02
-#define CRTHBlankEnd	0x03
-#define CRTHSyncStart	0x04
-#define CRTHSyncEnd	0x05
-
-#define CRTVTotal	0x06
-#define CRTVDispEnd	0x12
-#define CRTVBlankStart	0x15
-#define CRTVBlankEnd	0x16
-#define CRTVSyncStart	0x10
-#define CRTVSyncEnd	0x11
-
-#define CRTOverflow	0x07
-#define CRTPRowScan	0x08
-#define CRTMaxScanLine	0x09
-#define CRTModeControl	0x17
-#define CRTLineCompare	0x18
-
 /* 3x4 */
-#define StartAddrHigh 0x0C
-#define StartAddrLow 0x0D
-#define Offset 0x13
-#define Underline 0x14
-#define CRTCMode 0x17
 #define CRTCModuleTest 0x1E
 #define FIFOControl 0x20
 #define LinearAddReg 0x21
-- 
GitLab


From 7f762d23e607af786bba8ff4a18059f43950c0e8 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:55 -0700
Subject: [PATCH 365/853] tridentfb: fix timing calculations

Fix broken timings calculations. This patch helps with following
problems:
 - no left part of screen visible (up to half of the screen)
 - monitor's frequencies are not the ones intended for selected modes
 - if mode with resoultion y > 1024 is selected at least once then
   all modes with y < 1024 are "out of sync" (no display)

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 381e5853df6..af02af11353 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -886,20 +886,19 @@ static int tridentfb_set_par(struct fb_info *info)
 
 	debug("enter\n");
 	hdispend = var->xres / 8 - 1;
-	hsyncstart = (var->xres + var->right_margin) / 8;
-	hsyncend = var->hsync_len / 8;
-	htotal =
-		(var->xres + var->left_margin + var->right_margin +
-		 var->hsync_len) / 8 - 10;
-	hblankstart = hdispend + 1;
-	hblankend = htotal + 5;
+	hsyncstart = (var->xres + var->right_margin) / 8 - 1;
+	hsyncend = (var->xres + var->right_margin + var->hsync_len) / 8 - 1;
+	htotal = (var->xres + var->left_margin + var->right_margin +
+		  var->hsync_len) / 8 - 5;
+	hblankstart = hdispend + 2;
+	hblankend = htotal + 3;
 
 	vdispend = var->yres - 1;
 	vsyncstart = var->yres + var->lower_margin;
-	vsyncend = var->vsync_len;
-	vtotal = var->upper_margin + vsyncstart + vsyncend - 2;
-	vblankstart = var->yres;
-	vblankend = vtotal + 2;
+	vsyncend = vsyncstart + var->vsync_len;
+	vtotal = var->upper_margin + vsyncend - 2;
+	vblankstart = vdispend + 2;
+	vblankend = vtotal;
 
 	crtc_unlock(par);
 	write3CE(par, CyberControl, 8);
@@ -930,7 +929,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3X4(par, VGA_CRTC_V_SYNC_START, vsyncstart & 0xFF);
 	write3X4(par, VGA_CRTC_V_SYNC_END, (vsyncend & 0x0F));
 	write3X4(par, VGA_CRTC_V_BLANK_START, vblankstart & 0xFF);
-	write3X4(par, VGA_CRTC_V_BLANK_END, 0 /* p->vblankend & 0xFF */);
+	write3X4(par, VGA_CRTC_V_BLANK_END, vblankend & 0xFF);
 
 	/* horizontal timing values */
 	write3X4(par, VGA_CRTC_H_TOTAL, htotal & 0xFF);
@@ -939,7 +938,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3X4(par, VGA_CRTC_H_SYNC_END,
 		 (hsyncend & 0x1F) | ((hblankend & 0x20) << 2));
 	write3X4(par, VGA_CRTC_H_BLANK_START, hblankstart & 0xFF);
-	write3X4(par, VGA_CRTC_H_BLANK_END, 0 /* (p->hblankend & 0x1F) */);
+	write3X4(par, VGA_CRTC_H_BLANK_END, hblankend & 0x1F);
 
 	/* higher bits of vertical timing values */
 	tmp = 0x10;
@@ -953,16 +952,18 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (vsyncstart & 0x200) tmp |= 0x80;
 	write3X4(par, VGA_CRTC_OVERFLOW, tmp);
 
-	tmp = read3X4(par, CRTHiOrd) | 0x08;	/* line compare bit 10 */
+	tmp = read3X4(par, CRTHiOrd) & 0x07;
+	tmp |= 0x08;	/* line compare bit 10 */
 	if (vtotal & 0x400) tmp |= 0x80;
 	if (vblankstart & 0x400) tmp |= 0x40;
 	if (vsyncstart & 0x400) tmp |= 0x20;
 	if (vdispend & 0x400) tmp |= 0x10;
 	write3X4(par, CRTHiOrd, tmp);
 
-	tmp = 0;
-	if (htotal & 0x800) tmp |= 0x800 >> 11;
-	if (hblankstart & 0x800) tmp |= 0x800 >> 7;
+	tmp = (htotal >> 8) & 0x01;
+	tmp |= (hdispend >> 7) & 0x02;
+	tmp |= (hsyncstart >> 5) & 0x08;
+	tmp |= (hblankstart >> 4) & 0x10;
 	write3X4(par, HorizOverflow, tmp);
 
 	tmp = 0x40;
-- 
GitLab


From c1724fecabfed504a4cfb87319ad3b9d3a8baa92 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:56 -0700
Subject: [PATCH 366/853] tridentfb: use mmio access for clock setting

Use the mmio outb function instead of direct one.  The mmio registers are
already mapped (in the probe function).

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index af02af11353..f3153a82f92 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -680,8 +680,8 @@ static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 		vga_mm_wseq(par->io_virt, ClockHigh, hi);
 		vga_mm_wseq(par->io_virt, ClockLow, lo);
 	} else {
-		outb(lo, 0x43C8);
-		outb(hi, 0x43C9);
+		t_outb(par, lo, 0x43C8);
+		t_outb(par, hi, 0x43C9);
 	}
 	debug("VCLK = %X %X\n", hi, lo);
 }
-- 
GitLab


From 6bdf1035602abf0564d24a7447eea1c149c4bcb1 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:56 -0700
Subject: [PATCH 367/853] tridentfb: fix clock settings for older Trident 96XX
 chips

The Xorg code shows that Trident models 9660, 9680 and 9682 require a
different clock setting method.  Add the second clock setting method for older
models.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index f3153a82f92..8ee4261abf3 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -85,6 +85,11 @@ MODULE_PARM_DESC(fp, "Define if flatpanel is connected");
 module_param(crt, int, 0);
 MODULE_PARM_DESC(crt, "Define if CRT is connected");
 
+static int is_oldclock(int id)
+{
+	return (id == TGUI9660);
+}
+
 static int is_blade(int id)
 {
 	return	(id == BLADE3D) ||
@@ -659,23 +664,33 @@ static void set_screen_start(struct tridentfb_par *par, int base)
 static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 {
 	int m, n, k;
-	unsigned long f, fi, d, di;
-	unsigned char lo = 0, hi = 0;
+	unsigned long fi, d, di;
+	unsigned char best_m = 0, best_n = 0, best_k = 0;
+	unsigned char hi, lo;
 
 	d = 20000;
-	for (k = 2; k >= 0; k--)
-		for (m = 0; m < 63; m++)
-			for (n = 0; n < 128; n++) {
+	for (k = 1; k >= 0; k--)
+		for (m = 0; m < 32; m++)
+			for (n = 0; n < 122; n++) {
 				fi = ((14318l * (n + 8)) / (m + 2)) >> k;
 				if ((di = abs(fi - freq)) < d) {
 					d = di;
-					f = fi;
-					lo = n;
-					hi = (k << 6) | m;
+					best_n = n;
+					best_m = m;
+					best_k = k;
 				}
 				if (fi > freq)
 					break;
 			}
+
+	if (is_oldclock(par->chip_id)) {
+		lo = best_n | (best_m << 7);
+		hi = (best_m >> 1) | (best_k << 4);
+	} else {
+		lo = best_n;
+		hi = best_m | (best_k << 6);
+	}
+
 	if (is3Dchip(par->chip_id)) {
 		vga_mm_wseq(par->io_virt, ClockHigh, hi);
 		vga_mm_wseq(par->io_virt, ClockLow, lo);
-- 
GitLab


From 3876ae8beb2c7c19e21279b9603b1244fcd744dd Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:57 -0700
Subject: [PATCH 368/853] tridentfb: improve probe function

Add missing release of allocated fb_info structure and move enable_mmio() to
fix error path.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 8ee4261abf3..e79788a778f 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -1287,6 +1287,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 
 	if (!request_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len, "tridentfb")) {
 		debug("request_region failed!\n");
+		framebuffer_release(info);
 		return -1;
 	}
 
@@ -1299,8 +1300,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap1;
 	}
 
-	enable_mmio();
-
 	/* setup framebuffer memory */
 	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
 	tridentfb_fix.smem_len = get_memsize(default_par);
@@ -1312,6 +1311,8 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap1;
 	}
 
+	enable_mmio();
+
 	info->screen_base = ioremap_nocache(tridentfb_fix.smem_start,
 					    tridentfb_fix.smem_len);
 
-- 
GitLab


From 0e73a47f094a919e2edeaa88e840cd0400adc423 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:58 -0700
Subject: [PATCH 369/853] tridentfb: improved register values on TGUI 9680

Improved values for some registers after Xorg Trident driver.  The main
problem was that values set by BIOS have been ignored.

This patch completely remove random pixels ("snow") on the TGUI 9680 and
9440 (not supported yet by the driver).  It does not help with the "snow"
on 3DImage and Blade3D cards.

There is also small improvement in timing calculations (hblank start and
vblank start)

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 29 +++++++++++++++++++++++------
 include/video/trident.h   |  1 +
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index e79788a778f..9668be881fe 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -87,7 +87,17 @@ MODULE_PARM_DESC(crt, "Define if CRT is connected");
 
 static int is_oldclock(int id)
 {
-	return (id == TGUI9660);
+	return	(id == TGUI9660) ||
+		(id == CYBER9320);
+}
+
+static int is_oldprotect(int id)
+{
+	return	(id == TGUI9660) ||
+		(id == PROVIDIA9685) ||
+		(id == CYBER9320) ||
+		(id == CYBER9382) ||
+		(id == CYBER9385);
 }
 
 static int is_blade(int id)
@@ -143,6 +153,7 @@ static int iscyber(int id)
 
 	case CYBER9320:
 	case TGUI9660:
+	case PROVIDIA9685:
 	case IMAGE975:
 	case IMAGE985:
 	case BLADE3D:
@@ -905,14 +916,14 @@ static int tridentfb_set_par(struct fb_info *info)
 	hsyncend = (var->xres + var->right_margin + var->hsync_len) / 8 - 1;
 	htotal = (var->xres + var->left_margin + var->right_margin +
 		  var->hsync_len) / 8 - 5;
-	hblankstart = hdispend + 2;
+	hblankstart = hdispend + 1;
 	hblankend = htotal + 3;
 
 	vdispend = var->yres - 1;
 	vsyncstart = var->yres + var->lower_margin;
 	vsyncend = vsyncstart + var->vsync_len;
 	vtotal = var->upper_margin + vsyncend - 2;
-	vblankstart = vdispend + 2;
+	vblankstart = vdispend + 1;
 	vblankend = vtotal;
 
 	crtc_unlock(par);
@@ -1020,15 +1031,18 @@ static int tridentfb_set_par(struct fb_info *info)
 
 	write3X4(par, PixelBusReg, tmp);
 
-	tmp = 0x10;
+	tmp = read3X4(par, DRAMControl);
+	if (!is_oldprotect(par->chip_id))
+		tmp |= 0x10;
 	if (iscyber(par->chip_id))
 		tmp |= 0x20;
 	write3X4(par, DRAMControl, tmp);	/* both IO, linear enable */
 
 	write3X4(par, InterfaceSel, read3X4(par, InterfaceSel) | 0x40);
-	write3X4(par, Performance, 0x92);
+	if (!is_xp(par->chip_id))
+		write3X4(par, Performance, read3X4(par, Performance) | 0x10);
 	/* MMIO & PCI read and write burst enable */
-	write3X4(par, PCIReg, 0x07);
+	write3X4(par, PCIReg, read3X4(par, PCIReg) | 0x06);
 
 	/* convert from picoseconds to kHz */
 	vclk = PICOS2KHZ(info->var.pixclock);
@@ -1230,6 +1244,9 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		revision = vga_io_rseq(RevisionID);
 
 		switch (revision) {
+		case 0x21:
+			chip_id = PROVIDIA9685;
+			break;
 		case 0x22:
 		case 0x23:
 			chip_id = CYBER9397;
diff --git a/include/video/trident.h b/include/video/trident.h
index 51ec5a95d1a..fa690b9fcc1 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -25,6 +25,7 @@
 #define CYBER9520	0x9520
 #define CYBER9525DVD	0x9525
 #define TGUI9660	0x9660
+#define PROVIDIA9685	0x9685
 #define IMAGE975	0x9750
 #define IMAGE985	0x9850
 #define BLADE3D		0x9880
-- 
GitLab


From a0d922562d56073f147a4de2983bee499dd2a10e Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:58 -0700
Subject: [PATCH 370/853] tridentfb: add TGUI 9440 support

Add support for TGUI 9440 chip.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 10 +++++++---
 include/video/trident.h   |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 9668be881fe..9dedc1a5e42 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -87,13 +87,15 @@ MODULE_PARM_DESC(crt, "Define if CRT is connected");
 
 static int is_oldclock(int id)
 {
-	return	(id == TGUI9660) ||
+	return	(id == TGUI9440) ||
+		(id == TGUI9660) ||
 		(id == CYBER9320);
 }
 
 static int is_oldprotect(int id)
 {
-	return	(id == TGUI9660) ||
+	return	(id == TGUI9440) ||
+		(id == TGUI9660) ||
 		(id == PROVIDIA9685) ||
 		(id == CYBER9320) ||
 		(id == CYBER9382) ||
@@ -1042,7 +1044,8 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (!is_xp(par->chip_id))
 		write3X4(par, Performance, read3X4(par, Performance) | 0x10);
 	/* MMIO & PCI read and write burst enable */
-	write3X4(par, PCIReg, read3X4(par, PCIReg) | 0x06);
+	if (par->chip_id != TGUI9440)
+		write3X4(par, PCIReg, read3X4(par, PCIReg) | 0x06);
 
 	/* convert from picoseconds to kHz */
 	vclk = PICOS2KHZ(info->var.pixclock);
@@ -1418,6 +1421,7 @@ static struct pci_device_id trident_devices[] = {
 	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEAi1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEAi1D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEE4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	TGUI9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_TRIDENT,	TGUI9660, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_TRIDENT,	IMAGE975, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_TRIDENT,	IMAGE985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
diff --git a/include/video/trident.h b/include/video/trident.h
index fa690b9fcc1..76d81b2198d 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -24,6 +24,7 @@
 #define CYBER9397DVD	0x939A
 #define CYBER9520	0x9520
 #define CYBER9525DVD	0x9525
+#define TGUI9440	0x9440
 #define TGUI9660	0x9660
 #define PROVIDIA9685	0x9685
 #define IMAGE975	0x9750
-- 
GitLab


From aa0aa8ab2f28d8985daa79ecab51970376e17157 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:30:59 -0700
Subject: [PATCH 371/853] tridentfb: fix unitialized pseudo_palette

Initialize the pseudo_palette pointer properly.  This fixes crash when
16bpp or 32bpp mode is selected.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 9dedc1a5e42..6b3bb1e2073 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -1350,7 +1350,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 
 	info->fix = tridentfb_fix;
 	info->fbops = &tridentfb_ops;
-
+	info->pseudo_palette = default_par->pseudo_pal;
 
 	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
 #ifdef CONFIG_FB_TRIDENT_ACCEL
-- 
GitLab


From 74a933feaf13f705e6c798d87efe6a9d758b3ca0 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:00 -0700
Subject: [PATCH 372/853] tridentfb: improve check_var function

Do some additional checks (like pixelclock versus ramdac speed) to
eliminate modes which do not work.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 6b3bb1e2073..62701c5570f 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -829,6 +829,7 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 {
 	struct tridentfb_par *par = info->par;
 	int bpp = var->bits_per_pixel;
+	int ramdac = 230000; /* 230MHz for most 3D chips */
 	debug("enter\n");
 
 	/* check color depth */
@@ -837,6 +838,12 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	/* check whether resolution fits on panel and in memory */
 	if (par->flatpanel && nativex && var->xres > nativex)
 		return -EINVAL;
+	/* various resolution checks */
+	var->xres = (var->xres + 7) & ~0x7;
+	if (var->xres != var->xres_virtual)
+		var->xres_virtual = var->xres;
+	if (var->yres > var->yres_virtual)
+		var->yres_virtual = var->yres;
 	if (var->xres * var->yres_virtual * bpp / 8 > info->fix.smem_len)
 		return -EINVAL;
 
@@ -868,6 +875,33 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	default:
 		return -EINVAL;
 	}
+
+	if (is_xp(par->chip_id))
+		ramdac = 350000;
+
+	switch (par->chip_id) {
+	case TGUI9440:
+		ramdac = 90000;
+		break;
+	case CYBER9320:
+	case TGUI9660:
+		ramdac = 135000;
+		break;
+	case PROVIDIA9685:
+	case CYBER9388:
+	case CYBER9382:
+	case CYBER9385:
+		ramdac = 170000;
+		break;
+	}
+
+	/* The clock is doubled for 32 bpp */
+	if (bpp == 32)
+		ramdac /= 2;
+
+	if (PICOS2KHZ(var->pixclock) > ramdac)
+		return -EINVAL;
+
 	debug("exit\n");
 
 	return 0;
-- 
GitLab


From 65e93e038c8a6eb65b6907d6aed22a8ff1029d3a Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:00 -0700
Subject: [PATCH 373/853] tridentfb: preserve memory type settings

Do not overwrite bits which contain memory type settings.  It removes
noise pixels ("snow") on Blade3D and 3DImage chips.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 62701c5570f..26bc4d75d4f 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -1095,7 +1095,10 @@ static int tridentfb_set_par(struct fb_info *info)
 	vga_mm_wseq(par->io_virt, 4, 0x0E); /* memory mode enable bitmaps ?? */
 
 	/* divide clock by 2 if 32bpp chain4 mode display and CPU path */
-	write3CE(par, MiscExtFunc, (bpp == 32) ? 0x1A : 0x12);
+	tmp = read3CE(par, MiscExtFunc) & 0xF0;
+	if (bpp == 32)
+		tmp |= 8;
+	write3CE(par, MiscExtFunc, tmp | 0x12);
 	write3CE(par, 0x5, 0x40);	/* no CGA compat, allow 256 col */
 	write3CE(par, 0x6, 0x05);	/* graphics mode */
 	write3CE(par, 0x7, 0x0F);	/* planes? */
-- 
GitLab


From 54f019e54244fef0ad927ce5501927d9033492de Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:01 -0700
Subject: [PATCH 374/853] tridentfb: fix hi-color modes for TGUI 9440

The TGUI 9440 requires doubling clock for 16bpp (hi-color) modes.

The patch also moves back enable_mmio() call to the right position.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 26bc4d75d4f..ed1b32a1cef 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -835,6 +835,8 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	/* check color depth */
 	if (bpp == 24)
 		bpp = var->bits_per_pixel = 32;
+	if (par->chip_id == TGUI9440 && bpp == 32)
+		return -EINVAL;
 	/* check whether resolution fits on panel and in memory */
 	if (par->flatpanel && nativex && var->xres > nativex)
 		return -EINVAL;
@@ -881,7 +883,7 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 
 	switch (par->chip_id) {
 	case TGUI9440:
-		ramdac = 90000;
+		ramdac = (bpp >= 16) ? 45000 : 90000;
 		break;
 	case CYBER9320:
 	case TGUI9660:
@@ -1081,12 +1083,6 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (par->chip_id != TGUI9440)
 		write3X4(par, PCIReg, read3X4(par, PCIReg) | 0x06);
 
-	/* convert from picoseconds to kHz */
-	vclk = PICOS2KHZ(info->var.pixclock);
-	if (bpp == 32)
-		vclk *= 2;
-	set_vclk(par, vclk);
-
 	vga_mm_wseq(par->io_virt, 0, 3);
 	vga_mm_wseq(par->io_virt, 1, 1); /* set char clock 8 dots wide */
 	/* enable 4 maps because needed in chain4 mode */
@@ -1094,10 +1090,16 @@ static int tridentfb_set_par(struct fb_info *info)
 	vga_mm_wseq(par->io_virt, 3, 0);
 	vga_mm_wseq(par->io_virt, 4, 0x0E); /* memory mode enable bitmaps ?? */
 
+	/* convert from picoseconds to kHz */
+	vclk = PICOS2KHZ(info->var.pixclock);
+
 	/* divide clock by 2 if 32bpp chain4 mode display and CPU path */
 	tmp = read3CE(par, MiscExtFunc) & 0xF0;
-	if (bpp == 32)
+	if (bpp == 32 || (par->chip_id == TGUI9440 && bpp == 16)) {
 		tmp |= 8;
+		vclk *= 2;
+	}
+	set_vclk(par, vclk);
 	write3CE(par, MiscExtFunc, tmp | 0x12);
 	write3CE(par, 0x5, 0x40);	/* no CGA compat, allow 256 col */
 	write3CE(par, 0x6, 0x05);	/* graphics mode */
@@ -1361,6 +1363,8 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
 	tridentfb_fix.smem_len = get_memsize(default_par);
 
+	enable_mmio();
+
 	if (!request_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len, "tridentfb")) {
 		debug("request_mem_region failed!\n");
 		disable_mmio(info->par);
@@ -1368,8 +1372,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap1;
 	}
 
-	enable_mmio();
-
 	info->screen_base = ioremap_nocache(tridentfb_fix.smem_start,
 					    tridentfb_fix.smem_len);
 
-- 
GitLab


From bcac2d5fe36238dcfc955b49f9db10ad3ae3e53c Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:01 -0700
Subject: [PATCH 375/853] tridentfb: add acceleration for TGUI families

This patch adds acceleration for TGUI 9440 and 96xx chips.  These chips
requires line length to be power of 2, so this is also changed.

It also moves the troubling enable_mmio() function to its final
destination.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 135 ++++++++++++++++++++++++++++++++++----
 1 file changed, 123 insertions(+), 12 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index ed1b32a1cef..9b87c08e517 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -491,6 +491,95 @@ static void image_copy_rect(struct tridentfb_par *par,
 		 0x80000000 | 1 << 22 | 1 << 10 | 1 << 7 | direction);
 }
 
+/*
+ * TGUI 9440/96XX acceleration
+ */
+
+static void tgui_init_accel(struct tridentfb_par *par, int pitch, int bpp)
+{
+	unsigned char x = 0;
+
+	/* disable clipping */
+	writemmr(par, 0x2148, 0);
+	writemmr(par, 0x214C, point(4095, 2047));
+
+	switch (bpp) {
+	case 8:
+		x = 0;
+		break;
+	case 16:
+		x = 1;
+		break;
+	case 24:
+		x = 3;
+		break;
+	case 32:
+		x = 2;
+		break;
+	}
+
+	switch ((pitch * bpp) / 8) {
+	case 8192:
+	case 512:
+		x |= 0x00;
+		break;
+	case 1024:
+		x |= 0x04;
+		break;
+	case 2048:
+		x |= 0x08;
+		break;
+	case 4096:
+		x |= 0x0C;
+		break;
+	}
+
+	fb_writew(x, par->io_virt + 0x2122);
+}
+
+static void tgui_fill_rect(struct tridentfb_par *par,
+			   u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
+{
+	t_outb(par, ROP_P, 0x2127);
+	writemmr(par, 0x212c, c);
+	writemmr(par, 0x2128, 0x4020);
+	writemmr(par, 0x2140, point(w - 1, h - 1));
+	writemmr(par, 0x2138, point(x, y));
+	t_outb(par, 1, 0x2124);
+}
+
+static void tgui_copy_rect(struct tridentfb_par *par,
+			   u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
+{
+	int flags = 0;
+	u16 x1_tmp, x2_tmp, y1_tmp, y2_tmp;
+
+	if ((x1 < x2) && (y1 == y2)) {
+		flags |= 0x0200;
+		x1_tmp = x1 + w - 1;
+		x2_tmp = x2 + w - 1;
+	} else {
+		x1_tmp = x1;
+		x2_tmp = x2;
+	}
+
+	if (y1 < y2) {
+		flags |= 0x0100;
+		y1_tmp = y1 + h - 1;
+		y2_tmp = y2 + h - 1;
+	} else {
+		y1_tmp = y1;
+		y2_tmp = y2;
+	}
+
+	writemmr(par, 0x2128, 0x4 | flags);
+	t_outb(par, ROP_S, 0x2127);
+	writemmr(par, 0x213C, point(x1_tmp, y1_tmp));
+	writemmr(par, 0x2138, point(x2_tmp, y2_tmp));
+	writemmr(par, 0x2140, point(w - 1, h - 1));
+	t_outb(par, 1, 0x2124);
+}
+
 /*
  * Accel functions called by the upper layers
  */
@@ -530,12 +619,8 @@ static void tridentfb_copyarea(struct fb_info *info,
 		       ca->width, ca->height);
 	par->wait_engine(par);
 }
-#else /* !CONFIG_FB_TRIDENT_ACCEL */
-#define tridentfb_fillrect cfb_fillrect
-#define tridentfb_copyarea cfb_copyarea
 #endif /* CONFIG_FB_TRIDENT_ACCEL */
 
-
 /*
  * Hardware access functions
  */
@@ -829,6 +914,7 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 {
 	struct tridentfb_par *par = info->par;
 	int bpp = var->bits_per_pixel;
+	int line_length;
 	int ramdac = 230000; /* 230MHz for most 3D chips */
 	debug("enter\n");
 
@@ -844,9 +930,27 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	var->xres = (var->xres + 7) & ~0x7;
 	if (var->xres != var->xres_virtual)
 		var->xres_virtual = var->xres;
+	line_length = var->xres_virtual * bpp / 8;
+#ifdef CONFIG_FB_TRIDENT_ACCEL
+	if (!is3Dchip(par->chip_id)) {
+		/* acceleration requires line length to be power of 2 */
+		if (line_length <= 512)
+			var->xres_virtual = 512 * 8 / bpp;
+		else if (line_length <= 1024)
+			var->xres_virtual = 1024 * 8 / bpp;
+		else if (line_length <= 2048)
+			var->xres_virtual = 2048 * 8 / bpp;
+		else if (line_length <= 4096)
+			var->xres_virtual = 4096 * 8 / bpp;
+		else if (line_length <= 8192)
+			var->xres_virtual = 8192 * 8 / bpp;
+
+		line_length = var->xres_virtual * bpp / 8;
+	}
+#endif
 	if (var->yres > var->yres_virtual)
 		var->yres_virtual = var->yres;
-	if (var->xres * var->yres_virtual * bpp / 8 > info->fix.smem_len)
+	if (line_length * var->yres_virtual > info->fix.smem_len)
 		return -EINVAL;
 
 	switch (bpp) {
@@ -918,7 +1022,7 @@ static int tridentfb_pan_display(struct fb_var_screeninfo *var,
 	unsigned int offset;
 
 	debug("enter\n");
-	offset = (var->xoffset + (var->yoffset * var->xres))
+	offset = (var->xoffset + (var->yoffset * var->xres_virtual))
 		* var->bits_per_pixel / 32;
 	info->var.xoffset = var->xoffset;
 	info->var.yoffset = var->yoffset;
@@ -1049,7 +1153,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3X4(par, GraphEngReg, 0x80);
 
 #ifdef CONFIG_FB_TRIDENT_ACCEL
-	par->init_accel(par, info->var.xres, bpp);
+	par->init_accel(par, info->var.xres_virtual, bpp);
 #endif
 
 	switch (bpp) {
@@ -1147,9 +1251,9 @@ static int tridentfb_set_par(struct fb_info *info)
 
 	if (par->flatpanel)
 		set_number_of_lines(par, info->var.yres);
-	set_lwidth(par, info->var.xres * bpp / (4 * 16));
+	info->fix.line_length = info->var.xres_virtual * bpp / 8;
+	set_lwidth(par, info->fix.line_length / 8);
 	info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
-	info->fix.line_length = info->var.xres * (bpp >> 3);
 	info->cmap.len = (bpp == 8) ? 256 : 16;
 	debug("exit\n");
 	return 0;
@@ -1248,9 +1352,11 @@ static struct fb_ops tridentfb_ops = {
 	.fb_blank = tridentfb_blank,
 	.fb_check_var = tridentfb_check_var,
 	.fb_set_par = tridentfb_set_par,
+#ifdef CONFIG_FB_TRIDENT_ACCEL
 	.fb_fillrect = tridentfb_fillrect,
 	.fb_copyarea = tridentfb_copyarea,
 	.fb_imageblit = cfb_imageblit,
+#endif
 };
 
 static int __devinit trident_pci_probe(struct pci_dev *dev,
@@ -1328,11 +1434,16 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		default_par->wait_engine = blade_wait_engine;
 		default_par->fill_rect = blade_fill_rect;
 		default_par->copy_rect = blade_copy_rect;
-	} else {
+	} else if (chip3D) {			/* 3DImage family left */
 		default_par->init_accel = image_init_accel;
 		default_par->wait_engine = image_wait_engine;
 		default_par->fill_rect = image_fill_rect;
 		default_par->copy_rect = image_copy_rect;
+	} else { 				/* TGUI 9440/96XX family */
+		default_par->init_accel = tgui_init_accel;
+		default_par->wait_engine = xp_wait_engine;
+		default_par->fill_rect = tgui_fill_rect;
+		default_par->copy_rect = tgui_copy_rect;
 	}
 
 	default_par->chip_id = chip_id;
@@ -1359,12 +1470,12 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap1;
 	}
 
+	enable_mmio();
+
 	/* setup framebuffer memory */
 	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
 	tridentfb_fix.smem_len = get_memsize(default_par);
 
-	enable_mmio();
-
 	if (!request_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len, "tridentfb")) {
 		debug("request_mem_region failed!\n");
 		disable_mmio(info->par);
-- 
GitLab


From 49b1f4b44bcdc47a10d2b354b269305043ef2a32 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:02 -0700
Subject: [PATCH 376/853] tridentfb: acceleration code improvements

This patch brings various acceleration improvements:
- set  copyarea/fillrect for non-accelerated framebuffer (fix)
- remove 15 bpp depth handling to simplify code as it hardly
  works (15 bpp handling was obviously missing in some switches)
- add fb_sync call and move waiting before accelerated function
  to make acceleration more asynchronous to cpu (few % of speed
  improvement)
- add cpu_relax() call in waiting loops
- make longer register names and name more registers
- move registers' definition to header
- general code improvements (shortening, simplifying)

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 261 +++++++++++++-------------------------
 include/video/trident.h   |  21 ++-
 2 files changed, 109 insertions(+), 173 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 9b87c08e517..4a1258f9509 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -1,5 +1,5 @@
 /*
- * Frame buffer driver for Trident Blade and Image series
+ * Frame buffer driver for Trident TGUI, Blade and Image series
  *
  * Copyright 2001, 2002 - Jani Monoses   <jani@iv.ro>
  *
@@ -13,7 +13,6 @@
  *	code, suggestions
  * TODO:
  *	timing value tweaking so it looks good on every monitor in every mode
- *	TGUI acceleration
  */
 
 #include <linux/module.h>
@@ -193,37 +192,13 @@ static inline u32 readmmr(struct tridentfb_par *par, u16 r)
  */
 
 #define point(x, y) ((y) << 16 | (x))
-#define STA	0x2120
-#define CMD	0x2144
-#define ROP	0x2148
-#define CLR	0x2160
-#define SR1	0x2100
-#define SR2	0x2104
-#define DR1	0x2108
-#define DR2	0x210C
-
-#define ROP_S	0xCC
 
 static void blade_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 {
 	int v1 = (pitch >> 3) << 20;
-	int tmp = 0, v2;
-	switch (bpp) {
-	case 8:
-		tmp = 0;
-		break;
-	case 15:
-		tmp = 5;
-		break;
-	case 16:
-		tmp = 1;
-		break;
-	case 24:
-	case 32:
-		tmp = 2;
-		break;
-	}
-	v2 = v1 | (tmp << 29);
+	int tmp = bpp == 24 ? 2 : (bpp >> 4);
+	int v2 = v1 | (tmp << 29);
+
 	writemmr(par, 0x21C0, v2);
 	writemmr(par, 0x21C4, v2);
 	writemmr(par, 0x21B8, v2);
@@ -237,29 +212,29 @@ static void blade_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 
 static void blade_wait_engine(struct tridentfb_par *par)
 {
-	while (readmmr(par, STA) & 0xFA800000) ;
+	while (readmmr(par, STATUS) & 0xFA800000)
+		cpu_relax();
 }
 
 static void blade_fill_rect(struct tridentfb_par *par,
 			    u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
-	writemmr(par, CLR, c);
-	writemmr(par, ROP, rop ? 0x66 : ROP_S);
+	writemmr(par, COLOR, c);
+	writemmr(par, ROP, rop ? ROP_X : ROP_S);
 	writemmr(par, CMD, 0x20000000 | 1 << 19 | 1 << 4 | 2 << 2);
 
-	writemmr(par, DR1, point(x, y));
-	writemmr(par, DR2, point(x + w - 1, y + h - 1));
+	writemmr(par, DST1, point(x, y));
+	writemmr(par, DST2, point(x + w - 1, y + h - 1));
 }
 
 static void blade_copy_rect(struct tridentfb_par *par,
 			    u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
-	u32 s1, s2, d1, d2;
 	int direction = 2;
-	s1 = point(x1, y1);
-	s2 = point(x1 + w - 1, y1 + h - 1);
-	d1 = point(x2, y2);
-	d2 = point(x2 + w - 1, y2 + h - 1);
+	u32 s1 = point(x1, y1);
+	u32 s2 = point(x1 + w - 1, y1 + h - 1);
+	u32 d1 = point(x2, y2);
+	u32 d2 = point(x2 + w - 1, y2 + h - 1);
 
 	if ((y1 > y2) || ((y1 == y2) && (x1 > x2)))
 		direction = 0;
@@ -267,38 +242,20 @@ static void blade_copy_rect(struct tridentfb_par *par,
 	writemmr(par, ROP, ROP_S);
 	writemmr(par, CMD, 0xE0000000 | 1 << 19 | 1 << 4 | 1 << 2 | direction);
 
-	writemmr(par, SR1, direction ? s2 : s1);
-	writemmr(par, SR2, direction ? s1 : s2);
-	writemmr(par, DR1, direction ? d2 : d1);
-	writemmr(par, DR2, direction ? d1 : d2);
+	writemmr(par, SRC1, direction ? s2 : s1);
+	writemmr(par, SRC2, direction ? s1 : s2);
+	writemmr(par, DST1, direction ? d2 : d1);
+	writemmr(par, DST2, direction ? d1 : d2);
 }
 
 /*
  * BladeXP specific acceleration functions
  */
 
-#define ROP_P 0xF0
-#define masked_point(x, y) ((y & 0xffff)<<16|(x & 0xffff))
-
 static void xp_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 {
-	int tmp = 0, v1;
-	unsigned char x = 0;
-
-	switch (bpp) {
-	case 8:
-		x = 0;
-		break;
-	case 16:
-		x = 1;
-		break;
-	case 24:
-		x = 3;
-		break;
-	case 32:
-		x = 2;
-		break;
-	}
+	unsigned char x = bpp == 24 ? 3 : (bpp >> 4);
+	int v1 = pitch << (bpp == 24 ? 20 : (18 + x));
 
 	switch (pitch << (bpp >> 3)) {
 	case 8192:
@@ -320,22 +277,6 @@ static void xp_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 
 	eng_oper = x | 0x40;
 
-	switch (bpp) {
-	case 8:
-		tmp = 18;
-		break;
-	case 15:
-	case 16:
-		tmp = 19;
-		break;
-	case 24:
-	case 32:
-		tmp = 20;
-		break;
-	}
-
-	v1 = pitch << tmp;
-
 	writemmr(par, 0x2154, v1);
 	writemmr(par, 0x2150, v1);
 	t_outb(par, 3, 0x2126);
@@ -343,15 +284,11 @@ static void xp_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 
 static void xp_wait_engine(struct tridentfb_par *par)
 {
-	int busy;
 	int count, timeout;
 
 	count = 0;
 	timeout = 0;
-	for (;;) {
-		busy = t_inb(par, STA) & 0x80;
-		if (busy != 0x80)
-			return;
+	while (t_inb(par, STATUS) & 0x80) {
 		count++;
 		if (count == 10000000) {
 			/* Timeout */
@@ -359,10 +296,11 @@ static void xp_wait_engine(struct tridentfb_par *par)
 			timeout++;
 			if (timeout == 8) {
 				/* Reset engine */
-				t_outb(par, 0x00, 0x2120);
+				t_outb(par, 0x00, STATUS);
 				return;
 			}
 		}
+		cpu_relax();
 	}
 }
 
@@ -371,10 +309,10 @@ static void xp_fill_rect(struct tridentfb_par *par,
 {
 	writemmr(par, 0x2127, ROP_P);
 	writemmr(par, 0x2158, c);
-	writemmr(par, 0x2128, 0x4000);
-	writemmr(par, 0x2140, masked_point(h, w));
-	writemmr(par, 0x2138, masked_point(y, x));
-	t_outb(par, 0x01, 0x2124);
+	writemmr(par, DRAWFL, 0x4000);
+	writemmr(par, OLDDIM, point(h, w));
+	writemmr(par, OLDDST, point(y, x));
+	t_outb(par, 0x01, OLDCMD);
 	t_outb(par, eng_oper, 0x2125);
 }
 
@@ -404,12 +342,12 @@ static void xp_copy_rect(struct tridentfb_par *par,
 		y2_tmp = y2;
 	}
 
-	writemmr(par, 0x2128, direction);
+	writemmr(par, DRAWFL, direction);
 	t_outb(par, ROP_S, 0x2127);
-	writemmr(par, 0x213C, masked_point(y1_tmp, x1_tmp));
-	writemmr(par, 0x2138, masked_point(y2_tmp, x2_tmp));
-	writemmr(par, 0x2140, masked_point(h, w));
-	t_outb(par, 0x01, 0x2124);
+	writemmr(par, OLDSRC, point(y1_tmp, x1_tmp));
+	writemmr(par, OLDDST, point(y2_tmp, x2_tmp));
+	writemmr(par, OLDDIM, point(h, w));
+	t_outb(par, 0x01, OLDCMD);
 }
 
 /*
@@ -417,22 +355,8 @@ static void xp_copy_rect(struct tridentfb_par *par,
  */
 static void image_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 {
-	int tmp = 0;
-	switch (bpp) {
-	case 8:
-		tmp = 0;
-		break;
-	case 15:
-		tmp = 5;
-		break;
-	case 16:
-		tmp = 1;
-		break;
-	case 24:
-	case 32:
-		tmp = 2;
-		break;
-	}
+	int tmp = bpp == 24 ? 2: (bpp >> 4);
+
 	writemmr(par, 0x2120, 0xF0000000);
 	writemmr(par, 0x2120, 0x40000000 | tmp);
 	writemmr(par, 0x2120, 0x80000000);
@@ -450,7 +374,8 @@ static void image_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 
 static void image_wait_engine(struct tridentfb_par *par)
 {
-	while (readmmr(par, 0x2164) & 0xF0000000) ;
+	while (readmmr(par, 0x2164) & 0xF0000000)
+		cpu_relax();
 }
 
 static void image_fill_rect(struct tridentfb_par *par,
@@ -461,8 +386,8 @@ static void image_fill_rect(struct tridentfb_par *par,
 
 	writemmr(par, 0x2144, c);
 
-	writemmr(par, DR1, point(x, y));
-	writemmr(par, DR2, point(x + w - 1, y + h - 1));
+	writemmr(par, DST1, point(x, y));
+	writemmr(par, DST2, point(x + w - 1, y + h - 1));
 
 	writemmr(par, 0x2124, 0x80000000 | 3 << 22 | 1 << 10 | 1 << 9);
 }
@@ -470,12 +395,11 @@ static void image_fill_rect(struct tridentfb_par *par,
 static void image_copy_rect(struct tridentfb_par *par,
 			    u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
-	u32 s1, s2, d1, d2;
 	int direction = 2;
-	s1 = point(x1, y1);
-	s2 = point(x1 + w - 1, y1 + h - 1);
-	d1 = point(x2, y2);
-	d2 = point(x2 + w - 1, y2 + h - 1);
+	u32 s1 = point(x1, y1);
+	u32 s2 = point(x1 + w - 1, y1 + h - 1);
+	u32 d1 = point(x2, y2);
+	u32 d2 = point(x2 + w - 1, y2 + h - 1);
 
 	if ((y1 > y2) || ((y1 == y2) && (x1 > x2)))
 		direction = 0;
@@ -483,10 +407,10 @@ static void image_copy_rect(struct tridentfb_par *par,
 	writemmr(par, 0x2120, 0x80000000);
 	writemmr(par, 0x2120, 0x90000000 | ROP_S);
 
-	writemmr(par, SR1, direction ? s2 : s1);
-	writemmr(par, SR2, direction ? s1 : s2);
-	writemmr(par, DR1, direction ? d2 : d1);
-	writemmr(par, DR2, direction ? d1 : d2);
+	writemmr(par, SRC1, direction ? s2 : s1);
+	writemmr(par, SRC2, direction ? s1 : s2);
+	writemmr(par, DST1, direction ? d2 : d1);
+	writemmr(par, DST2, direction ? d1 : d2);
 	writemmr(par, 0x2124,
 		 0x80000000 | 1 << 22 | 1 << 10 | 1 << 7 | direction);
 }
@@ -497,27 +421,12 @@ static void image_copy_rect(struct tridentfb_par *par,
 
 static void tgui_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 {
-	unsigned char x = 0;
+	unsigned char x = bpp == 24 ? 3 : (bpp >> 4);
 
 	/* disable clipping */
 	writemmr(par, 0x2148, 0);
 	writemmr(par, 0x214C, point(4095, 2047));
 
-	switch (bpp) {
-	case 8:
-		x = 0;
-		break;
-	case 16:
-		x = 1;
-		break;
-	case 24:
-		x = 3;
-		break;
-	case 32:
-		x = 2;
-		break;
-	}
-
 	switch ((pitch * bpp) / 8) {
 	case 8192:
 	case 512:
@@ -541,11 +450,11 @@ static void tgui_fill_rect(struct tridentfb_par *par,
 			   u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
 	t_outb(par, ROP_P, 0x2127);
-	writemmr(par, 0x212c, c);
-	writemmr(par, 0x2128, 0x4020);
-	writemmr(par, 0x2140, point(w - 1, h - 1));
-	writemmr(par, 0x2138, point(x, y));
-	t_outb(par, 1, 0x2124);
+	writemmr(par, OLDCLR, c);
+	writemmr(par, DRAWFL, 0x4020);
+	writemmr(par, OLDDIM, point(w - 1, h - 1));
+	writemmr(par, OLDDST, point(x, y));
+	t_outb(par, 1, OLDCMD);
 }
 
 static void tgui_copy_rect(struct tridentfb_par *par,
@@ -572,12 +481,12 @@ static void tgui_copy_rect(struct tridentfb_par *par,
 		y2_tmp = y2;
 	}
 
-	writemmr(par, 0x2128, 0x4 | flags);
+	writemmr(par, DRAWFL, 0x4 | flags);
 	t_outb(par, ROP_S, 0x2127);
-	writemmr(par, 0x213C, point(x1_tmp, y1_tmp));
-	writemmr(par, 0x2138, point(x2_tmp, y2_tmp));
-	writemmr(par, 0x2140, point(w - 1, h - 1));
-	t_outb(par, 1, 0x2124);
+	writemmr(par, OLDSRC, point(x1_tmp, y1_tmp));
+	writemmr(par, OLDDST, point(x2_tmp, y2_tmp));
+	writemmr(par, OLDDIM, point(w - 1, h - 1));
+	t_outb(par, 1, OLDCMD);
 }
 
 /*
@@ -588,37 +497,40 @@ static void tridentfb_fillrect(struct fb_info *info,
 			       const struct fb_fillrect *fr)
 {
 	struct tridentfb_par *par = info->par;
-	int bpp = info->var.bits_per_pixel;
-	int col = 0;
+	int col;
 
-	switch (bpp) {
-	default:
-	case 8:
-		col |= fr->color;
+	if (info->var.bits_per_pixel == 8) {
+		col = fr->color;
 		col |= col << 8;
 		col |= col << 16;
-		break;
-	case 16:
-		col = ((u32 *)(info->pseudo_palette))[fr->color];
-		break;
-	case 32:
+	} else
 		col = ((u32 *)(info->pseudo_palette))[fr->color];
-		break;
-	}
 
+	par->wait_engine(par);
 	par->fill_rect(par, fr->dx, fr->dy, fr->width,
 		       fr->height, col, fr->rop);
-	par->wait_engine(par);
 }
+
 static void tridentfb_copyarea(struct fb_info *info,
 			       const struct fb_copyarea *ca)
 {
 	struct tridentfb_par *par = info->par;
 
+	par->wait_engine(par);
 	par->copy_rect(par, ca->sx, ca->sy, ca->dx, ca->dy,
 		       ca->width, ca->height);
+}
+
+static int tridentfb_sync(struct fb_info *info)
+{
+	struct tridentfb_par *par = info->par;
+
 	par->wait_engine(par);
+	return 0;
 }
+#else
+#define tridentfb_fillrect cfb_fillrect
+#define tridentfb_copyarea cfb_copyarea
 #endif /* CONFIG_FB_TRIDENT_ACCEL */
 
 /*
@@ -921,6 +833,8 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	/* check color depth */
 	if (bpp == 24)
 		bpp = var->bits_per_pixel = 32;
+	if (bpp != 8 && bpp != 16 && bpp != 32)
+		return -EINVAL;
 	if (par->chip_id == TGUI9440 && bpp == 32)
 		return -EINVAL;
 	/* check whether resolution fits on panel and in memory */
@@ -928,8 +842,15 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 		return -EINVAL;
 	/* various resolution checks */
 	var->xres = (var->xres + 7) & ~0x7;
-	if (var->xres != var->xres_virtual)
+	if (var->xres > var->xres_virtual)
 		var->xres_virtual = var->xres;
+	if (var->yres > var->yres_virtual)
+		var->yres_virtual = var->yres;
+	if (var->xres_virtual > 4095 || var->yres > 2048)
+		return -EINVAL;
+	/* prevent from position overflow for acceleration */
+	if (var->yres_virtual > 0xffff)
+		return -EINVAL;
 	line_length = var->xres_virtual * bpp / 8;
 #ifdef CONFIG_FB_TRIDENT_ACCEL
 	if (!is3Dchip(par->chip_id)) {
@@ -944,6 +865,8 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 			var->xres_virtual = 4096 * 8 / bpp;
 		else if (line_length <= 8192)
 			var->xres_virtual = 8192 * 8 / bpp;
+		else
+			return -EINVAL;
 
 		line_length = var->xres_virtual * bpp / 8;
 	}
@@ -1229,9 +1152,6 @@ static int tridentfb_set_par(struct fb_info *info)
 	case 8:
 		tmp = 0;
 		break;
-	case 15:
-		tmp = 0x10;
-		break;
 	case 16:
 		tmp = 0x30;
 		break;
@@ -1352,10 +1272,11 @@ static struct fb_ops tridentfb_ops = {
 	.fb_blank = tridentfb_blank,
 	.fb_check_var = tridentfb_check_var,
 	.fb_set_par = tridentfb_set_par,
-#ifdef CONFIG_FB_TRIDENT_ACCEL
 	.fb_fillrect = tridentfb_fillrect,
 	.fb_copyarea = tridentfb_copyarea,
 	.fb_imageblit = cfb_imageblit,
+#ifdef CONFIG_FB_TRIDENT_ACCEL
+	.fb_sync = tridentfb_sync,
 #endif
 };
 
@@ -1366,7 +1287,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	unsigned char revision;
 	struct fb_info *info;
 	struct tridentfb_par *default_par;
-	int defaultaccel;
 	int chip3D;
 	int chip_id;
 
@@ -1448,9 +1368,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 
 	default_par->chip_id = chip_id;
 
-	/* acceleration is on by default for 3D chips */
-	defaultaccel = chip3D && !noaccel;
-
 	/* setup MMIO region */
 	tridentfb_fix.mmio_start = pci_resource_start(dev, 1);
 	tridentfb_fix.mmio_len = chip3D ? 0x20000 : 0x10000;
@@ -1515,7 +1432,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	if (err < 0)
 		goto out_unmap2;
 
-	if (defaultaccel && default_par->init_accel)
+	if (!noaccel && default_par->init_accel)
 		info->var.accel_flags |= FB_ACCELF_TEXT;
 	else
 		info->var.accel_flags &= ~FB_ACCELF_TEXT;
diff --git a/include/video/trident.h b/include/video/trident.h
index 76d81b2198d..7540501bc04 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -4,7 +4,7 @@
 #endif
 
 #if TRIDENTFB_DEBUG
-#define debug(f,a...)	printk("%s:" f,  __FUNCTION__ , ## a);mdelay(1000);
+#define debug(f,a...)	printk("%s:" f,  __FUNCTION__ , ## a);mdelay(100);
 #else
 #define debug(f,a...)
 #endif
@@ -124,3 +124,22 @@
 #define BiosMode     0x5c
 #define BiosReg      0x5d
 
+/* Graphics Engine */
+#define STATUS	0x2120
+#define OLDCMD	0x2124
+#define DRAWFL	0x2128
+#define OLDCLR	0x212C
+#define OLDDST	0x2138
+#define OLDSRC	0x213C
+#define OLDDIM	0x2140
+#define CMD	0x2144
+#define ROP	0x2148
+#define COLOR	0x2160
+#define SRC1	0x2100
+#define SRC2	0x2104
+#define DST1	0x2108
+#define DST2	0x210C
+
+#define ROP_S	0xCC
+#define ROP_P	0xF0
+#define ROP_X	0x66
-- 
GitLab


From 2c86a0c26fbe8ea218f7a267645679fb78aba8a3 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:03 -0700
Subject: [PATCH 377/853] tridentfb: acceleration bug fixes

This patch fixes two problems when acceleration is enabled:

 - bit for bitblt direction is corrected
   so scrolling down works as expected on 3DImage chips

 - initialization of acceleration is done later
   this helps with initial console malfuntion (on Blade3D
   chips) well documented here:
   http://marc.info/?l=linux-fbdev-users&m=111386953124478&w=2

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 4a1258f9509..d896dee7b48 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -395,7 +395,7 @@ static void image_fill_rect(struct tridentfb_par *par,
 static void image_copy_rect(struct tridentfb_par *par,
 			    u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
-	int direction = 2;
+	int direction = 0x4;
 	u32 s1 = point(x1, y1);
 	u32 s2 = point(x1 + w - 1, y1 + h - 1);
 	u32 d1 = point(x2, y2);
@@ -1075,10 +1075,6 @@ static int tridentfb_set_par(struct fb_info *info)
 	/* enable GE for text acceleration */
 	write3X4(par, GraphEngReg, 0x80);
 
-#ifdef CONFIG_FB_TRIDENT_ACCEL
-	par->init_accel(par, info->var.xres_virtual, bpp);
-#endif
-
 	switch (bpp) {
 	case 8:
 		tmp = 0x00;
@@ -1173,6 +1169,10 @@ static int tridentfb_set_par(struct fb_info *info)
 		set_number_of_lines(par, info->var.yres);
 	info->fix.line_length = info->var.xres_virtual * bpp / 8;
 	set_lwidth(par, info->fix.line_length / 8);
+#ifdef CONFIG_FB_TRIDENT_ACCEL
+	par->init_accel(par, info->var.xres_virtual, bpp);
+#endif
+
 	info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
 	info->cmap.len = (bpp == 8) ? 256 : 16;
 	debug("exit\n");
-- 
GitLab


From 34dec24317d6824b7db172bb0072b909a9c376f2 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:04 -0700
Subject: [PATCH 378/853] tridentfb: various pixclock and timing improvements

This patch fixes few issues related to timings and pixclock generation:

 - disallow the pixclocks with numerator lower than
   double denominator. This fixes display instability
   for some modes.
 - choose the pixelclock with the highest
   numerator and denominator values. This improve
   image quality and fixes display instability
   for some modes.
 - make interlaced modes work.
 - set synchronization pulses polarization
   correctly.
 - horizontal synchronization timing are now
   the same as generated by X.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index d896dee7b48..ff82ec1e5e4 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -680,10 +680,12 @@ static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 
 	d = 20000;
 	for (k = 1; k >= 0; k--)
-		for (m = 0; m < 32; m++)
-			for (n = 0; n < 122; n++) {
+		for (m = 0; m < 32; m++) {
+			n = 2 * (m + 2) - 8;
+			for (n = (n < 0 ? 0 : n); n < 122; n++) {
 				fi = ((14318l * (n + 8)) / (m + 2)) >> k;
-				if ((di = abs(fi - freq)) < d) {
+				di = abs(fi - freq);
+				if (di <= d) {
 					d = di;
 					best_n = n;
 					best_m = m;
@@ -692,6 +694,7 @@ static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 				if (fi > freq)
 					break;
 			}
+		}
 
 	if (is_oldclock(par->chip_id)) {
 		lo = best_n | (best_m << 7);
@@ -977,8 +980,8 @@ static int tridentfb_set_par(struct fb_info *info)
 
 	debug("enter\n");
 	hdispend = var->xres / 8 - 1;
-	hsyncstart = (var->xres + var->right_margin) / 8 - 1;
-	hsyncend = (var->xres + var->right_margin + var->hsync_len) / 8 - 1;
+	hsyncstart = (var->xres + var->right_margin) / 8;
+	hsyncend = (var->xres + var->right_margin + var->hsync_len) / 8;
 	htotal = (var->xres + var->left_margin + var->right_margin +
 		  var->hsync_len) / 8 - 5;
 	hblankstart = hdispend + 1;
@@ -991,8 +994,22 @@ static int tridentfb_set_par(struct fb_info *info)
 	vblankstart = vdispend + 1;
 	vblankend = vtotal;
 
+	if (info->var.vmode & FB_VMODE_INTERLACED) {
+		vtotal /= 2;
+		vdispend /= 2;
+		vsyncstart /= 2;
+		vsyncend /= 2;
+		vblankstart /= 2;
+		vblankend /= 2;
+	}
+
 	crtc_unlock(par);
 	write3CE(par, CyberControl, 8);
+	tmp = 0xEB;
+	if (var->sync & FB_SYNC_HOR_HIGH_ACT)
+		tmp &= ~0x40;
+	if (var->sync & FB_SYNC_VERT_HIGH_ACT)
+		tmp &= ~0x80;
 
 	if (par->flatpanel && var->xres < nativex) {
 		/*
@@ -1000,7 +1017,7 @@ static int tridentfb_set_par(struct fb_info *info)
 		 * than requested resolution decide whether
 		 * we stretch or center
 		 */
-		t_outb(par, 0xEB, VGA_MIS_W);
+		t_outb(par, tmp | 0xC0, VGA_MIS_W);
 
 		shadowmode_on(par);
 
@@ -1010,7 +1027,7 @@ static int tridentfb_set_par(struct fb_info *info)
 			screen_stretch(par);
 
 	} else {
-		t_outb(par, 0x2B, VGA_MIS_W);
+		t_outb(par, tmp, VGA_MIS_W);
 		write3CE(par, CyberControl, 8);
 	}
 
@@ -1071,6 +1088,10 @@ static int tridentfb_set_par(struct fb_info *info)
 	tmp = (info->var.vmode & FB_VMODE_INTERLACED) ? 0x84 : 0x80;
 	/* enable access extended memory */
 	write3X4(par, CRTCModuleTest, tmp);
+	tmp = read3CE(par, MiscIntContReg) & ~0x4;
+	if (info->var.vmode & FB_VMODE_INTERLACED)
+		tmp |= 0x4;
+	write3CE(par, MiscIntContReg, tmp);
 
 	/* enable GE for text acceleration */
 	write3X4(par, GraphEngReg, 0x80);
-- 
GitLab


From 01a2d9ed85c945fc8a672622780533a1a0b7caf5 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:04 -0700
Subject: [PATCH 379/853] tridentfb: acceleration constants change

This patch replaces deprecated constant FB_ACCELF_TEXT with
FBINFO_HWACCEL_DISABLED and adds constants for Trident families of
accelerators.

The FBINFO_HWACCEL_DISABLED is correctly used so noaccel parameter works
now.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 45 +++++++++++++++++++++++++++------------
 include/linux/fb.h        |  4 ++++
 2 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index ff82ec1e5e4..279411523ed 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -499,6 +499,10 @@ static void tridentfb_fillrect(struct fb_info *info,
 	struct tridentfb_par *par = info->par;
 	int col;
 
+	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_fillrect(info, fr);
+		return;
+	}
 	if (info->var.bits_per_pixel == 8) {
 		col = fr->color;
 		col |= col << 8;
@@ -516,6 +520,10 @@ static void tridentfb_copyarea(struct fb_info *info,
 {
 	struct tridentfb_par *par = info->par;
 
+	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_copyarea(info, ca);
+		return;
+	}
 	par->wait_engine(par);
 	par->copy_rect(par, ca->sx, ca->sy, ca->dx, ca->dy,
 		       ca->width, ca->height);
@@ -525,7 +533,8 @@ static int tridentfb_sync(struct fb_info *info)
 {
 	struct tridentfb_par *par = info->par;
 
-	par->wait_engine(par);
+	if (!(info->flags & FBINFO_HWACCEL_DISABLED))
+		par->wait_engine(par);
 	return 0;
 }
 #else
@@ -855,8 +864,9 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	if (var->yres_virtual > 0xffff)
 		return -EINVAL;
 	line_length = var->xres_virtual * bpp / 8;
-#ifdef CONFIG_FB_TRIDENT_ACCEL
-	if (!is3Dchip(par->chip_id)) {
+
+	if (!is3Dchip(par->chip_id) &&
+	    !(info->flags & FBINFO_HWACCEL_DISABLED)) {
 		/* acceleration requires line length to be power of 2 */
 		if (line_length <= 512)
 			var->xres_virtual = 512 * 8 / bpp;
@@ -873,7 +883,7 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 
 		line_length = var->xres_virtual * bpp / 8;
 	}
-#endif
+
 	if (var->yres > var->yres_virtual)
 		var->yres_virtual = var->yres;
 	if (line_length * var->yres_virtual > info->fix.smem_len)
@@ -1190,9 +1200,9 @@ static int tridentfb_set_par(struct fb_info *info)
 		set_number_of_lines(par, info->var.yres);
 	info->fix.line_length = info->var.xres_virtual * bpp / 8;
 	set_lwidth(par, info->fix.line_length / 8);
-#ifdef CONFIG_FB_TRIDENT_ACCEL
-	par->init_accel(par, info->var.xres_virtual, bpp);
-#endif
+
+	if (!(info->flags & FBINFO_HWACCEL_DISABLED))
+		par->init_accel(par, info->var.xres_virtual, bpp);
 
 	info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
 	info->cmap.len = (bpp == 8) ? 256 : 16;
@@ -1326,6 +1336,9 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		output("*** Please do use cyblafb, Cyberblade/i1 support "
 		       "will soon be removed from tridentfb!\n");
 
+#ifndef CONFIG_FB_TRIDENT_ACCEL
+	noaccel = 1;
+#endif
 
 	/* If PCI id is 0x9660 then further detect chip type */
 
@@ -1370,21 +1383,25 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		default_par->wait_engine = xp_wait_engine;
 		default_par->fill_rect = xp_fill_rect;
 		default_par->copy_rect = xp_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_BLADEXP;
 	} else if (is_blade(chip_id)) {
 		default_par->init_accel = blade_init_accel;
 		default_par->wait_engine = blade_wait_engine;
 		default_par->fill_rect = blade_fill_rect;
 		default_par->copy_rect = blade_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_BLADE3D;
 	} else if (chip3D) {			/* 3DImage family left */
 		default_par->init_accel = image_init_accel;
 		default_par->wait_engine = image_wait_engine;
 		default_par->fill_rect = image_fill_rect;
 		default_par->copy_rect = image_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_3DIMAGE;
 	} else { 				/* TGUI 9440/96XX family */
 		default_par->init_accel = tgui_init_accel;
 		default_par->wait_engine = xp_wait_engine;
 		default_par->fill_rect = tgui_fill_rect;
 		default_par->copy_rect = tgui_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_TGUI;
 	}
 
 	default_par->chip_id = chip_id;
@@ -1441,9 +1458,13 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	info->pseudo_palette = default_par->pseudo_pal;
 
 	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
-#ifdef CONFIG_FB_TRIDENT_ACCEL
-	info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
-#endif
+	if (!noaccel && default_par->init_accel) {
+		info->flags &= ~FBINFO_HWACCEL_DISABLED;
+		info->flags |= FBINFO_HWACCEL_COPYAREA;
+		info->flags |= FBINFO_HWACCEL_FILLRECT;
+	} else
+		info->flags |= FBINFO_HWACCEL_DISABLED;
+
 	if (!fb_find_mode(&info->var, info,
 			  mode_option, NULL, 0, NULL, bpp)) {
 		err = -EINVAL;
@@ -1453,10 +1474,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	if (err < 0)
 		goto out_unmap2;
 
-	if (!noaccel && default_par->init_accel)
-		info->var.accel_flags |= FB_ACCELF_TEXT;
-	else
-		info->var.accel_flags &= ~FB_ACCELF_TEXT;
 	info->var.activate |= FB_ACTIVATE_NOW;
 	info->device = &dev->dev;
 	if (register_framebuffer(info) < 0) {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 72295b09922..a084d133586 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -120,6 +120,10 @@ struct dentry;
 #define FB_ACCEL_XGI_VOLARI_V	47	/* XGI Volari V3XT, V5, V8      */
 #define FB_ACCEL_XGI_VOLARI_Z	48	/* XGI Volari Z7                */
 #define FB_ACCEL_OMAP1610	49	/* TI OMAP16xx                  */
+#define FB_ACCEL_TRIDENT_TGUI	50	/* Trident TGUI			*/
+#define FB_ACCEL_TRIDENT_3DIMAGE 51	/* Trident 3DImage		*/
+#define FB_ACCEL_TRIDENT_BLADE3D 52	/* Trident Blade3D		*/
+#define FB_ACCEL_TRIDENT_BLADEXP 53	/* Trident BladeXP		*/
 #define FB_ACCEL_NEOMAGIC_NM2070 90	/* NeoMagic NM2070              */
 #define FB_ACCEL_NEOMAGIC_NM2090 91	/* NeoMagic NM2090              */
 #define FB_ACCEL_NEOMAGIC_NM2093 92	/* NeoMagic NM2093              */
-- 
GitLab


From 5cf138457af20b0ef79d8c249381927718ca1417 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:05 -0700
Subject: [PATCH 380/853] tridentfb: source code improvements

This patch contains general source code improvments:
 - more simple functions are inline
 - removes some meaningless output and the VERSION
   string as it is no use
 - eng_par is moved into the tridentfb_par
 - removed small section of code for CyberBladeXPAi1
   which is maybe right for only one resolution
   and refresh rate and is probably redundant now
 - other minor improvements

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 83 ++++++++++++++-------------------------
 include/video/trident.h   |  4 +-
 2 files changed, 32 insertions(+), 55 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 279411523ed..b6065effc5e 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -24,8 +24,6 @@
 #include <video/vga.h>
 #include <video/trident.h>
 
-#define VERSION		"0.7.9-NEWAPI"
-
 struct tridentfb_par {
 	void __iomem *io_virt;	/* iospace virtual memory address */
 	u32 pseudo_pal[16];
@@ -37,9 +35,9 @@ struct tridentfb_par {
 		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
 	void (*copy_rect)
 		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
+	unsigned char eng_oper;	/* engine operation... */
 };
 
-static unsigned char eng_oper;	/* engine operation... */
 static struct fb_ops tridentfb_ops;
 
 static struct fb_fix_screeninfo tridentfb_fix = {
@@ -53,7 +51,7 @@ static struct fb_fix_screeninfo tridentfb_fix = {
 /* defaults which are normally overriden by user values */
 
 /* video mode */
-static char *mode_option __devinitdata = "640x480";
+static char *mode_option __devinitdata = "640x480-8@60";
 static int bpp __devinitdata = 8;
 
 static int noaccel __devinitdata;
@@ -84,24 +82,22 @@ MODULE_PARM_DESC(fp, "Define if flatpanel is connected");
 module_param(crt, int, 0);
 MODULE_PARM_DESC(crt, "Define if CRT is connected");
 
-static int is_oldclock(int id)
+static inline int is_oldclock(int id)
 {
 	return	(id == TGUI9440) ||
 		(id == TGUI9660) ||
 		(id == CYBER9320);
 }
 
-static int is_oldprotect(int id)
+static inline int is_oldprotect(int id)
 {
-	return	(id == TGUI9440) ||
-		(id == TGUI9660) ||
+	return	is_oldclock(id) ||
 		(id == PROVIDIA9685) ||
-		(id == CYBER9320) ||
 		(id == CYBER9382) ||
 		(id == CYBER9385);
 }
 
-static int is_blade(int id)
+static inline int is_blade(int id)
 {
 	return	(id == BLADE3D) ||
 		(id == CYBERBLADEE4) ||
@@ -113,27 +109,22 @@ static int is_blade(int id)
 		(id == CYBERBLADEAi1D);
 }
 
-static int is_xp(int id)
+static inline int is_xp(int id)
 {
 	return	(id == CYBERBLADEXPAi1) ||
 		(id == CYBERBLADEXPm8) ||
 		(id == CYBERBLADEXPm16);
 }
 
-static int is3Dchip(int id)
+static inline int is3Dchip(int id)
 {
-	return ((id == BLADE3D) || (id == CYBERBLADEE4) ||
-		(id == CYBERBLADEi7) || (id == CYBERBLADEi7D) ||
+	return	is_blade(id) || is_xp(id) ||
 		(id == CYBER9397) || (id == CYBER9397DVD) ||
 		(id == CYBER9520) || (id == CYBER9525DVD) ||
-		(id == IMAGE975) || (id == IMAGE985) ||
-		(id == CYBERBLADEi1) || (id == CYBERBLADEi1D) ||
-		(id == CYBERBLADEAi1) || (id == CYBERBLADEAi1D) ||
-		(id == CYBERBLADEXPm8) || (id == CYBERBLADEXPm16) ||
-		(id == CYBERBLADEXPAi1));
+		(id == IMAGE975) || (id == IMAGE985);
 }
 
-static int iscyber(int id)
+static inline int iscyber(int id)
 {
 	switch (id) {
 	case CYBER9388:
@@ -153,13 +144,7 @@ static int iscyber(int id)
 		return 1;
 
 	case CYBER9320:
-	case TGUI9660:
-	case PROVIDIA9685:
-	case IMAGE975:
-	case IMAGE985:
-	case BLADE3D:
 	case CYBERBLADEi7:	/* VIA MPV4 integrated version */
-
 	default:
 		/* case CYBERBLDAEXPm8:  Strange */
 		/* case CYBERBLDAEXPm16: Strange */
@@ -275,7 +260,7 @@ static void xp_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 
 	t_outb(par, x, 0x2125);
 
-	eng_oper = x | 0x40;
+	par->eng_oper = x | 0x40;
 
 	writemmr(par, 0x2154, v1);
 	writemmr(par, 0x2150, v1);
@@ -284,10 +269,9 @@ static void xp_init_accel(struct tridentfb_par *par, int pitch, int bpp)
 
 static void xp_wait_engine(struct tridentfb_par *par)
 {
-	int count, timeout;
+	int count = 0;
+	int timeout = 0;
 
-	count = 0;
-	timeout = 0;
 	while (t_inb(par, STATUS) & 0x80) {
 		count++;
 		if (count == 10000000) {
@@ -313,16 +297,14 @@ static void xp_fill_rect(struct tridentfb_par *par,
 	writemmr(par, OLDDIM, point(h, w));
 	writemmr(par, OLDDST, point(y, x));
 	t_outb(par, 0x01, OLDCMD);
-	t_outb(par, eng_oper, 0x2125);
+	t_outb(par, par->eng_oper, 0x2125);
 }
 
 static void xp_copy_rect(struct tridentfb_par *par,
 			 u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
-	int direction;
 	u32 x1_tmp, x2_tmp, y1_tmp, y2_tmp;
-
-	direction = 0x0004;
+	int direction = 0x0004;
 
 	if ((x1 < x2) && (y1 == y2)) {
 		direction |= 0x0200;
@@ -602,7 +584,7 @@ static void disable_mmio(struct tridentfb_par *par)
 	t_outb(par, t_inb(par, 0x3D5) & ~0x01, 0x3D5);
 }
 
-static void crtc_unlock(struct tridentfb_par *par)
+static inline void crtc_unlock(struct tridentfb_par *par)
 {
 	write3X4(par, VGA_CRTC_V_SYNC_END,
 		 read3X4(par, VGA_CRTC_V_SYNC_END) & 0x7F);
@@ -642,7 +624,7 @@ static int __devinit get_nativex(struct tridentfb_par *par)
 }
 
 /* Set pitch */
-static void set_lwidth(struct tridentfb_par *par, int width)
+static inline void set_lwidth(struct tridentfb_par *par, int width)
 {
 	write3X4(par, VGA_CRTC_OFFSET, width & 0xFF);
 	write3X4(par, AddColReg,
@@ -661,7 +643,7 @@ static void screen_stretch(struct tridentfb_par *par)
 }
 
 /* For resolutions smaller than FP resolution center */
-static void screen_center(struct tridentfb_par *par)
+static inline void screen_center(struct tridentfb_par *par)
 {
 	write3CE(par, VertStretch, (read3CE(par, VertStretch) & 0x7C) | 0x80);
 	write3CE(par, HorStretch, (read3CE(par, HorStretch) & 0x7C) | 0x80);
@@ -967,12 +949,12 @@ static int tridentfb_pan_display(struct fb_var_screeninfo *var,
 	return 0;
 }
 
-static void shadowmode_on(struct tridentfb_par *par)
+static inline void shadowmode_on(struct tridentfb_par *par)
 {
 	write3CE(par, CyberControl, read3CE(par, CyberControl) | 0x81);
 }
 
-static void shadowmode_off(struct tridentfb_par *par)
+static inline void shadowmode_off(struct tridentfb_par *par)
 {
 	write3CE(par, CyberControl, read3CE(par, CyberControl) & 0x7E);
 }
@@ -980,7 +962,7 @@ static void shadowmode_off(struct tridentfb_par *par)
 /* Set the hardware to the requested video mode */
 static int tridentfb_set_par(struct fb_info *info)
 {
-	struct tridentfb_par *par = (struct tridentfb_par *)(info->par);
+	struct tridentfb_par *par = info->par;
 	u32 htotal, hdispend, hsyncstart, hsyncend, hblankstart, hblankend;
 	u32 vtotal, vdispend, vsyncstart, vsyncend, vblankstart, vblankend;
 	struct fb_var_screeninfo *var = &info->var;
@@ -1159,11 +1141,6 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3CE(par, 0x6, 0x05);	/* graphics mode */
 	write3CE(par, 0x7, 0x0F);	/* planes? */
 
-	if (par->chip_id == CYBERBLADEXPAi1) {
-		/* This fixes snow-effect in 32 bpp */
-		write3X4(par, VGA_CRTC_H_SYNC_START, 0x84);
-	}
-
 	/* graphics mode and support 256 color modes */
 	writeAttr(par, 0x10, 0x41);
 	writeAttr(par, 0x12, 0x0F);	/* planes */
@@ -1238,7 +1215,7 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			col |= col << 16;
 			((u32 *)(info->pseudo_palette))[regno] = col;
 		} else if (bpp == 32)		/* ARGB 8888 */
-			((u32*)info->pseudo_palette)[regno] =
+			((u32 *)info->pseudo_palette)[regno] =
 				((transp & 0xFF00) << 16)	|
 				((red & 0xFF00) << 8)		|
 				((green & 0xFF00))		|
@@ -1249,7 +1226,7 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return 0;
 }
 
-/* Try blanking the screen.For flat panels it does nothing */
+/* Try blanking the screen. For flat panels it does nothing */
 static int tridentfb_blank(int blank_mode, struct fb_info *info)
 {
 	unsigned char PMCont, DPMSCont;
@@ -1408,9 +1385,10 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 
 	/* setup MMIO region */
 	tridentfb_fix.mmio_start = pci_resource_start(dev, 1);
-	tridentfb_fix.mmio_len = chip3D ? 0x20000 : 0x10000;
+	tridentfb_fix.mmio_len = pci_resource_len(dev, 1);
 
-	if (!request_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len, "tridentfb")) {
+	if (!request_mem_region(tridentfb_fix.mmio_start,
+				tridentfb_fix.mmio_len, "tridentfb")) {
 		debug("request_region failed!\n");
 		framebuffer_release(info);
 		return -1;
@@ -1431,7 +1409,8 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
 	tridentfb_fix.smem_len = get_memsize(default_par);
 
-	if (!request_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len, "tridentfb")) {
+	if (!request_mem_region(tridentfb_fix.smem_start,
+				tridentfb_fix.smem_len, "tridentfb")) {
 		debug("request_mem_region failed!\n");
 		disable_mmio(info->par);
 		err = -1;
@@ -1447,7 +1426,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap2;
 	}
 
-	output("%s board found\n", pci_name(dev));
 	default_par->flatpanel = is_flatpanel(default_par);
 
 	if (default_par->flatpanel)
@@ -1477,7 +1455,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	info->var.activate |= FB_ACTIVATE_NOW;
 	info->device = &dev->dev;
 	if (register_framebuffer(info) < 0) {
-		printk(KERN_ERR "tridentfb: could not register Trident framebuffer\n");
+		printk(KERN_ERR "tridentfb: could not register framebuffer\n");
 		fb_dealloc_cmap(&info->cmap);
 		err = -EINVAL;
 		goto out_unmap2;
@@ -1599,7 +1577,6 @@ static int __init tridentfb_init(void)
 		return -ENODEV;
 	tridentfb_setup(option);
 #endif
-	output("Trident framebuffer %s initializing\n", VERSION);
 	return pci_register_driver(&tridentfb_pci_driver);
 }
 
diff --git a/include/video/trident.h b/include/video/trident.h
index 7540501bc04..85ced9d9131 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -4,9 +4,9 @@
 #endif
 
 #if TRIDENTFB_DEBUG
-#define debug(f,a...)	printk("%s:" f,  __FUNCTION__ , ## a);mdelay(100);
+#define debug(f, a...)	printk("%s:" f,  __func__ , ## a);
 #else
-#define debug(f,a...)
+#define debug(f, a...)
 #endif
 
 #define output(f, a...) pr_info("tridentfb: " f, ## a)
-- 
GitLab


From 13b0de49f52ec8638b3e3e59192a959b35214d9e Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:06 -0700
Subject: [PATCH 381/853] tridentfb: fix console freeze when switching from X11

This patch fixes two problems when acceleration is enabled:

 - console switch from the Xorg locks up the computer
   because the Xorg code locks some registers and disables
   the mmio mode, so reenable these in the
   tridentfb_set_par() and enable_mmio()

 - blacklist the Image975 chipset from setting PCI burst
   mode. This helps with random lock ups of the
   framebuffer on this chip. The same fix is probably
   needed for the Xorg as well.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index b6065effc5e..da4b464cbdb 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -558,13 +558,15 @@ static inline void write3CE(struct tridentfb_par *par, int reg,
 	vga_mm_wgfx(par->io_virt, reg, val);
 }
 
-static void enable_mmio(void)
+static void enable_mmio(struct tridentfb_par *par)
 {
 	/* Goto New Mode */
 	vga_io_rseq(0x0B);
 
 	/* Unprotect registers */
 	vga_io_wseq(NewMode1, 0x80);
+	if (!is_oldprotect(par->chip_id))
+		vga_io_wseq(Protection, 0x92);
 
 	/* Enable MMIO */
 	outb(PCIReg, 0x3D4);
@@ -578,6 +580,8 @@ static void disable_mmio(struct tridentfb_par *par)
 
 	/* Unprotect registers */
 	vga_mm_wseq(par->io_virt, NewMode1, 0x80);
+	if (!is_oldprotect(par->chip_id))
+		vga_mm_wseq(par->io_virt, Protection, 0x92);
 
 	/* Disable MMIO */
 	t_outb(par, PCIReg, 0x3D4);
@@ -995,6 +999,7 @@ static int tridentfb_set_par(struct fb_info *info)
 		vblankend /= 2;
 	}
 
+	enable_mmio(par);
 	crtc_unlock(par);
 	write3CE(par, CyberControl, 8);
 	tmp = 0xEB;
@@ -1116,7 +1121,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (!is_xp(par->chip_id))
 		write3X4(par, Performance, read3X4(par, Performance) | 0x10);
 	/* MMIO & PCI read and write burst enable */
-	if (par->chip_id != TGUI9440)
+	if (par->chip_id != TGUI9440 && par->chip_id != IMAGE975)
 		write3X4(par, PCIReg, read3X4(par, PCIReg) | 0x06);
 
 	vga_mm_wseq(par->io_virt, 0, 3);
@@ -1403,7 +1408,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		goto out_unmap1;
 	}
 
-	enable_mmio();
+	enable_mmio(default_par);
 
 	/* setup framebuffer memory */
 	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
-- 
GitLab


From a4af1798d768ab2f12ab623e21ad68dc8c248005 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:06 -0700
Subject: [PATCH 382/853] tridentfb: fix 224 color logo at 8 bpp

Fix depth setting for 8 bpp mode.  The nice 224 color logo is not
displayed in 8 bpp depth without this fix.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index da4b464cbdb..1c3f0ba4b33 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -878,11 +878,9 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	switch (bpp) {
 	case 8:
 		var->red.offset = 0;
-		var->green.offset = 0;
-		var->blue.offset = 0;
-		var->red.length = 6;
-		var->green.length = 6;
-		var->blue.length = 6;
+		var->red.length = 8;
+		var->green = var->red;
+		var->blue = var->red;
 		break;
 	case 16:
 		var->red.offset = 11;
-- 
GitLab


From f330c4b1961d730ef15ac184e4b7f1c25847d0ae Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:07 -0700
Subject: [PATCH 383/853] tridentfb: y-panning fixes

The Trident cards uses only 20-bit address of screen start in double
words.  This allows addressing for only 4MB of video memory so check this.

Also remove some redundant checks and assignments.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 1c3f0ba4b33..138140bdb49 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -870,8 +870,10 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 		line_length = var->xres_virtual * bpp / 8;
 	}
 
-	if (var->yres > var->yres_virtual)
-		var->yres_virtual = var->yres;
+	/* datasheet specifies how to set panning only up to 4 MB */
+	if (line_length * (var->yres_virtual - var->yres) > (4 << 20))
+		var->yres_virtual = ((4 << 20) / line_length) + var->yres;
+
 	if (line_length * var->yres_virtual > info->fix.smem_len)
 		return -EINVAL;
 
@@ -944,8 +946,6 @@ static int tridentfb_pan_display(struct fb_var_screeninfo *var,
 	debug("enter\n");
 	offset = (var->xoffset + (var->yoffset * var->xres_virtual))
 		* var->bits_per_pixel / 32;
-	info->var.xoffset = var->xoffset;
-	info->var.yoffset = var->yoffset;
 	set_screen_start(par, offset);
 	debug("exit\n");
 	return 0;
@@ -1225,7 +1225,6 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 				((blue & 0xFF00) >> 8);
 	}
 
-/* 	debug("exit\n"); */
 	return 0;
 }
 
-- 
GitLab


From 6280fd4f9c2683a4d2f096320dd74ded4e5106ad Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:08 -0700
Subject: [PATCH 384/853] tridentfb: Blade3D clock fixes

This patch fixes following problems:
- does not allow the m parameter to reach 0 as
  it locks the graphics core (power cycle needed)
- for the newer chips (with new clock registers)
  does not allow of n / m ratio below 4 as it gives
  unstable image on the Blade3D core
- extend shift parameter (k) range  to 2 for the newer
  chips to cope with the n /m >= 4 limit at low resolution
  (bandwidth) modes
- prefer modes with higher n / m ratio (higher k values)

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 138140bdb49..854e2e5af58 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -672,15 +672,16 @@ static void set_vclk(struct tridentfb_par *par, unsigned long freq)
 	unsigned long fi, d, di;
 	unsigned char best_m = 0, best_n = 0, best_k = 0;
 	unsigned char hi, lo;
+	unsigned char shift = !is_oldclock(par->chip_id) ? 2 : 1;
 
 	d = 20000;
-	for (k = 1; k >= 0; k--)
-		for (m = 0; m < 32; m++) {
-			n = 2 * (m + 2) - 8;
+	for (k = shift; k >= 0; k--)
+		for (m = 1; m < 32; m++) {
+			n = ((m + 2) << shift) - 8;
 			for (n = (n < 0 ? 0 : n); n < 122; n++) {
 				fi = ((14318l * (n + 8)) / (m + 2)) >> k;
 				di = abs(fi - freq);
-				if (di <= d) {
+				if (di < d || (di == d && k == best_k)) {
 					d = di;
 					best_n = n;
 					best_m = m;
-- 
GitLab


From 0292be4a382957016e8b574dc292779cfb49e029 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:08 -0700
Subject: [PATCH 385/853] tridentfb: add imageblit acceleration for Blade3D
 family

Add imageblit acceleration for the Blade3D family of cores.  The code is
based on code from the cyblafb driver.

It is a step toward assimilating back the cyblafb driver into the
tridentfb driver.  The cyblafb driver handles a subfamily of the Trident
Blade3d cores.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 79 +++++++++++++++++++++++++++++++++++++--
 include/video/trident.h   |  1 +
 2 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index 854e2e5af58..b21f8423902 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -35,11 +35,12 @@ struct tridentfb_par {
 		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
 	void (*copy_rect)
 		(struct tridentfb_par *par, u32, u32, u32, u32, u32, u32);
+	void (*image_blit)
+		(struct tridentfb_par *par, const char*,
+		 u32, u32, u32, u32, u32, u32);
 	unsigned char eng_oper;	/* engine operation... */
 };
 
-static struct fb_ops tridentfb_ops;
-
 static struct fb_fix_screeninfo tridentfb_fix = {
 	.id = "Trident",
 	.type = FB_TYPE_PACKED_PIXELS,
@@ -212,6 +213,21 @@ static void blade_fill_rect(struct tridentfb_par *par,
 	writemmr(par, DST2, point(x + w - 1, y + h - 1));
 }
 
+static void blade_image_blit(struct tridentfb_par *par, const char *data,
+			     u32 x, u32 y, u32 w, u32 h, u32 c, u32 b)
+{
+	unsigned size = ((w + 31) >> 5) * h;
+
+	writemmr(par, COLOR, c);
+	writemmr(par, BGCOLOR, b);
+	writemmr(par, CMD, 0xa0000000 | 3 << 19);
+
+	writemmr(par, DST1, point(x, y));
+	writemmr(par, DST2, point(x + w - 1, y + h - 1));
+
+	memcpy(par->io_virt + 0x10000, data, 4 * size);
+}
+
 static void blade_copy_rect(struct tridentfb_par *par,
 			    u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
@@ -497,6 +513,36 @@ static void tridentfb_fillrect(struct fb_info *info,
 		       fr->height, col, fr->rop);
 }
 
+static void tridentfb_imageblit(struct fb_info *info,
+				const struct fb_image *img)
+{
+	struct tridentfb_par *par = info->par;
+	int col, bgcol;
+
+	if ((info->flags & FBINFO_HWACCEL_DISABLED) || img->depth != 1) {
+		cfb_imageblit(info, img);
+		return;
+	}
+	if (info->var.bits_per_pixel == 8) {
+		col = img->fg_color;
+		col |= col << 8;
+		col |= col << 16;
+		bgcol = img->bg_color;
+		bgcol |= bgcol << 8;
+		bgcol |= bgcol << 16;
+	} else {
+		col = ((u32 *)(info->pseudo_palette))[img->fg_color];
+		bgcol = ((u32 *)(info->pseudo_palette))[img->bg_color];
+	}
+
+	par->wait_engine(par);
+	if (par->image_blit)
+		par->image_blit(par, img->data, img->dx, img->dy,
+				img->width, img->height, col, bgcol);
+	else
+		cfb_imageblit(info, img);
+}
+
 static void tridentfb_copyarea(struct fb_info *info,
 			       const struct fb_copyarea *ca)
 {
@@ -522,6 +568,7 @@ static int tridentfb_sync(struct fb_info *info)
 #else
 #define tridentfb_fillrect cfb_fillrect
 #define tridentfb_copyarea cfb_copyarea
+#define tridentfb_imageblit cfb_imageblit
 #endif /* CONFIG_FB_TRIDENT_ACCEL */
 
 /*
@@ -1285,7 +1332,7 @@ static struct fb_ops tridentfb_ops = {
 	.fb_set_par = tridentfb_set_par,
 	.fb_fillrect = tridentfb_fillrect,
 	.fb_copyarea = tridentfb_copyarea,
-	.fb_imageblit = cfb_imageblit,
+	.fb_imageblit = tridentfb_imageblit,
 #ifdef CONFIG_FB_TRIDENT_ACCEL
 	.fb_sync = tridentfb_sync,
 #endif
@@ -1369,6 +1416,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		default_par->wait_engine = blade_wait_engine;
 		default_par->fill_rect = blade_fill_rect;
 		default_par->copy_rect = blade_copy_rect;
+		default_par->image_blit = blade_image_blit;
 		tridentfb_fix.accel = FB_ACCEL_TRIDENT_BLADE3D;
 	} else if (chip3D) {			/* 3DImage family left */
 		default_par->init_accel = image_init_accel;
@@ -1446,6 +1494,29 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	} else
 		info->flags |= FBINFO_HWACCEL_DISABLED;
 
+	info->pixmap.addr = kmalloc(4096, GFP_KERNEL);
+	if (!info->pixmap.addr) {
+		err = -ENOMEM;
+		goto out_unmap2;
+	}
+
+	info->pixmap.size = 4096;
+	info->pixmap.buf_align = 4;
+	info->pixmap.scan_align = 1;
+	info->pixmap.access_align = 32;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+
+	if (default_par->image_blit) {
+		info->flags |= FBINFO_HWACCEL_IMAGEBLIT;
+		info->pixmap.scan_align = 4;
+	}
+
+	if (noaccel) {
+		printk(KERN_DEBUG "disabling acceleration\n");
+		info->flags |= FBINFO_HWACCEL_DISABLED;
+		info->pixmap.scan_align = 1;
+	}
+
 	if (!fb_find_mode(&info->var, info,
 			  mode_option, NULL, 0, NULL, bpp)) {
 		err = -EINVAL;
@@ -1471,6 +1542,7 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	return 0;
 
 out_unmap2:
+	kfree(info->pixmap.addr);
 	if (info->screen_base)
 		iounmap(info->screen_base);
 	release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len);
@@ -1494,6 +1566,7 @@ static void __devexit trident_pci_remove(struct pci_dev *dev)
 	release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len);
 	release_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len);
 	pci_set_drvdata(dev, NULL);
+	kfree(info->pixmap.addr);
 	framebuffer_release(info);
 }
 
diff --git a/include/video/trident.h b/include/video/trident.h
index 85ced9d9131..b6ce19d1b61 100644
--- a/include/video/trident.h
+++ b/include/video/trident.h
@@ -135,6 +135,7 @@
 #define CMD	0x2144
 #define ROP	0x2148
 #define COLOR	0x2160
+#define BGCOLOR	0x2164
 #define SRC1	0x2100
 #define SRC2	0x2104
 #define DST1	0x2108
-- 
GitLab


From 663b0e15877293451bdfea619db45eafae9dec54 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:09 -0700
Subject: [PATCH 386/853] tridentfb: remove warning message that cyblafb driver
 should be used

The tridentfb driver should handle now all chipsets handled by the cyblafb
driver.  Remove the message which claims that support will be removed.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index b21f8423902..479b2e79ad6 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -1359,10 +1359,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 
 	chip_id = id->device;
 
-	if (chip_id == CYBERBLADEi1)
-		output("*** Please do use cyblafb, Cyberblade/i1 support "
-		       "will soon be removed from tridentfb!\n");
-
 #ifndef CONFIG_FB_TRIDENT_ACCEL
 	noaccel = 1;
 #endif
-- 
GitLab


From 0b9cf3aa6b1e934807b40b4d478d7e11f7c43f55 Mon Sep 17 00:00:00 2001
From: Roland Kletzing <devzero@web.de>
Date: Wed, 23 Jul 2008 21:31:10 -0700
Subject: [PATCH 387/853] mdacon messing up default vc's - set default to
 vc13-16 again

mdacon incorrectly detects MDA hardware on systems without such graphics card.

One may load this module by chance, for example when doing some systematical
module-testing, and if there is no Monochrome Display Adapter attached ,
module init renders vc1-16 completely unusable.

I and others have run into this more than once.  see [Bug 224522 - modprobe
mdacon freezes machine -> https://bugzilla.novell.com/show_bug.cgi?id=224522 ]
for example

Apparently  proper MDA detection seems to be broken for a long time - seems to
be related to those #ifdef TEST_MDA_B statements added by Edward Betts.

this commit back in 2002 made things even worse :
http://git.kernel.org/?p=linux/kernel/git/tglx/history.git;a=commit;h=c72757b49c88914433244757fb4967fc63546685

It changed default vc allocation from 13-16 to 1-16 for no apparent reason
(!?) , and with that (and without X), mdacon grabs the vc you`re currently
sitting on and locks you out.

this is from Kconfig :
>config MDA_CONSOLE
>        depends on !M68K && !PARISC && ISA
>        tristate "MDA text console (dual-headed) (EXPERIMENTAL)"
>        ---help---
>          Say Y here if you have an old MDA or monochrome Hercules graphics
>          adapter in your system acting as a second head ( = video card). You
>          will then be able to use two monitors with your Linux system. Do not
>          say Y here if your MDA card is the primary card in your system; the
>          normal VGA driver will handle it.

As we can see mdacon is just meant as an additional driver for dual-head
setup, and since kernel 2.4.36 still defaults to vc13-16 , setting the default
back to that value again shouldn`t do any harm.

Hereby i'm reverting that change, setting default back to to vc13-16 again.

Besides the fact that mdacon may be rarely or never be used these days and
could perhaps put to trash anyway (pre-dinosaur hardware!), indeed this is not
a real solution, but at least it removes the unfortunate side-effect of
messing up the vc you`re working on.

Signed-off-by: Roland Kletzing <devzero@web.de>
Cc: James Simmons <jsimmons@infradead.org>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Tim Schmielau <tim@physik3.uni-rostock.de>
Cc: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/console/mdacon.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index 38a296bbdfc..9901064199b 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c
@@ -71,13 +71,15 @@ static char *mda_type_name;
 
 /* console information */
 
-static int	mda_first_vc = 1;
+static int	mda_first_vc = 13;
 static int	mda_last_vc  = 16;
 
 static struct vc_data	*mda_display_fg = NULL;
 
 module_param(mda_first_vc, int, 0);
+MODULE_PARM_DESC(mda_first_vc, "First virtual console. Default: 13");
 module_param(mda_last_vc, int, 0);
+MODULE_PARM_DESC(mda_last_vc, "Last virtual console. Default: 16");
 
 /* MDA register values
  */
-- 
GitLab


From 14aefd1b49ff3bd13caa37fb06bd53488d5d1486 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:31:12 -0700
Subject: [PATCH 388/853] video/sis/: remove compat code

This patch removes compat code for older kernel versions.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: <thomas@winischhofer.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sis/init.h       |  1 -
 drivers/video/sis/init301.h    |  1 -
 drivers/video/sis/initextlfb.c |  1 -
 drivers/video/sis/osdef.h      |  1 -
 drivers/video/sis/sis.h        | 22 ++++-------------
 drivers/video/sis/sis_accel.c  |  1 -
 drivers/video/sis/sis_main.c   | 44 ++++++++--------------------------
 drivers/video/sis/sis_main.h   |  4 ++--
 drivers/video/sis/vgatypes.h   |  4 ----
 9 files changed, 17 insertions(+), 62 deletions(-)

diff --git a/drivers/video/sis/init.h b/drivers/video/sis/init.h
index f40a680df86..b96005c39c6 100644
--- a/drivers/video/sis/init.h
+++ b/drivers/video/sis/init.h
@@ -73,7 +73,6 @@
 #ifdef SIS_CP
 #undef SIS_CP
 #endif
-#include <linux/version.h>
 #include <linux/types.h>
 #include <asm/io.h>
 #include <linux/fb.h>
diff --git a/drivers/video/sis/init301.h b/drivers/video/sis/init301.h
index 7708e1e1d99..51d99222375 100644
--- a/drivers/video/sis/init301.h
+++ b/drivers/video/sis/init301.h
@@ -67,7 +67,6 @@
 #ifdef SIS_CP
 #undef SIS_CP
 #endif
-#include <linux/version.h>
 #include <linux/types.h>
 #include <asm/io.h>
 #include <linux/fb.h>
diff --git a/drivers/video/sis/initextlfb.c b/drivers/video/sis/initextlfb.c
index 47a33501549..99c04a4855d 100644
--- a/drivers/video/sis/initextlfb.c
+++ b/drivers/video/sis/initextlfb.c
@@ -30,7 +30,6 @@
 #include "vgatypes.h"
 #include "vstruct.h"
 
-#include <linux/version.h>
 #include <linux/types.h>
 #include <linux/fb.h>
 
diff --git a/drivers/video/sis/osdef.h b/drivers/video/sis/osdef.h
index c1492782cb1..6ff8f988a1a 100644
--- a/drivers/video/sis/osdef.h
+++ b/drivers/video/sis/osdef.h
@@ -87,7 +87,6 @@
 /**********************************************************************/
 
 #ifdef SIS_LINUX_KERNEL
-#include <linux/version.h>
 
 #ifdef CONFIG_FB_SIS_300
 #define SIS300
diff --git a/drivers/video/sis/sis.h b/drivers/video/sis/sis.h
index a14e8221103..7c5710e3fb5 100644
--- a/drivers/video/sis/sis.h
+++ b/drivers/video/sis/sis.h
@@ -24,8 +24,6 @@
 #ifndef _SIS_H_
 #define _SIS_H_
 
-#include <linux/version.h>
-
 #include "osdef.h"
 #include <video/sisfb.h>
 
@@ -42,16 +40,6 @@
 #define SIS_NEW_CONFIG_COMPAT
 #endif	/* CONFIG_COMPAT */
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
-#define SIS_IOTYPE1 void __iomem
-#define SIS_IOTYPE2 __iomem
-#define SISINITSTATIC static
-#else
-#define SIS_IOTYPE1 unsigned char
-#define SIS_IOTYPE2
-#define SISINITSTATIC
-#endif
-
 #undef SISFBDEBUG
 
 #ifdef SISFBDEBUG
@@ -505,8 +493,8 @@ struct sis_video_info {
 
 	unsigned long	UMAsize, LFBsize;
 
-	SIS_IOTYPE1	*video_vbase;
-	SIS_IOTYPE1	*mmio_vbase;
+	void __iomem	*video_vbase;
+	void __iomem	*mmio_vbase;
 
 	unsigned char	*bios_abase;
 
@@ -533,8 +521,8 @@ struct sis_video_info {
 	int		sisfb_nocrt2rate;
 
 	u32		heapstart;		/* offset  */
-	SIS_IOTYPE1	*sisfb_heap_start;	/* address */
-	SIS_IOTYPE1	*sisfb_heap_end;	/* address */
+	void __iomem	*sisfb_heap_start;	/* address */
+	void __iomem	*sisfb_heap_end;	/* address */
 	u32		sisfb_heap_size;
 	int		havenoheap;
 
@@ -612,7 +600,7 @@ struct sis_video_info {
 	u8		detectedpdca;
 	u8		detectedlcda;
 
-	SIS_IOTYPE1	*hwcursor_vbase;
+	void __iomem	*hwcursor_vbase;
 
 	int		chronteltype;
 	int		tvxpos, tvypos;
diff --git a/drivers/video/sis/sis_accel.c b/drivers/video/sis/sis_accel.c
index 7addf91d2fe..ceb434c95c0 100644
--- a/drivers/video/sis/sis_accel.c
+++ b/drivers/video/sis/sis_accel.c
@@ -28,7 +28,6 @@
  *			for more information and updates)
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/fb.h>
diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index b9343844cd1..346d6458cf7 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -33,7 +33,6 @@
  *
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
@@ -41,13 +40,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
-#include <linux/tty.h>
-#else
 #include <linux/screen_info.h>
-#endif
-
 #include <linux/slab.h>
 #include <linux/fb.h>
 #include <linux/selection.h>
@@ -1167,11 +1160,7 @@ sisfb_set_mode(struct sis_video_info *ivideo, int clrscrn)
 	unsigned short modeno = ivideo->mode_no;
 
 	/* >=2.6.12's fbcon clears the screen anyway */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12)
-	if(!clrscrn) modeno |= 0x80;
-#else
 	modeno |= 0x80;
-#endif
 
 	outSISIDXREG(SISSR, IND_SIS_PASSWORD, SIS_PASSWORD);
 
@@ -1436,11 +1425,8 @@ sisfb_set_par(struct fb_info *info)
 	if((err = sisfb_do_set_var(&info->var, 1, info)))
 		return err;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
-	sisfb_get_fix(&info->fix, info->currcon, info);
-#else
 	sisfb_get_fix(&info->fix, -1, info);
-#endif
+
 	return 0;
 }
 
@@ -1676,14 +1662,8 @@ sisfb_blank(int blank, struct fb_info *info)
 
 /* ----------- FBDev related routines for all series ---------- */
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
 static int	sisfb_ioctl(struct fb_info *info, unsigned int cmd,
 			    unsigned long arg)
-#else
-static int	sisfb_ioctl(struct inode *inode, struct file *file,
-				unsigned int cmd, unsigned long arg,
-				struct fb_info *info)
-#endif
 {
 	struct sis_video_info	*ivideo = (struct sis_video_info *)info->par;
 	struct sis_memreq	sismemreq;
@@ -3986,8 +3966,7 @@ sisfb_handle_command(struct sis_video_info *ivideo, struct sisfb_cmd *sisfb_comm
 }
 
 #ifndef MODULE
-SISINITSTATIC int __init
-sisfb_setup(char *options)
+static int __init sisfb_setup(char *options)
 {
 	char *this_opt;
 
@@ -4086,9 +4065,9 @@ sisfb_setup(char *options)
 #endif
 
 static int __devinit
-sisfb_check_rom(SIS_IOTYPE1 *rom_base, struct sis_video_info *ivideo)
+sisfb_check_rom(void __iomem *rom_base, struct sis_video_info *ivideo)
 {
-	SIS_IOTYPE1 *rom;
+	void __iomem *rom;
 	int romptr;
 
 	if((readb(rom_base) != 0x55) || (readb(rom_base + 1) != 0xaa))
@@ -4117,10 +4096,9 @@ static unsigned char * __devinit
 sisfb_find_rom(struct pci_dev *pdev)
 {
 	struct sis_video_info *ivideo = pci_get_drvdata(pdev);
-	SIS_IOTYPE1 *rom_base;
+	void __iomem *rom_base;
 	unsigned char *myrombase = NULL;
 	u32 temp;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
 	size_t romsize;
 
 	/* First, try the official pci ROM functions (except
@@ -4151,7 +4129,6 @@ sisfb_find_rom(struct pci_dev *pdev)
 	}
 
 	if(myrombase) return myrombase;
-#endif
 
 	/* Otherwise do it the conventional way. */
 
@@ -4225,7 +4202,7 @@ sisfb_post_map_vram(struct sis_video_info *ivideo, unsigned int *mapsize,
 static int __devinit
 sisfb_post_300_buswidth(struct sis_video_info *ivideo)
 {
-	SIS_IOTYPE1 *FBAddress = ivideo->video_vbase;
+	void __iomem *FBAddress = ivideo->video_vbase;
 	unsigned short temp;
 	unsigned char reg;
 	int i, j;
@@ -4273,7 +4250,7 @@ sisfb_post_300_rwtest(struct sis_video_info *ivideo, int iteration, int buswidth
 			int PseudoRankCapacity, int PseudoAdrPinCount,
 			unsigned int mapsize)
 {
-	SIS_IOTYPE1 *FBAddr = ivideo->video_vbase;
+	void __iomem *FBAddr = ivideo->video_vbase;
 	unsigned short sr14;
 	unsigned int k, RankCapacity, PageCapacity, BankNumHigh, BankNumMid;
 	unsigned int PhysicalAdrOtherPage, PhysicalAdrHigh, PhysicalAdrHalfPage;
@@ -5829,7 +5806,7 @@ sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ivideo->engineok = 0;
 
 	ivideo->sisfb_was_boot_device = 0;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12))
+
 	if(pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW) {
 		if(ivideo->sisvga_enabled)
 			ivideo->sisfb_was_boot_device = 1;
@@ -5840,7 +5817,6 @@ sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 				"as the primary VGA device\n");
 		}
 	}
-#endif
 
 	ivideo->sisfb_parm_mem = sisfb_parm_mem;
 	ivideo->sisfb_accel = sisfb_accel;
@@ -6010,7 +5986,7 @@ sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		ivideo->modeprechange = reg & 0x7f;
 	} else if(ivideo->sisvga_enabled) {
 #if defined(__i386__) || defined(__x86_64__)
-		unsigned char SIS_IOTYPE2 *tt = ioremap(0x400, 0x100);
+		unsigned char __iomem *tt = ioremap(0x400, 0x100);
 		if(tt) {
 			ivideo->modeprechange = readb(tt + 0x49);
 			iounmap(tt);
@@ -6503,7 +6479,7 @@ static struct pci_driver sisfb_driver = {
 	.remove 	= __devexit_p(sisfb_remove)
 };
 
-SISINITSTATIC int __init sisfb_init(void)
+static int __init sisfb_init(void)
 {
 #ifndef MODULE
 	char *options = NULL;
diff --git a/drivers/video/sis/sis_main.h b/drivers/video/sis/sis_main.h
index 3e3b7fa05d6..9540e977270 100644
--- a/drivers/video/sis/sis_main.h
+++ b/drivers/video/sis/sis_main.h
@@ -665,11 +665,11 @@ static struct _customttable {
 
 /* Interface used by the world */
 #ifndef MODULE
-SISINITSTATIC int sisfb_setup(char *options);
+static int sisfb_setup(char *options);
 #endif
 
 /* Interface to the low level console driver */
-SISINITSTATIC int sisfb_init(void);
+static int sisfb_init(void);
 
 /* fbdev routines */
 static int	sisfb_get_fix(struct fb_fix_screeninfo *fix, int con,
diff --git a/drivers/video/sis/vgatypes.h b/drivers/video/sis/vgatypes.h
index b532fbd2b04..81a22eaabfd 100644
--- a/drivers/video/sis/vgatypes.h
+++ b/drivers/video/sis/vgatypes.h
@@ -53,10 +53,6 @@
 #ifndef _VGATYPES_H_
 #define _VGATYPES_H_
 
-#ifdef SIS_LINUX_KERNEL
-#include <linux/version.h>
-#endif
-
 #define SISIOMEMTYPE
 
 #ifdef SIS_LINUX_KERNEL
-- 
GitLab


From b91dbce56a8dbf312f6255d5121b295553d2b4db Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Wed, 23 Jul 2008 21:31:14 -0700
Subject: [PATCH 389/853] pxafb: convert ctrlr_sem in a mutex

The semaphore ctrlr_sem is used as a mutex.  Convert it to the mutex API.

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Cc: Daniel Mack <daniel@caiaq.de>
Cc: Eric Miao <eric.miao@marvell.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/pxafb.c | 7 ++++---
 drivers/video/pxafb.h | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 5e8a140399f..2b707a8ce5d 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -41,6 +41,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/completion.h>
+#include <linux/mutex.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 
@@ -1117,7 +1118,7 @@ static void set_ctrlr_state(struct pxafb_info *fbi, u_int state)
 {
 	u_int old_state;
 
-	down(&fbi->ctrlr_sem);
+	mutex_lock(&fbi->ctrlr_lock);
 
 	old_state = fbi->state;
 
@@ -1205,7 +1206,7 @@ static void set_ctrlr_state(struct pxafb_info *fbi, u_int state)
 		}
 		break;
 	}
-	up(&fbi->ctrlr_sem);
+	mutex_unlock(&fbi->ctrlr_lock);
 }
 
 /*
@@ -1458,7 +1459,7 @@ static struct pxafb_info * __devinit pxafb_init_fbinfo(struct device *dev)
 
 	init_waitqueue_head(&fbi->ctrlr_wait);
 	INIT_WORK(&fbi->task, pxafb_task);
-	init_MUTEX(&fbi->ctrlr_sem);
+	mutex_init(&fbi->ctrlr_lock);
 	init_completion(&fbi->disable_done);
 #ifdef CONFIG_FB_PXA_SMARTPANEL
 	init_completion(&fbi->command_done);
diff --git a/drivers/video/pxafb.h b/drivers/video/pxafb.h
index 8238dc82642..31541b86f13 100644
--- a/drivers/video/pxafb.h
+++ b/drivers/video/pxafb.h
@@ -106,7 +106,7 @@ struct pxafb_info {
 
 	volatile u_char		state;
 	volatile u_char		task_state;
-	struct semaphore	ctrlr_sem;
+	struct mutex		ctrlr_lock;
 	wait_queue_head_t	ctrlr_wait;
 	struct work_struct	task;
 
-- 
GitLab


From 7951ac91c7d45b61f54f1cdabc24b52b40785de6 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Wed, 23 Jul 2008 21:31:16 -0700
Subject: [PATCH 390/853] sa1100fb: convert ctrlr_sem in a mutex

The semaphore ctrlr_sem is used as a mutex.  Convert it to the mutex API

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sa1100fb.c | 7 ++++---
 drivers/video/sa1100fb.h | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index 4a9f7e12180..78bcdbc3f48 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -175,6 +175,7 @@
 #include <linux/cpufreq.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/mutex.h>
 
 #include <asm/hardware.h>
 #include <asm/io.h>
@@ -1108,7 +1109,7 @@ static void set_ctrlr_state(struct sa1100fb_info *fbi, u_int state)
 {
 	u_int old_state;
 
-	down(&fbi->ctrlr_sem);
+	mutex_lock(&fbi->ctrlr_lock);
 
 	old_state = fbi->state;
 
@@ -1193,7 +1194,7 @@ static void set_ctrlr_state(struct sa1100fb_info *fbi, u_int state)
 		}
 		break;
 	}
-	up(&fbi->ctrlr_sem);
+	mutex_unlock(&fbi->ctrlr_lock);
 }
 
 /*
@@ -1445,7 +1446,7 @@ static struct sa1100fb_info * __init sa1100fb_init_fbinfo(struct device *dev)
 
 	init_waitqueue_head(&fbi->ctrlr_wait);
 	INIT_WORK(&fbi->task, sa1100fb_task);
-	init_MUTEX(&fbi->ctrlr_sem);
+	mutex_init(&fbi->ctrlr_lock);
 
 	return fbi;
 }
diff --git a/drivers/video/sa1100fb.h b/drivers/video/sa1100fb.h
index f465b27ed86..86831db9a04 100644
--- a/drivers/video/sa1100fb.h
+++ b/drivers/video/sa1100fb.h
@@ -100,7 +100,7 @@ struct sa1100fb_info {
 
 	volatile u_char		state;
 	volatile u_char		task_state;
-	struct semaphore	ctrlr_sem;
+	struct mutex		ctrlr_lock;
 	wait_queue_head_t	ctrlr_wait;
 	struct work_struct	task;
 
-- 
GitLab


From fcea8030b3c2e71ad89f080901c63a04f07881c8 Mon Sep 17 00:00:00 2001
From: Tony Breeds <tony@bakeyournoodle.com>
Date: Wed, 23 Jul 2008 21:31:16 -0700
Subject: [PATCH 391/853] drivers/video/aty/radeon_base.c: notify user if
 sysfs_create_bin_file() failed

Current kernel builds warn about:
drivers/video/aty/radeon_base.c: In function 'radeonfb_pci_register':
drivers/video/aty/radeon_base.c:2334: warning: ignoring return value of 'sysfs_create_bin_file', declared with attribute warn_unused_result
drivers/video/aty/radeon_base.c:2336: warning: ignoring return value of 'sysfs_create_bin_file', declared with attribute warn_unused_result

Do minimal checking of these functions and issue a warning if either
fails.  They don't seem to be critical..

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/radeon_base.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 400e9264e45..3c0a03f69d8 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -2161,6 +2161,7 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev,
 	struct radeonfb_info *rinfo;
 	int ret;
 	unsigned char c1, c2;
+	int err = 0;
 
 	pr_debug("radeonfb_pci_register BEGIN\n");
 	
@@ -2340,9 +2341,14 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev,
 
 	/* Register some sysfs stuff (should be done better) */
 	if (rinfo->mon1_EDID)
-		sysfs_create_bin_file(&rinfo->pdev->dev.kobj, &edid1_attr);
+		err |= sysfs_create_bin_file(&rinfo->pdev->dev.kobj,
+						&edid1_attr);
 	if (rinfo->mon2_EDID)
-		sysfs_create_bin_file(&rinfo->pdev->dev.kobj, &edid2_attr);
+		err |= sysfs_create_bin_file(&rinfo->pdev->dev.kobj,
+						&edid2_attr);
+	if (err)
+		pr_warning("%s() Creating sysfs files failed, continuing\n",
+			   __func__);
 
 	/* save current mode regs before we switch into the new one
 	 * so we can restore this upon __exit
-- 
GitLab


From 816664f88707b03fde24fb09759d569ed42406cb Mon Sep 17 00:00:00 2001
From: Roel Kluin <12o3l@tiscali.nl>
Date: Wed, 23 Jul 2008 21:31:17 -0700
Subject: [PATCH 392/853] aty128fb: test below 0 on unsigned pll->post_divider

pll->post_divider is unsigned, so the test fails

Signed-off-by: Roel Kluin <12o3l@tiscali.nl>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/aty128fb.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 07b6addbb3c..243ea4ab20c 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -1339,10 +1339,8 @@ static int aty128_var_to_pll(u32 period_in_ps, struct aty128_pll *pll,
 	if (vclk * 12 < c.ppll_min)
 		vclk = c.ppll_min/12;
 
-	pll->post_divider = -1;
-
 	/* now, find an acceptable divider */
-	for (i = 0; i < sizeof(post_dividers); i++) {
+	for (i = 0; i < ARRAY_SIZE(post_dividers); i++) {
 		output_freq = post_dividers[i] * vclk;
 		if (output_freq >= c.ppll_min && output_freq <= c.ppll_max) {
 			pll->post_divider = post_dividers[i];
@@ -1350,7 +1348,7 @@ static int aty128_var_to_pll(u32 period_in_ps, struct aty128_pll *pll,
 		}
 	}
 
-	if (pll->post_divider < 0)
+	if (i == ARRAY_SIZE(post_dividers))
 		return -EINVAL;
 
 	/* calculate feedback divider */
-- 
GitLab


From 091c82c01295719d47b89b38d24e41ad2066ead8 Mon Sep 17 00:00:00 2001
From: Roel Kluin <12o3l@tiscali.nl>
Date: Wed, 23 Jul 2008 21:31:18 -0700
Subject: [PATCH 393/853] amifb: test virtual screen range before subtraction
 on unsigned

dx and dy are u32's, so the test should occur before the subtraction

Signed-off-by: Roel Kluin <12o3l@tiscali.nl>
Cc: Antonino Daplas <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/amifb.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/video/amifb.c b/drivers/video/amifb.c
index 45c154ade9c..0c549aa1cf8 100644
--- a/drivers/video/amifb.c
+++ b/drivers/video/amifb.c
@@ -2048,13 +2048,16 @@ static void amifb_copyarea(struct fb_info *info,
 	width = x2 - dx;
 	height = y2 - dy;
 
+	if (area->sx + dx < area->dx || area->sy + dy < area->dy)
+		return;
+
 	/* update sx,sy */
 	sx = area->sx + (dx - area->dx);
 	sy = area->sy + (dy - area->dy);
 
 	/* the source must be completely inside the virtual screen */
-	if (sx < 0 || sy < 0 || (sx + width) > info->var.xres_virtual ||
-	    (sy + height) > info->var.yres_virtual)
+	if (sx + width > info->var.xres_virtual ||
+			sy + height > info->var.yres_virtual)
 		return;
 
 	if (dy > sy || (dy == sy && dx > sx)) {
-- 
GitLab


From 1c0face9d4024bf942096297937759bdf0e1aeac Mon Sep 17 00:00:00 2001
From: Roel Kluin <12o3l@tiscali.nl>
Date: Wed, 23 Jul 2008 21:31:18 -0700
Subject: [PATCH 394/853] atafb: test virtual screen range before subtraction
 on unsigned

dx and dy are u32's, so the test should occur before the subtraction

Signed-off-by: Roel Kluin <12o3l@tiscali.nl>
Cc: Tim Schmielau <tim@physik3.uni-rostock.de>
Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Antonino Daplas <adaplas@pol.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/atafb.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c
index fa55d356b53..77eb8b34fbf 100644
--- a/drivers/video/atafb.c
+++ b/drivers/video/atafb.c
@@ -2593,13 +2593,16 @@ static void atafb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 	width = x2 - dx;
 	height = y2 - dy;
 
+	if (area->sx + dx < area->dx || area->sy + dy < area->dy)
+		return;
+
 	/* update sx,sy */
 	sx = area->sx + (dx - area->dx);
 	sy = area->sy + (dy - area->dy);
 
 	/* the source must be completely inside the virtual screen */
-	if (sx < 0 || sy < 0 || (sx + width) > info->var.xres_virtual ||
-	    (sy + height) > info->var.yres_virtual)
+	if (sx + width > info->var.xres_virtual ||
+			sy + height > info->var.yres_virtual)
 		return;
 
 	if (dy > sy || (dy == sy && dx > sx)) {
-- 
GitLab


From 77a6e7abb09de0e85a15e2fe42c21ffc59847759 Mon Sep 17 00:00:00 2001
From: Roel Kluin <12o3l@tiscali.nl>
Date: Wed, 23 Jul 2008 21:31:19 -0700
Subject: [PATCH 395/853] vga16fb: test virtual screen range before subtraction
 on unsigned

dx and dy are u32's, so the test should occur before the subtraction

Signed-off-by: Roel Kluin <12o3l@tiscali.nl>
Cc: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/vga16fb.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/video/vga16fb.c b/drivers/video/vga16fb.c
index 9b3c5923365..9d275171789 100644
--- a/drivers/video/vga16fb.c
+++ b/drivers/video/vga16fb.c
@@ -1087,12 +1087,15 @@ static void vga16fb_copyarea(struct fb_info *info, const struct fb_copyarea *are
 	width = x2 - dx;
 	height = y2 - dy;
 
+	if (sx + dx < old_dx || sy + dy < old_dy)
+		return;
+
 	/* update sx1,sy1 */
 	sx += (dx - old_dx);
 	sy += (dy - old_dy);
 
 	/* the source must be completely inside the virtual screen */
-	if (sx < 0 || sy < 0 || (sx + width) > vxres || (sy + height) > vyres)
+	if (sx + width > vxres || sy + height > vyres)
 		return;
 
 	switch (info->fix.type) {
-- 
GitLab


From d22579b837358cbef12ccca5adaf7e93ae09ab7a Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Wed, 23 Jul 2008 21:31:20 -0700
Subject: [PATCH 396/853] atmel_lcdfb: FIFO underflow management

Manage atmel_lcdfb FIFO underflow

Resetting the LCD and DMA allows to fix screen shifting after a FIFO
underflow.  It follows reset sequence from errata "LCD Screen Shifting
After a Reset".

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Andrew Victor <linux@maxim.org.za>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/atmel_lcdfb.c | 57 ++++++++++++++++++++++++++++++++++++-
 include/video/atmel_lcdc.h  |  1 +
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index b004036d408..d335bb96b03 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -379,6 +379,35 @@ static int atmel_lcdfb_check_var(struct fb_var_screeninfo *var,
 	return 0;
 }
 
+/*
+ * LCD reset sequence
+ */
+static void atmel_lcdfb_reset(struct atmel_lcdfb_info *sinfo)
+{
+	might_sleep();
+
+	/* LCD power off */
+	lcdc_writel(sinfo, ATMEL_LCDC_PWRCON, sinfo->guard_time << ATMEL_LCDC_GUARDT_OFFSET);
+
+	/* wait for the LCDC core to become idle */
+	while (lcdc_readl(sinfo, ATMEL_LCDC_PWRCON) & ATMEL_LCDC_BUSY)
+		msleep(10);
+
+	/* DMA disable */
+	lcdc_writel(sinfo, ATMEL_LCDC_DMACON, 0);
+
+	/* wait for DMA engine to become idle */
+	while (lcdc_readl(sinfo, ATMEL_LCDC_DMACON) & ATMEL_LCDC_DMABUSY)
+		msleep(10);
+
+	/* LCD power on */
+	lcdc_writel(sinfo, ATMEL_LCDC_PWRCON,
+		(sinfo->guard_time << ATMEL_LCDC_GUARDT_OFFSET) | ATMEL_LCDC_PWR);
+
+	/* DMA enable */
+	lcdc_writel(sinfo, ATMEL_LCDC_DMACON, sinfo->default_dmacon);
+}
+
 /**
  *      atmel_lcdfb_set_par - Alters the hardware state.
  *      @info: frame buffer structure that represents a single frame buffer
@@ -401,6 +430,8 @@ static int atmel_lcdfb_set_par(struct fb_info *info)
 	unsigned long clk_value_khz;
 	unsigned long bits_per_line;
 
+	might_sleep();
+
 	dev_dbg(info->device, "%s:\n", __func__);
 	dev_dbg(info->device, "  * resolution: %ux%u (%ux%u virtual)\n",
 		 info->var.xres, info->var.yres,
@@ -511,6 +542,8 @@ static int atmel_lcdfb_set_par(struct fb_info *info)
 
 	/* Disable all interrupts */
 	lcdc_writel(sinfo, ATMEL_LCDC_IDR, ~0UL);
+	/* Enable FIFO & DMA errors */
+	lcdc_writel(sinfo, ATMEL_LCDC_IER, ATMEL_LCDC_UFLWI | ATMEL_LCDC_OWRI | ATMEL_LCDC_MERI);
 
 	/* ...wait for DMA engine to become idle... */
 	while (lcdc_readl(sinfo, ATMEL_LCDC_DMACON) & ATMEL_LCDC_DMABUSY)
@@ -645,10 +678,26 @@ static irqreturn_t atmel_lcdfb_interrupt(int irq, void *dev_id)
 	u32 status;
 
 	status = lcdc_readl(sinfo, ATMEL_LCDC_ISR);
-	lcdc_writel(sinfo, ATMEL_LCDC_IDR, status);
+	if (status & ATMEL_LCDC_UFLWI) {
+		dev_warn(info->device, "FIFO underflow %#x\n", status);
+		/* reset DMA and FIFO to avoid screen shifting */
+		schedule_work(&sinfo->task);
+	}
+	lcdc_writel(sinfo, ATMEL_LCDC_ICR, status);
 	return IRQ_HANDLED;
 }
 
+/*
+ * LCD controller task (to reset the LCD)
+ */
+static void atmel_lcdfb_task(struct work_struct *work)
+{
+	struct atmel_lcdfb_info *sinfo =
+		container_of(work, struct atmel_lcdfb_info, task);
+
+	atmel_lcdfb_reset(sinfo);
+}
+
 static int __init atmel_lcdfb_init_fbinfo(struct atmel_lcdfb_info *sinfo)
 {
 	struct fb_info *info = sinfo->info;
@@ -824,6 +873,10 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev)
 		goto unmap_mmio;
 	}
 
+	/* Some operations on the LCDC might sleep and
+	 * require a preemptible task context */
+	INIT_WORK(&sinfo->task, atmel_lcdfb_task);
+
 	ret = atmel_lcdfb_init_fbinfo(sinfo);
 	if (ret < 0) {
 		dev_err(dev, "init fbinfo failed: %d\n", ret);
@@ -866,6 +919,7 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev)
 free_cmap:
 	fb_dealloc_cmap(&info->cmap);
 unregister_irqs:
+	cancel_work_sync(&sinfo->task);
 	free_irq(sinfo->irq_base, info);
 unmap_mmio:
 	exit_backlight(sinfo);
@@ -903,6 +957,7 @@ static int __exit atmel_lcdfb_remove(struct platform_device *pdev)
 	if (!sinfo)
 		return 0;
 
+	cancel_work_sync(&sinfo->task);
 	exit_backlight(sinfo);
 	if (sinfo->atmel_lcdfb_power_control)
 		sinfo->atmel_lcdfb_power_control(0);
diff --git a/include/video/atmel_lcdc.h b/include/video/atmel_lcdc.h
index ed64862c4e1..1ccf462b433 100644
--- a/include/video/atmel_lcdc.h
+++ b/include/video/atmel_lcdc.h
@@ -37,6 +37,7 @@ struct atmel_lcdfb_info {
 	struct fb_info		*info;
 	void __iomem		*mmio;
 	unsigned long		irq_base;
+	struct work_struct	task;
 
 	unsigned int		guard_time;
 	struct platform_device	*pdev;
-- 
GitLab


From 49a1d28f57adc9cb064572f0373e26363b0a412f Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:21 -0700
Subject: [PATCH 397/853] fbcon: make logo_height a local variable

Make logo_height variable local in the only function it is used.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/console/fbcon.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 4be3b46c069..3ccfa76d9b2 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -107,9 +107,7 @@ static struct display fb_display[MAX_NR_CONSOLES];
 
 static signed char con2fb_map[MAX_NR_CONSOLES];
 static signed char con2fb_map_boot[MAX_NR_CONSOLES];
-#ifndef MODULE
-static int logo_height;
-#endif
+
 static int logo_lines;
 /* logo_shown is an index to vc_cons when >= 0; otherwise follows FBCON_LOGO
    enums.  */
@@ -607,6 +605,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
 	struct fbcon_ops *ops = info->fbcon_par;
 	int cnt, erase = vc->vc_video_erase_char, step;
 	unsigned short *save = NULL, *r, *q;
+	int logo_height;
 
 	if (info->flags & FBINFO_MODULE) {
 		logo_shown = FBCON_LOGO_DONTSHOW;
-- 
GitLab


From 012e26096b36bfeacaba2c9e31eaf32d6faa6567 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:21 -0700
Subject: [PATCH 398/853] uvesafb: change mode parameter to mode_option

Make more drivers use the "mode_option" parameter.  This one is quite new
so drop the old "mode" parameter before someone starts using it seriously.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/uvesafb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index cdbb56edb6c..50744229c7a 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -2054,8 +2054,8 @@ MODULE_PARM_DESC(maxhf,
 module_param(maxvf, ushort, 0);
 MODULE_PARM_DESC(maxvf,
 	"Maximum vertical frequency [Hz], overrides EDID data");
-module_param_named(mode, mode_option, charp, 0);
-MODULE_PARM_DESC(mode,
+module_param(mode_option, charp, 0);
+MODULE_PARM_DESC(mode_option,
 	"Specify initial video mode as \"<xres>x<yres>[-<bpp>][@<refresh>]\"");
 module_param(vbemode, ushort, 0);
 MODULE_PARM_DESC(vbemode,
-- 
GitLab


From a90ed92ed852a3d4b8a6f20b10bba771997f5ede Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:22 -0700
Subject: [PATCH 399/853] tridentfb: documentation update

Make the tridentfb documentation closer to current state of the tridentfb
driver.  Fix also some formatting.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/fb/tridentfb.txt | 46 +++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/Documentation/fb/tridentfb.txt b/Documentation/fb/tridentfb.txt
index 8a6c8a43e6a..45d9de5b13a 100644
--- a/Documentation/fb/tridentfb.txt
+++ b/Documentation/fb/tridentfb.txt
@@ -3,11 +3,25 @@ Tridentfb is a framebuffer driver for some Trident chip based cards.
 The following list of chips is thought to be supported although not all are
 tested:
 
-those from the Image series with Cyber in their names - accelerated
-those with Blade in their names (Blade3D,CyberBlade...) - accelerated
-the newer CyberBladeXP family  - nonaccelerated
-
-Only PCI/AGP based cards are supported, none of the older Tridents.
+those from the TGUI series 9440/96XX and with Cyber in their names
+those from the Image series and with Cyber in their names
+those with Blade in their names (Blade3D,CyberBlade...)
+the newer CyberBladeXP family
+
+All families are accelerated. Only PCI/AGP based cards are supported,
+none of the older Tridents.
+The driver supports 8, 16 and 32 bits per pixel depths.
+The TGUI family requires a line length to be power of 2 if acceleration
+is enabled. This means that range of possible resolutions and bpp is
+limited comparing to the range if acceleration is disabled (see list
+of parameters below).
+
+Known bugs:
+1. The driver randomly locks up on 3DImage975 chip with acceleration
+   enabled. The same happens in X11 (Xorg).
+2. The ramdac speeds require some more fine tuning. It is possible to
+   switch resolution which the chip does not support at some depths for
+   older chips.
 
 How to use it?
 ==============
@@ -17,12 +31,11 @@ video=tridentfb
 
 The parameters for tridentfb are concatenated with a ':' as in this example.
 
-video=tridentfb:800x600,bpp=16,noaccel
+video=tridentfb:800x600-16@75,noaccel
 
 The second level parameters that tridentfb understands are:
 
 noaccel - turns off acceleration (when it doesn't work for your card)
-accel - force text acceleration (for boards which by default are noacceled)
 
 fp	- use flat panel related stuff
 crt 	- assume monitor is present instead of fp
@@ -31,21 +44,24 @@ center 	- for flat panels and resolutions smaller than native size center the
 	  image, otherwise use
 stretch
 
-memsize - integer value in Kb, use if your card's memory size is misdetected.
+memsize - integer value in KB, use if your card's memory size is misdetected.
 	  look at the driver output to see what it says when initializing.
-memdiff - integer value in Kb,should be nonzero if your card reports
-	  more memory than it actually has.For instance mine is 192K less than
+
+memdiff - integer value in KB, should be nonzero if your card reports
+	  more memory than it actually has. For instance mine is 192K less than
 	  detection says in all three BIOS selectable situations 2M, 4M, 8M.
 	  Only use if your video memory is taken from main memory hence of
-	  configurable size.Otherwise use memsize.
-	  If in some modes which barely fit the memory you see garbage at the bottom
-	  this might help by not letting change to that mode anymore.
+	  configurable size. Otherwise use memsize.
+	  If in some modes which barely fit the memory you see garbage
+	  at the bottom this might help by not letting change to that mode
+	  anymore.
 
 nativex - the width in pixels of the flat panel.If you know it (usually 1024
 	  800 or 1280) and it is not what the driver seems to detect use it.
 
-bpp  - bits per pixel (8,16 or 32)
-mode - a mode name like 800x600 (as described in Documentation/fb/modedb.txt)
+bpp	- bits per pixel (8,16 or 32)
+mode	- a mode name like 800x600-8@75 as described in
+	  Documentation/fb/modedb.txt
 
 Using insane values for the above parameters will probably result in driver
 misbehaviour so take care(for instance memsize=12345678 or memdiff=23784 or
-- 
GitLab


From ea9014bcacf236124d5e0ff971838049a98456cb Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:22 -0700
Subject: [PATCH 400/853] tdfxfb: add mode_option module parameter

Small step toward unification of mode setting parameter.  This is required
to fix the Bugzilla's bug 9847

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tdfxfb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c
index ea9f19d2559..67664252868 100644
--- a/drivers/video/tdfxfb.c
+++ b/drivers/video/tdfxfb.c
@@ -1426,6 +1426,8 @@ MODULE_LICENSE("GPL");
 module_param(hwcursor, int, 0644);
 MODULE_PARM_DESC(hwcursor, "Enable hardware cursor "
 			"(1=enable, 0=disable, default=1)");
+module_param(mode_option, charp, 0);
+MODULE_PARM_DESC(mode_option, "Initial video mode e.g. '648x480-8@60'");
 #ifdef CONFIG_MTRR
 module_param(nomtrr, bool, 0);
 MODULE_PARM_DESC(nomtrr, "Disable MTRR support (default: enabled)");
-- 
GitLab


From 98219374d9ed2d257e56e8e1fcd9d16a083397bb Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:23 -0700
Subject: [PATCH 401/853] vga16fb: source code improvement

Use constants and functions from the vga.h file.  Also add module
description.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/vga16fb.c | 117 ++++++++++++++--------------------------
 1 file changed, 39 insertions(+), 78 deletions(-)

diff --git a/drivers/video/vga16fb.c b/drivers/video/vga16fb.c
index 9d275171789..e31bca8a0cb 100644
--- a/drivers/video/vga16fb.c
+++ b/drivers/video/vga16fb.c
@@ -26,18 +26,6 @@
 #include <asm/io.h>
 #include <video/vga.h>
 
-#define GRAPHICS_ADDR_REG VGA_GFX_I	/* Graphics address register. */
-#define GRAPHICS_DATA_REG VGA_GFX_D	/* Graphics data register. */
-
-#define SET_RESET_INDEX 	VGA_GFX_SR_VALUE	/* Set/Reset Register index. */
-#define ENABLE_SET_RESET_INDEX	VGA_GFX_SR_ENABLE	/* Enable Set/Reset Register index. */
-#define DATA_ROTATE_INDEX	VGA_GFX_DATA_ROTATE	/* Data Rotate Register index. */
-#define GRAPHICS_MODE_INDEX	VGA_GFX_MODE		/* Graphics Mode Register index. */
-#define BIT_MASK_INDEX		VGA_GFX_BIT_MASK	/* Bit Mask Register index. */
-
-#define dac_reg	(VGA_PEL_IW)
-#define dac_val	(VGA_PEL_D)
-
 #define VGA_FB_PHYS 0xA0000
 #define VGA_FB_PHYS_LEN 65536
 
@@ -108,7 +96,7 @@ static struct fb_fix_screeninfo vga16fb_fix __initdata = {
 	.visual		= FB_VISUAL_PSEUDOCOLOR,
 	.xpanstep	= 8,
 	.ypanstep	= 1,
-	.line_length	= 640/8,
+	.line_length	= 640 / 8,
 	.accel		= FB_ACCEL_NONE
 };
 
@@ -135,23 +123,22 @@ static inline int setmode(int mode)
 {
 	int oldmode;
 	
-	vga_io_w(GRAPHICS_ADDR_REG, GRAPHICS_MODE_INDEX);
-	oldmode = vga_io_r(GRAPHICS_DATA_REG);
-	vga_io_w(GRAPHICS_DATA_REG, mode);
+	oldmode = vga_io_rgfx(VGA_GFX_MODE);
+	vga_io_w(VGA_GFX_D, mode);
 	return oldmode;
 }
 
 /* Select the Bit Mask Register and return its value. */
 static inline int selectmask(void)
 {
-	return vga_io_rgfx(BIT_MASK_INDEX);
+	return vga_io_rgfx(VGA_GFX_BIT_MASK);
 }
 
 /* Set the value of the Bit Mask Register.  It must already have been
    selected with selectmask(). */
 static inline void setmask(int mask)
 {
-	vga_io_w(GRAPHICS_DATA_REG, mask);
+	vga_io_w(VGA_GFX_D, mask);
 }
 
 /* Set the Data Rotate Register and return its old value. 
@@ -161,9 +148,8 @@ static inline int setop(int op)
 {
 	int oldop;
 	
-	vga_io_w(GRAPHICS_ADDR_REG, DATA_ROTATE_INDEX);
-	oldop = vga_io_r(GRAPHICS_DATA_REG);
-	vga_io_w(GRAPHICS_DATA_REG, op);
+	oldop = vga_io_rgfx(VGA_GFX_DATA_ROTATE);
+	vga_io_w(VGA_GFX_D, op);
 	return oldop;
 }
 
@@ -173,9 +159,8 @@ static inline int setsr(int sr)
 {
 	int oldsr;
 
-	vga_io_w(GRAPHICS_ADDR_REG, ENABLE_SET_RESET_INDEX);
-	oldsr = vga_io_r(GRAPHICS_DATA_REG);
-	vga_io_w(GRAPHICS_DATA_REG, sr);
+	oldsr = vga_io_rgfx(VGA_GFX_SR_ENABLE);
+	vga_io_w(VGA_GFX_D, sr);
 	return oldsr;
 }
 
@@ -184,22 +169,21 @@ static inline int setcolor(int color)
 {
 	int oldcolor;
 
-	vga_io_w(GRAPHICS_ADDR_REG, SET_RESET_INDEX);
-	oldcolor = vga_io_r(GRAPHICS_DATA_REG);
-	vga_io_w(GRAPHICS_DATA_REG, color);
+	oldcolor = vga_io_rgfx(VGA_GFX_SR_VALUE);
+	vga_io_w(VGA_GFX_D, color);
 	return oldcolor;
 }
 
 /* Return the value in the Graphics Address Register. */
 static inline int getindex(void)
 {
-	return vga_io_r(GRAPHICS_ADDR_REG);
+	return vga_io_r(VGA_GFX_I);
 }
 
 /* Set the value in the Graphics Address Register. */
 static inline void setindex(int index)
 {
-	vga_io_w(GRAPHICS_ADDR_REG, index);
+	vga_io_w(VGA_GFX_I, index);
 }
 
 static void vga16fb_pan_var(struct fb_info *info, 
@@ -672,10 +656,10 @@ static void ega16_setpalette(int regno, unsigned red, unsigned green, unsigned b
 
 static void vga16_setpalette(int regno, unsigned red, unsigned green, unsigned blue)
 {
-	outb(regno,       dac_reg);
-	outb(red   >> 10, dac_val);
-	outb(green >> 10, dac_val);
-	outb(blue  >> 10, dac_val);
+	outb(regno,       VGA_PEL_IW);
+	outb(red   >> 10, VGA_PEL_D);
+	outb(green >> 10, VGA_PEL_D);
+	outb(blue  >> 10, VGA_PEL_D);
 }
 
 static int vga16fb_setcolreg(unsigned regno, unsigned red, unsigned green,
@@ -719,28 +703,15 @@ static int vga16fb_pan_display(struct fb_var_screeninfo *var,
    blanking code was originally by Huang shi chao, and modified by
    Christoph Rimek (chrimek@toppoint.de) and todd j. derr
    (tjd@barefoot.org) for Linux. */
-#define attrib_port		VGA_ATC_IW
-#define seq_port_reg		VGA_SEQ_I
-#define seq_port_val		VGA_SEQ_D
-#define gr_port_reg		VGA_GFX_I
-#define gr_port_val		VGA_GFX_D
-#define video_misc_rd		VGA_MIS_R
-#define video_misc_wr		VGA_MIS_W
-#define vga_video_port_reg	VGA_CRT_IC
-#define vga_video_port_val	VGA_CRT_DC
 
 static void vga_vesa_blank(struct vga16fb_par *par, int mode)
 {
-	unsigned char SeqCtrlIndex;
-	unsigned char CrtCtrlIndex;
+	unsigned char SeqCtrlIndex = vga_io_r(VGA_SEQ_I);
+	unsigned char CrtCtrlIndex = vga_io_r(VGA_CRT_IC);
 	
-	//cli();
-	SeqCtrlIndex = vga_io_r(seq_port_reg);
-	CrtCtrlIndex = vga_io_r(vga_video_port_reg);
-
 	/* save original values of VGA controller registers */
 	if(!par->vesa_blanked) {
-		par->vga_state.CrtMiscIO = vga_io_r(video_misc_rd);
+		par->vga_state.CrtMiscIO = vga_io_r(VGA_MIS_R);
 		//sti();
 
 		par->vga_state.HorizontalTotal = vga_io_rcrt(0x00);	/* HorizontalTotal */
@@ -756,12 +727,11 @@ static void vga_vesa_blank(struct vga16fb_par *par, int mode)
 
 	/* assure that video is enabled */
 	/* "0x20" is VIDEO_ENABLE_bit in register 01 of sequencer */
-	//cli();
 	vga_io_wseq(0x01, par->vga_state.ClockingMode | 0x20);
 
 	/* test for vertical retrace in process.... */
 	if ((par->vga_state.CrtMiscIO & 0x80) == 0x80)
-		vga_io_w(video_misc_wr, par->vga_state.CrtMiscIO & 0xef);
+		vga_io_w(VGA_MIS_W, par->vga_state.CrtMiscIO & 0xef);
 
 	/*
 	 * Set <End of vertical retrace> to minimum (0) and
@@ -769,12 +739,10 @@ static void vga_vesa_blank(struct vga16fb_par *par, int mode)
 	 * Result: turn off vertical sync (VSync) pulse.
 	 */
 	if (mode & FB_BLANK_VSYNC_SUSPEND) {
-		outb_p(0x10,vga_video_port_reg);	/* StartVertRetrace */
-		outb_p(0xff,vga_video_port_val); 	/* maximum value */
-		outb_p(0x11,vga_video_port_reg);	/* EndVertRetrace */
-		outb_p(0x40,vga_video_port_val);	/* minimum (bits 0..3)  */
-		outb_p(0x07,vga_video_port_reg);	/* Overflow */
-		outb_p(par->vga_state.Overflow | 0x84,vga_video_port_val); /* bits 9,10 of vert. retrace */
+		vga_io_wcrt(VGA_CRTC_V_SYNC_START, 0xff);
+		vga_io_wcrt(VGA_CRTC_V_SYNC_END, 0x40);
+		/* bits 9,10 of vert. retrace */
+		vga_io_wcrt(VGA_CRTC_OVERFLOW, par->vga_state.Overflow | 0x84);
 	}
 
 	if (mode & FB_BLANK_HSYNC_SUSPEND) {
@@ -783,29 +751,22 @@ static void vga_vesa_blank(struct vga16fb_par *par, int mode)
 		 *  <Start of horizontal Retrace> to maximum
 		 * Result: turn off horizontal sync (HSync) pulse.
 		 */
-		outb_p(0x04,vga_video_port_reg);	/* StartHorizRetrace */
-		outb_p(0xff,vga_video_port_val);	/* maximum */
-		outb_p(0x05,vga_video_port_reg);	/* EndHorizRetrace */
-		outb_p(0x00,vga_video_port_val);	/* minimum (0) */
+		vga_io_wcrt(VGA_CRTC_H_SYNC_START, 0xff);
+		vga_io_wcrt(VGA_CRTC_H_SYNC_END, 0x00);
 	}
 
 	/* restore both index registers */
-	outb_p(SeqCtrlIndex,seq_port_reg);
-	outb_p(CrtCtrlIndex,vga_video_port_reg);
-	//sti();
+	outb_p(SeqCtrlIndex, VGA_SEQ_I);
+	outb_p(CrtCtrlIndex, VGA_CRT_IC);
 }
 
 static void vga_vesa_unblank(struct vga16fb_par *par)
 {
-	unsigned char SeqCtrlIndex;
-	unsigned char CrtCtrlIndex;
+	unsigned char SeqCtrlIndex = vga_io_r(VGA_SEQ_I);
+	unsigned char CrtCtrlIndex = vga_io_r(VGA_CRT_IC);
 	
-	//cli();
-	SeqCtrlIndex = vga_io_r(seq_port_reg);
-	CrtCtrlIndex = vga_io_r(vga_video_port_reg);
-
 	/* restore original values of VGA controller registers */
-	vga_io_w(video_misc_wr, par->vga_state.CrtMiscIO);
+	vga_io_w(VGA_MIS_W, par->vga_state.CrtMiscIO);
 
 	/* HorizontalTotal */
 	vga_io_wcrt(0x00, par->vga_state.HorizontalTotal);
@@ -827,9 +788,8 @@ static void vga_vesa_unblank(struct vga16fb_par *par)
 	vga_io_wseq(0x01, par->vga_state.ClockingMode);
 
 	/* restore index/control registers */
-	vga_io_w(seq_port_reg, SeqCtrlIndex);
-	vga_io_w(vga_video_port_reg, CrtCtrlIndex);
-	//sti();
+	vga_io_w(VGA_SEQ_I, SeqCtrlIndex);
+	vga_io_w(VGA_CRT_IC, CrtCtrlIndex);
 }
 
 static void vga_pal_blank(void)
@@ -837,10 +797,10 @@ static void vga_pal_blank(void)
 	int i;
 
 	for (i=0; i<16; i++) {
-		outb_p (i, dac_reg) ;
-		outb_p (0, dac_val) ;
-		outb_p (0, dac_val) ;
-		outb_p (0, dac_val) ;
+		outb_p(i, VGA_PEL_IW);
+		outb_p(0, VGA_PEL_D);
+		outb_p(0, VGA_PEL_D);
+		outb_p(0, VGA_PEL_D);
 	}
 }
 
@@ -1485,6 +1445,7 @@ static void __exit vga16fb_exit(void)
 	platform_driver_unregister(&vga16fb_driver);
 }
 
+MODULE_DESCRIPTION("Legacy VGA framebuffer device driver");
 MODULE_LICENSE("GPL");
 module_init(vga16fb_init);
 module_exit(vga16fb_exit);
-- 
GitLab


From c2c12155cf05bf3e25eeae5711beffc634505400 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:24 -0700
Subject: [PATCH 402/853] tdfxfb: remove ypan checks done by a higher layer

These checks and assignments are done by a higher layer so remove them
from the driver.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tdfxfb.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c
index 67664252868..77aafcfae03 100644
--- a/drivers/video/tdfxfb.c
+++ b/drivers/video/tdfxfb.c
@@ -836,16 +836,12 @@ static int tdfxfb_pan_display(struct fb_var_screeninfo *var,
 	struct tdfx_par *par = info->par;
 	u32 addr = var->yoffset * info->fix.line_length;
 
-	if (nopan || var->xoffset || (var->yoffset > var->yres_virtual))
-		return -EINVAL;
-	if ((var->yoffset + var->yres > var->yres_virtual && nowrap))
+	if (nopan || var->xoffset)
 		return -EINVAL;
 
 	banshee_make_room(par, 1);
 	tdfx_outl(par, VIDDESKSTART, addr);
 
-	info->var.xoffset = var->xoffset;
-	info->var.yoffset = var->yoffset;
 	return 0;
 }
 
-- 
GitLab


From cfb4f5d1750e05f43902197713c50c29e7dfbc99 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Wed, 23 Jul 2008 21:31:24 -0700
Subject: [PATCH 403/853] fbdev: SuperH Mobile LCDC Driver

This is the SuperH Mobile LCDC frame buffer driver V2, adding support for
the LCDC block found in SuperH Mobile processors.  The hardware supports
up to two LCD panels per LCDC block, and both RGB and SYS interfaces can
be used to hook up LCD panels/modules.

The device driver is a regular platform driver, so LCD configuration and
board specific hooks are passed to the driver using platform data.  LCD
modules using SYS interface often require special configuration using the
SYS bus, and to solve this cleanly the driver provides SYS interface
operations to the board code.

Tested on sh7723 and sh7722 processors with a SYS16A QVGA panel and WVGA
panels using RGB16 and RGB18 interfaces.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Reviewed-by: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/Kconfig            |  10 +
 drivers/video/Makefile           |   2 +
 drivers/video/sh_mobile_lcdcfb.c | 725 +++++++++++++++++++++++++++++++
 include/asm-sh/sh_mobile_lcdc.h  |  66 +++
 4 files changed, 803 insertions(+)
 create mode 100644 drivers/video/sh_mobile_lcdcfb.c
 create mode 100644 include/asm-sh/sh_mobile_lcdc.h

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 7072d2c5a04..80fa066416b 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1866,6 +1866,16 @@ config FB_W100
 
 	  If unsure, say N.
 
+config FB_SH_MOBILE_LCDC
+	tristate "SuperH Mobile LCDC framebuffer support"
+	depends on FB && SUPERH
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	default m
+	---help---
+	  Frame buffer driver for the on-chip SH-Mobile LCD controller.
+
 config FB_S3C2410
 	tristate "S3C2410 LCD framebuffer support"
 	depends on FB && ARCH_S3C2410
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 7ee85c0d2e5..4809f8b9bb2 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_FB_IBM_GXT4500)	  += gxt4500.o
 obj-$(CONFIG_FB_PS3)		  += ps3fb.o
 obj-$(CONFIG_FB_SM501)            += sm501fb.o
 obj-$(CONFIG_FB_XILINX)           += xilinxfb.o
+obj-$(CONFIG_FB_SH_MOBILE_LCDC)	  += sh_mobile_lcdcfb.o
+obj-$(CONFIG_FB_SH7343VOU)	  += sh7343_voufb.o
 obj-$(CONFIG_FB_OMAP)             += omap/
 obj-$(CONFIG_XEN_FBDEV_FRONTEND)  += xen-fbfront.o
 obj-$(CONFIG_FB_CARMINE)          += carminefb.o
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
new file mode 100644
index 00000000000..f6ef6cca73c
--- /dev/null
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -0,0 +1,725 @@
+/*
+ * SuperH Mobile LCDC Framebuffer
+ *
+ * Copyright (c) 2008 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/fb.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <asm/sh_mobile_lcdc.h>
+
+#define PALETTE_NR 16
+
+struct sh_mobile_lcdc_priv;
+struct sh_mobile_lcdc_chan {
+	struct sh_mobile_lcdc_priv *lcdc;
+	unsigned long *reg_offs;
+	unsigned long ldmt1r_value;
+	unsigned long enabled; /* ME and SE in LDCNT2R */
+	struct sh_mobile_lcdc_chan_cfg cfg;
+	u32 pseudo_palette[PALETTE_NR];
+	struct fb_info info;
+	dma_addr_t dma_handle;
+};
+
+struct sh_mobile_lcdc_priv {
+	void __iomem *base;
+	struct clk *clk;
+	unsigned long lddckr;
+	struct sh_mobile_lcdc_chan ch[2];
+};
+
+/* shared registers */
+#define _LDDCKR 0x410
+#define _LDDCKSTPR 0x414
+#define _LDINTR 0x468
+#define _LDSR 0x46c
+#define _LDCNT1R 0x470
+#define _LDCNT2R 0x474
+#define _LDDDSR 0x47c
+#define _LDDWD0R 0x800
+#define _LDDRDR 0x840
+#define _LDDWAR 0x900
+#define _LDDRAR 0x904
+
+/* per-channel registers */
+enum { LDDCKPAT1R, LDDCKPAT2R, LDMT1R, LDMT2R, LDMT3R, LDDFR, LDSM1R,
+       LDSA1R, LDMLSR, LDHCNR, LDHSYNR, LDVLNR, LDVSYNR, LDPMR };
+
+static unsigned long lcdc_offs_mainlcd[] = {
+	[LDDCKPAT1R] = 0x400,
+	[LDDCKPAT2R] = 0x404,
+	[LDMT1R] = 0x418,
+	[LDMT2R] = 0x41c,
+	[LDMT3R] = 0x420,
+	[LDDFR] = 0x424,
+	[LDSM1R] = 0x428,
+	[LDSA1R] = 0x430,
+	[LDMLSR] = 0x438,
+	[LDHCNR] = 0x448,
+	[LDHSYNR] = 0x44c,
+	[LDVLNR] = 0x450,
+	[LDVSYNR] = 0x454,
+	[LDPMR] = 0x460,
+};
+
+static unsigned long lcdc_offs_sublcd[] = {
+	[LDDCKPAT1R] = 0x408,
+	[LDDCKPAT2R] = 0x40c,
+	[LDMT1R] = 0x600,
+	[LDMT2R] = 0x604,
+	[LDMT3R] = 0x608,
+	[LDDFR] = 0x60c,
+	[LDSM1R] = 0x610,
+	[LDSA1R] = 0x618,
+	[LDMLSR] = 0x620,
+	[LDHCNR] = 0x624,
+	[LDHSYNR] = 0x628,
+	[LDVLNR] = 0x62c,
+	[LDVSYNR] = 0x630,
+	[LDPMR] = 0x63c,
+};
+
+#define START_LCDC	0x00000001
+#define LCDC_RESET	0x00000100
+#define DISPLAY_BEU	0x00000008
+#define LCDC_ENABLE	0x00000001
+
+static void lcdc_write_chan(struct sh_mobile_lcdc_chan *chan,
+			    int reg_nr, unsigned long data)
+{
+	iowrite32(data, chan->lcdc->base + chan->reg_offs[reg_nr]);
+}
+
+static unsigned long lcdc_read_chan(struct sh_mobile_lcdc_chan *chan,
+				    int reg_nr)
+{
+	return ioread32(chan->lcdc->base + chan->reg_offs[reg_nr]);
+}
+
+static void lcdc_write(struct sh_mobile_lcdc_priv *priv,
+		       unsigned long reg_offs, unsigned long data)
+{
+	iowrite32(data, priv->base + reg_offs);
+}
+
+static unsigned long lcdc_read(struct sh_mobile_lcdc_priv *priv,
+			       unsigned long reg_offs)
+{
+	return ioread32(priv->base + reg_offs);
+}
+
+static void lcdc_wait_bit(struct sh_mobile_lcdc_priv *priv,
+			  unsigned long reg_offs,
+			  unsigned long mask, unsigned long until)
+{
+	while ((lcdc_read(priv, reg_offs) & mask) != until)
+		cpu_relax();
+}
+
+static int lcdc_chan_is_sublcd(struct sh_mobile_lcdc_chan *chan)
+{
+	return chan->cfg.chan == LCDC_CHAN_SUBLCD;
+}
+
+static void lcdc_sys_write_index(void *handle, unsigned long data)
+{
+	struct sh_mobile_lcdc_chan *ch = handle;
+
+	lcdc_write(ch->lcdc, _LDDWD0R, data | 0x10000000);
+	lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0);
+	lcdc_write(ch->lcdc, _LDDWAR, 1 | (lcdc_chan_is_sublcd(ch) ? 2 : 0));
+}
+
+static void lcdc_sys_write_data(void *handle, unsigned long data)
+{
+	struct sh_mobile_lcdc_chan *ch = handle;
+
+	lcdc_write(ch->lcdc, _LDDWD0R, data | 0x11000000);
+	lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0);
+	lcdc_write(ch->lcdc, _LDDWAR, 1 | (lcdc_chan_is_sublcd(ch) ? 2 : 0));
+}
+
+static unsigned long lcdc_sys_read_data(void *handle)
+{
+	struct sh_mobile_lcdc_chan *ch = handle;
+
+	lcdc_write(ch->lcdc, _LDDRDR, 0x01000000);
+	lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0);
+	lcdc_write(ch->lcdc, _LDDRAR, 1 | (lcdc_chan_is_sublcd(ch) ? 2 : 0));
+	udelay(1);
+
+	return lcdc_read(ch->lcdc, _LDDRDR) & 0xffff;
+}
+
+struct sh_mobile_lcdc_sys_bus_ops sh_mobile_lcdc_sys_bus_ops = {
+	lcdc_sys_write_index,
+	lcdc_sys_write_data,
+	lcdc_sys_read_data,
+};
+
+static void sh_mobile_lcdc_start_stop(struct sh_mobile_lcdc_priv *priv,
+				      int start)
+{
+	unsigned long tmp = lcdc_read(priv, _LDCNT2R);
+	int k;
+
+	/* start or stop the lcdc */
+	if (start)
+		lcdc_write(priv, _LDCNT2R, tmp | START_LCDC);
+	else
+		lcdc_write(priv, _LDCNT2R, tmp & ~START_LCDC);
+
+	/* wait until power is applied/stopped on all channels */
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++)
+		if (lcdc_read(priv, _LDCNT2R) & priv->ch[k].enabled)
+			while (1) {
+				tmp = lcdc_read_chan(&priv->ch[k], LDPMR) & 3;
+				if (start && tmp == 3)
+					break;
+				if (!start && tmp == 0)
+					break;
+				cpu_relax();
+			}
+
+	if (!start)
+		lcdc_write(priv, _LDDCKSTPR, 1); /* stop dotclock */
+}
+
+static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
+{
+	struct sh_mobile_lcdc_chan *ch;
+	struct fb_videomode *lcd_cfg;
+	struct sh_mobile_lcdc_board_cfg	*board_cfg;
+	unsigned long tmp;
+	int k, m;
+	int ret = 0;
+
+	/* reset */
+	lcdc_write(priv, _LDCNT2R, lcdc_read(priv, _LDCNT2R) | LCDC_RESET);
+	lcdc_wait_bit(priv, _LDCNT2R, LCDC_RESET, 0);
+
+	/* enable LCDC channels */
+	tmp = lcdc_read(priv, _LDCNT2R);
+	tmp |= priv->ch[0].enabled;
+	tmp |= priv->ch[1].enabled;
+	lcdc_write(priv, _LDCNT2R, tmp);
+
+	/* read data from external memory, avoid using the BEU for now */
+	lcdc_write(priv, _LDCNT2R, lcdc_read(priv, _LDCNT2R) & ~DISPLAY_BEU);
+
+	/* stop the lcdc first */
+	sh_mobile_lcdc_start_stop(priv, 0);
+
+	/* configure clocks */
+	tmp = priv->lddckr;
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
+		ch = &priv->ch[k];
+
+		if (!priv->ch[k].enabled)
+			continue;
+
+		m = ch->cfg.clock_divider;
+		if (!m)
+			continue;
+
+		if (m == 1)
+			m = 1 << 6;
+		tmp |= m << (lcdc_chan_is_sublcd(ch) ? 8 : 0);
+
+		lcdc_write_chan(ch, LDDCKPAT1R, 0x00000000);
+		lcdc_write_chan(ch, LDDCKPAT2R, (1 << (m/2)) - 1);
+	}
+
+	lcdc_write(priv, _LDDCKR, tmp);
+
+	/* start dotclock again */
+	lcdc_write(priv, _LDDCKSTPR, 0);
+	lcdc_wait_bit(priv, _LDDCKSTPR, ~0, 0);
+
+	/* interrupts are disabled */
+	lcdc_write(priv, _LDINTR, 0);
+
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
+		ch = &priv->ch[k];
+		lcd_cfg = &ch->cfg.lcd_cfg;
+
+		if (!ch->enabled)
+			continue;
+
+		tmp = ch->ldmt1r_value;
+		tmp |= (lcd_cfg->sync & FB_SYNC_VERT_HIGH_ACT) ? 0 : 1 << 28;
+		tmp |= (lcd_cfg->sync & FB_SYNC_HOR_HIGH_ACT) ? 0 : 1 << 27;
+		lcdc_write_chan(ch, LDMT1R, tmp);
+
+		/* setup SYS bus */
+		lcdc_write_chan(ch, LDMT2R, ch->cfg.sys_bus_cfg.ldmt2r);
+		lcdc_write_chan(ch, LDMT3R, ch->cfg.sys_bus_cfg.ldmt3r);
+
+		/* horizontal configuration */
+		tmp = lcd_cfg->xres + lcd_cfg->hsync_len;
+		tmp += lcd_cfg->left_margin;
+		tmp += lcd_cfg->right_margin;
+		tmp /= 8; /* HTCN */
+		tmp |= (lcd_cfg->xres / 8) << 16; /* HDCN */
+		lcdc_write_chan(ch, LDHCNR, tmp);
+
+		tmp = lcd_cfg->xres;
+		tmp += lcd_cfg->right_margin;
+		tmp /= 8; /* HSYNP */
+		tmp |= (lcd_cfg->hsync_len / 8) << 16; /* HSYNW */
+		lcdc_write_chan(ch, LDHSYNR, tmp);
+
+		/* power supply */
+		lcdc_write_chan(ch, LDPMR, 0);
+
+		/* vertical configuration */
+		tmp = lcd_cfg->yres + lcd_cfg->vsync_len;
+		tmp += lcd_cfg->upper_margin;
+		tmp += lcd_cfg->lower_margin; /* VTLN */
+		tmp |= lcd_cfg->yres << 16; /* VDLN */
+		lcdc_write_chan(ch, LDVLNR, tmp);
+
+		tmp = lcd_cfg->yres;
+		tmp += lcd_cfg->lower_margin; /* VSYNP */
+		tmp |= lcd_cfg->vsync_len << 16; /* VSYNW */
+		lcdc_write_chan(ch, LDVSYNR, tmp);
+
+		board_cfg = &ch->cfg.board_cfg;
+		if (board_cfg->setup_sys)
+			ret = board_cfg->setup_sys(board_cfg->board_data, ch,
+						   &sh_mobile_lcdc_sys_bus_ops);
+		if (ret)
+			return ret;
+	}
+
+	/* --- display_lcdc_data() --- */
+	lcdc_write(priv, _LDINTR, 0x00000f00);
+
+	/* word and long word swap */
+	lcdc_write(priv, _LDDDSR, lcdc_read(priv, _LDDDSR) | 6);
+
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
+		ch = &priv->ch[k];
+
+		if (!priv->ch[k].enabled)
+			continue;
+
+		/* set bpp format in PKF[4:0] */
+		tmp = lcdc_read_chan(ch, LDDFR);
+		tmp &= ~(0x0001001f);
+		tmp |= (priv->ch[k].info.var.bits_per_pixel == 16) ? 3 : 0;
+		lcdc_write_chan(ch, LDDFR, tmp);
+
+		/* point out our frame buffer */
+		lcdc_write_chan(ch, LDSA1R, ch->info.fix.smem_start);
+
+		/* set line size */
+		lcdc_write_chan(ch, LDMLSR, ch->info.fix.line_length);
+
+		/* continuous read mode */
+		lcdc_write_chan(ch, LDSM1R, 0);
+	}
+
+	/* display output */
+	lcdc_write(priv, _LDCNT1R, LCDC_ENABLE);
+
+	/* start the lcdc */
+	sh_mobile_lcdc_start_stop(priv, 1);
+
+	/* tell the board code to enable the panel */
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
+		ch = &priv->ch[k];
+		board_cfg = &ch->cfg.board_cfg;
+		if (board_cfg->display_on)
+			board_cfg->display_on(board_cfg->board_data);
+	}
+
+	return 0;
+}
+
+static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
+{
+	struct sh_mobile_lcdc_chan *ch;
+	struct sh_mobile_lcdc_board_cfg	*board_cfg;
+	int k;
+
+	/* tell the board code to disable the panel */
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
+		ch = &priv->ch[k];
+		board_cfg = &ch->cfg.board_cfg;
+		if (board_cfg->display_off)
+			board_cfg->display_off(board_cfg->board_data);
+	}
+
+	/* stop the lcdc */
+	sh_mobile_lcdc_start_stop(priv, 0);
+}
+
+static int sh_mobile_lcdc_check_interface(struct sh_mobile_lcdc_chan *ch)
+{
+	int ifm, miftyp;
+
+	switch (ch->cfg.interface_type) {
+	case RGB8: ifm = 0; miftyp = 0; break;
+	case RGB9: ifm = 0; miftyp = 4; break;
+	case RGB12A: ifm = 0; miftyp = 5; break;
+	case RGB12B: ifm = 0; miftyp = 6; break;
+	case RGB16: ifm = 0; miftyp = 7; break;
+	case RGB18: ifm = 0; miftyp = 10; break;
+	case RGB24: ifm = 0; miftyp = 11; break;
+	case SYS8A: ifm = 1; miftyp = 0; break;
+	case SYS8B: ifm = 1; miftyp = 1; break;
+	case SYS8C: ifm = 1; miftyp = 2; break;
+	case SYS8D: ifm = 1; miftyp = 3; break;
+	case SYS9: ifm = 1; miftyp = 4; break;
+	case SYS12: ifm = 1; miftyp = 5; break;
+	case SYS16A: ifm = 1; miftyp = 7; break;
+	case SYS16B: ifm = 1; miftyp = 8; break;
+	case SYS16C: ifm = 1; miftyp = 9; break;
+	case SYS18: ifm = 1; miftyp = 10; break;
+	case SYS24: ifm = 1; miftyp = 11; break;
+	default: goto bad;
+	}
+
+	/* SUBLCD only supports SYS interface */
+	if (lcdc_chan_is_sublcd(ch)) {
+		if (ifm == 0)
+			goto bad;
+		else
+			ifm = 0;
+	}
+
+	ch->ldmt1r_value = (ifm << 12) | miftyp;
+	return 0;
+ bad:
+	return -EINVAL;
+}
+
+static int sh_mobile_lcdc_setup_clocks(struct device *dev, int clock_source,
+				       struct sh_mobile_lcdc_priv *priv)
+{
+	char *str;
+	int icksel;
+
+	switch (clock_source) {
+	case LCDC_CLK_BUS: str = "bus_clk"; icksel = 0; break;
+	case LCDC_CLK_PERIPHERAL: str = "peripheral_clk"; icksel = 1; break;
+	case LCDC_CLK_EXTERNAL: str = NULL; icksel = 2; break;
+	default:
+		return -EINVAL;
+	}
+
+	priv->lddckr = icksel << 16;
+
+	if (str) {
+		priv->clk = clk_get(dev, str);
+		if (IS_ERR(priv->clk)) {
+			dev_err(dev, "cannot get clock %s\n", str);
+			return PTR_ERR(priv->clk);
+		}
+
+		clk_enable(priv->clk);
+	}
+
+	return 0;
+}
+
+static int sh_mobile_lcdc_setcolreg(u_int regno,
+				    u_int red, u_int green, u_int blue,
+				    u_int transp, struct fb_info *info)
+{
+	u32 *palette = info->pseudo_palette;
+
+	if (regno >= PALETTE_NR)
+		return -EINVAL;
+
+	/* only FB_VISUAL_TRUECOLOR supported */
+
+	red >>= 16 - info->var.red.length;
+	green >>= 16 - info->var.green.length;
+	blue >>= 16 - info->var.blue.length;
+	transp >>= 16 - info->var.transp.length;
+
+	palette[regno] = (red << info->var.red.offset) |
+	  (green << info->var.green.offset) |
+	  (blue << info->var.blue.offset) |
+	  (transp << info->var.transp.offset);
+
+	return 0;
+}
+
+static struct fb_fix_screeninfo sh_mobile_lcdc_fix  = {
+	.id =		"SH Mobile LCDC",
+	.type =		FB_TYPE_PACKED_PIXELS,
+	.visual =	FB_VISUAL_TRUECOLOR,
+	.accel =	FB_ACCEL_NONE,
+};
+
+static struct fb_ops sh_mobile_lcdc_ops = {
+	.fb_setcolreg	= sh_mobile_lcdc_setcolreg,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+};
+
+static int sh_mobile_lcdc_set_bpp(struct fb_var_screeninfo *var, int bpp)
+{
+	switch (bpp) {
+	case 16: /* PKF[4:0] = 00011 - RGB 565 */
+		var->red.offset = 11;
+		var->red.length = 5;
+		var->green.offset = 5;
+		var->green.length = 6;
+		var->blue.offset = 0;
+		var->blue.length = 5;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+
+	case 32: /* PKF[4:0] = 00000 - RGB 888
+		  * sh7722 pdf says 00RRGGBB but reality is GGBB00RR
+		  * this may be because LDDDSR has word swap enabled..
+		  */
+		var->red.offset = 0;
+		var->red.length = 8;
+		var->green.offset = 24;
+		var->green.length = 8;
+		var->blue.offset = 16;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	var->bits_per_pixel = bpp;
+	var->red.msb_right = 0;
+	var->green.msb_right = 0;
+	var->blue.msb_right = 0;
+	var->transp.msb_right = 0;
+	return 0;
+}
+
+static int sh_mobile_lcdc_remove(struct platform_device *pdev);
+
+static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
+{
+	struct fb_info *info;
+	struct sh_mobile_lcdc_priv *priv;
+	struct sh_mobile_lcdc_info *pdata;
+	struct sh_mobile_lcdc_chan_cfg *cfg;
+	struct resource *res;
+	int error;
+	void *buf;
+	int i, j;
+
+	if (!pdev->dev.platform_data) {
+		dev_err(&pdev->dev, "no platform data defined\n");
+		error = -EINVAL;
+		goto err0;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "cannot find IO resource\n");
+		error = -ENOENT;
+		goto err0;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&pdev->dev, "cannot allocate device data\n");
+		error = -ENOMEM;
+		goto err0;
+	}
+
+	platform_set_drvdata(pdev, priv);
+	pdata = pdev->dev.platform_data;
+
+	j = 0;
+	for (i = 0; i < ARRAY_SIZE(pdata->ch); i++) {
+		priv->ch[j].lcdc = priv;
+		memcpy(&priv->ch[j].cfg, &pdata->ch[i], sizeof(pdata->ch[i]));
+
+		error = sh_mobile_lcdc_check_interface(&priv->ch[i]);
+		if (error) {
+			dev_err(&pdev->dev, "unsupported interface type\n");
+			goto err1;
+		}
+
+		switch (pdata->ch[i].chan) {
+		case LCDC_CHAN_MAINLCD:
+			priv->ch[j].enabled = 1 << 1;
+			priv->ch[j].reg_offs = lcdc_offs_mainlcd;
+			j++;
+			break;
+		case LCDC_CHAN_SUBLCD:
+			priv->ch[j].enabled = 1 << 2;
+			priv->ch[j].reg_offs = lcdc_offs_sublcd;
+			j++;
+			break;
+		}
+	}
+
+	if (!j) {
+		dev_err(&pdev->dev, "no channels defined\n");
+		error = -EINVAL;
+		goto err1;
+	}
+
+	error = sh_mobile_lcdc_setup_clocks(&pdev->dev,
+					    pdata->clock_source, priv);
+	if (error) {
+		dev_err(&pdev->dev, "unable to setup clocks\n");
+		goto err1;
+	}
+
+	priv->lddckr = pdata->lddckr;
+	priv->base = ioremap_nocache(res->start, (res->end - res->start) + 1);
+
+	for (i = 0; i < j; i++) {
+		info = &priv->ch[i].info;
+		cfg = &priv->ch[i].cfg;
+
+		info->fbops = &sh_mobile_lcdc_ops;
+		info->var.xres = info->var.xres_virtual = cfg->lcd_cfg.xres;
+		info->var.yres = info->var.yres_virtual = cfg->lcd_cfg.yres;
+		info->var.activate = FB_ACTIVATE_NOW;
+		error = sh_mobile_lcdc_set_bpp(&info->var, cfg->bpp);
+		if (error)
+			break;
+
+		info->fix = sh_mobile_lcdc_fix;
+		info->fix.line_length = cfg->lcd_cfg.xres * (cfg->bpp / 8);
+		info->fix.smem_len = info->fix.line_length * cfg->lcd_cfg.yres;
+
+		buf = dma_alloc_coherent(&pdev->dev, info->fix.smem_len,
+					 &priv->ch[i].dma_handle, GFP_KERNEL);
+		if (!buf) {
+			dev_err(&pdev->dev, "unable to allocate buffer\n");
+			error = -ENOMEM;
+			break;
+		}
+
+		info->pseudo_palette = &priv->ch[i].pseudo_palette;
+		info->flags = FBINFO_FLAG_DEFAULT;
+
+		error = fb_alloc_cmap(&info->cmap, PALETTE_NR, 0);
+		if (error < 0) {
+			dev_err(&pdev->dev, "unable to allocate cmap\n");
+			dma_free_coherent(&pdev->dev, info->fix.smem_len,
+					  buf, priv->ch[i].dma_handle);
+			break;
+		}
+
+		memset(buf, 0, info->fix.smem_len);
+		info->fix.smem_start = priv->ch[i].dma_handle;
+		info->screen_base = buf;
+		info->device = &pdev->dev;
+	}
+
+	if (error)
+		goto err1;
+
+	error = sh_mobile_lcdc_start(priv);
+	if (error) {
+		dev_err(&pdev->dev, "unable to start hardware\n");
+		goto err1;
+	}
+
+	for (i = 0; i < j; i++) {
+		error = register_framebuffer(&priv->ch[i].info);
+		if (error < 0)
+			goto err1;
+	}
+
+	for (i = 0; i < j; i++) {
+		info = &priv->ch[i].info;
+		dev_info(info->dev,
+			 "registered %s/%s as %dx%d %dbpp.\n",
+			 pdev->name,
+			 (priv->ch[i].cfg.chan == LCDC_CHAN_MAINLCD) ?
+			 "mainlcd" : "sublcd",
+			 (int) priv->ch[i].cfg.lcd_cfg.xres,
+			 (int) priv->ch[i].cfg.lcd_cfg.yres,
+			 priv->ch[i].cfg.bpp);
+	}
+
+	return 0;
+ err1:
+	sh_mobile_lcdc_remove(pdev);
+ err0:
+	return error;
+}
+
+static int sh_mobile_lcdc_remove(struct platform_device *pdev)
+{
+	struct sh_mobile_lcdc_priv *priv = platform_get_drvdata(pdev);
+	struct fb_info *info;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(priv->ch); i++)
+		if (priv->ch[i].info.dev)
+			unregister_framebuffer(&priv->ch[i].info);
+
+	sh_mobile_lcdc_stop(priv);
+
+	for (i = 0; i < ARRAY_SIZE(priv->ch); i++) {
+		info = &priv->ch[i].info;
+
+		if (!info->device)
+			continue;
+
+		dma_free_coherent(&pdev->dev, info->fix.smem_len,
+				  info->screen_base, priv->ch[i].dma_handle);
+		fb_dealloc_cmap(&info->cmap);
+	}
+
+	if (priv->clk) {
+		clk_disable(priv->clk);
+		clk_put(priv->clk);
+	}
+
+	if (priv->base)
+		iounmap(priv->base);
+
+	kfree(priv);
+	return 0;
+}
+
+static struct platform_driver sh_mobile_lcdc_driver = {
+	.driver		= {
+		.name		= "sh_mobile_lcdc_fb",
+		.owner		= THIS_MODULE,
+	},
+	.probe		= sh_mobile_lcdc_probe,
+	.remove		= sh_mobile_lcdc_remove,
+};
+
+static int __init sh_mobile_lcdc_init(void)
+{
+	return platform_driver_register(&sh_mobile_lcdc_driver);
+}
+
+static void __exit sh_mobile_lcdc_exit(void)
+{
+	platform_driver_unregister(&sh_mobile_lcdc_driver);
+}
+
+module_init(sh_mobile_lcdc_init);
+module_exit(sh_mobile_lcdc_exit);
+
+MODULE_DESCRIPTION("SuperH Mobile LCDC Framebuffer driver");
+MODULE_AUTHOR("Magnus Damm <damm@opensource.se>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/asm-sh/sh_mobile_lcdc.h b/include/asm-sh/sh_mobile_lcdc.h
new file mode 100644
index 00000000000..27677727df4
--- /dev/null
+++ b/include/asm-sh/sh_mobile_lcdc.h
@@ -0,0 +1,66 @@
+#ifndef __ASM_SH_MOBILE_LCDC_H__
+#define __ASM_SH_MOBILE_LCDC_H__
+
+#include <linux/fb.h>
+
+enum { RGB8,   /* 24bpp, 8:8:8 */
+       RGB9,   /* 18bpp, 9:9 */
+       RGB12A, /* 24bpp, 12:12 */
+       RGB12B, /* 12bpp */
+       RGB16,  /* 16bpp */
+       RGB18,  /* 18bpp */
+       RGB24,  /* 24bpp */
+       SYS8A,  /* 24bpp, 8:8:8 */
+       SYS8B,  /* 18bpp, 8:8:2 */
+       SYS8C,  /* 18bpp, 2:8:8 */
+       SYS8D,  /* 16bpp, 8:8 */
+       SYS9,   /* 18bpp, 9:9 */
+       SYS12,  /* 24bpp, 12:12 */
+       SYS16A, /* 16bpp */
+       SYS16B, /* 18bpp, 16:2 */
+       SYS16C, /* 18bpp, 2:16 */
+       SYS18,  /* 18bpp */
+       SYS24 };/* 24bpp */
+
+enum { LCDC_CHAN_DISABLED = 0,
+       LCDC_CHAN_MAINLCD,
+       LCDC_CHAN_SUBLCD };
+
+enum { LCDC_CLK_BUS, LCDC_CLK_PERIPHERAL, LCDC_CLK_EXTERNAL };
+
+struct sh_mobile_lcdc_sys_bus_cfg {
+	unsigned long ldmt2r;
+	unsigned long ldmt3r;
+};
+
+struct sh_mobile_lcdc_sys_bus_ops {
+	void (*write_index)(void *handle, unsigned long data);
+	void (*write_data)(void *handle, unsigned long data);
+	unsigned long (*read_data)(void *handle);
+};
+
+struct sh_mobile_lcdc_board_cfg {
+	void *board_data;
+	int (*setup_sys)(void *board_data, void *sys_ops_handle,
+			 struct sh_mobile_lcdc_sys_bus_ops *sys_ops);
+	void (*display_on)(void *board_data);
+	void (*display_off)(void *board_data);
+};
+
+struct sh_mobile_lcdc_chan_cfg {
+	int chan;
+	int bpp;
+	int interface_type; /* selects RGBn or SYSn I/F, see above */
+	int clock_divider;
+	struct fb_videomode lcd_cfg;
+	struct sh_mobile_lcdc_board_cfg board_cfg;
+	struct sh_mobile_lcdc_sys_bus_cfg sys_bus_cfg; /* only for SYSn I/F */
+};
+
+struct sh_mobile_lcdc_info {
+	unsigned long lddckr;
+	int clock_source;
+	struct sh_mobile_lcdc_chan_cfg ch[2];
+};
+
+#endif /* __ASM_SH_MOBILE_LCDC_H__ */
-- 
GitLab


From b604838ac6d233fd6bffc0e758a818133a01ff22 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 23 Jul 2008 21:31:26 -0700
Subject: [PATCH 404/853] vfb: only enable if explicitly requested when
 compiled in

The Kconfig help for the vfb driver says:
   Do NOT enable it for normal systems! To protect the innocent, it
   has to be enabled explicitly at boot time using the kernel option
   `video=vfb:'.

This change lets the code match the description.

Support for vfb:disable is kept for backwards compatibility; vfb:off works
because it is tested at a higher level.

Note: any undefined option (e.g. vfb:enable) will also enable this driver.

The relevant code has been unchanged since before the migration to
git (2.6.12).

This patch fixes bugzilla #9310 and was the root cause behind
http://lkml.org/lkml/2008/5/31/220.

Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Antonino A. Daplas <adaplas@gmail.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/vfb.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index 072638a9528..93fe08d6c78 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -443,19 +443,29 @@ static int vfb_mmap(struct fb_info *info,
 }
 
 #ifndef MODULE
+/*
+ * The virtual framebuffer driver is only enabled if explicitly
+ * requested by passing 'video=vfb:' (or any actual options).
+ */
 static int __init vfb_setup(char *options)
 {
 	char *this_opt;
 
+	vfb_enable = 0;
+
+	if (!options)
+		return 1;
+
 	vfb_enable = 1;
 
-	if (!options || !*options)
+	if (!*options)
 		return 1;
 
 	while ((this_opt = strsep(&options, ",")) != NULL) {
 		if (!*this_opt)
 			continue;
-		if (!strncmp(this_opt, "disable", 7))
+		/* Test disable for backwards compatibility */
+		if (!strcmp(this_opt, "disable"))
 			vfb_enable = 0;
 	}
 	return 1;
-- 
GitLab


From 2870086e9f2032bdd95b8da9bd187e3c16fc6d49 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:26 -0700
Subject: [PATCH 405/853] hgafb: convert to new platform driver API

Convert the hgafb driver to use new platform driver API.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=9689

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/hgafb.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/video/hgafb.c b/drivers/video/hgafb.c
index c18880d9db1..0129c044f6d 100644
--- a/drivers/video/hgafb.c
+++ b/drivers/video/hgafb.c
@@ -551,7 +551,7 @@ static struct fb_ops hgafb_ops = {
 	 *  Initialization
 	 */
 
-static int __init hgafb_probe(struct device *device)
+static int __init hgafb_probe(struct platform_device *pdev)
 {
 	struct fb_info *info;
 
@@ -565,7 +565,7 @@ static int __init hgafb_probe(struct device *device)
 	printk(KERN_INFO "hgafb: %s with %ldK of memory detected.\n",
 		hga_type_name, hga_vram_len/1024);
 
-	info = framebuffer_alloc(0, NULL);
+	info = framebuffer_alloc(0, &pdev->dev);
 	if (!info) {
 		iounmap(hga_vram);
 		return -ENOMEM;
@@ -593,13 +593,13 @@ static int __init hgafb_probe(struct device *device)
 
         printk(KERN_INFO "fb%d: %s frame buffer device\n",
                info->node, info->fix.id);
-	dev_set_drvdata(device, info);
+	platform_set_drvdata(pdev, info);
 	return 0;
 }
 
-static int hgafb_remove(struct device *device)
+static int hgafb_remove(struct platform_device *pdev)
 {
-	struct fb_info *info = dev_get_drvdata(device);
+	struct fb_info *info = platform_get_drvdata(pdev);
 
 	hga_txt_mode();
 	hga_clear_screen();
@@ -620,16 +620,15 @@ static int hgafb_remove(struct device *device)
 	return 0;
 }
 
-static struct device_driver hgafb_driver = {
-	.name = "hgafb",
-	.bus  = &platform_bus_type,
+static struct platform_driver hgafb_driver = {
 	.probe = hgafb_probe,
 	.remove = hgafb_remove,
+	.driver = {
+		.name = "hgafb",
+	},
 };
 
-static struct platform_device hgafb_device = {
-	.name = "hgafb",
-};
+static struct platform_device *hgafb_device;
 
 static int __init hgafb_init(void)
 {
@@ -638,12 +637,15 @@ static int __init hgafb_init(void)
 	if (fb_get_options("hgafb", NULL))
 		return -ENODEV;
 
-	ret = driver_register(&hgafb_driver);
+	ret = platform_driver_register(&hgafb_driver);
 
 	if (!ret) {
-		ret = platform_device_register(&hgafb_device);
-		if (ret)
-			driver_unregister(&hgafb_driver);
+		hgafb_device = platform_device_register_simple("hgafb", 0, NULL, 0);
+
+		if (IS_ERR(hgafb_device)) {
+			platform_driver_unregister(&hgafb_driver);
+			ret = PTR_ERR(hgafb_device);
+		}
 	}
 
 	return ret;
@@ -651,8 +653,8 @@ static int __init hgafb_init(void)
 
 static void __exit hgafb_exit(void)
 {
-	platform_device_unregister(&hgafb_device);
-	driver_unregister(&hgafb_driver);
+	platform_device_unregister(hgafb_device);
+	platform_driver_unregister(&hgafb_driver);
 }
 
 /* -------------------------------------------------------------------------
-- 
GitLab


From 43a3abc6aca8505e708508e2c7c2f99a7f8f820b Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Wed, 23 Jul 2008 21:31:27 -0700
Subject: [PATCH 406/853] fbdev: width and height are unsigned

The width and height members of fb_var_screeninfo are __u32.  The code
initializes them to -1 which seems wrong, and 0 seems like an equally good
default value.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index 052e1805849..6a0aa180c26 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -879,7 +879,7 @@ int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var)
 		if (edid_is_timing_block(block)) {
 			var->xres = var->xres_virtual = H_ACTIVE;
 			var->yres = var->yres_virtual = V_ACTIVE;
-			var->height = var->width = -1;
+			var->height = var->width = 0;
 			var->right_margin = H_SYNC_OFFSET;
 			var->left_margin = (H_ACTIVE + H_BLANKING) -
 				(H_ACTIVE + H_SYNC_OFFSET + H_SYNC_WIDTH);
-- 
GitLab


From 7572a1ea034a8fc45e57de28cc7573264975532a Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Wed, 23 Jul 2008 21:31:28 -0700
Subject: [PATCH 407/853] fbdev: xoffset, yoffset and yres are unsigned

The xoffset, yoffset and yres members of fb_var_screeninfo are __u32.
Make them unsigned in the code as well.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 1cd5071e536..6b487801eea 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -848,9 +848,8 @@ int
 fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var)
 {
 	struct fb_fix_screeninfo *fix = &info->fix;
-        int xoffset = var->xoffset;
-        int yoffset = var->yoffset;
-        int err = 0, yres = info->var.yres;
+	unsigned int yres = info->var.yres;
+	int err = 0;
 
 	if (var->yoffset > 0) {
 		if (var->vmode & FB_VMODE_YWRAP) {
@@ -866,8 +865,8 @@ fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var)
 				 (var->xoffset % fix->xpanstep)))
 		err = -EINVAL;
 
-        if (err || !info->fbops->fb_pan_display || xoffset < 0 ||
-	    yoffset < 0 || var->yoffset + yres > info->var.yres_virtual ||
+	if (err || !info->fbops->fb_pan_display ||
+	    var->yoffset + yres > info->var.yres_virtual ||
 	    var->xoffset + info->var.xres > info->var.xres_virtual)
 		return -EINVAL;
 
-- 
GitLab


From 50cd0221c9062ec5dac8a3620f36f568df052ac1 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Wed, 23 Jul 2008 21:31:29 -0700
Subject: [PATCH 408/853] atyfb: remove dead code

Remove dead code.  This will slightly change the behaviour of the driver
on systems that support backlight control.  Previously they would just
turn the backlight off using the backlight control but now the generic LCD
code will also turn off the LCD using the POWER_MANAGEMENT register.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/atyfb_base.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index bd4ac0bafec..d7ba3f1a4d8 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2709,8 +2709,7 @@ static int atyfb_blank(int blank, struct fb_info *info)
 	if (par->lock_blank || par->asleep)
 		return 0;
 
-#ifdef CONFIG_FB_ATY_BACKLIGHT
-#elif defined(CONFIG_FB_ATY_GENERIC_LCD)
+#ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table && blank > FB_BLANK_NORMAL &&
 	    (aty_ld_lcd(LCD_GEN_CNTL, par) & LCD_ON)) {
 		u32 pm = aty_ld_lcd(POWER_MANAGEMENT, par);
@@ -2739,8 +2738,7 @@ static int atyfb_blank(int blank, struct fb_info *info)
 	}
 	aty_st_le32(CRTC_GEN_CNTL, gen_cntl, par);
 
-#ifdef CONFIG_FB_ATY_BACKLIGHT
-#elif defined(CONFIG_FB_ATY_GENERIC_LCD)
+#ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table && blank <= FB_BLANK_NORMAL &&
 	    (aty_ld_lcd(LCD_GEN_CNTL, par) & LCD_ON)) {
 		u32 pm = aty_ld_lcd(POWER_MANAGEMENT, par);
-- 
GitLab


From 3880b0b5297ae9bf58a7662d13a46b5d5f0b2af6 Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Wed, 23 Jul 2008 21:31:30 -0700
Subject: [PATCH 409/853] atyfb: correct_chipset() can fail

Atari probe code relies on correct_chipset() failing if the device is not
a mach64 GX/CX.  aty_chips[] array would be indexed with -1 in that case.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/atyfb_base.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index d7ba3f1a4d8..d6903c70bee 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -424,7 +424,6 @@ static struct {
 #endif /* CONFIG_FB_ATY_CT */
 };
 
-/* can not fail */
 static int __devinit correct_chipset(struct atyfb_par *par)
 {
 	u8 rev;
@@ -437,6 +436,9 @@ static int __devinit correct_chipset(struct atyfb_par *par)
 		if (par->pci_id == aty_chips[i].pci_id)
 			break;
 
+	if (i < 0)
+		return -ENODEV;
+
 	name = aty_chips[i].name;
 	par->pll_limits.pll_max = aty_chips[i].pll;
 	par->pll_limits.mclk = aty_chips[i].mclk;
-- 
GitLab


From 6cfafc15994ac2a2377b32b5a65cf62a90a80d49 Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Wed, 23 Jul 2008 21:31:31 -0700
Subject: [PATCH 410/853] atyfb: use a PCI device ID table

Convert atyfb to use a PCI device ID table.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/atyfb_base.c | 73 +++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 18 deletions(-)

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index d6903c70bee..872760accb9 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -3418,14 +3418,7 @@ static int __devinit atyfb_pci_probe(struct pci_dev *pdev, const struct pci_devi
 	struct fb_info *info;
 	struct resource *rp;
 	struct atyfb_par *par;
-	int i, rc = -ENOMEM;
-
-	for (i = ARRAY_SIZE(aty_chips) - 1; i >= 0; i--)
-		if (pdev->device == aty_chips[i].pci_id)
-			break;
-
-	if (i < 0)
-		return -ENODEV;
+	int rc = -ENOMEM;
 
 	/* Enable device in PCI config */
 	if (pci_enable_device(pdev)) {
@@ -3456,7 +3449,7 @@ static int __devinit atyfb_pci_probe(struct pci_dev *pdev, const struct pci_devi
 	par = info->par;
 	info->fix = atyfb_fix;
 	info->device = &pdev->dev;
-	par->pci_id = aty_chips[i].pci_id;
+	par->pci_id = pdev->device;
 	par->res_start = res_start;
 	par->res_size = res_size;
 	par->irq = pdev->irq;
@@ -3655,18 +3648,62 @@ static void __devexit atyfb_pci_remove(struct pci_dev *pdev)
 	atyfb_remove(info);
 }
 
-/*
- * This driver uses its own matching table. That will be more difficult
- * to fix, so for now, we just match against any ATI ID and let the
- * probe() function find out what's up. That also mean we don't have
- * a module ID table though.
- */
 static struct pci_device_id atyfb_pci_tbl[] = {
-	{ PCI_VENDOR_ID_ATI, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
-	  PCI_BASE_CLASS_DISPLAY << 16, 0xff0000, 0 },
-	{ 0, }
+#ifdef CONFIG_FB_ATY_GX
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GX) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64CX) },
+#endif /* CONFIG_FB_ATY_GX */
+
+#ifdef CONFIG_FB_ATY_CT
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64CT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64ET) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LT) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64VT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GT) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64VU) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GU) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LG) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64VV) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GV) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GW) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GY) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GZ) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GB) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GD) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GI) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GP) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GQ) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LB) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LD) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LI) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LP) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LQ) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GM) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GN) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GO) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GL) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GR) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64GS) },
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LM) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LN) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LR) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_CHIP_MACH64LS) },
+#endif /* CONFIG_FB_ATY_CT */
+	{ }
 };
 
+MODULE_DEVICE_TABLE(pci, atyfb_pci_tbl);
+
 static struct pci_driver atyfb_driver = {
 	.name		= "atyfb",
 	.id_table	= atyfb_pci_tbl,
-- 
GitLab


From 89c69d2b8eb3ee2338fded9d70a0795b4712f112 Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Wed, 23 Jul 2008 21:31:32 -0700
Subject: [PATCH 411/853] atyfb: report probe errors

Properly propagate errors to the probe function.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/atyfb_base.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 872760accb9..e38398f491f 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2231,6 +2231,7 @@ static int __devinit aty_init(struct fb_info *info)
 	const char *ramname = NULL, *xtal;
 	int gtb_memsize, has_var = 0;
 	struct fb_var_screeninfo var;
+	int ret;
 
 	init_waitqueue_head(&par->vblank.wait);
 	spin_lock_init(&par->int_lock);
@@ -2612,7 +2613,8 @@ static int __devinit aty_init(struct fb_info *info)
 			var.yres_virtual = var.yres;
 	}
 
-	if (atyfb_check_var(&var, info)) {
+	ret = atyfb_check_var(&var, info);
+	if (ret) {
 		PRINTKE("can't set default video mode\n");
 		goto aty_init_exit;
 	}
@@ -2623,10 +2625,12 @@ static int __devinit aty_init(struct fb_info *info)
 #endif /* CONFIG_FB_ATY_CT */
 	info->var = var;
 
-	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0)
+	ret = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (ret < 0)
 		goto aty_init_exit;
 
-	if (register_framebuffer(info) < 0) {
+	ret = register_framebuffer(info);
+	if (ret < 0) {
 		fb_dealloc_cmap(&info->cmap);
 		goto aty_init_exit;
 	}
@@ -2652,7 +2656,7 @@ aty_init_exit:
 	    par->mtrr_aper = -1;
 	}
 #endif
-	return -1;
+	return ret;
 }
 
 static void aty_resume_chip(struct fb_info *info)
@@ -3467,7 +3471,8 @@ static int __devinit atyfb_pci_probe(struct pci_dev *pdev, const struct pci_devi
 	pci_set_drvdata(pdev, info);
 
 	/* Init chip & register framebuffer */
-	if (aty_init(info))
+	rc = aty_init(info);
+	if (rc)
 		goto err_release_io;
 
 #ifdef __sparc__
-- 
GitLab


From 1c554ff9554d67b4db0fb5e2f78c7cb4b2e0d627 Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Wed, 23 Jul 2008 21:31:32 -0700
Subject: [PATCH 412/853] atyfb: fix a cast

The argument to iounmap() is void __iomem *. Fix the cast.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/atyfb_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index e38398f491f..620ba812036 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -3335,7 +3335,7 @@ static int __devinit init_from_bios(struct atyfb_par *par)
 		PRINTKE("no BIOS frequency table found, use parameters\n");
 		ret = -ENXIO;
 	}
-	iounmap((void* __iomem )bios_base);
+	iounmap((void __iomem *)bios_base);
 
 	return ret;
 }
-- 
GitLab


From a882ef47c7156e8cc47e72f2aa396f2514569c48 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Jul 2008 21:31:33 -0700
Subject: [PATCH 413/853] aty: use memory_read_from_buffer()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/radeon_base.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 3c0a03f69d8..652273e9f5f 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -2098,15 +2098,7 @@ static void radeon_identify_vram(struct radeonfb_info *rinfo)
 
 static ssize_t radeon_show_one_edid(char *buf, loff_t off, size_t count, const u8 *edid)
 {
-	if (off > EDID_LENGTH)
-		return 0;
-
-	if (off + count > EDID_LENGTH)
-		count = EDID_LENGTH - off;
-
-	memcpy(buf, edid + off, count);
-
-	return count;
+	return memory_read_from_buffer(buf, count, &off, edid, EDID_LENGTH);
 }
 
 
-- 
GitLab


From 84c41ce83e9b2987ccef352f28ba0055b26c8f8e Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:34 -0700
Subject: [PATCH 414/853] skeletonfb: update to correct platform driver usage

It updates skeletonfb to new platform driver API.  The skeletonfb is
templates for creating new drivers.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/skeletonfb.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c
index 62321458f71..df5336561d1 100644
--- a/drivers/video/skeletonfb.c
+++ b/drivers/video/skeletonfb.c
@@ -675,13 +675,13 @@ static struct fb_ops xxxfb_ops = {
      *  Initialization
      */
 
-/* static int __init xxfb_probe (struct device *device) -- for platform devs */
+/* static int __init xxfb_probe (struct platform_device *pdev) -- for platform devs */
 static int __devinit xxxfb_probe(struct pci_dev *dev,
 			      const struct pci_device_id *ent)
 {
     struct fb_info *info;
     struct xxx_par *par;
-    struct device* device = &dev->dev; /* for pci drivers */
+    struct device *device = &dev->dev; /* or &pdev->dev */
     int cmap_len, retval;	
    
     /*
@@ -824,18 +824,18 @@ static int __devinit xxxfb_probe(struct pci_dev *dev,
 	return -EINVAL;
     printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
 	   info->fix.id);
-    pci_set_drvdata(dev, info); /* or dev_set_drvdata(device, info) */
+    pci_set_drvdata(dev, info); /* or platform_set_drvdata(pdev, info) */
     return 0;
 }
 
     /*
      *  Cleanup
      */
-/* static void __devexit xxxfb_remove(struct device *device) */
+/* static void __devexit xxxfb_remove(struct platform_device *pdev) */
 static void __devexit xxxfb_remove(struct pci_dev *dev)
 {
 	struct fb_info *info = pci_get_drvdata(dev);
-	/* or dev_get_drvdata(device); */
+	/* or platform_get_drvdata(pdev); */
 
 	if (info) {
 		unregister_framebuffer(info);
@@ -961,18 +961,17 @@ static int xxxfb_resume(struct platform_dev *dev)
 #define xxxfb_resume NULL
 #endif /* CONFIG_PM */
 
-static struct device_driver xxxfb_driver = {
-	.name = "xxxfb",
-	.bus  = &platform_bus_type,
+static struct platform_device_driver xxxfb_driver = {
 	.probe = xxxfb_probe,
 	.remove = xxxfb_remove,
 	.suspend = xxxfb_suspend, /* optional but recommended */
 	.resume = xxxfb_resume,   /* optional but recommended */
+	.driver = {
+		.name = "xxxfb",
+	},
 };
 
-static struct platform_device xxxfb_device = {
-	.name = "xxxfb",
-};
+static struct platform_device *xxxfb_device;
 
 #ifndef MODULE
     /*
@@ -1002,12 +1001,16 @@ static int __init xxxfb_init(void)
 		return -ENODEV;
 	xxxfb_setup(option);
 #endif
-	ret = driver_register(&xxxfb_driver);
+	ret = platform_driver_register(&xxxfb_driver);
 
 	if (!ret) {
-		ret = platform_device_register(&xxxfb_device);
-		if (ret)
-			driver_unregister(&xxxfb_driver);
+		xxxfb_device = platform_device_register_simple("xxxfb", 0,
+								NULL, 0);
+
+		if (IS_ERR(xxxfb_device)) {
+			platform_driver_unregister(&xxxfb_driver);
+			ret = PTR_ERR(xxxfb_device);
+		}
 	}
 
 	return ret;
@@ -1015,8 +1018,8 @@ static int __init xxxfb_init(void)
 
 static void __exit xxxfb_exit(void)
 {
-	platform_device_unregister(&xxxfb_device);
-	driver_unregister(&xxxfb_driver);
+	platform_device_unregister(xxxfb_device);
+	platform_driver_unregister(&xxxfb_driver);
 }
 #endif /* CONFIG_PCI */
 
-- 
GitLab


From 968910bd03b226ed410d092c2da59dffe5bfe8de Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Wed, 23 Jul 2008 21:31:34 -0700
Subject: [PATCH 415/853] atmel_lcdfb: avoid division by zero

Avoid division by zero in  atmel_lcdfb_check_var() function.

If pixclock is not specified while passing a var structure in
the check_var() funtion, a division by zero occurs (when
translating pixclock to KHz).

This patch adds a checking of this value and try to choose a
video mode in the modelist.

The mode found in the probe function in added to the modelist.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Andrew Victor <linux@maxim.org.za>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/atmel_lcdfb.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index d335bb96b03..5b3a15dffb5 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -256,6 +256,20 @@ static int atmel_lcdfb_alloc_video_memory(struct atmel_lcdfb_info *sinfo)
 	return 0;
 }
 
+static const struct fb_videomode *atmel_lcdfb_choose_mode(struct fb_var_screeninfo *var,
+						     struct fb_info *info)
+{
+	struct fb_videomode varfbmode;
+	const struct fb_videomode *fbmode = NULL;
+
+	fb_var_to_videomode(&varfbmode, var);
+	fbmode = fb_find_nearest_mode(&varfbmode, &info->modelist);
+	if (fbmode)
+		fb_videomode_to_var(var, fbmode);
+	return fbmode;
+}
+
+
 /**
  *      atmel_lcdfb_check_var - Validates a var passed in.
  *      @var: frame buffer variable screen structure
@@ -289,6 +303,15 @@ static int atmel_lcdfb_check_var(struct fb_var_screeninfo *var,
 	clk_value_khz = clk_get_rate(sinfo->lcdc_clk) / 1000;
 
 	dev_dbg(dev, "%s:\n", __func__);
+
+	if (!(var->pixclock && var->bits_per_pixel)) {
+		/* choose a suitable mode if possible */
+		if (!atmel_lcdfb_choose_mode(var, info)) {
+			dev_err(dev, "needed value not specified\n");
+			return -EINVAL;
+		}
+	}
+
 	dev_dbg(dev, "  resolution: %ux%u\n", var->xres, var->yres);
 	dev_dbg(dev, "  pixclk:     %lu KHz\n", PICOS2KHZ(var->pixclock));
 	dev_dbg(dev, "  bpp:        %u\n", var->bits_per_pixel);
@@ -299,6 +322,13 @@ static int atmel_lcdfb_check_var(struct fb_var_screeninfo *var,
 		return -EINVAL;
 	}
 
+	/* Do not allow to have real resoulution larger than virtual */
+	if (var->xres > var->xres_virtual)
+		var->xres_virtual = var->xres;
+
+	if (var->yres > var->yres_virtual)
+		var->yres_virtual = var->yres;
+
 	/* Force same alignment for each line */
 	var->xres = (var->xres + 3) & ~3UL;
 	var->xres_virtual = (var->xres_virtual + 3) & ~3UL;
@@ -740,6 +770,7 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev)
 	struct fb_info *info;
 	struct atmel_lcdfb_info *sinfo;
 	struct atmel_lcdfb_info *pdata_sinfo;
+	struct fb_videomode fbmode;
 	struct resource *regs = NULL;
 	struct resource *map = NULL;
 	int ret;
@@ -906,6 +937,10 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev)
 		goto free_cmap;
 	}
 
+	/* add selected videomode to modelist */
+	fb_var_to_videomode(&fbmode, &info->var);
+	fb_add_videomode(&fbmode, &info->modelist);
+
 	/* Power up the LCDC screen */
 	if (sinfo->atmel_lcdfb_power_control)
 		sinfo->atmel_lcdfb_power_control(1);
-- 
GitLab


From 206c5d69d0540024faffd423fc703f1e457332d7 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:35 -0700
Subject: [PATCH 416/853] sm501: add inversion controls for VBIASEN and FPEN

Add flags to allow the driver to invert the sense of both VBIASEN and FPEN
signals comming from the SM501.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sm501fb.c | 26 ++++++++++++++++++++++----
 include/linux/sm501.h   |  2 ++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 15d4a768b1f..122a0f8495c 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -663,15 +663,25 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 		sm501fb_sync_regs(fbi);
 		mdelay(10);
 
+		/* VBIASEN */
+
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_VBIASEN)) {
-			control |= SM501_DC_PANEL_CONTROL_BIAS;	/* VBIASEN */
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_VBIASEN)
+				control &= ~SM501_DC_PANEL_CONTROL_BIAS;
+			else
+				control |= SM501_DC_PANEL_CONTROL_BIAS;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
 		}
 
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_FPEN)) {
-			control |= SM501_DC_PANEL_CONTROL_FPEN;
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_FPEN)
+				control &= ~SM501_DC_PANEL_CONTROL_FPEN;
+			else
+				control |= SM501_DC_PANEL_CONTROL_FPEN;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
@@ -679,14 +689,22 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 	} else if (!to && (control & SM501_DC_PANEL_CONTROL_VDD) != 0) {
 		/* disable panel power */
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_FPEN)) {
-			control &= ~SM501_DC_PANEL_CONTROL_FPEN;
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_FPEN)
+				control |= SM501_DC_PANEL_CONTROL_FPEN;
+			else
+				control &= ~SM501_DC_PANEL_CONTROL_FPEN;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
 		}
 
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_VBIASEN)) {
-			control &= ~SM501_DC_PANEL_CONTROL_BIAS;
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_VBIASEN)
+				control |= SM501_DC_PANEL_CONTROL_BIAS;
+			else
+				control &= ~SM501_DC_PANEL_CONTROL_BIAS;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 95c1c39ba44..b530fa6a1d3 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -73,6 +73,8 @@ extern unsigned long sm501_gpio_get(struct device *dev,
 #define SM501FB_FLAG_USE_HWACCEL	(1<<3)
 #define SM501FB_FLAG_PANEL_NO_FPEN	(1<<4)
 #define SM501FB_FLAG_PANEL_NO_VBIASEN	(1<<5)
+#define SM501FB_FLAG_PANEL_INV_FPEN	(1<<6)
+#define SM501FB_FLAG_PANEL_INV_VBIASEN	(1<<7)
 
 struct sm501_platdata_fbsub {
 	struct fb_videomode	*def_mode;
-- 
GitLab


From 9b599fb2fc23386dfd2965bf7d10b2b0f628b208 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:36 -0700
Subject: [PATCH 417/853] sm501: restructure init to allow only 1 fb on an
 SM501

Add the ability to register only one of the two possible main framebuffer
devices on the SM501 by passing platform data for only the framebuffer
that you are interested in having.

As a side note, we update the init sequence to commonise the code that is
executed twice, and fix a pair of missing frees that we didn't do on
framebuffer exit, such as freeing the fb's cmap.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sm501fb.c | 264 +++++++++++++++++++++++-----------------
 1 file changed, 149 insertions(+), 115 deletions(-)

diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 122a0f8495c..b473cf665d8 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -143,6 +143,8 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
 			   unsigned int why, size_t size)
 {
 	unsigned int ptr = 0;
+	unsigned int end;
+	struct fb_info *fbi;
 
 	switch (why) {
 	case SM501_MEMF_CURSOR:
@@ -152,7 +154,9 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
 
 	case SM501_MEMF_PANEL:
 		ptr = inf->fbmem_len - size;
-		if (ptr < inf->fb[0]->fix.smem_len)
+		fbi = inf->fb[0];
+
+		if (fbi && ptr < fbi->fix.smem_len)
 			return -ENOMEM;
 
 		break;
@@ -162,11 +166,18 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
 		break;
 
 	case SM501_MEMF_ACCEL:
-		ptr = inf->fb[0]->fix.smem_len;
+		fbi = inf->fb[0];
+		ptr = fbi ? fbi->fix.smem_len : 0;
+
+		fbi = inf->fb[1];
+		if (fbi)
+			end = (fbi->fix.smem_start - inf->fbmem_res->start);
+		else
+			end = inf->fbmem_len;
 
-		if ((ptr + size) >
-		    (inf->fb[1]->fix.smem_start - inf->fbmem_res->start))
+		if ((ptr + size) > end)
 			return -ENOMEM;
+
 		break;
 
 	default:
@@ -1228,39 +1239,6 @@ static struct fb_ops sm501fb_ops_pnl = {
 	.fb_imageblit	= cfb_imageblit,
 };
 
-/* sm501fb_info_alloc
- *
- * creates and initialises an sm501fb_info structure
-*/
-
-static struct sm501fb_info *sm501fb_info_alloc(struct fb_info *fbinfo_crt,
-					       struct fb_info *fbinfo_pnl)
-{
-	struct sm501fb_info *info;
-	struct sm501fb_par  *par;
-
-	info = kzalloc(sizeof(struct sm501fb_info), GFP_KERNEL);
-	if (info) {
-		/* set the references back */
-
-		par = fbinfo_crt->par;
-		par->info = info;
-		par->head = HEAD_CRT;
-		fbinfo_crt->pseudo_palette = &par->pseudo_palette;
-
-		par = fbinfo_pnl->par;
-		par->info = info;
-		par->head = HEAD_PANEL;
-		fbinfo_pnl->pseudo_palette = &par->pseudo_palette;
-
-		/* store the two fbs into our info */
-		info->fb[HEAD_CRT] = fbinfo_crt;
-		info->fb[HEAD_PANEL] = fbinfo_pnl;
-	}
-
-	return info;
-}
-
 /* sm501_init_cursor
  *
  * initialise hw cursor parameters
@@ -1268,10 +1246,16 @@ static struct sm501fb_info *sm501fb_info_alloc(struct fb_info *fbinfo_crt,
 
 static int sm501_init_cursor(struct fb_info *fbi, unsigned int reg_base)
 {
-	struct sm501fb_par *par = fbi->par;
-	struct sm501fb_info *info = par->info;
+	struct sm501fb_par *par;
+	struct sm501fb_info *info;
 	int ret;
 
+	if (fbi == NULL)
+		return 0;
+
+	par = fbi->par;
+	info = par->info;
+
 	par->cursor_regs = info->regs + reg_base;
 
 	ret = sm501_alloc_mem(info, &par->cursor, SM501_MEMF_CURSOR, 1024);
@@ -1299,13 +1283,10 @@ static int sm501fb_start(struct sm501fb_info *info,
 			 struct platform_device *pdev)
 {
 	struct resource	*res;
-	struct device *dev;
+	struct device *dev = &pdev->dev;
 	int k;
 	int ret;
 
-	info->dev = dev = &pdev->dev;
-	platform_set_drvdata(pdev, info);
-
 	info->irq = ret = platform_get_irq(pdev, 0);
 	if (ret < 0) {
 		/* we currently do not use the IRQ */
@@ -1408,11 +1389,6 @@ static void sm501fb_stop(struct sm501fb_info *info)
 	kfree(info->regs_res);
 }
 
-static void sm501fb_info_release(struct sm501fb_info *info)
-{
-	kfree(info);
-}
-
 static int sm501fb_init_fb(struct fb_info *fb,
 			   enum sm501_controller head,
 			   const char *fbname)
@@ -1557,36 +1533,93 @@ static struct sm501_platdata_fb sm501fb_def_pdata = {
 static char driver_name_crt[] = "sm501fb-crt";
 static char driver_name_pnl[] = "sm501fb-panel";
 
-static int __init sm501fb_probe(struct platform_device *pdev)
+static int __devinit sm501fb_probe_one(struct sm501fb_info *info,
+				       enum sm501_controller head)
 {
-	struct sm501fb_info *info;
-	struct device	    *dev = &pdev->dev;
-	struct fb_info	    *fbinfo_crt;
-	struct fb_info	    *fbinfo_pnl;
-	int		     ret;
+	unsigned char *name = (head == HEAD_CRT) ? "crt" : "panel";
+	struct sm501_platdata_fbsub *pd;
+	struct sm501fb_par *par;
+	struct fb_info *fbi;
 
-	/* allocate our framebuffers */
+	pd = (head == HEAD_CRT) ? info->pdata->fb_crt : info->pdata->fb_pnl;
 
-	fbinfo_crt = framebuffer_alloc(sizeof(struct sm501fb_par), dev);
-	if (fbinfo_crt == NULL) {
-		dev_err(dev, "cannot allocate crt framebuffer\n");
+	/* Do not initialise if we've not been given any platform data */
+	if (pd == NULL) {
+		dev_info(info->dev, "no data for fb %s (disabled)\n", name);
+		return 0;
+	}
+
+	fbi = framebuffer_alloc(sizeof(struct sm501fb_par), info->dev);
+	if (fbi == NULL) {
+		dev_err(info->dev, "cannot allocate %s framebuffer\n", name);
 		return -ENOMEM;
 	}
 
-	fbinfo_pnl = framebuffer_alloc(sizeof(struct sm501fb_par), dev);
-	if (fbinfo_pnl == NULL) {
-		dev_err(dev, "cannot allocate panel framebuffer\n");
-		ret = -ENOMEM;
-		goto fbinfo_crt_alloc_fail;
+	par = fbi->par;
+	par->info = info;
+	par->head = head;
+	fbi->pseudo_palette = &par->pseudo_palette;
+
+	info->fb[head] = fbi;
+
+	return 0;
+}
+
+/* Free up anything allocated by sm501fb_init_fb */
+
+static void sm501_free_init_fb(struct sm501fb_info *info,
+				enum sm501_controller head)
+{
+	struct fb_info *fbi = info->fb[head];
+
+	fb_dealloc_cmap(&fbi->cmap);
+}
+
+static int __devinit sm501fb_start_one(struct sm501fb_info *info,
+				       enum sm501_controller head,
+				       const char *drvname)
+{
+	struct fb_info *fbi = info->fb[head];
+	int ret;
+
+	if (!fbi)
+		return 0;
+
+	ret = sm501fb_init_fb(info->fb[head], head, drvname);
+	if (ret) {
+		dev_err(info->dev, "cannot initialise fb %s\n", drvname);
+		return ret;
+	}
+
+	ret = register_framebuffer(info->fb[head]);
+	if (ret) {
+		dev_err(info->dev, "failed to register fb %s\n", drvname);
+		sm501_free_init_fb(info, head);
+		return ret;
 	}
 
-	info = sm501fb_info_alloc(fbinfo_crt, fbinfo_pnl);
-	if (info == NULL) {
-		dev_err(dev, "cannot allocate par\n");
-		ret = -ENOMEM;
-		goto sm501fb_alloc_fail;
+	dev_info(info->dev, "fb%d: %s frame buffer\n", fbi->node, fbi->fix.id);
+
+	return 0;
+}
+
+static int __devinit sm501fb_probe(struct platform_device *pdev)
+{
+	struct sm501fb_info *info;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	/* allocate our framebuffers */
+
+	info = kzalloc(sizeof(struct sm501fb_info), GFP_KERNEL);
+	if (!info) {
+		dev_err(dev, "failed to allocate state\n");
+		return -ENOMEM;
 	}
 
+	info->dev = dev = &pdev->dev;
+	platform_set_drvdata(pdev, info);
+
 	if (dev->parent->platform_data) {
 		struct sm501_platdata *pd = dev->parent->platform_data;
 		info->pdata = pd->fb;
@@ -1597,90 +1630,88 @@ static int __init sm501fb_probe(struct platform_device *pdev)
 		info->pdata = &sm501fb_def_pdata;
 	}
 
-	/* start the framebuffers */
+	/* probe for the presence of each panel */
 
-	ret = sm501fb_start(info, pdev);
-	if (ret) {
-		dev_err(dev, "cannot initialise SM501\n");
-		goto sm501fb_start_fail;
+	ret = sm501fb_probe_one(info, HEAD_CRT);
+	if (ret < 0) {
+		dev_err(dev, "failed to probe CRT\n");
+		goto err_alloc;
 	}
 
-	/* CRT framebuffer setup */
+	ret = sm501fb_probe_one(info, HEAD_PANEL);
+	if (ret < 0) {
+		dev_err(dev, "failed to probe PANEL\n");
+		goto err_probed_crt;
+	}
 
-	ret = sm501fb_init_fb(fbinfo_crt, HEAD_CRT, driver_name_crt);
-	if (ret) {
-		dev_err(dev, "cannot initialise CRT fb\n");
-		goto sm501fb_start_fail;
+	if (info->fb[HEAD_PANEL] == NULL &&
+	    info->fb[HEAD_CRT] == NULL) {
+		dev_err(dev, "no framebuffers found\n");
+		goto err_alloc;
 	}
 
-	/* Panel framebuffer setup */
+	/* get the resources for both of the framebuffers */
 
-	ret = sm501fb_init_fb(fbinfo_pnl, HEAD_PANEL, driver_name_pnl);
+	ret = sm501fb_start(info, pdev);
 	if (ret) {
-		dev_err(dev, "cannot initialise Panel fb\n");
-		goto sm501fb_start_fail;
+		dev_err(dev, "cannot initialise SM501\n");
+		goto err_probed_panel;
 	}
 
-	/* register framebuffers */
-
-	ret = register_framebuffer(fbinfo_crt);
-	if (ret < 0) {
-		dev_err(dev, "failed to register CRT fb (%d)\n", ret);
-		goto register_crt_fail;
+	ret = sm501fb_start_one(info, HEAD_CRT, driver_name_crt);
+	if (ret) {
+		dev_err(dev, "failed to start CRT\n");
+		goto err_started;
 	}
 
-	ret = register_framebuffer(fbinfo_pnl);
-	if (ret < 0) {
-		dev_err(dev, "failed to register panel fb (%d)\n", ret);
-		goto register_pnl_fail;
+	ret = sm501fb_start_one(info, HEAD_PANEL, driver_name_pnl);
+	if (ret) {
+		dev_err(dev, "failed to start Panel\n");
+		goto err_started_crt;
 	}
 
-	dev_info(dev, "fb%d: %s frame buffer device\n",
-		 fbinfo_crt->node, fbinfo_crt->fix.id);
-
-	dev_info(dev, "fb%d: %s frame buffer device\n",
-	       fbinfo_pnl->node, fbinfo_pnl->fix.id);
-
 	/* create device files */
 
 	ret = device_create_file(dev, &dev_attr_crt_src);
 	if (ret)
-		goto crtsrc_fail;
+		goto err_started_panel;
 
 	ret = device_create_file(dev, &dev_attr_fbregs_pnl);
 	if (ret)
-		goto fbregs_pnl_fail;
+		goto err_attached_crtsrc_file;
 
 	ret = device_create_file(dev, &dev_attr_fbregs_crt);
 	if (ret)
-		goto fbregs_crt_fail;
+		goto err_attached_pnlregs_file;
 
 	/* we registered, return ok */
 	return 0;
 
- fbregs_crt_fail:
+err_attached_pnlregs_file:
 	device_remove_file(dev, &dev_attr_fbregs_pnl);
 
- fbregs_pnl_fail:
+err_attached_crtsrc_file:
 	device_remove_file(dev, &dev_attr_crt_src);
 
- crtsrc_fail:
-	unregister_framebuffer(fbinfo_pnl);
+err_started_panel:
+	unregister_framebuffer(info->fb[HEAD_PANEL]);
+	sm501_free_init_fb(info, HEAD_PANEL);
 
- register_pnl_fail:
-	unregister_framebuffer(fbinfo_crt);
+err_started_crt:
+	unregister_framebuffer(info->fb[HEAD_CRT]);
+	sm501_free_init_fb(info, HEAD_CRT);
 
- register_crt_fail:
+err_started:
 	sm501fb_stop(info);
 
- sm501fb_start_fail:
-	sm501fb_info_release(info);
+err_probed_panel:
+	framebuffer_release(info->fb[HEAD_PANEL]);
 
- sm501fb_alloc_fail:
-	framebuffer_release(fbinfo_pnl);
+err_probed_crt:
+	framebuffer_release(info->fb[HEAD_CRT]);
 
- fbinfo_crt_alloc_fail:
-	framebuffer_release(fbinfo_crt);
+err_alloc:
+	kfree(info);
 
 	return ret;
 }
@@ -1699,11 +1730,14 @@ static int sm501fb_remove(struct platform_device *pdev)
 	device_remove_file(&pdev->dev, &dev_attr_fbregs_pnl);
 	device_remove_file(&pdev->dev, &dev_attr_crt_src);
 
+	sm501_free_init_fb(info, HEAD_CRT);
+	sm501_free_init_fb(info, HEAD_PANEL);
+
 	unregister_framebuffer(fbinfo_crt);
 	unregister_framebuffer(fbinfo_pnl);
 
 	sm501fb_stop(info);
-	sm501fb_info_release(info);
+	kfree(info);
 
 	framebuffer_release(fbinfo_pnl);
 	framebuffer_release(fbinfo_crt);
-- 
GitLab


From d05254190dd1a4751284f4a51efb70fcc16c45a4 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:37 -0700
Subject: [PATCH 418/853] sm501: fixup allocation code to be 64bit resource
 compliant

As pointed out by Andrew Morton, we have a problem when setting the 64bit
resources option.  Alter the allocation routines to remove the need to use
the start and end fields, use the proper HEAD_PANEL/HEAD_CRT and update
the comments.

Note, we also fix the bug where we failed to check the size of the
CRT memory allocation.

[akpm@linux-foundation.org: cleanup]
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sm501fb.c | 53 ++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index b473cf665d8..f94ae84a58c 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -48,10 +48,15 @@ enum sm501_controller {
 	HEAD_PANEL	= 1,
 };
 
-/* SM501 memory address */
+/* SM501 memory address.
+ *
+ * This structure is used to track memory usage within the SM501 framebuffer
+ * allocation. The sm_addr field is stored as an offset as it is often used
+ * against both the physical and mapped addresses.
+ */
 struct sm501_mem {
 	unsigned long	 size;
-	unsigned long	 sm_addr;
+	unsigned long	 sm_addr;	/* offset from base of sm501 fb. */
 	void __iomem	*k_addr;
 };
 
@@ -142,37 +147,63 @@ static inline void sm501fb_sync_regs(struct sm501fb_info *info)
 static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
 			   unsigned int why, size_t size)
 {
-	unsigned int ptr = 0;
-	unsigned int end;
+	struct sm501fb_par *par;
 	struct fb_info *fbi;
+	unsigned int ptr;
+	unsigned int end;
 
 	switch (why) {
 	case SM501_MEMF_CURSOR:
 		ptr = inf->fbmem_len - size;
-		inf->fbmem_len = ptr;
+		inf->fbmem_len = ptr;	/* adjust available memory. */
 		break;
 
 	case SM501_MEMF_PANEL:
 		ptr = inf->fbmem_len - size;
-		fbi = inf->fb[0];
+		fbi = inf->fb[HEAD_CRT];
+
+		/* round down, some programs such as directfb do not draw
+		 * 0,0 correctly unless the start is aligned to a page start.
+		 */
+
+		if (ptr > 0)
+			ptr &= ~(PAGE_SIZE - 1);
 
 		if (fbi && ptr < fbi->fix.smem_len)
 			return -ENOMEM;
 
+		if (ptr < 0)
+			return -ENOMEM;
+
 		break;
 
 	case SM501_MEMF_CRT:
 		ptr = 0;
+
+		/* check to see if we have panel memory allocated
+		 * which would put an limit on available memory. */
+
+		fbi = inf->fb[HEAD_PANEL];
+		if (fbi) {
+			par = fbi->par;
+			end = par->screen.k_addr ? par->screen.sm_addr : inf->fbmem_len;
+		} else
+			end = inf->fbmem_len;
+
+		if ((ptr + size) > end)
+			return -ENOMEM;
+
 		break;
 
 	case SM501_MEMF_ACCEL:
-		fbi = inf->fb[0];
+		fbi = inf->fb[HEAD_CRT];
 		ptr = fbi ? fbi->fix.smem_len : 0;
 
-		fbi = inf->fb[1];
-		if (fbi)
-			end = (fbi->fix.smem_start - inf->fbmem_res->start);
-		else
+		fbi = inf->fb[HEAD_PANEL];
+		if (fbi) {
+			par = fbi->par;
+			end = par->screen.sm_addr;
+		} else
 			end = inf->fbmem_len;
 
 		if ((ptr + size) > end)
-- 
GitLab


From cccb6d3c149603b9c15d3c460dff317455df1766 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:37 -0700
Subject: [PATCH 419/853] fb: add support for the ILI9320 video display
 controller

Provide support for the ILI9320 display controller chip which is found in
many LCD displays.  Included with this is support for an example LCD using
this chip, the VGG2432A4.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/Kconfig     |  17 ++
 drivers/video/backlight/Makefile    |   4 +-
 drivers/video/backlight/ili9320.c   | 330 ++++++++++++++++++++++++++++
 drivers/video/backlight/ili9320.h   |  80 +++++++
 drivers/video/backlight/vgg2432a4.c | 284 ++++++++++++++++++++++++
 include/video/ili9320.h             | 201 +++++++++++++++++
 6 files changed, 915 insertions(+), 1 deletion(-)
 create mode 100644 drivers/video/backlight/ili9320.c
 create mode 100644 drivers/video/backlight/ili9320.h
 create mode 100644 drivers/video/backlight/vgg2432a4.c
 create mode 100644 include/video/ili9320.h

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 30bf7f2f163..a5b3a92ffdc 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -36,6 +36,23 @@ config LCD_LTV350QV
 
 	  The LTV350QV panel is present on all ATSTK1000 boards.
 
+config LCD_ILI9320
+	tristate
+	depends on LCD_CLASS_DEVICE && BACKLIGHT_LCD_SUPPORT
+	default n
+	help
+	  If you have a panel based on the ILI9320 controller chip
+	  then say y to include a power driver for it.
+
+config LCD_VGG2432A4
+	tristate "VGG2432A4 LCM device support"
+	depends on BACKLIGHT_LCD_SUPPORT && LCD_CLASS_DEVICE && SPI_MASTER
+	select LCD_ILI9320
+	default n
+	help
+	  If you have a VGG2432A4 panel based on the ILI9320 controller chip
+	  then say y to include a power driver for it.
+
 #
 # Backlight
 #
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index b51a7cd1250..366d84e380c 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -1,7 +1,9 @@
 # Backlight & LCD drivers
 
 obj-$(CONFIG_LCD_CLASS_DEVICE)     += lcd.o
-obj-$(CONFIG_LCD_LTV350QV)	+= ltv350qv.o
+obj-$(CONFIG_LCD_LTV350QV)	   += ltv350qv.o
+obj-$(CONFIG_LCD_ILI9320)	   += ili9320.o
+obj-$(CONFIG_LCD_VGG2432A4)	   += vgg2432a4.o
 
 obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o
 obj-$(CONFIG_BACKLIGHT_CORGI)	+= corgi_bl.o
diff --git a/drivers/video/backlight/ili9320.c b/drivers/video/backlight/ili9320.c
new file mode 100644
index 00000000000..ba89b41b639
--- /dev/null
+++ b/drivers/video/backlight/ili9320.c
@@ -0,0 +1,330 @@
+/* drivers/video/backlight/ili9320.c
+ *
+ * ILI9320 LCD controller driver core.
+ *
+ * Copyright 2007 Simtec Electronics
+ *	http://armlinux.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/lcd.h>
+#include <linux/module.h>
+
+#include <linux/spi/spi.h>
+
+#include <video/ili9320.h>
+
+#include "ili9320.h"
+
+
+static inline int ili9320_write_spi(struct ili9320 *ili,
+				    unsigned int reg,
+				    unsigned int value)
+{
+	struct ili9320_spi *spi = &ili->access.spi;
+	unsigned char *addr = spi->buffer_addr;
+	unsigned char *data = spi->buffer_data;
+
+	/* spi message consits of:
+	 * first byte: ID and operation
+	 */
+
+	addr[0] = spi->id | ILI9320_SPI_INDEX | ILI9320_SPI_WRITE;
+	addr[1] = reg >> 8;
+	addr[2] = reg;
+
+	/* second message is the data to transfer */
+
+	data[0] = spi->id | ILI9320_SPI_DATA  | ILI9320_SPI_WRITE;
+ 	data[1] = value >> 8;
+	data[2] = value;
+
+	return spi_sync(spi->dev, &spi->message);
+}
+
+int ili9320_write(struct ili9320 *ili, unsigned int reg, unsigned int value)
+{
+	dev_dbg(ili->dev, "write: reg=%02x, val=%04x\n", reg, value);
+	return ili->write(ili, reg, value);
+}
+
+EXPORT_SYMBOL_GPL(ili9320_write);
+
+int ili9320_write_regs(struct ili9320 *ili,
+		       struct ili9320_reg *values,
+		       int nr_values)
+{
+	int index;
+	int ret;
+
+	for (index = 0; index < nr_values; index++, values++) {
+		ret = ili9320_write(ili, values->address, values->value);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(ili9320_write_regs);
+
+static void ili9320_reset(struct ili9320 *lcd)
+{
+	struct ili9320_platdata *cfg = lcd->platdata;
+
+	cfg->reset(1);
+	mdelay(50);
+
+	cfg->reset(0);
+	mdelay(50);
+
+	cfg->reset(1);
+	mdelay(100);
+}
+
+static inline int ili9320_init_chip(struct ili9320 *lcd)
+{
+	int ret;
+
+	ili9320_reset(lcd);
+
+	ret = lcd->client->init(lcd, lcd->platdata);
+	if (ret != 0) {
+		dev_err(lcd->dev, "failed to initialise display\n");
+		return ret;
+	}
+
+	lcd->initialised = 1;
+	return 0;
+}
+
+static inline int ili9320_power_on(struct ili9320 *lcd)
+{
+	if (!lcd->initialised)
+		ili9320_init_chip(lcd);
+
+	lcd->display1 |= (ILI9320_DISPLAY1_D(3) | ILI9320_DISPLAY1_BASEE);
+	ili9320_write(lcd, ILI9320_DISPLAY1, lcd->display1);
+
+	return 0;
+}
+
+static inline int ili9320_power_off(struct ili9320 *lcd)
+{
+	lcd->display1 &= ~(ILI9320_DISPLAY1_D(3) | ILI9320_DISPLAY1_BASEE);
+	ili9320_write(lcd, ILI9320_DISPLAY1, lcd->display1);
+
+	return 0;
+}
+
+#define POWER_IS_ON(pwr)	((pwr) <= FB_BLANK_NORMAL)
+
+static int ili9320_power(struct ili9320 *lcd, int power)
+{
+	int ret = 0;
+
+	dev_dbg(lcd->dev, "power %d => %d\n", lcd->power, power);
+
+	if (POWER_IS_ON(power) && !POWER_IS_ON(lcd->power))
+		ret = ili9320_power_on(lcd);
+	else if (!POWER_IS_ON(power) && POWER_IS_ON(lcd->power))
+		ret = ili9320_power_off(lcd);
+
+	if (ret == 0)
+		lcd->power = power;
+	else
+		dev_warn(lcd->dev, "failed to set power mode %d\n", power);
+
+	return ret;
+}
+
+static inline struct ili9320 *to_our_lcd(struct lcd_device *lcd)
+{
+	return lcd_get_data(lcd);
+}
+
+static int ili9320_set_power(struct lcd_device *ld, int power)
+{
+	struct ili9320 *lcd = to_our_lcd(ld);
+
+	return ili9320_power(lcd, power);
+}
+
+static int ili9320_get_power(struct lcd_device *ld)
+{
+	struct ili9320 *lcd = to_our_lcd(ld);
+
+	return lcd->power;
+}
+
+static struct lcd_ops ili9320_ops = {
+	.get_power	= ili9320_get_power,
+	.set_power	= ili9320_set_power,
+};
+
+static void __devinit ili9320_setup_spi(struct ili9320 *ili,
+					struct spi_device *dev)
+{
+	struct ili9320_spi *spi = &ili->access.spi;
+
+	ili->write = ili9320_write_spi;
+	spi->dev = dev;
+
+	/* fill the two messages we are going to use to send the data
+	 * with, the first the address followed by the data. The datasheet
+	 * says they should be done as two distinct cycles of the SPI CS line.
+	 */
+
+	spi->xfer[0].tx_buf = spi->buffer_addr;
+	spi->xfer[1].tx_buf = spi->buffer_data;
+	spi->xfer[0].len = 3;
+	spi->xfer[1].len = 3;
+	spi->xfer[0].bits_per_word = 8;
+	spi->xfer[1].bits_per_word = 8;
+	spi->xfer[0].cs_change = 1;
+
+	spi_message_init(&spi->message);
+	spi_message_add_tail(&spi->xfer[0], &spi->message);
+	spi_message_add_tail(&spi->xfer[1], &spi->message);
+}
+
+int __devinit ili9320_probe_spi(struct spi_device *spi,
+				struct ili9320_client *client)
+{
+	struct ili9320_platdata *cfg = spi->dev.platform_data;
+	struct device *dev = &spi->dev;
+	struct ili9320 *ili;
+	struct lcd_device *lcd;
+	int ret = 0;
+
+	/* verify we where given some information */
+
+	if (cfg == NULL) {
+		dev_err(dev, "no platform data supplied\n");
+		return -EINVAL;
+	}
+
+	if (cfg->hsize <= 0 || cfg->vsize <= 0 || cfg->reset == NULL) {
+		dev_err(dev, "invalid platform data supplied\n");
+		return -EINVAL;
+	}
+
+	/* allocate and initialse our state */
+
+	ili = kzalloc(sizeof(struct ili9320), GFP_KERNEL);
+	if (ili == NULL) {
+		dev_err(dev, "no memory for device\n");
+		return -ENOMEM;
+	}
+
+	ili->access.spi.id = ILI9320_SPI_IDCODE | ILI9320_SPI_ID(1);
+
+	ili->dev = dev;
+	ili->client = client;
+	ili->power = FB_BLANK_POWERDOWN;
+	ili->platdata = cfg;
+
+	dev_set_drvdata(&spi->dev, ili);
+
+	ili9320_setup_spi(ili, spi);
+
+	lcd = lcd_device_register("ili9320", dev, ili, &ili9320_ops);
+	if (IS_ERR(lcd)) {
+		dev_err(dev, "failed to register lcd device\n");
+		ret = PTR_ERR(lcd);
+		goto err_free;
+	}
+
+	ili->lcd = lcd;
+
+	dev_info(dev, "initialising %s\n", client->name);
+
+	ret = ili9320_power(ili, FB_BLANK_UNBLANK);
+	if (ret != 0) {
+		dev_err(dev, "failed to set lcd power state\n");
+		goto err_unregister;
+	}
+
+	return 0;
+
+ err_unregister:
+	lcd_device_unregister(lcd);
+
+ err_free:
+	kfree(ili);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(ili9320_probe_spi);
+
+int __devexit ili9320_remove(struct ili9320 *ili)
+{
+	ili9320_power(ili, FB_BLANK_POWERDOWN);
+
+	lcd_device_unregister(ili->lcd);
+	kfree(ili);
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(ili9320_remove);
+
+#ifdef CONFIG_PM
+int ili9320_suspend(struct ili9320 *lcd, pm_message_t state)
+{
+	int ret;
+
+	dev_dbg(lcd->dev, "%s: event %d\n", __func__, state.event);
+
+	if (state.event == PM_EVENT_SUSPEND) {
+		ret = ili9320_power(lcd, FB_BLANK_POWERDOWN);
+
+		if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP) {
+			ili9320_write(lcd, ILI9320_POWER1, lcd->power1 |
+				      ILI9320_POWER1_SLP |
+				      ILI9320_POWER1_DSTB);
+			lcd->initialised = 0;
+		}
+
+		return ret;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(ili9320_suspend);
+
+int ili9320_resume(struct ili9320 *lcd)
+{
+	dev_info(lcd->dev, "resuming from power state %d\n", lcd->power);
+
+	if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP) {
+		ili9320_write(lcd, ILI9320_POWER1, 0x00);
+	}
+
+	return ili9320_power(lcd, FB_BLANK_UNBLANK);
+}
+
+EXPORT_SYMBOL_GPL(ili9320_resume);
+#endif
+
+/* Power down all displays on reboot, poweroff or halt */
+void ili9320_shutdown(struct ili9320 *lcd)
+{
+	ili9320_power(lcd, FB_BLANK_POWERDOWN);
+}
+
+EXPORT_SYMBOL_GPL(ili9320_shutdown);
+
+MODULE_AUTHOR("Ben Dooks <ben-linux@fluff.org>");
+MODULE_DESCRIPTION("ILI9320 LCD Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/backlight/ili9320.h b/drivers/video/backlight/ili9320.h
new file mode 100644
index 00000000000..e388eca7cac
--- /dev/null
+++ b/drivers/video/backlight/ili9320.h
@@ -0,0 +1,80 @@
+/* drivers/video/backlight/ili9320.h
+ *
+ * ILI9320 LCD controller driver core.
+ *
+ * Copyright 2007 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * http://armlinux.simtec.co.uk/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/* Holder for register and value pairs. */
+struct ili9320_reg {
+	unsigned short		address;
+	unsigned short		value;
+};
+
+struct ili9320;
+
+struct ili9320_client {
+	const char	*name;
+	int	(*init)(struct ili9320 *ili, struct ili9320_platdata *cfg);
+
+};
+/* Device attached via an SPI bus. */
+struct  ili9320_spi {
+	struct spi_device	*dev;
+	struct spi_message	message;
+	struct spi_transfer	xfer[2];
+
+	unsigned char		id;
+	unsigned char		buffer_addr[4];
+	unsigned char		buffer_data[4];
+};
+
+/* ILI9320 device state. */
+struct ili9320 {
+	union {
+		struct ili9320_spi	spi;	/* SPI attachged device. */
+	} access;				/* Register access method. */
+
+	struct device			*dev;
+	struct lcd_device		*lcd;	/* LCD device we created. */
+	struct ili9320_client		*client;
+	struct ili9320_platdata		*platdata;
+
+	int				 power; /* current power state. */
+	int				 initialised;
+
+	unsigned short			 display1;
+	unsigned short			 power1;
+
+	int (*write)(struct ili9320 *ili, unsigned int reg, unsigned int val);
+};
+
+
+/* ILI9320 register access routines */
+
+extern int ili9320_write(struct ili9320 *ili,
+			 unsigned int reg, unsigned int value);
+
+extern int ili9320_write_regs(struct ili9320 *ili,
+			      struct ili9320_reg *values,
+			      int nr_values);
+
+/* Device probe */
+
+extern int ili9320_probe_spi(struct spi_device *spi,
+			     struct ili9320_client *cli);
+
+extern int ili9320_remove(struct ili9320 *lcd);
+extern void ili9320_shutdown(struct ili9320 *lcd);
+
+/* PM */
+
+extern int ili9320_suspend(struct ili9320 *lcd, pm_message_t state);
+extern int ili9320_resume(struct ili9320 *lcd);
diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
new file mode 100644
index 00000000000..593c7687d54
--- /dev/null
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -0,0 +1,284 @@
+/* drivers/video/backlight/vgg2432a4.c
+ *
+ * VGG2432A4 (ILI9320) LCD controller driver.
+ *
+ * Copyright 2007 Simtec Electronics
+ *	http://armlinux.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/lcd.h>
+#include <linux/module.h>
+
+#include <linux/spi/spi.h>
+
+#include <video/ili9320.h>
+
+#include "ili9320.h"
+
+/* Device initialisation sequences */
+
+static struct ili9320_reg vgg_init1[] = {
+	{
+		.address = ILI9320_POWER1,
+		.value	 = ILI9320_POWER1_AP(0) | ILI9320_POWER1_BT(0),
+	}, {
+		.address = ILI9320_POWER2,
+		.value	 = (ILI9320_POWER2_VC(7) |
+			    ILI9320_POWER2_DC0(0) | ILI9320_POWER2_DC1(0)),
+	}, {
+		.address = ILI9320_POWER3,
+		.value	 = ILI9320_POWER3_VRH(0),
+	}, {
+		.address = ILI9320_POWER4,
+		.value	 = ILI9320_POWER4_VREOUT(0),
+	},
+};
+
+static struct ili9320_reg vgg_init2[] = {
+	{
+		.address = ILI9320_POWER1,
+		.value   = (ILI9320_POWER1_AP(3) | ILI9320_POWER1_APE |
+			    ILI9320_POWER1_BT(7) | ILI9320_POWER1_SAP),
+	}, {
+		.address = ILI9320_POWER2,
+		.value   = ILI9320_POWER2_VC(7) | ILI9320_POWER2_DC0(3),
+	}
+};
+
+static struct ili9320_reg vgg_gamma[] = {
+	{
+		.address = ILI9320_GAMMA1,
+		.value	 = 0x0000,
+	}, {
+		.address = ILI9320_GAMMA2,
+		.value   = 0x0505,
+	}, {
+		.address = ILI9320_GAMMA3,
+		.value	 = 0x0004,
+	}, {
+		.address = ILI9320_GAMMA4,
+		.value	 = 0x0006,
+	}, {
+		.address = ILI9320_GAMMA5,
+		.value	 = 0x0707,
+	}, {
+		.address = ILI9320_GAMMA6,
+		.value	 = 0x0105,
+	}, {
+		.address = ILI9320_GAMMA7,
+		.value	 = 0x0002,
+	}, {
+		.address = ILI9320_GAMMA8,
+		.value	 = 0x0707,
+	}, {
+		.address = ILI9320_GAMMA9,
+		.value	 = 0x0704,
+	}, {
+		.address = ILI9320_GAMMA10,
+		.value	 = 0x807,
+	}
+
+};
+
+static struct ili9320_reg vgg_init0[] = {
+	[0]	= {
+		/* set direction and scan mode gate */
+		.address = ILI9320_DRIVER,
+		.value	 = ILI9320_DRIVER_SS,
+	}, {
+		.address = ILI9320_DRIVEWAVE,
+		.value	 = (ILI9320_DRIVEWAVE_MUSTSET |
+			    ILI9320_DRIVEWAVE_EOR | ILI9320_DRIVEWAVE_BC),
+	}, {
+		.address = ILI9320_ENTRYMODE,
+		.value	 = ILI9320_ENTRYMODE_ID(3) | ILI9320_ENTRYMODE_BGR,
+	}, {
+		.address = ILI9320_RESIZING,
+		.value	 = 0x0,
+	},
+};
+
+
+static int vgg2432a4_lcd_init(struct ili9320 *lcd,
+			      struct ili9320_platdata *cfg)
+{
+	unsigned int addr;
+	int ret;
+
+	/* Set VCore before anything else (VGG243237-6UFLWA) */
+	ret = ili9320_write(lcd, 0x00e5, 0x8000);
+	if (ret)
+		goto err_initial;
+
+	/* Start the oscillator up before we can do anything else. */
+	ret = ili9320_write(lcd, ILI9320_OSCILATION, ILI9320_OSCILATION_OSC);
+	if (ret)
+		goto err_initial;
+
+	/* must wait at-lesat 10ms after starting */
+	mdelay(15);
+
+	ret = ili9320_write_regs(lcd, vgg_init0, ARRAY_SIZE(vgg_init0));
+	if (ret != 0)
+		goto err_initial;
+
+	ili9320_write(lcd, ILI9320_DISPLAY2, cfg->display2);
+	ili9320_write(lcd, ILI9320_DISPLAY3, cfg->display3);
+	ili9320_write(lcd, ILI9320_DISPLAY4, cfg->display4);
+
+	ili9320_write(lcd, ILI9320_RGB_IF1, cfg->rgb_if1);
+	ili9320_write(lcd, ILI9320_FRAMEMAKER, 0x0);
+	ili9320_write(lcd, ILI9320_RGB_IF2, ILI9320_RGBIF2_DPL);
+
+	ret = ili9320_write_regs(lcd, vgg_init1, ARRAY_SIZE(vgg_init1));
+	if (ret != 0)
+		goto err_vgg;
+
+	mdelay(300);
+
+	ret = ili9320_write_regs(lcd, vgg_init2, ARRAY_SIZE(vgg_init2));
+	if (ret != 0)
+		goto err_vgg2;
+
+	mdelay(100);
+
+	ili9320_write(lcd, ILI9320_POWER3, 0x13c);
+
+	mdelay(100);
+
+	ili9320_write(lcd, ILI9320_POWER4, 0x1c00);
+	ili9320_write(lcd, ILI9320_POWER7, 0x000e);
+
+	mdelay(100);
+
+	ili9320_write(lcd, ILI9320_GRAM_HORIZ_ADDR, 0x00);
+	ili9320_write(lcd, ILI9320_GRAM_VERT_ADD, 0x00);
+
+	ret = ili9320_write_regs(lcd, vgg_gamma, ARRAY_SIZE(vgg_gamma));
+	if (ret != 0)
+		goto err_vgg3;
+
+	ili9320_write(lcd, ILI9320_HORIZ_START, 0x0);
+	ili9320_write(lcd, ILI9320_HORIZ_END, cfg->hsize - 1);
+	ili9320_write(lcd, ILI9320_VERT_START, 0x0);
+	ili9320_write(lcd, ILI9320_VERT_END, cfg->vsize - 1);
+
+	ili9320_write(lcd, ILI9320_DRIVER2,
+		      ILI9320_DRIVER2_NL(((cfg->vsize - 240) / 8) + 0x1D));
+
+	ili9320_write(lcd, ILI9320_BASE_IMAGE, 0x1);
+	ili9320_write(lcd, ILI9320_VERT_SCROLL, 0x00);
+
+	for (addr = ILI9320_PARTIAL1_POSITION; addr <= ILI9320_PARTIAL2_END;
+	     addr++) {
+		ili9320_write(lcd, addr, 0x0);
+	}
+
+	ili9320_write(lcd, ILI9320_INTERFACE1, 0x10);
+	ili9320_write(lcd, ILI9320_INTERFACE2, cfg->interface2);
+	ili9320_write(lcd, ILI9320_INTERFACE3, cfg->interface3);
+	ili9320_write(lcd, ILI9320_INTERFACE4, cfg->interface4);
+	ili9320_write(lcd, ILI9320_INTERFACE5, cfg->interface5);
+	ili9320_write(lcd, ILI9320_INTERFACE6, cfg->interface6);
+
+	lcd->display1 = (ILI9320_DISPLAY1_D(3) | ILI9320_DISPLAY1_DTE |
+			 ILI9320_DISPLAY1_GON | ILI9320_DISPLAY1_BASEE |
+			 0x40);
+
+	ili9320_write(lcd, ILI9320_DISPLAY1, lcd->display1);
+
+	return 0;
+
+ err_vgg3:
+ err_vgg2:
+ err_vgg:
+ err_initial:
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int vgg2432a4_suspend(struct spi_device *spi, pm_message_t state)
+{
+	return ili9320_suspend(dev_get_drvdata(&spi->dev), state);
+}
+
+static int vgg2432a4_resume(struct spi_device *spi)
+{
+	return ili9320_resume(dev_get_drvdata(&spi->dev));
+}
+#else
+#define vgg2432a4_suspend	NULL
+#define vgg2432a4_resume 	NULL
+#endif
+
+static struct ili9320_client vgg2432a4_client = {
+	.name	= "VGG2432A4",
+	.init	= vgg2432a4_lcd_init,
+};
+
+/* Device probe */
+
+static int __devinit vgg2432a4_probe(struct spi_device *spi)
+{
+	int ret;
+
+	ret = ili9320_probe_spi(spi, &vgg2432a4_client);
+	if (ret != 0) {
+		dev_err(&spi->dev, "failed to initialise ili9320\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __devexit vgg2432a4_remove(struct spi_device *spi)
+{
+	return ili9320_remove(dev_get_drvdata(&spi->dev));
+}
+
+static void vgg2432a4_shutdown(struct spi_device *spi)
+{
+	ili9320_shutdown(dev_get_drvdata(&spi->dev));
+}
+
+static struct spi_driver vgg2432a4_driver = {
+	.driver = {
+		.name		= "VGG2432A4",
+		.owner		= THIS_MODULE,
+	},
+	.probe		= vgg2432a4_probe,
+	.remove		= __devexit_p(vgg2432a4_remove),
+	.shutdown	= vgg2432a4_shutdown,
+	.suspend	= vgg2432a4_suspend,
+	.resume		= vgg2432a4_resume,
+};
+
+/* Device driver initialisation */
+
+static int __init vgg2432a4_init(void)
+{
+	return spi_register_driver(&vgg2432a4_driver);
+}
+
+static void __exit vgg2432a4_exit(void)
+{
+	spi_unregister_driver(&vgg2432a4_driver);
+}
+
+module_init(vgg2432a4_init);
+module_exit(vgg2432a4_exit);
+
+MODULE_AUTHOR("Ben Dooks <ben-linux@fluff.org>");
+MODULE_DESCRIPTION("VGG2432A4 LCD Driver");
+MODULE_LICENSE("GPL v2");
+
+
diff --git a/include/video/ili9320.h b/include/video/ili9320.h
new file mode 100644
index 00000000000..e5d1622e3f3
--- /dev/null
+++ b/include/video/ili9320.h
@@ -0,0 +1,201 @@
+/* include/video/ili9320.c
+ *
+ * ILI9320 LCD controller configuration control.
+ *
+ * Copyright 2007 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * http://armlinux.simtec.co.uk/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define ILI9320_REG(x)	(x)
+
+#define ILI9320_INDEX			ILI9320_REG(0x00)
+
+#define ILI9320_OSCILATION		ILI9320_REG(0x00)
+#define ILI9320_DRIVER			ILI9320_REG(0x01)
+#define ILI9320_DRIVEWAVE		ILI9320_REG(0x02)
+#define ILI9320_ENTRYMODE		ILI9320_REG(0x03)
+#define ILI9320_RESIZING		ILI9320_REG(0x04)
+#define ILI9320_DISPLAY1		ILI9320_REG(0x07)
+#define ILI9320_DISPLAY2		ILI9320_REG(0x08)
+#define ILI9320_DISPLAY3		ILI9320_REG(0x09)
+#define ILI9320_DISPLAY4		ILI9320_REG(0x0A)
+#define ILI9320_RGB_IF1			ILI9320_REG(0x0C)
+#define ILI9320_FRAMEMAKER		ILI9320_REG(0x0D)
+#define ILI9320_RGB_IF2			ILI9320_REG(0x0F)
+
+#define ILI9320_POWER1			ILI9320_REG(0x10)
+#define ILI9320_POWER2			ILI9320_REG(0x11)
+#define ILI9320_POWER3			ILI9320_REG(0x12)
+#define ILI9320_POWER4			ILI9320_REG(0x13)
+#define ILI9320_GRAM_HORIZ_ADDR		ILI9320_REG(0x20)
+#define ILI9320_GRAM_VERT_ADD		ILI9320_REG(0x21)
+#define ILI9320_POWER7			ILI9320_REG(0x29)
+#define ILI9320_FRAME_RATE_COLOUR	ILI9320_REG(0x2B)
+
+#define ILI9320_GAMMA1			ILI9320_REG(0x30)
+#define ILI9320_GAMMA2			ILI9320_REG(0x31)
+#define ILI9320_GAMMA3			ILI9320_REG(0x32)
+#define ILI9320_GAMMA4			ILI9320_REG(0x35)
+#define ILI9320_GAMMA5			ILI9320_REG(0x36)
+#define ILI9320_GAMMA6			ILI9320_REG(0x37)
+#define ILI9320_GAMMA7			ILI9320_REG(0x38)
+#define ILI9320_GAMMA8			ILI9320_REG(0x39)
+#define ILI9320_GAMMA9			ILI9320_REG(0x3C)
+#define ILI9320_GAMMA10			ILI9320_REG(0x3D)
+
+#define ILI9320_HORIZ_START		ILI9320_REG(0x50)
+#define ILI9320_HORIZ_END		ILI9320_REG(0x51)
+#define ILI9320_VERT_START		ILI9320_REG(0x52)
+#define ILI9320_VERT_END		ILI9320_REG(0x53)
+
+#define ILI9320_DRIVER2			ILI9320_REG(0x60)
+#define ILI9320_BASE_IMAGE		ILI9320_REG(0x61)
+#define ILI9320_VERT_SCROLL		ILI9320_REG(0x6a)
+
+#define ILI9320_PARTIAL1_POSITION	ILI9320_REG(0x80)
+#define ILI9320_PARTIAL1_START		ILI9320_REG(0x81)
+#define ILI9320_PARTIAL1_END		ILI9320_REG(0x82)
+#define ILI9320_PARTIAL2_POSITION	ILI9320_REG(0x83)
+#define ILI9320_PARTIAL2_START		ILI9320_REG(0x84)
+#define ILI9320_PARTIAL2_END		ILI9320_REG(0x85)
+
+#define ILI9320_INTERFACE1		ILI9320_REG(0x90)
+#define ILI9320_INTERFACE2		ILI9320_REG(0x92)
+#define ILI9320_INTERFACE3		ILI9320_REG(0x93)
+#define ILI9320_INTERFACE4		ILI9320_REG(0x95)
+#define ILI9320_INTERFACE5		ILI9320_REG(0x97)
+#define ILI9320_INTERFACE6		ILI9320_REG(0x98)
+
+/* Register contents definitions. */
+
+#define ILI9320_OSCILATION_OSC		(1 << 0)
+
+#define ILI9320_DRIVER_SS		(1 << 8)
+#define ILI9320_DRIVER_SM		(1 << 10)
+
+#define ILI9320_DRIVEWAVE_EOR		(1 << 8)
+#define ILI9320_DRIVEWAVE_BC		(1 << 9)
+#define ILI9320_DRIVEWAVE_MUSTSET	(1 << 10)
+
+#define ILI9320_ENTRYMODE_AM		(1 << 3)
+#define ILI9320_ENTRYMODE_ID(x)		((x) << 4)
+#define ILI9320_ENTRYMODE_ORG		(1 << 7)
+#define ILI9320_ENTRYMODE_HWM		(1 << 8)
+#define ILI9320_ENTRYMODE_BGR		(1 << 12)
+#define ILI9320_ENTRYMODE_DFM		(1 << 14)
+#define ILI9320_ENTRYMODE_TRI		(1 << 15)
+
+
+#define ILI9320_RESIZING_RSZ(x)		((x) << 0)
+#define ILI9320_RESIZING_RCH(x)		((x) << 4)
+#define ILI9320_RESIZING_RCV(x)		((x) << 8)
+
+
+#define ILI9320_DISPLAY1_D(x)		((x) << 0)
+#define ILI9320_DISPLAY1_CL		(1 << 3)
+#define ILI9320_DISPLAY1_DTE		(1 << 4)
+#define ILI9320_DISPLAY1_GON		(1 << 5)
+#define ILI9320_DISPLAY1_BASEE		(1 << 8)
+#define ILI9320_DISPLAY1_PTDE(x)	((x) << 12)
+
+
+#define ILI9320_DISPLAY2_BP(x)		((x) << 0)
+#define ILI9320_DISPLAY2_FP(x)		((x) << 8)
+
+
+#define ILI9320_RGBIF1_RIM_RGB18	(0 << 0)
+#define ILI9320_RGBIF1_RIM_RGB16	(1 << 0)
+#define ILI9320_RGBIF1_RIM_RGB6		(2 << 0)
+
+#define ILI9320_RGBIF1_CLK_INT		(0 << 4)
+#define ILI9320_RGBIF1_CLK_RGBIF	(1 << 4)
+#define ILI9320_RGBIF1_CLK_VSYNC	(2 << 4)
+
+#define ILI9320_RGBIF1_RM		(1 << 8)
+
+#define ILI9320_RGBIF1_ENC_FRAMES(x)	(((x) - 1)<< 13)
+
+#define ILI9320_RGBIF2_DPL		(1 << 0)
+#define ILI9320_RGBIF2_EPL		(1 << 1)
+#define ILI9320_RGBIF2_HSPL		(1 << 3)
+#define ILI9320_RGBIF2_VSPL		(1 << 4)
+
+
+#define ILI9320_POWER1_SLP		(1 << 1)
+#define ILI9320_POWER1_DSTB		(1 << 2)
+#define ILI9320_POWER1_AP(x)		((x) << 4)
+#define ILI9320_POWER1_APE		(1 << 7)
+#define ILI9320_POWER1_BT(x)		((x) << 8)
+#define ILI9320_POWER1_SAP		(1 << 12)
+
+
+#define ILI9320_POWER2_VC(x)		((x) << 0)
+#define ILI9320_POWER2_DC0(x)		((x) << 4)
+#define ILI9320_POWER2_DC1(x)		((x) << 8)
+
+
+#define ILI9320_POWER3_VRH(x)		((x) << 0)
+#define ILI9320_POWER3_PON		(1 << 4)
+#define ILI9320_POWER3_VCMR		(1 << 8)
+
+
+#define ILI9320_POWER4_VREOUT(x)	((x) << 8)
+
+
+#define ILI9320_DRIVER2_SCNL(x)		((x) << 0)
+#define ILI9320_DRIVER2_NL(x)		((x) << 8)
+#define ILI9320_DRIVER2_GS		(1 << 15)
+
+
+#define ILI9320_BASEIMAGE_REV		(1 << 0)
+#define ILI9320_BASEIMAGE_VLE		(1 << 1)
+#define ILI9320_BASEIMAGE_NDL		(1 << 2)
+
+
+#define ILI9320_INTERFACE4_RTNE(x)	(x)
+#define ILI9320_INTERFACE4_DIVE(x)	((x) << 8)
+
+/* SPI interface definitions */
+
+#define ILI9320_SPI_IDCODE		(0x70)
+#define ILI9320_SPI_ID(x)		((x) << 2)
+#define ILI9320_SPI_READ		(0x01)
+#define ILI9320_SPI_WRITE		(0x00)
+#define ILI9320_SPI_DATA		(0x02)
+#define ILI9320_SPI_INDEX		(0x00)
+
+/* platform data to pass configuration from lcd */
+
+enum ili9320_suspend {
+	ILI9320_SUSPEND_OFF,
+	ILI9320_SUSPEND_DEEP,
+};
+
+struct ili9320_platdata {
+	unsigned short	hsize;
+	unsigned short	vsize;
+
+	enum ili9320_suspend suspend;
+
+	/* set the reset line, 0 = reset asserted, 1 = normal */
+	void		(*reset)(unsigned int val);
+
+	unsigned short	entry_mode;
+	unsigned short	display2;
+	unsigned short	display3;
+	unsigned short	display4;
+	unsigned short	rgb_if1;
+	unsigned short	rgb_if2;
+	unsigned short	interface2;
+	unsigned short	interface3;
+	unsigned short	interface4;
+	unsigned short	interface5;
+	unsigned short	interface6;
+};
+
-- 
GitLab


From 0c531360ed504aa0ce995fcb8ef08e82b6534d0b Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:38 -0700
Subject: [PATCH 420/853] lcd: add lcd_device to check_fb() entry in lcd_ops

Add the lcd_device being checked to the check_fb entry of lcd_ops.  This
ensures that any driver using this to check against it's own state can do
so, and also makes all the calls in lcd_ops more orthogonal in their
arguments.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/lcd.c    | 2 +-
 drivers/video/bf54x-lq043fb.c    | 2 +-
 drivers/video/bfin-t350mcqb-fb.c | 2 +-
 include/linux/lcd.h              | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 299fd318dd4..b15b2b84a6f 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -33,7 +33,7 @@ static int fb_notifier_callback(struct notifier_block *self,
 	ld = container_of(self, struct lcd_device, fb_notif);
 	mutex_lock(&ld->ops_lock);
 	if (ld->ops)
-		if (!ld->ops->check_fb || ld->ops->check_fb(evdata->info))
+		if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info))
 			ld->ops->set_power(ld, *(int *)evdata->data);
 	mutex_unlock(&ld->ops_lock);
 	return 0;
diff --git a/drivers/video/bf54x-lq043fb.c b/drivers/video/bf54x-lq043fb.c
index 49834a67a62..940467aed13 100644
--- a/drivers/video/bf54x-lq043fb.c
+++ b/drivers/video/bf54x-lq043fb.c
@@ -478,7 +478,7 @@ static int bfin_lcd_set_contrast(struct lcd_device *dev, int contrast)
 	return 0;
 }
 
-static int bfin_lcd_check_fb(struct fb_info *fi)
+static int bfin_lcd_check_fb(struct lcd_device *dev, struct fb_info *fi)
 {
 	if (!fi || (fi == &bfin_bf54x_fb))
 		return 1;
diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c
index 135d6dd7e67..7d1b819e501 100644
--- a/drivers/video/bfin-t350mcqb-fb.c
+++ b/drivers/video/bfin-t350mcqb-fb.c
@@ -396,7 +396,7 @@ static int bfin_lcd_set_contrast(struct lcd_device *dev, int contrast)
 	return 0;
 }
 
-static int bfin_lcd_check_fb(struct fb_info *fi)
+static int bfin_lcd_check_fb(struct lcd_device *dev, struct fb_info *fi)
 {
 	if (!fi || (fi == &bfin_t350mcqb_fb))
 		return 1;
diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 1d379787f2e..173febac665 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -47,7 +47,7 @@ struct lcd_ops {
         int (*set_contrast)(struct lcd_device *, int contrast);
 	/* Check if given framebuffer device is the one LCD is bound to;
 	   return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */
-	int (*check_fb)(struct fb_info *);
+	int (*check_fb)(struct lcd_device *, struct fb_info *);
 };
 
 struct lcd_device {
-- 
GitLab


From c25826a7cf1c61b5c6e6db8365172eb97ef39ef3 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:38 -0700
Subject: [PATCH 421/853] lcd: add platform_lcd driver

Add a platform_lcd driver to allow boards with simple lcd power controls
to register themselves easily.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/Kconfig        |   7 +
 drivers/video/backlight/Makefile       |   1 +
 drivers/video/backlight/platform_lcd.c | 172 +++++++++++++++++++++++++
 include/video/platform_lcd.h           |  21 +++
 4 files changed, 201 insertions(+)
 create mode 100644 drivers/video/backlight/platform_lcd.c
 create mode 100644 include/video/platform_lcd.h

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index a5b3a92ffdc..b289e197e55 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -53,6 +53,13 @@ config LCD_VGG2432A4
 	  If you have a VGG2432A4 panel based on the ILI9320 controller chip
 	  then say y to include a power driver for it.
 
+config LCD_PLATFORM
+	tristate "Platform LCD controls"
+	depends on LCD_CLASS_DEVICE
+	help
+	  This driver provides a platform-device registered LCD power
+	  control interface.
+
 #
 # Backlight
 #
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 366d84e380c..7d31c14088a 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -3,6 +3,7 @@
 obj-$(CONFIG_LCD_CLASS_DEVICE)     += lcd.o
 obj-$(CONFIG_LCD_LTV350QV)	   += ltv350qv.o
 obj-$(CONFIG_LCD_ILI9320)	   += ili9320.o
+obj-$(CONFIG_LCD_PLATFORM)	   += platform_lcd.o
 obj-$(CONFIG_LCD_VGG2432A4)	   += vgg2432a4.o
 
 obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o
diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c
new file mode 100644
index 00000000000..72d44dbfce8
--- /dev/null
+++ b/drivers/video/backlight/platform_lcd.c
@@ -0,0 +1,172 @@
+/* drivers/video/backlight/platform_lcd.c
+ *
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * Generic platform-device LCD power control interface.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+*/
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/fb.h>
+#include <linux/backlight.h>
+#include <linux/lcd.h>
+
+#include <video/platform_lcd.h>
+
+struct platform_lcd {
+	struct device		*us;
+	struct lcd_device	*lcd;
+	struct plat_lcd_data	*pdata;
+
+	unsigned int		 power;
+	unsigned int		 suspended : 1;
+};
+
+static inline struct platform_lcd *to_our_lcd(struct lcd_device *lcd)
+{
+	return lcd_get_data(lcd);
+}
+
+static int platform_lcd_get_power(struct lcd_device *lcd)
+{
+	struct platform_lcd *plcd = to_our_lcd(lcd);
+
+	return plcd->power;
+}
+
+static int platform_lcd_set_power(struct lcd_device *lcd, int power)
+{
+	struct platform_lcd *plcd = to_our_lcd(lcd);
+	int lcd_power = 1;
+
+	if (power == FB_BLANK_POWERDOWN || plcd->suspended)
+		lcd_power = 0;
+
+	plcd->pdata->set_power(plcd->pdata, lcd_power);
+	plcd->power = power;
+
+	return 0;
+}
+
+static int platform_lcd_match(struct lcd_device *lcd, struct fb_info *info)
+{
+	struct platform_lcd *plcd = to_our_lcd(lcd);
+	struct plat_lcd_data *pdata = plcd->pdata;
+
+	if (pdata->match_fb)
+		return pdata->match_fb(pdata, info);
+
+	return plcd->us->parent == info->device;
+}
+
+static struct lcd_ops platform_lcd_ops = {
+	.get_power	= platform_lcd_get_power,
+	.set_power	= platform_lcd_set_power,
+	.check_fb	= platform_lcd_match,
+};
+
+static int __devinit platform_lcd_probe(struct platform_device *pdev)
+{
+	struct plat_lcd_data *pdata;
+	struct platform_lcd *plcd;
+	struct device *dev = &pdev->dev;
+	int err;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(dev, "no platform data supplied\n");
+		return -EINVAL;
+	}
+
+	plcd = kzalloc(sizeof(struct platform_lcd), GFP_KERNEL);
+	if (!plcd) {
+		dev_err(dev, "no memory for state\n");
+		return -ENOMEM;
+	}
+
+	plcd->us = dev;
+	plcd->pdata = pdata;
+	plcd->lcd = lcd_device_register("platform-lcd", dev,
+					plcd, &platform_lcd_ops);
+	if (IS_ERR(plcd->lcd)) {
+		dev_err(dev, "cannot register lcd device\n");
+		err = PTR_ERR(plcd->lcd);
+		goto err_mem;
+	}
+
+	platform_set_drvdata(pdev, plcd);
+	return 0;
+
+ err_mem:
+	kfree(plcd);
+	return err;
+}
+
+static int __devexit platform_lcd_remove(struct platform_device *pdev)
+{
+	struct platform_lcd *plcd = platform_get_drvdata(pdev);
+
+	lcd_device_unregister(plcd->lcd);
+	kfree(plcd);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int platform_lcd_suspend(struct platform_device *pdev, pm_message_t st)
+{
+	struct platform_lcd *plcd = platform_get_drvdata(pdev);
+
+	plcd->suspended = 1;
+	platform_lcd_set_power(plcd->lcd, plcd->power);
+
+	return 0;
+}
+
+static int platform_lcd_resume(struct platform_device *pdev)
+{
+	struct platform_lcd *plcd = platform_get_drvdata(pdev);
+
+	plcd->suspended = 0;
+	platform_lcd_set_power(plcd->lcd, plcd->power);
+
+	return 0;
+}
+#else
+#define platform_lcd_suspend NULL
+#define platform_lcd_resume NULL
+#endif
+
+static struct platform_driver platform_lcd_driver = {
+	.driver		= {
+		.name	= "platform-lcd",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= platform_lcd_probe,
+	.remove		= __devexit_p(platform_lcd_remove),
+	.suspend        = platform_lcd_suspend,
+	.resume         = platform_lcd_resume,
+};
+
+static int __init platform_lcd_init(void)
+{
+	return platform_driver_register(&platform_lcd_driver);
+}
+
+static void __exit platform_lcd_cleanup(void)
+{
+	platform_driver_unregister(&platform_lcd_driver);
+}
+
+module_init(platform_lcd_init);
+module_exit(platform_lcd_cleanup);
+
+MODULE_AUTHOR("Ben Dooks <ben-linux@fluff.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:platform-lcd");
diff --git a/include/video/platform_lcd.h b/include/video/platform_lcd.h
new file mode 100644
index 00000000000..ad3bdfe743b
--- /dev/null
+++ b/include/video/platform_lcd.h
@@ -0,0 +1,21 @@
+/* include/video/platform_lcd.h
+ *
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * Generic platform-device LCD power control interface.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+*/
+
+struct plat_lcd_data;
+struct fb_info;
+
+struct plat_lcd_data {
+	void	(*set_power)(struct plat_lcd_data *, unsigned int power);
+	int	(*match_fb)(struct plat_lcd_data *, struct fb_info *);
+};
+
-- 
GitLab


From 6b51d51a9d24719f905ba9657b29e04efd82a7ea Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Wed, 23 Jul 2008 21:31:39 -0700
Subject: [PATCH 422/853] fsl-diu-fb: update Freescale DIU driver to use
 page_alloc_exact()

Update the Freescale DIU driver to use page_alloc_exact() to allocate a
DMA buffer.  This also eliminates the rheap-based memory allocator.  We
can do this now because commit 6ccf61f9 allows us to allocate 8MB
physically- contiguous memory blocks.

[akpm@linux-foundation.org: fix printk warnings]
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fsl-diu-fb.c | 60 ++++++++++++++------------------------
 1 file changed, 22 insertions(+), 38 deletions(-)

diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index 09d7e22c6fe..9cd36c223d3 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -279,58 +279,42 @@ static struct diu_hw dr = {
 
 static struct diu_pool pool;
 
-/*	To allocate memory for framebuffer. First try __get_free_pages(). If it
- *	fails, try rh_alloc. The reason is __get_free_pages() cannot allocate
- *	very large memory (more than 4MB). We don't want to allocate all memory
- *	in rheap since small memory allocation/deallocation will fragment the
- *	rheap and make the furture large allocation fail.
+/**
+ * fsl_diu_alloc - allocate memory for the DIU
+ * @size: number of bytes to allocate
+ * @param: returned physical address of memory
+ *
+ * This function allocates a physically-contiguous block of memory.
  */
-
-static void *fsl_diu_alloc(unsigned long size, phys_addr_t *phys)
+static void *fsl_diu_alloc(size_t size, phys_addr_t *phys)
 {
 	void *virt;
 
-	pr_debug("size=%lu\n", size);
+	pr_debug("size=%zu\n", size);
 
-	virt = (void *)__get_free_pages(GFP_DMA | __GFP_ZERO, get_order(size));
+	virt = alloc_pages_exact(size, GFP_DMA | __GFP_ZERO);
 	if (virt) {
 		*phys = virt_to_phys(virt);
-		pr_debug("virt %p, phys=%llx\n", virt, (uint64_t) *phys);
-		return virt;
-	}
-	if (!diu_ops.diu_mem) {
-		printk(KERN_INFO "%s: no diu_mem."
-			" To reserve more memory, put 'diufb=15M' "
-			"in the command line\n", __func__);
-		return NULL;
-	}
-
-	virt = (void *)rh_alloc(&diu_ops.diu_rh_info, size, "DIU");
-	if (virt) {
-		*phys = virt_to_bus(virt);
-		memset(virt, 0, size);
+		pr_debug("virt=%p phys=%llx\n", virt,
+			(unsigned long long)*phys);
 	}
 
-	pr_debug("rh virt=%p phys=%llx\n", virt, (unsigned long long)*phys);
-
 	return virt;
 }
 
-static void fsl_diu_free(void *p, unsigned long size)
+/**
+ * fsl_diu_free - release DIU memory
+ * @virt: pointer returned by fsl_diu_alloc()
+ * @size: number of bytes allocated by fsl_diu_alloc()
+ *
+ * This function releases memory allocated by fsl_diu_alloc().
+ */
+static void fsl_diu_free(void *virt, size_t size)
 {
-	pr_debug("p=%p size=%lu\n", p, size);
+	pr_debug("virt=%p size=%zu\n", virt, size);
 
-	if (!p)
-		return;
-
-	if ((p >= diu_ops.diu_mem) &&
-	    (p < (diu_ops.diu_mem + diu_ops.diu_size))) {
-		pr_debug("rh\n");
-		rh_free(&diu_ops.diu_rh_info, (unsigned long) p);
-	} else {
-		pr_debug("dma\n");
-		free_pages((unsigned long)p, get_order(size));
-	}
+	if (virt && size)
+		free_pages_exact(virt, size);
 }
 
 static int fsl_diu_enable_panel(struct fb_info *info)
-- 
GitLab


From 5abe3b4063f16245b8fafbff37bd93814eb8e363 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Wed, 23 Jul 2008 21:31:40 -0700
Subject: [PATCH 423/853] fbdev: add new Cobalt LCD framebuffer driver

Add new Cobalt LCD framebuffer driver.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/Kconfig        |   4 +
 drivers/video/Makefile       |   1 +
 drivers/video/cobalt_lcdfb.c | 371 +++++++++++++++++++++++++++++++++++
 3 files changed, 376 insertions(+)
 create mode 100644 drivers/video/cobalt_lcdfb.c

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 80fa066416b..c1b7db84341 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1987,6 +1987,10 @@ config FB_AM200EPD
          This enables support for the Metronome display controller used on
          the E-Ink AM-200 EPD devkit.
 
+config FB_COBALT
+	tristate "Cobalt server LCD frame buffer support"
+	depends on FB && MIPS_COBALT
+
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 4809f8b9bb2..35803259775 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -109,6 +109,7 @@ obj-$(CONFIG_FB_S1D13XXX)	  += s1d13xxxfb.o
 obj-$(CONFIG_FB_IMX)              += imxfb.o
 obj-$(CONFIG_FB_S3C2410)	  += s3c2410fb.o
 obj-$(CONFIG_FB_FSL_DIU)	  += fsl-diu-fb.o
+obj-$(CONFIG_FB_COBALT)           += cobalt_lcdfb.o
 obj-$(CONFIG_FB_PNX4008_DUM)	  += pnx4008/
 obj-$(CONFIG_FB_PNX4008_DUM_RGB)  += pnx4008/
 obj-$(CONFIG_FB_IBM_GXT4500)	  += gxt4500.o
diff --git a/drivers/video/cobalt_lcdfb.c b/drivers/video/cobalt_lcdfb.c
new file mode 100644
index 00000000000..7bad24ed04e
--- /dev/null
+++ b/drivers/video/cobalt_lcdfb.c
@@ -0,0 +1,371 @@
+/*
+ *  Cobalt server LCD frame buffer driver.
+ *
+ *  Copyright (C) 2008  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/uaccess.h>
+#include <linux/platform_device.h>
+
+/*
+ * Cursor position address
+ * \X  0    1    2  ...  14   15
+ * Y+----+----+----+---+----+----+
+ * 0|0x00|0x01|0x02|...|0x0e|0x0f|
+ *  +----+----+----+---+----+----+
+ * 1|0x40|0x41|0x42|...|0x4e|0x4f|
+ *  +----+----+----+---+----+----+
+ */
+#define LCD_DATA_REG_OFFSET	0x10
+#define LCD_XRES_MAX		16
+#define LCD_YRES_MAX		2
+#define LCD_CHARS_MAX		32
+
+#define LCD_CLEAR		0x01
+#define LCD_CURSOR_MOVE_HOME	0x02
+#define LCD_RESET		0x06
+#define LCD_OFF			0x08
+#define LCD_CURSOR_OFF		0x0c
+#define LCD_CURSOR_BLINK_OFF	0x0e
+#define LCD_CURSOR_ON		0x0f
+#define LCD_ON			LCD_CURSOR_ON
+#define LCD_CURSOR_MOVE_LEFT	0x10
+#define LCD_CURSOR_MOVE_RIGHT	0x14
+#define LCD_DISPLAY_LEFT	0x18
+#define LCD_DISPLAY_RIGHT	0x1c
+#define LCD_PRERESET		0x3f	/* execute 4 times continuously */
+#define LCD_BUSY		0x80
+
+#define LCD_GRAPHIC_MODE	0x40
+#define LCD_TEXT_MODE		0x80
+#define LCD_CUR_POS_MASK	0x7f
+
+#define LCD_CUR_POS(x)		((x) & LCD_CUR_POS_MASK)
+#define LCD_TEXT_POS(x)		((x) | LCD_TEXT_MODE)
+
+static inline void lcd_write_control(struct fb_info *info, u8 control)
+{
+	writel((u32)control << 24, info->screen_base);
+}
+
+static inline u8 lcd_read_control(struct fb_info *info)
+{
+	return readl(info->screen_base) >> 24;
+}
+
+static inline void lcd_write_data(struct fb_info *info, u8 data)
+{
+	writel((u32)data << 24, info->screen_base + LCD_DATA_REG_OFFSET);
+}
+
+static inline u8 lcd_read_data(struct fb_info *info)
+{
+	return readl(info->screen_base + LCD_DATA_REG_OFFSET) >> 24;
+}
+
+static int lcd_busy_wait(struct fb_info *info)
+{
+	u8 val = 0;
+	int timeout = 10, retval = 0;
+
+	do {
+		val = lcd_read_control(info);
+		val &= LCD_BUSY;
+		if (val != LCD_BUSY)
+			break;
+
+		if (msleep_interruptible(1))
+			return -EINTR;
+
+		timeout--;
+	} while (timeout);
+
+	if (val == LCD_BUSY)
+		retval = -EBUSY;
+
+	return retval;
+}
+
+static void lcd_clear(struct fb_info *info)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		udelay(150);
+
+		lcd_write_control(info, LCD_PRERESET);
+	}
+
+	udelay(150);
+
+	lcd_write_control(info, LCD_CLEAR);
+
+	udelay(150);
+
+	lcd_write_control(info, LCD_RESET);
+}
+
+static struct fb_fix_screeninfo cobalt_lcdfb_fix __initdata = {
+	.id		= "cobalt-lcd",
+	.type		= FB_TYPE_TEXT,
+	.type_aux	= FB_AUX_TEXT_MDA,
+	.visual		= FB_VISUAL_MONO01,
+	.line_length	= LCD_XRES_MAX,
+	.accel		= FB_ACCEL_NONE,
+};
+
+static ssize_t cobalt_lcdfb_read(struct fb_info *info, char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	char src[LCD_CHARS_MAX];
+	unsigned long pos;
+	int len, retval = 0;
+
+	pos = *ppos;
+	if (pos >= LCD_CHARS_MAX || count == 0)
+		return 0;
+
+	if (count > LCD_CHARS_MAX)
+		count = LCD_CHARS_MAX;
+
+	if (pos + count > LCD_CHARS_MAX)
+		count = LCD_CHARS_MAX - pos;
+
+	for (len = 0; len < count; len++) {
+		retval = lcd_busy_wait(info);
+		if (retval < 0)
+			break;
+
+		lcd_write_control(info, LCD_TEXT_POS(pos));
+
+		retval = lcd_busy_wait(info);
+		if (retval < 0)
+			break;
+
+		src[len] = lcd_read_data(info);
+		if (pos == 0x0f)
+			pos = 0x40;
+		else
+			pos++;
+	}
+
+	if (retval < 0 && signal_pending(current))
+		return -ERESTARTSYS;
+
+	if (copy_to_user(buf, src, len))
+		return -EFAULT;
+
+	*ppos += len;
+
+	return len;
+}
+
+static ssize_t cobalt_lcdfb_write(struct fb_info *info, const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	char dst[LCD_CHARS_MAX];
+	unsigned long pos;
+	int len, retval = 0;
+
+	pos = *ppos;
+	if (pos >= LCD_CHARS_MAX || count == 0)
+		return 0;
+
+	if (count > LCD_CHARS_MAX)
+		count = LCD_CHARS_MAX;
+
+	if (pos + count > LCD_CHARS_MAX)
+		count = LCD_CHARS_MAX - pos;
+
+	if (copy_from_user(dst, buf, count))
+		return -EFAULT;
+
+	for (len = 0; len < count; len++) {
+		retval = lcd_busy_wait(info);
+		if (retval < 0)
+			break;
+
+		lcd_write_control(info, LCD_TEXT_POS(pos));
+
+		retval = lcd_busy_wait(info);
+		if (retval < 0)
+			break;
+
+		lcd_write_data(info, dst[len]);
+		if (pos == 0x0f)
+			pos = 0x40;
+		else
+			pos++;
+	}
+
+	if (retval < 0 && signal_pending(current))
+		return -ERESTARTSYS;
+
+	*ppos += len;
+
+	return len;
+}
+
+static int cobalt_lcdfb_blank(int blank_mode, struct fb_info *info)
+{
+	int retval;
+
+	retval = lcd_busy_wait(info);
+	if (retval < 0)
+		return retval;
+
+	switch (blank_mode) {
+	case FB_BLANK_UNBLANK:
+		lcd_write_control(info, LCD_ON);
+		break;
+	default:
+		lcd_write_control(info, LCD_OFF);
+		break;
+	}
+
+	return 0;
+}
+
+static int cobalt_lcdfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
+{
+	u32 x, y;
+	int retval;
+
+	switch (cursor->set) {
+	case FB_CUR_SETPOS:
+		x = cursor->image.dx;
+		y = cursor->image.dy;
+		if (x >= LCD_XRES_MAX || y >= LCD_YRES_MAX)
+			return -EINVAL;
+
+		retval = lcd_busy_wait(info);
+		if (retval < 0)
+			return retval;
+
+		lcd_write_control(info,
+				  LCD_TEXT_POS(info->fix.line_length * y + x));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	retval = lcd_busy_wait(info);
+	if (retval < 0)
+		return retval;
+
+	if (cursor->enable)
+		lcd_write_control(info, LCD_CURSOR_ON);
+	else
+		lcd_write_control(info, LCD_CURSOR_OFF);
+
+	return 0;
+}
+
+static struct fb_ops cobalt_lcd_fbops = {
+	.owner		= THIS_MODULE,
+	.fb_read	= cobalt_lcdfb_read,
+	.fb_write	= cobalt_lcdfb_write,
+	.fb_blank	= cobalt_lcdfb_blank,
+	.fb_cursor	= cobalt_lcdfb_cursor,
+};
+
+static int __init cobalt_lcdfb_probe(struct platform_device *dev)
+{
+	struct fb_info *info;
+	struct resource *res;
+	int retval;
+
+	info = framebuffer_alloc(0, &dev->dev);
+	if (!info)
+		return -ENOMEM;
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!res) {
+		framebuffer_release(info);
+		return -EBUSY;
+	}
+
+	info->screen_size = res->end - res->start + 1;
+	info->screen_base = ioremap(res->start, info->screen_size);
+	info->fbops = &cobalt_lcd_fbops;
+	info->fix = cobalt_lcdfb_fix;
+	info->fix.smem_start = res->start;
+	info->fix.smem_len = info->screen_size;
+	info->pseudo_palette = NULL;
+	info->par = NULL;
+	info->flags = FBINFO_DEFAULT;
+
+	retval = register_framebuffer(info);
+	if (retval < 0) {
+		iounmap(info->screen_base);
+		framebuffer_release(info);
+		return retval;
+	}
+
+	platform_set_drvdata(dev, info);
+
+	lcd_clear(info);
+
+	printk(KERN_INFO "fb%d: Cobalt server LCD frame buffer device\n",
+		info->node);
+
+	return 0;
+}
+
+static int __devexit cobalt_lcdfb_remove(struct platform_device *dev)
+{
+	struct fb_info *info;
+
+	info = platform_get_drvdata(dev);
+	if (info) {
+		iounmap(info->screen_base);
+		unregister_framebuffer(info);
+		framebuffer_release(info);
+	}
+
+	return 0;
+}
+
+static struct platform_driver cobalt_lcdfb_driver = {
+	.probe	= cobalt_lcdfb_probe,
+	.remove	= __devexit_p(cobalt_lcdfb_remove),
+	.driver	= {
+		.name	= "cobalt-lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init cobalt_lcdfb_init(void)
+{
+	return platform_driver_register(&cobalt_lcdfb_driver);
+}
+
+static void __exit cobalt_lcdfb_exit(void)
+{
+	platform_driver_unregister(&cobalt_lcdfb_driver);
+}
+
+module_init(cobalt_lcdfb_init);
+module_exit(cobalt_lcdfb_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yoichi Yuasa");
+MODULE_DESCRIPTION("Cobalt server LCD frame buffer driver");
-- 
GitLab


From 18b095d4b847bb08bf8a1bace7711a93d27732c0 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Wed, 23 Jul 2008 21:31:41 -0700
Subject: [PATCH 424/853] drivers/char: remove old broken Cobalt LCD driver

Remove old broken Cobalt LCD driver.

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/Kconfig  |   7 -
 drivers/char/Makefile |   1 -
 drivers/char/lcd.c    | 516 ------------------------------------------
 drivers/char/lcd.h    | 154 -------------
 4 files changed, 678 deletions(-)
 delete mode 100644 drivers/char/lcd.c
 delete mode 100644 drivers/char/lcd.h

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index e0bbbfb6a36..67b07576f8b 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -857,13 +857,6 @@ config DS1302
 
 endif # RTC_LIB
 
-config COBALT_LCD
-	bool "Support for Cobalt LCD"
-	depends on MIPS_COBALT
-	help
-	  This option enables support for the LCD display and buttons found
-	  on Cobalt systems through a misc device.
-
 config DTLK
 	tristate "Double Talk PC internal speech card support"
 	depends on ISA
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index dc5a327d72d..4b6e736cfa0 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -88,7 +88,6 @@ obj-$(CONFIG_TOSHIBA)		+= toshiba.o
 obj-$(CONFIG_I8K)		+= i8k.o
 obj-$(CONFIG_DS1620)		+= ds1620.o
 obj-$(CONFIG_HW_RANDOM)		+= hw_random/
-obj-$(CONFIG_COBALT_LCD)	+= lcd.o
 obj-$(CONFIG_PPDEV)		+= ppdev.o
 obj-$(CONFIG_NWBUTTON)		+= nwbutton.o
 obj-$(CONFIG_NWFLASH)		+= nwflash.o
diff --git a/drivers/char/lcd.c b/drivers/char/lcd.c
deleted file mode 100644
index 1c29b20e4f4..00000000000
--- a/drivers/char/lcd.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * LCD, LED and Button interface for Cobalt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996, 1997 by Andrew Bose
- *
- * Linux kernel version history:
- *       March 2001: Ported from 2.0.34  by Liam Davies
- *
- */
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/miscdevice.h>
-#include <linux/slab.h>
-#include <linux/ioport.h>
-#include <linux/fcntl.h>
-#include <linux/mc146818rtc.h>
-#include <linux/netdevice.h>
-#include <linux/sched.h>
-#include <linux/smp_lock.h>
-#include <linux/delay.h>
-
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include "lcd.h"
-
-static int lcd_ioctl(struct inode *inode, struct file *file,
-		     unsigned int cmd, unsigned long arg);
-
-static unsigned int lcd_present = 1;
-
-/* used in arch/mips/cobalt/reset.c */
-int led_state = 0;
-
-#if defined(CONFIG_TULIP) && 0
-
-#define MAX_INTERFACES	8
-static linkcheck_func_t linkcheck_callbacks[MAX_INTERFACES];
-static void *linkcheck_cookies[MAX_INTERFACES];
-
-int lcd_register_linkcheck_func(int iface_num, void *func, void *cookie)
-{
-	if (iface_num < 0 ||
-	    iface_num >= MAX_INTERFACES ||
-	    linkcheck_callbacks[iface_num] != NULL)
-		return -1;
-	linkcheck_callbacks[iface_num] = (linkcheck_func_t) func;
-	linkcheck_cookies[iface_num] = cookie;
-	return 0;
-}
-#endif
-
-static int lcd_ioctl(struct inode *inode, struct file *file,
-		     unsigned int cmd, unsigned long arg)
-{
-	struct lcd_display button_display;
-	unsigned long address, a;
-
-	switch (cmd) {
-	case LCD_On:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x0F);
-		break;
-
-	case LCD_Off:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x08);
-		break;
-
-	case LCD_Reset:
-		udelay(150);
-		LCDWriteInst(0x3F);
-		udelay(150);
-		LCDWriteInst(0x3F);
-		udelay(150);
-		LCDWriteInst(0x3F);
-		udelay(150);
-		LCDWriteInst(0x3F);
-		udelay(150);
-		LCDWriteInst(0x01);
-		udelay(150);
-		LCDWriteInst(0x06);
-		break;
-
-	case LCD_Clear:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x01);
-		break;
-
-	case LCD_Cursor_Left:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x10);
-		break;
-
-	case LCD_Cursor_Right:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x14);
-		break;
-
-	case LCD_Cursor_Off:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x0C);
-		break;
-
-	case LCD_Cursor_On:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x0F);
-		break;
-
-	case LCD_Blink_Off:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x0E);
-		break;
-
-	case LCD_Get_Cursor_Pos:{
-			struct lcd_display display;
-
-			udelay(150);
-			BusyCheck();
-			display.cursor_address = (LCDReadInst);
-			display.cursor_address =
-			    (display.cursor_address & 0x07F);
-			if (copy_to_user
-			    ((struct lcd_display *) arg, &display,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-
-			break;
-		}
-
-
-	case LCD_Set_Cursor_Pos:{
-			struct lcd_display display;
-
-			if (copy_from_user
-			    (&display, (struct lcd_display *) arg,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-
-			a = (display.cursor_address | kLCD_Addr);
-
-			udelay(150);
-			BusyCheck();
-			LCDWriteInst(a);
-
-			break;
-		}
-
-	case LCD_Get_Cursor:{
-			struct lcd_display display;
-
-			udelay(150);
-			BusyCheck();
-			display.character = LCDReadData;
-
-			if (copy_to_user
-			    ((struct lcd_display *) arg, &display,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-			udelay(150);
-			BusyCheck();
-			LCDWriteInst(0x10);
-
-			break;
-		}
-
-	case LCD_Set_Cursor:{
-			struct lcd_display display;
-
-			if (copy_from_user
-			    (&display, (struct lcd_display *) arg,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-
-			udelay(150);
-			BusyCheck();
-			LCDWriteData(display.character);
-			udelay(150);
-			BusyCheck();
-			LCDWriteInst(0x10);
-
-			break;
-		}
-
-
-	case LCD_Disp_Left:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x18);
-		break;
-
-	case LCD_Disp_Right:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x1C);
-		break;
-
-	case LCD_Home:
-		udelay(150);
-		BusyCheck();
-		LCDWriteInst(0x02);
-		break;
-
-	case LCD_Write:{
-			struct lcd_display display;
-			unsigned int index;
-
-
-			if (copy_from_user
-			    (&display, (struct lcd_display *) arg,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-
-			udelay(150);
-			BusyCheck();
-			LCDWriteInst(0x80);
-			udelay(150);
-			BusyCheck();
-
-			for (index = 0; index < (display.size1); index++) {
-				udelay(150);
-				BusyCheck();
-				LCDWriteData(display.line1[index]);
-				BusyCheck();
-			}
-
-			udelay(150);
-			BusyCheck();
-			LCDWriteInst(0xC0);
-			udelay(150);
-			BusyCheck();
-			for (index = 0; index < (display.size2); index++) {
-				udelay(150);
-				BusyCheck();
-				LCDWriteData(display.line2[index]);
-			}
-
-			break;
-		}
-
-	case LCD_Read:{
-			struct lcd_display display;
-
-			BusyCheck();
-			for (address = kDD_R00; address <= kDD_R01;
-			     address++) {
-				a = (address | kLCD_Addr);
-
-				udelay(150);
-				BusyCheck();
-				LCDWriteInst(a);
-				udelay(150);
-				BusyCheck();
-				display.line1[address] = LCDReadData;
-			}
-
-			display.line1[0x27] = '\0';
-
-			for (address = kDD_R10; address <= kDD_R11;
-			     address++) {
-				a = (address | kLCD_Addr);
-
-				udelay(150);
-				BusyCheck();
-				LCDWriteInst(a);
-
-				udelay(150);
-				BusyCheck();
-				display.line2[address - 0x40] =
-				    LCDReadData;
-			}
-
-			display.line2[0x27] = '\0';
-
-			if (copy_to_user
-			    ((struct lcd_display *) arg, &display,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-			break;
-		}
-
-//  set all GPIO leds to led_display.leds
-
-	case LED_Set:{
-			struct lcd_display led_display;
-
-
-			if (copy_from_user
-			    (&led_display, (struct lcd_display *) arg,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-
-			led_state = led_display.leds;
-			LEDSet(led_state);
-
-			break;
-		}
-
-
-//  set only bit led_display.leds
-
-	case LED_Bit_Set:{
-			unsigned int i;
-			int bit = 1;
-			struct lcd_display led_display;
-
-
-			if (copy_from_user
-			    (&led_display, (struct lcd_display *) arg,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-
-			for (i = 0; i < (int) led_display.leds; i++) {
-				bit = 2 * bit;
-			}
-
-			led_state = led_state | bit;
-			LEDSet(led_state);
-			break;
-		}
-
-//  clear only bit led_display.leds
-
-	case LED_Bit_Clear:{
-			unsigned int i;
-			int bit = 1;
-			struct lcd_display led_display;
-
-
-			if (copy_from_user
-			    (&led_display, (struct lcd_display *) arg,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-
-			for (i = 0; i < (int) led_display.leds; i++) {
-				bit = 2 * bit;
-			}
-
-			led_state = led_state & ~bit;
-			LEDSet(led_state);
-			break;
-		}
-
-
-	case BUTTON_Read:{
-			button_display.buttons = GPIRead;
-			if (copy_to_user
-			    ((struct lcd_display *) arg, &button_display,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-			break;
-		}
-
-	case LINK_Check:{
-			button_display.buttons =
-			    *((volatile unsigned long *) (0xB0100060));
-			if (copy_to_user
-			    ((struct lcd_display *) arg, &button_display,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-			break;
-		}
-
-	case LINK_Check_2:{
-			int iface_num;
-
-			/* panel-utils should pass in the desired interface status is wanted for
-			 * in "buttons" of the structure.  We will set this to non-zero if the
-			 * link is in fact up for the requested interface.  --DaveM
-			 */
-			if (copy_from_user
-			    (&button_display, (struct lcd_display *) arg,
-			     sizeof(button_display)))
-				return -EFAULT;
-			iface_num = button_display.buttons;
-#if defined(CONFIG_TULIP) && 0
-			if (iface_num >= 0 &&
-			    iface_num < MAX_INTERFACES &&
-			    linkcheck_callbacks[iface_num] != NULL) {
-				button_display.buttons =
-				    linkcheck_callbacks[iface_num]
-				    (linkcheck_cookies[iface_num]);
-			} else
-#endif
-				button_display.buttons = 0;
-
-			if (__copy_to_user
-			    ((struct lcd_display *) arg, &button_display,
-			     sizeof(struct lcd_display)))
-				return -EFAULT;
-			break;
-		}
-
-	default:
-		return -EINVAL;
-
-	}
-
-	return 0;
-
-}
-
-static int lcd_open(struct inode *inode, struct file *file)
-{
-	cycle_kernel_lock();
-
-	if (!lcd_present)
-		return -ENXIO;
-	else
-		return 0;
-}
-
-/* Only RESET or NEXT counts as button pressed */
-
-static inline int button_pressed(void)
-{
-	unsigned long buttons = GPIRead;
-
-	if ((buttons == BUTTON_Next) || (buttons == BUTTON_Next_B)
-	    || (buttons == BUTTON_Reset_B))
-		return buttons;
-	return 0;
-}
-
-/* LED daemon sits on this and we wake him up once a key is pressed. */
-
-static int lcd_waiters = 0;
-
-static ssize_t lcd_read(struct file *file, char *buf,
-		     size_t count, loff_t *ofs)
-{
-	long buttons_now;
-
-	if (lcd_waiters > 0)
-		return -EINVAL;
-
-	lcd_waiters++;
-	while (((buttons_now = (long) button_pressed()) == 0) &&
-	       !(signal_pending(current))) {
-		msleep_interruptible(2000);
-	}
-	lcd_waiters--;
-
-	if (signal_pending(current))
-		return -ERESTARTSYS;
-	return buttons_now;
-}
-
-/*
- *	The various file operations we support.
- */
-
-static const struct file_operations lcd_fops = {
-	.read = lcd_read,
-	.ioctl = lcd_ioctl,
-	.open = lcd_open,
-};
-
-static struct miscdevice lcd_dev = {
-	MISC_DYNAMIC_MINOR,
-	"lcd",
-	&lcd_fops
-};
-
-static int lcd_init(void)
-{
-	int ret;
-	unsigned long data;
-
-	pr_info("%s\n", LCD_DRIVER);
-	ret = misc_register(&lcd_dev);
-	if (ret) {
-		printk(KERN_WARNING LCD "Unable to register misc device.\n");
-		return ret;
-	}
-
-	/* Check region? Naaah! Just snarf it up. */
-/*	request_region(RTC_PORT(0), RTC_IO_EXTENT, "lcd");*/
-
-	udelay(150);
-	data = LCDReadData;
-	if ((data & 0x000000FF) == (0x00)) {
-		lcd_present = 0;
-		pr_info(LCD "LCD Not Present\n");
-	} else {
-		lcd_present = 1;
-		WRITE_GAL(kGal_DevBank2PReg, kGal_DevBank2Cfg);
-		WRITE_GAL(kGal_DevBank3PReg, kGal_DevBank3Cfg);
-	}
-
-	return 0;
-}
-
-static void __exit lcd_exit(void)
-{
-	misc_deregister(&lcd_dev);
-}
-
-module_init(lcd_init);
-module_exit(lcd_exit);
-
-MODULE_AUTHOR("Andrew Bose");
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/lcd.h b/drivers/char/lcd.h
deleted file mode 100644
index 290b3ff23b0..00000000000
--- a/drivers/char/lcd.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * LED, LCD and Button panel driver for Cobalt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996, 1997 by Andrew Bose
- *
- * Linux kernel version history:
- *       March 2001: Ported from 2.0.34  by Liam Davies
- *
- */
-
-// function headers
-
-#define LCD_CHARS_PER_LINE 40
-#define MAX_IDLE_TIME 120
-
-struct lcd_display {
-        unsigned buttons;
-        int size1;
-        int size2;
-        unsigned char line1[LCD_CHARS_PER_LINE];
-        unsigned char line2[LCD_CHARS_PER_LINE];
-        unsigned char cursor_address;
-        unsigned char character;
-        unsigned char leds;
-        unsigned char *RomImage;
-};
-
-
-
-#define LCD_DRIVER	"Cobalt LCD Driver v2.10"
-
-#define LCD		"lcd: "
-
-#define kLCD_IR		0x0F000000
-#define kLCD_DR		0x0F000010
-#define kGPI		0x0D000000
-#define kLED		0x0C000000
-
-#define kDD_R00         0x00
-#define kDD_R01         0x27
-#define kDD_R10         0x40
-#define kDD_R11         0x67
-
-#define kLCD_Addr       0x00000080
-
-#define LCDTimeoutValue	0xfff
-
-
-// Macros
-
-#define LCDWriteData(x)	outl((x << 24), kLCD_DR)
-#define LCDWriteInst(x)	outl((x << 24), kLCD_IR)
-
-#define LCDReadData	(inl(kLCD_DR) >> 24)
-#define LCDReadInst	(inl(kLCD_IR) >> 24)
-
-#define GPIRead		(inl(kGPI) >> 24)
-
-#define LEDSet(x)	outb((char)x, kLED)
-
-#define WRITE_GAL(x,y)	outl(y, 0x04000000 | (x))
-#define BusyCheck()	while ((LCDReadInst & 0x80) == 0x80)
-
-
-
-/*
- * Function command codes for io_ctl.
- */
-#define LCD_On			1
-#define LCD_Off			2
-#define LCD_Clear		3
-#define LCD_Reset		4
-#define LCD_Cursor_Left		5
-#define LCD_Cursor_Right	6
-#define LCD_Disp_Left		7
-#define LCD_Disp_Right		8
-#define LCD_Get_Cursor		9
-#define LCD_Set_Cursor		10
-#define LCD_Home		11
-#define LCD_Read		12
-#define LCD_Write		13
-#define LCD_Cursor_Off		14
-#define LCD_Cursor_On		15
-#define LCD_Get_Cursor_Pos	16
-#define LCD_Set_Cursor_Pos	17
-#define LCD_Blink_Off           18
-
-#define LED_Set			40
-#define LED_Bit_Set		41
-#define LED_Bit_Clear		42
-
-
-//  Button defs
-#define BUTTON_Read             50
-
-
-// Ethernet LINK check hackaroo
-#define LINK_Check              90
-#define LINK_Check_2		91
-
-//  Button patterns  _B - single layer lcd boards
-
-#define BUTTON_NONE               0x3F
-#define BUTTON_NONE_B             0xFE
-
-#define BUTTON_Left               0x3B
-#define BUTTON_Left_B             0xFA
-
-#define BUTTON_Right              0x37
-#define BUTTON_Right_B            0xDE
-
-#define BUTTON_Up                 0x2F
-#define BUTTON_Up_B               0xF6
-
-#define BUTTON_Down               0x1F
-#define BUTTON_Down_B             0xEE
-
-#define BUTTON_Next               0x3D
-#define BUTTON_Next_B             0x7E
-
-#define BUTTON_Enter              0x3E
-#define BUTTON_Enter_B            0xBE
-
-#define BUTTON_Reset_B            0xFC
-
-
-// debounce constants
-
-#define BUTTON_SENSE            160000
-#define BUTTON_DEBOUNCE		5000
-
-
-//  Galileo register stuff
-
-#define kGal_DevBank2Cfg        0x1466DB33
-#define kGal_DevBank2PReg       0x464
-#define kGal_DevBank3Cfg        0x146FDFFB
-#define kGal_DevBank3PReg       0x468
-
-// Network
-
-#define kIPADDR			1
-#define kNETMASK		2
-#define kGATEWAY		3
-#define kDNS			4
-
-#define kClassA			5
-#define kClassB			6
-#define kClassC			7
-
-- 
GitLab


From be935d5b6301865b4e9ec35d79d398cedb3c82b7 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Wed, 23 Jul 2008 21:31:41 -0700
Subject: [PATCH 425/853] lxfb: drop dead declarations from header

We never sent the gamma stuff upstream, and don't really care about it.
However, lx_[gs]_et_gamma prototypes snuck into lxfb.h anyways; there are
no definitions for them.  Drop the dead code.

Signed-off-by: Andres Salomon <dilinger@debian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/geode/lxfb.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/video/geode/lxfb.h b/drivers/video/geode/lxfb.h
index 3b9416f4ee2..6a51448fd3f 100644
--- a/drivers/video/geode/lxfb.h
+++ b/drivers/video/geode/lxfb.h
@@ -51,8 +51,6 @@ static inline unsigned int lx_get_pitch(unsigned int xres, int bpp)
 }
 
 void lx_set_mode(struct fb_info *);
-void lx_get_gamma(struct fb_info *, unsigned int *, int);
-void lx_set_gamma(struct fb_info *, unsigned int *, int);
 unsigned int lx_framebuffer_size(void);
 int lx_blank_display(struct fb_info *, int);
 void lx_set_palette_reg(struct fb_info *, unsigned int, unsigned int,
-- 
GitLab


From 104b198dd0b3b62a4fc4e9146f01f2abc718e926 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jordan.crouse@amd.com>
Date: Wed, 23 Jul 2008 21:31:43 -0700
Subject: [PATCH 426/853] lxfb: fix console blanking

Simply enabling DAC blanking without turning off the CRT seems to be resulting
in characters remaining on the screen when the monitor blanks.  This patch
turns off the CRT for all modes, and also powers down the DACs when vsync
and/or hsync are disabled.

Signed-off-by: Jordan Crouse <jordan.crouse@amd.com>
Acked-by: Andres Salomon <dilinger@debian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/geode/lxfb_ops.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/video/geode/lxfb_ops.c b/drivers/video/geode/lxfb_ops.c
index aaef9165ec9..b1cd49c9935 100644
--- a/drivers/video/geode/lxfb_ops.c
+++ b/drivers/video/geode/lxfb_ops.c
@@ -517,25 +517,25 @@ void lx_set_palette_reg(struct fb_info *info, unsigned regno,
 int lx_blank_display(struct fb_info *info, int blank_mode)
 {
 	struct lxfb_par *par = info->par;
-	u32 dcfg, fp_pm;
-	int blank, hsync, vsync, crt;
+	u32 dcfg, misc, fp_pm;
+	int blank, hsync, vsync;
 
 	/* CRT power saving modes. */
 	switch (blank_mode) {
 	case FB_BLANK_UNBLANK:
-		blank = 0; hsync = 1; vsync = 1; crt = 1;
+		blank = 0; hsync = 1; vsync = 1;
 		break;
 	case FB_BLANK_NORMAL:
-		blank = 1; hsync = 1; vsync = 1; crt = 1;
+		blank = 1; hsync = 1; vsync = 1;
 		break;
 	case FB_BLANK_VSYNC_SUSPEND:
-		blank = 1; hsync = 1; vsync = 0; crt = 1;
+		blank = 1; hsync = 1; vsync = 0;
 		break;
 	case FB_BLANK_HSYNC_SUSPEND:
-		blank = 1; hsync = 0; vsync = 1; crt = 1;
+		blank = 1; hsync = 0; vsync = 1;
 		break;
 	case FB_BLANK_POWERDOWN:
-		blank = 1; hsync = 0; vsync = 0; crt = 0;
+		blank = 1; hsync = 0; vsync = 0;
 		break;
 	default:
 		return -EINVAL;
@@ -545,15 +545,23 @@ int lx_blank_display(struct fb_info *info, int blank_mode)
 	dcfg &= ~(VP_DCFG_DAC_BL_EN | VP_DCFG_HSYNC_EN | VP_DCFG_VSYNC_EN |
 			VP_DCFG_CRT_EN);
 	if (!blank)
-		dcfg |= VP_DCFG_DAC_BL_EN;
+		dcfg |= VP_DCFG_DAC_BL_EN | VP_DCFG_CRT_EN;
 	if (hsync)
 		dcfg |= VP_DCFG_HSYNC_EN;
 	if (vsync)
 		dcfg |= VP_DCFG_VSYNC_EN;
-	if (crt)
-		dcfg |= VP_DCFG_CRT_EN;
+
 	write_vp(par, VP_DCFG, dcfg);
 
+	misc = read_vp(par, VP_MISC);
+
+	if (vsync && hsync)
+		misc &= ~VP_MISC_DACPWRDN;
+	else
+		misc |= VP_MISC_DACPWRDN;
+
+	write_vp(par, VP_MISC, misc);
+
 	/* Power on/off flat panel */
 
 	if (par->output & OUTPUT_PANEL) {
-- 
GitLab


From 5798712d608f5ebad994487748a2ccf3cc613d78 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:31:43 -0700
Subject: [PATCH 427/853] drivers/video/amifb.c cleanups

This patch contains the following cleanups:
- make the needlessly global amifb_init() static
- rename cleanup_module() to amifb_exit(),
  make it static __exit,
  use module_exit(),
  there's no need to #ifdef MODULE it

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/amifb.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/video/amifb.c b/drivers/video/amifb.c
index 0c549aa1cf8..b8e9a8682f2 100644
--- a/drivers/video/amifb.c
+++ b/drivers/video/amifb.c
@@ -1136,7 +1136,6 @@ static int amifb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg
 	 * Interface to the low level console driver
 	 */
 
-int amifb_init(void);
 static void amifb_deinit(void);
 
 	/*
@@ -2248,7 +2247,7 @@ static inline void chipfree(void)
 	 * Initialisation
 	 */
 
-int __init amifb_init(void)
+static int __init amifb_init(void)
 {
 	int tag, i, err = 0;
 	u_long chipptr;
@@ -3793,16 +3792,14 @@ static void ami_rebuild_copper(void)
 	}
 }
 
-
-module_init(amifb_init);
-
-#ifdef MODULE
-MODULE_LICENSE("GPL");
-
-void cleanup_module(void)
+static void __exit amifb_exit(void)
 {
 	unregister_framebuffer(&fb_info);
 	amifb_deinit();
 	amifb_video_off();
 }
-#endif /* MODULE */
+
+module_init(amifb_init);
+module_exit(amifb_exit);
+
+MODULE_LICENSE("GPL");
-- 
GitLab


From 7fc80b7bd682b47825e806018cca8ff7dc6bb55a Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:44 -0700
Subject: [PATCH 428/853] neofb: simplify clock calculation

There is nothing to gain by converting value in kHz to fixed point MHz.
Just calculate everything in kHz.

A reorder of the loop allows reducing number of iterations (check if
frequency is not too high already).

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/neofb.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index 5246b0402d7..b033e5a4dc8 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -201,7 +201,6 @@ static int neoFindMode(int xres, int yres, int depth)
  *
  * Determine the closest clock frequency to the one requested.
  */
-#define REF_FREQ 0xe517		/* 14.31818 in 20.12 fixed point */
 #define MAX_N 127
 #define MAX_D 31
 #define MAX_F 1
@@ -211,27 +210,24 @@ static void neoCalcVCLK(const struct fb_info *info,
 {
 	int n, d, f;
 	int n_best = 0, d_best = 0, f_best = 0;
-	long f_best_diff = (0x7ffff << 12);	/* 20.12 */
-	long f_target = (freq << 12) / 1000;	/* 20.12 */
+	long f_best_diff = 0x7ffff;
 
 	for (f = 0; f <= MAX_F; f++)
-		for (n = 0; n <= MAX_N; n++)
-			for (d = 0; d <= MAX_D; d++) {
-				long f_out;	/* 20.12 */
-				long f_diff;	/* 20.12 */
-
-				f_out =
-				    ((((n + 1) << 12) / ((d +
-							  1) *
-							 (1 << f))) >> 12)
-				    * REF_FREQ;
-				f_diff = abs(f_out - f_target);
-				if (f_diff < f_best_diff) {
+		for (d = 0; d <= MAX_D; d++)
+			for (n = 0; n <= MAX_N; n++) {
+				long f_out;
+				long f_diff;
+
+				f_out = ((14318 * (n + 1)) / (d + 1)) >> f;
+				f_diff = abs(f_out - freq);
+				if (f_diff <= f_best_diff) {
 					f_best_diff = f_diff;
 					n_best = n;
 					d_best = d;
 					f_best = f;
 				}
+				if (f_out > freq)
+					break;
 			}
 
 	if (info->fix.accel == FB_ACCEL_NEOMAGIC_NM2200 ||
@@ -248,11 +244,11 @@ static void neoCalcVCLK(const struct fb_info *info,
 	par->VCLK3Denominator = d_best;
 
 #ifdef NEOFB_DEBUG
-	printk("neoVCLK: f:%d NumLow=%d NumHi=%d Den=%d Df=%d\n",
-	       f_target >> 12,
+	printk(KERN_DEBUG "neoVCLK: f:%ld NumLow=%d NumHi=%d Den=%d Df=%ld\n",
+	       freq,
 	       par->VCLK3NumeratorLow,
 	       par->VCLK3NumeratorHigh,
-	       par->VCLK3Denominator, f_best_diff >> 12);
+	       par->VCLK3Denominator, f_best_diff);
 #endif
 }
 
-- 
GitLab


From 1ca6b62f8ca668ccfab0da9112c0125ef82343bd Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:45 -0700
Subject: [PATCH 429/853] neofb: drop redundant code

Drop structure which is only set but never read.  Drop variables which are
only set and never read.  Convert one long switch into two shorter ones.

Add cpu_relax() in busy waiting loop.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/neofb.c | 95 ++++++++++---------------------------------
 1 file changed, 22 insertions(+), 73 deletions(-)

diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index b033e5a4dc8..669b8c196bb 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -479,7 +479,8 @@ static inline int neo2200_sync(struct fb_info *info)
 {
 	struct neofb_par *par = info->par;
 
-	while (readl(&par->neo2200->bltStat) & 1);
+	while (readl(&par->neo2200->bltStat) & 1)
+		cpu_relax();
 	return 0;
 }
 
@@ -587,34 +588,14 @@ static int
 neofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
 	struct neofb_par *par = info->par;
-	unsigned int pixclock = var->pixclock;
-	struct xtimings timings;
 	int memlen, vramlen;
 	int mode_ok = 0;
 
 	DBG("neofb_check_var");
 
-	if (!pixclock)
-		pixclock = 10000;	/* 10ns = 100MHz */
-	timings.pixclock = 1000000000 / pixclock;
-	if (timings.pixclock < 1)
-		timings.pixclock = 1;
-
-	if (timings.pixclock > par->maxClock)
+	if (PICOS2KHZ(var->pixclock) > par->maxClock)
 		return -EINVAL;
 
-	timings.dblscan = var->vmode & FB_VMODE_DOUBLE;
-	timings.interlaced = var->vmode & FB_VMODE_INTERLACED;
-	timings.HDisplay = var->xres;
-	timings.HSyncStart = timings.HDisplay + var->right_margin;
-	timings.HSyncEnd = timings.HSyncStart + var->hsync_len;
-	timings.HTotal = timings.HSyncEnd + var->left_margin;
-	timings.VDisplay = var->yres;
-	timings.VSyncStart = timings.VDisplay + var->lower_margin;
-	timings.VSyncEnd = timings.VSyncStart + var->vsync_len;
-	timings.VTotal = timings.VSyncEnd + var->upper_margin;
-	timings.sync = var->sync;
-
 	/* Is the mode larger than the LCD panel? */
 	if (par->internal_display &&
             ((var->xres > par->NeoPanelWidth) ||
@@ -1923,9 +1904,6 @@ static int __devinit neo_init_hw(struct fb_info *info)
 	int maxClock = 65000;
 	int CursorMem = 1024;
 	int CursorOff = 0x100;
-	int linearSize = 1024;
-	int maxWidth = 1024;
-	int maxHeight = 1024;
 
 	DBG("neo_init_hw");
 
@@ -1944,81 +1922,52 @@ static int __devinit neo_init_hw(struct fb_info *info)
 	case FB_ACCEL_NEOMAGIC_NM2070:
 		videoRam = 896;
 		maxClock = 65000;
-		CursorMem = 2048;
-		CursorOff = 0x100;
-		linearSize = 1024;
-		maxWidth = 1024;
-		maxHeight = 1024;
 		break;
 	case FB_ACCEL_NEOMAGIC_NM2090:
 	case FB_ACCEL_NEOMAGIC_NM2093:
-		videoRam = 1152;
-		maxClock = 80000;
-		CursorMem = 2048;
-		CursorOff = 0x100;
-		linearSize = 2048;
-		maxWidth = 1024;
-		maxHeight = 1024;
-		break;
 	case FB_ACCEL_NEOMAGIC_NM2097:
 		videoRam = 1152;
 		maxClock = 80000;
-		CursorMem = 1024;
-		CursorOff = 0x100;
-		linearSize = 2048;
-		maxWidth = 1024;
-		maxHeight = 1024;
 		break;
 	case FB_ACCEL_NEOMAGIC_NM2160:
 		videoRam = 2048;
 		maxClock = 90000;
-		CursorMem = 1024;
-		CursorOff = 0x100;
-		linearSize = 2048;
-		maxWidth = 1024;
-		maxHeight = 1024;
 		break;
 	case FB_ACCEL_NEOMAGIC_NM2200:
 		videoRam = 2560;
 		maxClock = 110000;
-		CursorMem = 1024;
-		CursorOff = 0x1000;
-		linearSize = 4096;
-		maxWidth = 1280;
-		maxHeight = 1024;	/* ???? */
-
-		par->neo2200 = (Neo2200 __iomem *) par->mmio_vbase;
 		break;
 	case FB_ACCEL_NEOMAGIC_NM2230:
 		videoRam = 3008;
 		maxClock = 110000;
-		CursorMem = 1024;
-		CursorOff = 0x1000;
-		linearSize = 4096;
-		maxWidth = 1280;
-		maxHeight = 1024;	/* ???? */
-
-		par->neo2200 = (Neo2200 __iomem *) par->mmio_vbase;
 		break;
 	case FB_ACCEL_NEOMAGIC_NM2360:
 		videoRam = 4096;
 		maxClock = 110000;
-		CursorMem = 1024;
-		CursorOff = 0x1000;
-		linearSize = 4096;
-		maxWidth = 1280;
-		maxHeight = 1024;	/* ???? */
-
-		par->neo2200 = (Neo2200 __iomem *) par->mmio_vbase;
 		break;
 	case FB_ACCEL_NEOMAGIC_NM2380:
 		videoRam = 6144;
 		maxClock = 110000;
+		break;
+	}
+	switch (info->fix.accel) {
+	case FB_ACCEL_NEOMAGIC_NM2070:
+	case FB_ACCEL_NEOMAGIC_NM2090:
+	case FB_ACCEL_NEOMAGIC_NM2093:
+		CursorMem = 2048;
+		CursorOff = 0x100;
+		break;
+	case FB_ACCEL_NEOMAGIC_NM2097:
+	case FB_ACCEL_NEOMAGIC_NM2160:
+		CursorMem = 1024;
+		CursorOff = 0x100;
+		break;
+	case FB_ACCEL_NEOMAGIC_NM2200:
+	case FB_ACCEL_NEOMAGIC_NM2230:
+	case FB_ACCEL_NEOMAGIC_NM2360:
+	case FB_ACCEL_NEOMAGIC_NM2380:
 		CursorMem = 1024;
 		CursorOff = 0x1000;
-		linearSize = 8192;
-		maxWidth = 1280;
-		maxHeight = 1024;	/* ???? */
 
 		par->neo2200 = (Neo2200 __iomem *) par->mmio_vbase;
 		break;
@@ -2032,7 +1981,7 @@ static int __devinit neo_init_hw(struct fb_info *info)
 */
 	par->maxClock = maxClock;
 	par->cursorOff = CursorOff;
-	return ((videoRam * 1024));
+	return videoRam * 1024;
 }
 
 
-- 
GitLab


From c6b044d6bab5e2878d408666469362fc200a889a Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:45 -0700
Subject: [PATCH 430/853] neofb: drop the xtimings structure

Remove the xtimings structure which only stored some values to be used
later (mostly once).  Calculate and use these values in places they are
needed.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/neofb.c    | 88 ++++++++++++++++++----------------------
 include/video/neomagic.h | 17 --------
 2 files changed, 40 insertions(+), 65 deletions(-)

diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index 669b8c196bb..25172b2a2a9 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -259,15 +259,20 @@ static void neoCalcVCLK(const struct fb_info *info,
  */
 
 static int vgaHWInit(const struct fb_var_screeninfo *var,
-		     const struct fb_info *info,
-		     struct neofb_par *par, struct xtimings *timings)
+		     struct neofb_par *par)
 {
+	int hsync_end = var->xres + var->right_margin + var->hsync_len;
+	int htotal = (hsync_end + var->left_margin) >> 3;
+	int vsync_start = var->yres + var->lower_margin;
+	int vsync_end = vsync_start + var->vsync_len;
+	int vtotal = vsync_end + var->upper_margin;
+
 	par->MiscOutReg = 0x23;
 
-	if (!(timings->sync & FB_SYNC_HOR_HIGH_ACT))
+	if (!(var->sync & FB_SYNC_HOR_HIGH_ACT))
 		par->MiscOutReg |= 0x40;
 
-	if (!(timings->sync & FB_SYNC_VERT_HIGH_ACT))
+	if (!(var->sync & FB_SYNC_VERT_HIGH_ACT))
 		par->MiscOutReg |= 0x80;
 
 	/*
@@ -282,25 +287,25 @@ static int vgaHWInit(const struct fb_var_screeninfo *var,
 	/*
 	 * CRTC Controller
 	 */
-	par->CRTC[0] = (timings->HTotal >> 3) - 5;
-	par->CRTC[1] = (timings->HDisplay >> 3) - 1;
-	par->CRTC[2] = (timings->HDisplay >> 3) - 1;
-	par->CRTC[3] = (((timings->HTotal >> 3) - 1) & 0x1F) | 0x80;
-	par->CRTC[4] = (timings->HSyncStart >> 3);
-	par->CRTC[5] = ((((timings->HTotal >> 3) - 1) & 0x20) << 2)
-	    | (((timings->HSyncEnd >> 3)) & 0x1F);
-	par->CRTC[6] = (timings->VTotal - 2) & 0xFF;
-	par->CRTC[7] = (((timings->VTotal - 2) & 0x100) >> 8)
-	    | (((timings->VDisplay - 1) & 0x100) >> 7)
-	    | ((timings->VSyncStart & 0x100) >> 6)
-	    | (((timings->VDisplay - 1) & 0x100) >> 5)
-	    | 0x10 | (((timings->VTotal - 2) & 0x200) >> 4)
-	    | (((timings->VDisplay - 1) & 0x200) >> 3)
-	    | ((timings->VSyncStart & 0x200) >> 2);
+	par->CRTC[0] = htotal - 5;
+	par->CRTC[1] = (var->xres >> 3) - 1;
+	par->CRTC[2] = (var->xres >> 3) - 1;
+	par->CRTC[3] = ((htotal - 1) & 0x1F) | 0x80;
+	par->CRTC[4] = ((var->xres + var->right_margin) >> 3);
+	par->CRTC[5] = (((htotal - 1) & 0x20) << 2)
+	    | (((hsync_end >> 3)) & 0x1F);
+	par->CRTC[6] = (vtotal - 2) & 0xFF;
+	par->CRTC[7] = (((vtotal - 2) & 0x100) >> 8)
+	    | (((var->yres - 1) & 0x100) >> 7)
+	    | ((vsync_start & 0x100) >> 6)
+	    | (((var->yres - 1) & 0x100) >> 5)
+	    | 0x10 | (((vtotal - 2) & 0x200) >> 4)
+	    | (((var->yres - 1) & 0x200) >> 3)
+	    | ((vsync_start & 0x200) >> 2);
 	par->CRTC[8] = 0x00;
-	par->CRTC[9] = (((timings->VDisplay - 1) & 0x200) >> 4) | 0x40;
+	par->CRTC[9] = (((var->yres - 1) & 0x200) >> 4) | 0x40;
 
-	if (timings->dblscan)
+	if (var->vmode & FB_VMODE_DOUBLE)
 		par->CRTC[9] |= 0x80;
 
 	par->CRTC[10] = 0x00;
@@ -309,13 +314,13 @@ static int vgaHWInit(const struct fb_var_screeninfo *var,
 	par->CRTC[13] = 0x00;
 	par->CRTC[14] = 0x00;
 	par->CRTC[15] = 0x00;
-	par->CRTC[16] = timings->VSyncStart & 0xFF;
-	par->CRTC[17] = (timings->VSyncEnd & 0x0F) | 0x20;
-	par->CRTC[18] = (timings->VDisplay - 1) & 0xFF;
+	par->CRTC[16] = vsync_start & 0xFF;
+	par->CRTC[17] = (vsync_end & 0x0F) | 0x20;
+	par->CRTC[18] = (var->yres - 1) & 0xFF;
 	par->CRTC[19] = var->xres_virtual >> 4;
 	par->CRTC[20] = 0x00;
-	par->CRTC[21] = (timings->VDisplay - 1) & 0xFF;
-	par->CRTC[22] = (timings->VTotal - 1) & 0xFF;
+	par->CRTC[21] = (var->yres - 1) & 0xFF;
+	par->CRTC[22] = (vtotal - 1) & 0xFF;
 	par->CRTC[23] = 0xC3;
 	par->CRTC[24] = 0xFF;
 
@@ -736,11 +741,11 @@ neofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 static int neofb_set_par(struct fb_info *info)
 {
 	struct neofb_par *par = info->par;
-	struct xtimings timings;
 	unsigned char temp;
 	int i, clock_hi = 0;
 	int lcd_stretch;
 	int hoffset, voffset;
+	int vsync_start, vtotal;
 
 	DBG("neofb_set_par");
 
@@ -748,28 +753,15 @@ static int neofb_set_par(struct fb_info *info)
 
 	vgaHWProtect(1);	/* Blank the screen */
 
-	timings.dblscan = info->var.vmode & FB_VMODE_DOUBLE;
-	timings.interlaced = info->var.vmode & FB_VMODE_INTERLACED;
-	timings.HDisplay = info->var.xres;
-	timings.HSyncStart = timings.HDisplay + info->var.right_margin;
-	timings.HSyncEnd = timings.HSyncStart + info->var.hsync_len;
-	timings.HTotal = timings.HSyncEnd + info->var.left_margin;
-	timings.VDisplay = info->var.yres;
-	timings.VSyncStart = timings.VDisplay + info->var.lower_margin;
-	timings.VSyncEnd = timings.VSyncStart + info->var.vsync_len;
-	timings.VTotal = timings.VSyncEnd + info->var.upper_margin;
-	timings.sync = info->var.sync;
-	timings.pixclock = PICOS2KHZ(info->var.pixclock);
-
-	if (timings.pixclock < 1)
-		timings.pixclock = 1;
+	vsync_start = info->var.yres + info->var.lower_margin;
+	vtotal = vsync_start + info->var.vsync_len + info->var.upper_margin;
 
 	/*
 	 * This will allocate the datastructure and initialize all of the
 	 * generic VGA registers.
 	 */
 
-	if (vgaHWInit(&info->var, info, par, &timings))
+	if (vgaHWInit(&info->var, par))
 		return -EINVAL;
 
 	/*
@@ -808,10 +800,10 @@ static int neofb_set_par(struct fb_info *info)
 	par->ExtCRTDispAddr = 0x10;
 
 	/* Vertical Extension */
-	par->VerticalExt = (((timings.VTotal - 2) & 0x400) >> 10)
-	    | (((timings.VDisplay - 1) & 0x400) >> 9)
-	    | (((timings.VSyncStart) & 0x400) >> 8)
-	    | (((timings.VSyncStart) & 0x400) >> 7);
+	par->VerticalExt = (((vtotal - 2) & 0x400) >> 10)
+	    | (((info->var.yres - 1) & 0x400) >> 9)
+	    | (((vsync_start) & 0x400) >> 8)
+	    | (((vsync_start) & 0x400) >> 7);
 
 	/* Fast write bursts on unless disabled. */
 	if (par->pci_burst)
@@ -972,7 +964,7 @@ static int neofb_set_par(struct fb_info *info)
 	 * Calculate the VCLK that most closely matches the requested dot
 	 * clock.
 	 */
-	neoCalcVCLK(info, par, timings.pixclock);
+	neoCalcVCLK(info, par, PICOS2KHZ(info->var.pixclock));
 
 	/* Since we program the clocks ourselves, always use VCLK3. */
 	par->MiscOutReg |= 0x0C;
diff --git a/include/video/neomagic.h b/include/video/neomagic.h
index a9e118a1cd1..38910da0ae5 100644
--- a/include/video/neomagic.h
+++ b/include/video/neomagic.h
@@ -90,23 +90,6 @@
 #define PCI_CHIP_NM2360 0x0006
 #define PCI_CHIP_NM2380 0x0016
 
-
-struct xtimings {
-	unsigned int pixclock;
-	unsigned int HDisplay;
-	unsigned int HSyncStart;
-	unsigned int HSyncEnd;
-	unsigned int HTotal;
-	unsigned int VDisplay;
-	unsigned int VSyncStart;
-	unsigned int VSyncEnd;
-	unsigned int VTotal;
-	unsigned int sync;
-	int dblscan;
-	int interlaced;
-};
-
-
 /* --------------------------------------------------------------------- */
 
 typedef volatile struct {
-- 
GitLab


From 4a25e41831ee851c1365d8b41decc22493b18e6d Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Date: Wed, 23 Jul 2008 21:31:46 -0700
Subject: [PATCH 431/853] video: sh7760fb: SH7760/SH7763 LCDC framebuffer
 driver

Framebuffer driver for the SH7760/SH7763 integrated LCD controller.

Signed-off-by: Manuel Lauss <mano@roarinelk.homelinux.net>
Signed-off-by: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Reviewed-by: Paul Mundt <lethal@linux-sh.org>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Siegfried Schaefer <s.schaefer@schaefer-edv.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/fb/sh7760fb.txt | 131 +++++++
 drivers/video/Kconfig         |  13 +
 drivers/video/Makefile        |   1 +
 drivers/video/sh7760fb.c      | 658 ++++++++++++++++++++++++++++++++++
 include/asm-sh/sh7760fb.h     | 197 ++++++++++
 5 files changed, 1000 insertions(+)
 create mode 100644 Documentation/fb/sh7760fb.txt
 create mode 100644 drivers/video/sh7760fb.c
 create mode 100644 include/asm-sh/sh7760fb.h

diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt
new file mode 100644
index 00000000000..c87bfe5c630
--- /dev/null
+++ b/Documentation/fb/sh7760fb.txt
@@ -0,0 +1,131 @@
+SH7760/SH7763 integrated LCDC Framebuffer driver
+================================================
+
+0. Overwiew
+-----------
+The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which
+supports (in theory) resolutions ranging from 1x1 to 1024x1024,
+with color depths ranging from 1 to 16 bits, on STN, DSTN and TFT Panels.
+
+Caveats:
+* Framebuffer memory must be a large chunk allocated at the top
+  of Area3 (HW requirement). Because of this requirement you should NOT
+  make the driver a module since at runtime it may become impossible to
+  get a large enough contiguous chunk of memory.
+
+* The driver does not support changing resolution while loaded
+  (displays aren't hotpluggable anyway)
+
+* Heavy flickering may be observed
+  a) if you're using 15/16bit color modes at >= 640x480 px resolutions,
+  b) during PCMCIA (or any other slow bus) activity.
+
+* Rotation works only 90degress clockwise, and only if horizontal
+  resolution is <= 320 pixels.
+
+files:   drivers/video/sh7760fb.c
+        include/asm-sh/sh7760fb.h
+        Documentation/fb/sh7760fb.txt
+
+1. Platform setup
+-----------------
+SH7760:
+ Video data is fetched via the DMABRG DMA engine, so you have to
+ configure the SH DMAC for DMABRG mode (write 0x94808080 to the
+ DMARSRA register somewhere at boot).
+
+ PFC registers PCCR and PCDR must be set to peripheral mode.
+ (write zeros to both).
+
+The driver does NOT do the above for you since board setup is, well, job
+of the board setup code.
+
+2. Panel definitions
+--------------------
+The LCDC must explicitly be told about the type of LCD panel
+attached.  Data must be wrapped in a "struct sh7760fb_platdata" and
+passed to the driver as platform_data.
+
+Suggest you take a closer look at the SH7760 Manual, Section 30.
+(http://documentation.renesas.com/eng/products/mpumcu/e602291_sh7760.pdf)
+
+The following code illustrates what needs to be done to
+get the framebuffer working on a 640x480 TFT:
+
+====================== cut here ======================================
+
+#include <linux/fb.h>
+#include <asm/sh7760fb.h>
+
+/*
+ * NEC NL6440bc26-01 640x480 TFT
+ * dotclock 25175 kHz
+ * Xres                640     Yres            480
+ * Htotal      800     Vtotal          525
+ * HsynStart   656     VsynStart       490
+ * HsynLenn    30      VsynLenn        2
+ *
+ * The linux framebuffer layer does not use the syncstart/synclen
+ * values but right/left/upper/lower margin values. The comments
+ * for the x_margin explain how to calculate those from given
+ * panel sync timings.
+ */
+static struct fb_videomode nl6448bc26 = {
+       .name           = "NL6448BC26",
+       .refresh        = 60,
+       .xres           = 640,
+       .yres           = 480,
+       .pixclock       = 39683,        /* in picoseconds! */
+       .hsync_len      = 30,
+       .vsync_len      = 2,
+       .left_margin    = 114,  /* HTOT - (HSYNSLEN + HSYNSTART) */
+       .right_margin   = 16,   /* HSYNSTART - XRES */
+       .upper_margin   = 33,   /* VTOT - (VSYNLEN + VSYNSTART) */
+       .lower_margin   = 10,   /* VSYNSTART - YRES */
+       .sync           = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+       .vmode          = FB_VMODE_NONINTERLACED,
+       .flag           = 0,
+};
+
+static struct sh7760fb_platdata sh7760fb_nl6448 = {
+       .def_mode       = &nl6448bc26,
+       .ldmtr          = LDMTR_TFT_COLOR_16,   /* 16bit TFT panel */
+       .lddfr          = LDDFR_8BPP,           /* we want 8bit output */
+       .ldpmmr         = 0x0070,
+       .ldpspr         = 0x0500,
+       .ldaclnr        = 0,
+       .ldickr         = LDICKR_CLKSRC(LCDC_CLKSRC_EXTERNAL) |
+                         LDICKR_CLKDIV(1),
+       .rotate         = 0,
+       .novsync        = 1,
+       .blank          = NULL,
+};
+
+/* SH7760:
+ * 0xFE300800: 256 * 4byte xRGB palette ram
+ * 0xFE300C00: 42 bytes ctrl registers
+ */
+static struct resource sh7760_lcdc_res[] = {
+       [0] = {
+               .start  = 0xFE300800,
+               .end    = 0xFE300CFF,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = 65,
+               .end    = 65,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device sh7760_lcdc_dev = {
+       .dev    = {
+               .platform_data = &sh7760fb_nl6448,
+       },
+       .name           = "sh7760-lcdc",
+       .id             = -1,
+       .resource       = sh7760_lcdc_res,
+       .num_resources  = ARRAY_SIZE(sh7760_lcdc_res),
+};
+
+====================== cut here ======================================
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index c1b7db84341..70d135e0cc4 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1991,6 +1991,19 @@ config FB_COBALT
 	tristate "Cobalt server LCD frame buffer support"
 	depends on FB && MIPS_COBALT
 
+config FB_SH7760
+       bool "SH7760/SH7763 LCDC support"
+       depends on FB && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763)
+       select FB_CFB_FILLRECT
+       select FB_CFB_COPYAREA
+       select FB_CFB_IMAGEBLIT
+       help
+         Support for the SH7760/SH7763 integrated (D)STN/TFT LCD Controller.
+         Supports display resolutions up to 1024x1024 pixel, grayscale and
+         color operation, with depths ranging from 1 bpp to 8 bpp monochrome
+         and 8, 15 or 16 bpp color; 90 degrees clockwise display rotation for
+         panels <= 320 pixel horizontal resolution.
+
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 35803259775..0ebc1bfd251 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_FB_PMAGB_B)	  += pmagb-b-fb.o
 obj-$(CONFIG_FB_MAXINE)		  += maxinefb.o
 obj-$(CONFIG_FB_METRONOME)        += metronomefb.o
 obj-$(CONFIG_FB_S1D13XXX)	  += s1d13xxxfb.o
+obj-$(CONFIG_FB_SH7760)		  += sh7760fb.o
 obj-$(CONFIG_FB_IMX)              += imxfb.o
 obj-$(CONFIG_FB_S3C2410)	  += s3c2410fb.o
 obj-$(CONFIG_FB_FSL_DIU)	  += fsl-diu-fb.o
diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c
new file mode 100644
index 00000000000..4d0e28c5790
--- /dev/null
+++ b/drivers/video/sh7760fb.c
@@ -0,0 +1,658 @@
+/*
+ * SH7760/SH7763 LCDC Framebuffer driver.
+ *
+ * (c) 2006-2008 MSC Vertriebsges.m.b.H.,
+ *             Manuel Lauss <mano@roarinelk.homelinux.net>
+ * (c) 2008 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ *
+ *  This file is subject to the terms and conditions of the GNU General
+ *  Public License.  See the file COPYING in the main directory of this
+ *  archive for more details.
+ *
+ * PLEASE HAVE A LOOK AT Documentation/fb/sh7760fb.txt!
+ *
+ * Thanks to Siegfried Schaefer <s.schaefer at schaefer-edv.de>
+ *     for his original source and testing!
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/fb.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/sh7760fb.h>
+
+struct sh7760fb_par {
+	void __iomem *base;
+	int irq;
+
+	struct sh7760fb_platdata *pd;	/* display information */
+
+	dma_addr_t fbdma;	/* physical address */
+
+	int rot;		/* rotation enabled? */
+
+	u32 pseudo_palette[16];
+
+	struct platform_device *dev;
+	struct resource *ioarea;
+	struct completion vsync;	/* vsync irq event */
+};
+
+static irqreturn_t sh7760fb_irq(int irq, void *data)
+{
+	struct completion *c = data;
+
+	complete(c);
+
+	return IRQ_HANDLED;
+}
+
+static void sh7760fb_wait_vsync(struct fb_info *info)
+{
+	struct sh7760fb_par *par = info->par;
+
+	if (par->pd->novsync)
+		return;
+
+	iowrite16(ioread16(par->base + LDINTR) & ~VINT_CHECK,
+		  par->base + LDINTR);
+
+	if (par->irq < 0) {
+		/* poll for vert. retrace: status bit is sticky */
+		while (!(ioread16(par->base + LDINTR) & VINT_CHECK))
+			cpu_relax();
+	} else {
+		/* a "wait_for_irq_event(par->irq)" would be extremely nice */
+		init_completion(&par->vsync);
+		enable_irq(par->irq);
+		wait_for_completion(&par->vsync);
+		disable_irq_nosync(par->irq);
+	}
+}
+
+/* wait_for_lps - wait until power supply has reached a certain state. */
+static int wait_for_lps(struct sh7760fb_par *par, int val)
+{
+	int i = 100;
+	while (--i && ((ioread16(par->base + LDPMMR) & 3) != val))
+		msleep(1);
+
+	if (i <= 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/* en/disable the LCDC */
+static int sh7760fb_blank(int blank, struct fb_info *info)
+{
+	struct sh7760fb_par *par = info->par;
+	struct sh7760fb_platdata *pd = par->pd;
+	unsigned short cntr = ioread16(par->base + LDCNTR);
+	unsigned short intr = ioread16(par->base + LDINTR);
+	int lps;
+
+	if (blank == FB_BLANK_UNBLANK) {
+		intr |= VINT_START;
+		cntr = LDCNTR_DON2 | LDCNTR_DON;
+		lps = 3;
+	} else {
+		intr &= ~VINT_START;
+		cntr = LDCNTR_DON2;
+		lps = 0;
+	}
+
+	if (pd->blank)
+		pd->blank(blank);
+
+	iowrite16(intr, par->base + LDINTR);
+	iowrite16(cntr, par->base + LDCNTR);
+
+	return wait_for_lps(par, lps);
+}
+
+/* set color registers */
+static int sh7760fb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
+{
+	struct sh7760fb_par *par = info->par;
+	u32 s = cmap->start;
+	u32 l = cmap->len;
+	u16 *r = cmap->red;
+	u16 *g = cmap->green;
+	u16 *b = cmap->blue;
+	u32 col, tmo;
+	int ret;
+
+	ret = 0;
+
+	sh7760fb_wait_vsync(info);
+
+	/* request palette access */
+	iowrite16(LDPALCR_PALEN, par->base + LDPALCR);
+
+	/* poll for access grant */
+	tmo = 100;
+	while (!(ioread16(par->base + LDPALCR) & LDPALCR_PALS) && (--tmo))
+		cpu_relax();
+
+	if (!tmo) {
+		ret = 1;
+		dev_dbg(info->dev, "no palette access!\n");
+		goto out;
+	}
+
+	while (l && (s < 256)) {
+		col = ((*r) & 0xff) << 16;
+		col |= ((*g) & 0xff) << 8;
+		col |= ((*b) & 0xff);
+		col &= SH7760FB_PALETTE_MASK;
+
+		if (s < 16)
+			((u32 *) (info->pseudo_palette))[s] = s;
+
+		s++;
+		l--;
+		r++;
+		g++;
+		b++;
+	}
+out:
+	iowrite16(0, par->base + LDPALCR);
+	return ret;
+}
+
+static void encode_fix(struct fb_fix_screeninfo *fix, struct fb_info *info,
+		       unsigned long stride)
+{
+	memset(fix, 0, sizeof(struct fb_fix_screeninfo));
+	strcpy(fix->id, "sh7760-lcdc");
+
+	fix->smem_start = (unsigned long)info->screen_base;
+	fix->smem_len = info->screen_size;
+
+	fix->line_length = stride;
+}
+
+static int sh7760fb_get_color_info(struct device *dev,
+				   u16 lddfr, int *bpp, int *gray)
+{
+	int lbpp, lgray;
+
+	lgray = lbpp = 0;
+
+	switch (lddfr & LDDFR_COLOR_MASK) {
+	case LDDFR_1BPP_MONO:
+		lgray = 1;
+		lbpp = 1;
+		break;
+	case LDDFR_2BPP_MONO:
+		lgray = 1;
+		lbpp = 2;
+		break;
+	case LDDFR_4BPP_MONO:
+		lgray = 1;
+	case LDDFR_4BPP:
+		lbpp = 4;
+		break;
+	case LDDFR_6BPP_MONO:
+		lgray = 1;
+	case LDDFR_8BPP:
+		lbpp = 8;
+		break;
+	case LDDFR_16BPP_RGB555:
+	case LDDFR_16BPP_RGB565:
+		lbpp = 16;
+		lgray = 0;
+		break;
+	default:
+		dev_dbg(dev, "unsupported LDDFR bit depth.\n");
+		return -EINVAL;
+	}
+
+	if (bpp)
+		*bpp = lbpp;
+	if (gray)
+		*gray = lgray;
+
+	return 0;
+}
+
+static int sh7760fb_check_var(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	struct fb_fix_screeninfo *fix = &info->fix;
+	struct sh7760fb_par *par = info->par;
+	int ret, bpp;
+
+	/* get color info from register value */
+	ret = sh7760fb_get_color_info(info->dev, par->pd->lddfr, &bpp, NULL);
+	if (ret)
+		return ret;
+
+	var->bits_per_pixel = bpp;
+
+	if ((var->grayscale) && (var->bits_per_pixel == 1))
+		fix->visual = FB_VISUAL_MONO10;
+	else if (var->bits_per_pixel >= 15)
+		fix->visual = FB_VISUAL_TRUECOLOR;
+	else
+		fix->visual = FB_VISUAL_PSEUDOCOLOR;
+
+	/* TODO: add some more validation here */
+	return 0;
+}
+
+/*
+ * sh7760fb_set_par - set videomode.
+ *
+ * NOTE: The rotation, grayscale and DSTN codepaths are
+ *     totally untested!
+ */
+static int sh7760fb_set_par(struct fb_info *info)
+{
+	struct sh7760fb_par *par = info->par;
+	struct fb_videomode *vm = par->pd->def_mode;
+	unsigned long sbase, dstn_off, ldsarl, stride;
+	unsigned short hsynp, hsynw, htcn, hdcn;
+	unsigned short vsynp, vsynw, vtln, vdln;
+	unsigned short lddfr, ldmtr;
+	int ret, bpp, gray;
+
+	par->rot = par->pd->rotate;
+
+	/* rotate only works with xres <= 320 */
+	if (par->rot && (vm->xres > 320)) {
+		dev_dbg(info->dev, "rotation disabled due to display size\n");
+		par->rot = 0;
+	}
+
+	/* calculate LCDC reg vals from display parameters */
+	hsynp = vm->right_margin + vm->xres;
+	hsynw = vm->hsync_len;
+	htcn = vm->left_margin + hsynp + hsynw;
+	hdcn = vm->xres;
+	vsynp = vm->lower_margin + vm->yres;
+	vsynw = vm->vsync_len;
+	vtln = vm->upper_margin + vsynp + vsynw;
+	vdln = vm->yres;
+
+	/* get color info from register value */
+	ret = sh7760fb_get_color_info(info->dev, par->pd->lddfr, &bpp, &gray);
+	if (ret)
+		return ret;
+
+	dev_dbg(info->dev, "%dx%d %dbpp %s (orientation %s)\n", hdcn,
+		vdln, bpp, gray ? "grayscale" : "color",
+		par->rot ? "rotated" : "normal");
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+	lddfr = par->pd->lddfr | (1 << 8);
+#else
+	lddfr = par->pd->lddfr & ~(1 << 8);
+#endif
+
+	ldmtr = par->pd->ldmtr;
+
+	if (!(vm->sync & FB_SYNC_HOR_HIGH_ACT))
+		ldmtr |= LDMTR_CL1POL;
+	if (!(vm->sync & FB_SYNC_VERT_HIGH_ACT))
+		ldmtr |= LDMTR_FLMPOL;
+
+	/* shut down LCDC before changing display parameters */
+	sh7760fb_blank(FB_BLANK_POWERDOWN, info);
+
+	iowrite16(par->pd->ldickr, par->base + LDICKR);	/* pixclock */
+	iowrite16(ldmtr, par->base + LDMTR);	/* polarities */
+	iowrite16(lddfr, par->base + LDDFR);	/* color/depth */
+	iowrite16((par->rot ? 1 << 13 : 0), par->base + LDSMR);	/* rotate */
+	iowrite16(par->pd->ldpmmr, par->base + LDPMMR);	/* Power Management */
+	iowrite16(par->pd->ldpspr, par->base + LDPSPR);	/* Power Supply Ctrl */
+
+	/* display resolution */
+	iowrite16(((htcn >> 3) - 1) | (((hdcn >> 3) - 1) << 8),
+		  par->base + LDHCNR);
+	iowrite16(vdln - 1, par->base + LDVDLNR);
+	iowrite16(vtln - 1, par->base + LDVTLNR);
+	/* h/v sync signals */
+	iowrite16((vsynp - 1) | ((vsynw - 1) << 12), par->base + LDVSYNR);
+	iowrite16(((hsynp >> 3) - 1) | (((hsynw >> 3) - 1) << 12),
+		  par->base + LDHSYNR);
+	/* AC modulation sig */
+	iowrite16(par->pd->ldaclnr, par->base + LDACLNR);
+
+	stride = (par->rot) ? vtln : hdcn;
+	if (!gray)
+		stride *= (bpp + 7) >> 3;
+	else {
+		if (bpp == 1)
+			stride >>= 3;
+		else if (bpp == 2)
+			stride >>= 2;
+		else if (bpp == 4)
+			stride >>= 1;
+		/* 6 bpp == 8 bpp */
+	}
+
+	/* if rotated, stride must be power of 2 */
+	if (par->rot) {
+		unsigned long bit = 1 << 31;
+		while (bit) {
+			if (stride & bit)
+				break;
+			bit >>= 1;
+		}
+		if (stride & ~bit)
+			stride = bit << 1;	/* not P-o-2, round up */
+	}
+	iowrite16(stride, par->base + LDLAOR);
+
+	/* set display mem start address */
+	sbase = (unsigned long)par->fbdma;
+	if (par->rot)
+		sbase += (hdcn - 1) * stride;
+
+	iowrite32(sbase, par->base + LDSARU);
+
+	/*
+	 * for DSTN need to set address for lower half.
+	 * I (mlau) don't know which address to set it to,
+	 * so I guessed at (stride * yres/2).
+	 */
+	if (((ldmtr & 0x003f) >= LDMTR_DSTN_MONO_8) &&
+	    ((ldmtr & 0x003f) <= LDMTR_DSTN_COLOR_16)) {
+
+		dev_dbg(info->dev, " ***** DSTN untested! *****\n");
+
+		dstn_off = stride;
+		if (par->rot)
+			dstn_off *= hdcn >> 1;
+		else
+			dstn_off *= vdln >> 1;
+
+		ldsarl = sbase + dstn_off;
+	} else
+		ldsarl = 0;
+
+	iowrite32(ldsarl, par->base + LDSARL);	/* mem for lower half of DSTN */
+
+	encode_fix(&info->fix, info, stride);
+	sh7760fb_check_var(&info->var, info);
+
+	sh7760fb_blank(FB_BLANK_UNBLANK, info);	/* panel on! */
+
+	dev_dbg(info->dev, "hdcn  : %6d htcn  : %6d\n", hdcn, htcn);
+	dev_dbg(info->dev, "hsynw : %6d hsynp : %6d\n", hsynw, hsynp);
+	dev_dbg(info->dev, "vdln  : %6d vtln  : %6d\n", vdln, vtln);
+	dev_dbg(info->dev, "vsynw : %6d vsynp : %6d\n", vsynw, vsynp);
+	dev_dbg(info->dev, "clksrc: %6d clkdiv: %6d\n",
+		(par->pd->ldickr >> 12) & 3, par->pd->ldickr & 0x1f);
+	dev_dbg(info->dev, "ldpmmr: 0x%04x ldpspr: 0x%04x\n", par->pd->ldpmmr,
+		par->pd->ldpspr);
+	dev_dbg(info->dev, "ldmtr : 0x%04x lddfr : 0x%04x\n", ldmtr, lddfr);
+	dev_dbg(info->dev, "ldlaor: %ld\n", stride);
+	dev_dbg(info->dev, "ldsaru: 0x%08lx ldsarl: 0x%08lx\n", sbase, ldsarl);
+
+	return 0;
+}
+
+static struct fb_ops sh7760fb_ops = {
+	.owner = THIS_MODULE,
+	.fb_blank = sh7760fb_blank,
+	.fb_check_var = sh7760fb_check_var,
+	.fb_setcmap = sh7760fb_setcmap,
+	.fb_set_par = sh7760fb_set_par,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+};
+
+static void sh7760fb_free_mem(struct fb_info *info)
+{
+	struct sh7760fb_par *par = info->par;
+
+	if (!info->screen_base)
+		return;
+
+	dma_free_coherent(info->dev, info->screen_size,
+			  info->screen_base, par->fbdma);
+
+	par->fbdma = 0;
+	info->screen_base = NULL;
+	info->screen_size = 0;
+}
+
+/* allocate the framebuffer memory. This memory must be in Area3,
+ * (dictated by the DMA engine) and contiguous, at a 512 byte boundary.
+ */
+static int sh7760fb_alloc_mem(struct fb_info *info)
+{
+	struct sh7760fb_par *par = info->par;
+	void *fbmem;
+	unsigned long vram;
+	int ret, bpp;
+
+	if (info->screen_base)
+		return 0;
+
+	/* get color info from register value */
+	ret = sh7760fb_get_color_info(info->dev, par->pd->lddfr, &bpp, NULL);
+	if (ret) {
+		printk(KERN_ERR "colinfo\n");
+		return ret;
+	}
+
+	/* min VRAM: xres_min = 16, yres_min = 1, bpp = 1: 2byte -> 1 page
+	   max VRAM: xres_max = 1024, yres_max = 1024, bpp = 16: 2MB */
+
+	vram = info->var.xres * info->var.yres;
+	if (info->var.grayscale) {
+		if (bpp == 1)
+			vram >>= 3;
+		else if (bpp == 2)
+			vram >>= 2;
+		else if (bpp == 4)
+			vram >>= 1;
+	} else if (bpp > 8)
+		vram *= 2;
+	if ((vram < 1) || (vram > 1024 * 2048)) {
+		dev_dbg(info->dev, "too much VRAM required. Check settings\n");
+		return -ENODEV;
+	}
+
+	if (vram < PAGE_SIZE)
+		vram = PAGE_SIZE;
+
+	fbmem = dma_alloc_coherent(info->dev, vram, &par->fbdma, GFP_KERNEL);
+
+	if (!fbmem)
+		return -ENOMEM;
+
+	if ((par->fbdma & SH7760FB_DMA_MASK) != SH7760FB_DMA_MASK) {
+		sh7760fb_free_mem(info);
+		dev_err(info->dev, "kernel gave me memory at 0x%08lx, which is"
+			"unusable for the LCDC\n", (unsigned long)par->fbdma);
+		return -ENOMEM;
+	}
+
+	info->screen_base = fbmem;
+	info->screen_size = vram;
+
+	return 0;
+}
+
+static int __devinit sh7760fb_probe(struct platform_device *pdev)
+{
+	struct fb_info *info;
+	struct resource *res;
+	struct sh7760fb_par *par;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (unlikely(res == NULL)) {
+		dev_err(&pdev->dev, "invalid resource\n");
+		return -EINVAL;
+	}
+
+	info = framebuffer_alloc(sizeof(struct sh7760fb_par), &pdev->dev);
+	if (!info)
+		return -ENOMEM;
+
+	par = info->par;
+	par->dev = pdev;
+
+	par->pd = pdev->dev.platform_data;
+	if (!par->pd) {
+		dev_dbg(info->dev, "no display setup data!\n");
+		ret = -ENODEV;
+		goto out_fb;
+	}
+
+	par->ioarea = request_mem_region(res->start,
+					 (res->end - res->start), pdev->name);
+	if (!par->ioarea) {
+		dev_err(&pdev->dev, "mmio area busy\n");
+		ret = -EBUSY;
+		goto out_fb;
+	}
+
+	par->base = ioremap_nocache(res->start, res->end - res->start + 1);
+	if (!par->base) {
+		dev_err(&pdev->dev, "cannot remap\n");
+		ret = -ENODEV;
+		goto out_res;
+	}
+
+	iowrite16(0, par->base + LDINTR);	/* disable vsync irq */
+	par->irq = platform_get_irq(pdev, 0);
+	if (par->irq >= 0) {
+		ret = request_irq(par->irq, sh7760fb_irq, 0,
+				  "sh7760-lcdc", &par->vsync);
+		if (ret) {
+			dev_err(&pdev->dev, "cannot grab IRQ\n");
+			par->irq = -ENXIO;
+		} else
+			disable_irq_nosync(par->irq);
+	}
+
+	fb_videomode_to_var(&info->var, par->pd->def_mode);
+
+	ret = sh7760fb_alloc_mem(info);
+	if (ret) {
+		dev_dbg(info->dev, "framebuffer memory allocation failed!\n");
+		goto out_unmap;
+	}
+
+	info->pseudo_palette = par->pseudo_palette;
+
+	/* fixup color register bitpositions. These are fixed by hardware */
+	info->var.red.offset = 11;
+	info->var.red.length = 5;
+	info->var.red.msb_right = 0;
+
+	info->var.green.offset = 5;
+	info->var.green.length = 6;
+	info->var.green.msb_right = 0;
+
+	info->var.blue.offset = 0;
+	info->var.blue.length = 5;
+	info->var.blue.msb_right = 0;
+
+	info->var.transp.offset = 0;
+	info->var.transp.length = 0;
+	info->var.transp.msb_right = 0;
+
+	/* set the DON2 bit now, before cmap allocation, as it will randomize
+	 * palette memory.
+	 */
+	iowrite16(LDCNTR_DON2, par->base + LDCNTR);
+	info->fbops = &sh7760fb_ops;
+
+	ret = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (ret) {
+		dev_dbg(info->dev, "Unable to allocate cmap memory\n");
+		goto out_mem;
+	}
+
+	ret = register_framebuffer(info);
+	if (ret < 0) {
+		dev_dbg(info->dev, "cannot register fb!\n");
+		goto out_cmap;
+	}
+	platform_set_drvdata(pdev, info);
+
+	printk(KERN_INFO "%s: memory at phys 0x%08lx-0x%08lx, size %ld KiB\n",
+	       pdev->name,
+	       (unsigned long)par->fbdma,
+	       (unsigned long)(par->fbdma + info->screen_size - 1),
+	       info->screen_size >> 10);
+
+	return 0;
+
+out_cmap:
+	sh7760fb_blank(FB_BLANK_POWERDOWN, info);
+	fb_dealloc_cmap(&info->cmap);
+out_mem:
+	sh7760fb_free_mem(info);
+out_unmap:
+	if (par->irq >= 0)
+		free_irq(par->irq, &par->vsync);
+	iounmap(par->base);
+out_res:
+	release_resource(par->ioarea);
+	kfree(par->ioarea);
+out_fb:
+	framebuffer_release(info);
+	return ret;
+}
+
+static int __devexit sh7760fb_remove(struct platform_device *dev)
+{
+	struct fb_info *info = platform_get_drvdata(dev);
+	struct sh7760fb_par *par = info->par;
+
+	sh7760fb_blank(FB_BLANK_POWERDOWN, info);
+	unregister_framebuffer(info);
+	fb_dealloc_cmap(&info->cmap);
+	sh7760fb_free_mem(info);
+	if (par->irq >= 0)
+		free_irq(par->irq, par);
+	iounmap(par->base);
+	release_resource(par->ioarea);
+	kfree(par->ioarea);
+	framebuffer_release(info);
+	platform_set_drvdata(dev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver sh7760_lcdc_driver = {
+	.driver = {
+		   .name = "sh7760-lcdc",
+		   .owner = THIS_MODULE,
+		   },
+	.probe = sh7760fb_probe,
+	.remove = __devexit_p(sh7760fb_remove),
+};
+
+static int __init sh7760fb_init(void)
+{
+	return platform_driver_register(&sh7760_lcdc_driver);
+}
+
+static void __exit sh7760fb_exit(void)
+{
+	platform_driver_unregister(&sh7760_lcdc_driver);
+}
+
+module_init(sh7760fb_init);
+module_exit(sh7760fb_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu, Manuel Lauss");
+MODULE_DESCRIPTION("FBdev for SH7760/63 integrated LCD Controller");
+MODULE_LICENSE("GPL");
diff --git a/include/asm-sh/sh7760fb.h b/include/asm-sh/sh7760fb.h
new file mode 100644
index 00000000000..8767f61acec
--- /dev/null
+++ b/include/asm-sh/sh7760fb.h
@@ -0,0 +1,197 @@
+/*
+ * sh7760fb.h -- platform data for SH7760/SH7763 LCDC framebuffer driver.
+ *
+ * (c) 2006-2008 MSC Vertriebsges.m.b.H.,
+ * 			Manuel Lauss <mano@roarinelk.homelinux.net>
+ * (c) 2008 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ */
+
+#ifndef _ASM_SH_SH7760FB_H
+#define _ASM_SH_SH7760FB_H
+
+/*
+ * some bits of the colormap registers should be written as zero.
+ * create a mask for that.
+ */
+#define SH7760FB_PALETTE_MASK 0x00f8fcf8
+
+/* The LCDC dma engine always sets bits 27-26 to 1: this is Area3 */
+#define SH7760FB_DMA_MASK 0x0C000000
+
+/* palette */
+#define LDPR(x) (((x) << 2))
+
+/* framebuffer registers and bits */
+#define LDICKR 0x400
+#define LDMTR 0x402
+/* see sh7760fb.h for LDMTR bits */
+#define LDDFR 0x404
+#define LDDFR_PABD (1 << 8)
+#define LDDFR_COLOR_MASK 0x7F
+#define LDSMR 0x406
+#define LDSMR_ROT (1 << 13)
+#define LDSARU 0x408
+#define LDSARL 0x40c
+#define LDLAOR 0x410
+#define LDPALCR 0x412
+#define LDPALCR_PALS (1 << 4)
+#define LDPALCR_PALEN (1 << 0)
+#define LDHCNR 0x414
+#define LDHSYNR 0x416
+#define LDVDLNR 0x418
+#define LDVTLNR 0x41a
+#define LDVSYNR 0x41c
+#define LDACLNR 0x41e
+#define LDINTR 0x420
+#define LDPMMR 0x424
+#define LDPSPR 0x426
+#define LDCNTR 0x428
+#define LDCNTR_DON (1 << 0)
+#define LDCNTR_DON2 (1 << 4)
+
+#ifdef CONFIG_CPU_SUBTYPE_SH7763
+# define LDLIRNR       0x440
+/* LDINTR bit */
+# define LDINTR_MINTEN (1 << 15)
+# define LDINTR_FINTEN (1 << 14)
+# define LDINTR_VSINTEN (1 << 13)
+# define LDINTR_VEINTEN (1 << 12)
+# define LDINTR_MINTS (1 << 11)
+# define LDINTR_FINTS (1 << 10)
+# define LDINTR_VSINTS (1 << 9)
+# define LDINTR_VEINTS (1 << 8)
+# define VINT_START (LDINTR_VSINTEN)
+# define VINT_CHECK (LDINTR_VSINTS)
+#else
+/* LDINTR bit */
+# define LDINTR_VINTSEL (1 << 12)
+# define LDINTR_VINTE (1 << 8)
+# define LDINTR_VINTS (1 << 0)
+# define VINT_START (LDINTR_VINTSEL)
+# define VINT_CHECK (LDINTR_VINTS)
+#endif
+
+/* HSYNC polarity inversion */
+#define LDMTR_FLMPOL (1 << 15)
+
+/* VSYNC polarity inversion */
+#define LDMTR_CL1POL (1 << 14)
+
+/* DISPLAY-ENABLE polarity inversion */
+#define LDMTR_DISPEN_LOWACT (1 << 13)
+
+/* DISPLAY DATA BUS polarity inversion */
+#define LDMTR_DPOL_LOWACT (1 << 12)
+
+/* AC modulation signal enable */
+#define LDMTR_MCNT (1 << 10)
+
+/* Disable output of HSYNC during VSYNC period */
+#define LDMTR_CL1CNT (1 << 9)
+
+/* Disable output of VSYNC during VSYNC period */
+#define LDMTR_CL2CNT (1 << 8)
+
+/* Display types supported by the LCDC */
+#define LDMTR_STN_MONO_4       0x00
+#define LDMTR_STN_MONO_8       0x01
+#define LDMTR_STN_COLOR_4      0x08
+#define LDMTR_STN_COLOR_8      0x09
+#define LDMTR_STN_COLOR_12     0x0A
+#define LDMTR_STN_COLOR_16     0x0B
+#define LDMTR_DSTN_MONO_8      0x11
+#define LDMTR_DSTN_MONO_16     0x13
+#define LDMTR_DSTN_COLOR_8     0x19
+#define LDMTR_DSTN_COLOR_12    0x1A
+#define LDMTR_DSTN_COLOR_16    0x1B
+#define LDMTR_TFT_COLOR_16     0x2B
+
+/* framebuffer color layout */
+#define LDDFR_1BPP_MONO 0x00
+#define LDDFR_2BPP_MONO 0x01
+#define LDDFR_4BPP_MONO 0x02
+#define LDDFR_6BPP_MONO 0x04
+#define LDDFR_4BPP 0x0A
+#define LDDFR_8BPP 0x0C
+#define LDDFR_16BPP_RGB555 0x1D
+#define LDDFR_16BPP_RGB565 0x2D
+
+/* LCDC Pixclock sources */
+#define LCDC_CLKSRC_BUSCLOCK 0
+#define LCDC_CLKSRC_PERIPHERAL 1
+#define LCDC_CLKSRC_EXTERNAL 2
+
+#define LDICKR_CLKSRC(x) \
+       (((x) & 3) << 12)
+
+/* LCDC pixclock input divider. Set to 1 at a minimum! */
+#define LDICKR_CLKDIV(x) \
+       ((x) & 0x1f)
+
+struct sh7760fb_platdata {
+
+	/* Set this member to a valid fb_videmode for the display you
+	 * wish to use.  The following members must be initialized:
+	 * xres, yres, hsync_len, vsync_len, sync,
+	 * {left,right,upper,lower}_margin.
+	 * The driver uses the above members to calculate register values
+	 * and memory requirements. Other members are ignored but may
+	 * be used by other framebuffer layer components.
+	 */
+	struct fb_videomode *def_mode;
+
+	/* LDMTR includes display type and signal polarity.  The
+	 * HSYNC/VSYNC polarities are derived from the fb_var_screeninfo
+	 * data above; however the polarities of the following signals
+	 * must be encoded in the ldmtr member:
+	 * Display Enable signal (default high-active)  DISPEN_LOWACT
+	 * Display Data signals (default high-active)   DPOL_LOWACT
+	 * AC Modulation signal (default off)           MCNT
+	 * Hsync-During-Vsync suppression (default off) CL1CNT
+	 * Vsync-during-vsync suppression (default off) CL2CNT
+	 * NOTE: also set a display type!
+	 * (one of LDMTR_{STN,DSTN,TFT}_{MONO,COLOR}_{4,8,12,16})
+	 */
+	u16 ldmtr;
+
+	/* LDDFR controls framebuffer image format (depth, organization)
+	 * Use ONE of the LDDFR_?BPP_* macros!
+	 */
+	u16 lddfr;
+
+	/* LDPMMR and LDPSPR control the timing of the power signals
+	 * for the display. Please read the SH7760 Hardware Manual,
+	 * Chapters 30.3.17, 30.3.18 and 30.4.6!
+	 */
+	u16 ldpmmr;
+	u16 ldpspr;
+
+	/* LDACLNR contains the line numbers after which the AC modulation
+	 * signal is to toggle. Set to ZERO for TFTs or displays which
+	 * do not need it. (Chapter 30.3.15 in SH7760 Hardware Manual).
+	 */
+	u16 ldaclnr;
+
+	/* LDICKR contains information on pixelclock source and config.
+	 * Please use the LDICKR_CLKSRC() and LDICKR_CLKDIV() macros.
+	 * minimal value for CLKDIV() must be 1!.
+	 */
+	u16 ldickr;
+
+	/* set this member to 1 if you wish to use the LCDC's hardware
+	 * rotation function.  This is limited to displays <= 320x200
+	 * pixels resolution!
+	 */
+	int rotate;		/* set to 1 to rotate 90 CCW */
+
+	/* set this to 1 to suppress vsync irq use. */
+	int novsync;
+
+	/* blanking hook for platform. Set this if your platform can do
+	 * more than the LCDC in terms of blanking (e.g. disable clock
+	 * generator / backlight power supply / etc.
+	 */
+	void (*blank) (int);
+};
+
+#endif /* _ASM_SH_SH7760FB_H */
-- 
GitLab


From 2d04a4a72d7e1519b4838f24bdd4b5d0f3f426dc Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 23 Jul 2008 21:31:48 -0700
Subject: [PATCH 432/853] fbcon: bgcolor fix

The fourth bit of the background color is the blink property bit, not the
intensity bit, as for the foreground color.  Therefore it shouldn't be
included in the background color.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/console/fbcon.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/console/fbcon.h b/drivers/video/console/fbcon.h
index 0135e039545..beb6a74fc7f 100644
--- a/drivers/video/console/fbcon.h
+++ b/drivers/video/console/fbcon.h
@@ -92,7 +92,7 @@ struct fbcon_ops {
 #define attr_fgcol(fgshift,s)    \
 	(((s) >> (fgshift)) & 0x0f)
 #define attr_bgcol(bgshift,s)    \
-	(((s) >> (bgshift)) & 0x0f)
+	(((s) >> (bgshift)) & 0x07)
 
 /* Monochrome */
 #define attr_bold(s) \
-- 
GitLab


From 3e074058d72486676f6fdf6fe803200c62dcb403 Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Date: Wed, 23 Jul 2008 21:31:48 -0700
Subject: [PATCH 433/853] fbdev: LCD backlight driver using Atmel PWM driver

This patch adds a platform driver using the ATMEL PWM driver to control a
backlight which requires a PWM signal and optional GPIO signal for discrete
on/off signal.  It has been tested on Favr-32 board from EarthLCD.

The driver is configurable by supplying a struct with the platform data.  See
the include/linux/atmel-pwm-bl.h for details.

The board code for Favr-32 will be submitted to the AVR32 kernel list.

Signed-off-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/Kconfig        |  12 ++
 drivers/video/backlight/Makefile       |   1 +
 drivers/video/backlight/atmel-pwm-bl.c | 244 +++++++++++++++++++++++++
 include/linux/atmel-pwm-bl.h           |  43 +++++
 4 files changed, 300 insertions(+)
 create mode 100644 drivers/video/backlight/atmel-pwm-bl.c
 create mode 100644 include/linux/atmel-pwm-bl.h

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index b289e197e55..98d9faf4970 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -87,6 +87,18 @@ config BACKLIGHT_ATMEL_LCDC
 	  If in doubt, it's safe to enable this option; it doesn't kick
 	  in unless the board's description says it's wired that way.
 
+config BACKLIGHT_ATMEL_PWM
+	tristate "Atmel PWM backlight control"
+	depends on BACKLIGHT_CLASS_DEVICE && ATMEL_PWM
+	default n
+	help
+	  Say Y here if you want to use the PWM peripheral in Atmel AT91 and
+	  AVR32 devices. This driver will need additional platform data to know
+	  which PWM instance to use and how to configure it.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called atmel-pwm-bl.
+
 config BACKLIGHT_CORGI
 	tristate "Generic (aka Sharp Corgi) Backlight Driver"
 	depends on BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 7d31c14088a..d8a08e468cc 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_LCD_PLATFORM)	   += platform_lcd.o
 obj-$(CONFIG_LCD_VGG2432A4)	   += vgg2432a4.o
 
 obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o
+obj-$(CONFIG_BACKLIGHT_ATMEL_PWM)    += atmel-pwm-bl.o
 obj-$(CONFIG_BACKLIGHT_CORGI)	+= corgi_bl.o
 obj-$(CONFIG_BACKLIGHT_HP680)	+= hp680_bl.o
 obj-$(CONFIG_BACKLIGHT_LOCOMO)	+= locomolcd.o
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
new file mode 100644
index 00000000000..505c0823a10
--- /dev/null
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * Backlight driver using Atmel PWM peripheral.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/fb.h>
+#include <linux/clk.h>
+#include <linux/gpio.h>
+#include <linux/backlight.h>
+#include <linux/atmel_pwm.h>
+#include <linux/atmel-pwm-bl.h>
+
+struct atmel_pwm_bl {
+	const struct atmel_pwm_bl_platform_data	*pdata;
+	struct backlight_device			*bldev;
+	struct platform_device			*pdev;
+	struct pwm_channel			pwmc;
+	int					gpio_on;
+};
+
+static int atmel_pwm_bl_set_intensity(struct backlight_device *bd)
+{
+	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
+	int intensity = bd->props.brightness;
+	int pwm_duty;
+
+	if (bd->props.power != FB_BLANK_UNBLANK)
+		intensity = 0;
+	if (bd->props.fb_blank != FB_BLANK_UNBLANK)
+		intensity = 0;
+
+	if (pwmbl->pdata->pwm_active_low)
+		pwm_duty = pwmbl->pdata->pwm_duty_min + intensity;
+	else
+		pwm_duty = pwmbl->pdata->pwm_duty_max - intensity;
+
+	if (pwm_duty > pwmbl->pdata->pwm_duty_max)
+		pwm_duty = pwmbl->pdata->pwm_duty_max;
+	if (pwm_duty < pwmbl->pdata->pwm_duty_min)
+		pwm_duty = pwmbl->pdata->pwm_duty_min;
+
+	if (!intensity) {
+		if (pwmbl->gpio_on != -1) {
+			gpio_set_value(pwmbl->gpio_on,
+					0 ^ pwmbl->pdata->on_active_low);
+		}
+		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
+		pwm_channel_disable(&pwmbl->pwmc);
+	} else {
+		pwm_channel_enable(&pwmbl->pwmc);
+		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
+		if (pwmbl->gpio_on != -1) {
+			gpio_set_value(pwmbl->gpio_on,
+					1 ^ pwmbl->pdata->on_active_low);
+		}
+	}
+
+	return 0;
+}
+
+static int atmel_pwm_bl_get_intensity(struct backlight_device *bd)
+{
+	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
+	u8 intensity;
+
+	if (pwmbl->pdata->pwm_active_low) {
+		intensity = pwm_channel_readl(&pwmbl->pwmc, PWM_CDTY) -
+			pwmbl->pdata->pwm_duty_min;
+	} else {
+		intensity = pwmbl->pdata->pwm_duty_max -
+			pwm_channel_readl(&pwmbl->pwmc, PWM_CDTY);
+	}
+
+	return intensity;
+}
+
+static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl)
+{
+	unsigned long pwm_rate = pwmbl->pwmc.mck;
+	unsigned long prescale = DIV_ROUND_UP(pwm_rate,
+			(pwmbl->pdata->pwm_frequency *
+			 pwmbl->pdata->pwm_compare_max)) - 1;
+
+	/*
+	 * Prescale must be power of two and maximum 0xf in size because of
+	 * hardware limit. PWM speed will be:
+	 *	PWM module clock speed / (2 ^ prescale).
+	 */
+	prescale = fls(prescale);
+	if (prescale > 0xf)
+		prescale = 0xf;
+
+	pwm_channel_writel(&pwmbl->pwmc, PWM_CMR, prescale);
+	pwm_channel_writel(&pwmbl->pwmc, PWM_CDTY,
+			pwmbl->pdata->pwm_duty_min +
+			pwmbl->bldev->props.brightness);
+	pwm_channel_writel(&pwmbl->pwmc, PWM_CPRD,
+			pwmbl->pdata->pwm_compare_max);
+
+	dev_info(&pwmbl->pdev->dev, "Atmel PWM backlight driver "
+			"(%lu Hz)\n", pwmbl->pwmc.mck /
+			pwmbl->pdata->pwm_compare_max /
+			(1 << prescale));
+
+	return pwm_channel_enable(&pwmbl->pwmc);
+}
+
+static struct backlight_ops atmel_pwm_bl_ops = {
+	.get_brightness = atmel_pwm_bl_get_intensity,
+	.update_status  = atmel_pwm_bl_set_intensity,
+};
+
+static int atmel_pwm_bl_probe(struct platform_device *pdev)
+{
+	const struct atmel_pwm_bl_platform_data *pdata;
+	struct backlight_device *bldev;
+	struct atmel_pwm_bl *pwmbl;
+	int retval;
+
+	pwmbl = kzalloc(sizeof(struct atmel_pwm_bl), GFP_KERNEL);
+	if (!pwmbl)
+		return -ENOMEM;
+
+	pwmbl->pdev = pdev;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		retval = -ENODEV;
+		goto err_free_mem;
+	}
+
+	if (pdata->pwm_compare_max < pdata->pwm_duty_max ||
+			pdata->pwm_duty_min > pdata->pwm_duty_max ||
+			pdata->pwm_frequency == 0) {
+		retval = -EINVAL;
+		goto err_free_mem;
+	}
+
+	pwmbl->pdata = pdata;
+	pwmbl->gpio_on = pdata->gpio_on;
+
+	retval = pwm_channel_alloc(pdata->pwm_channel, &pwmbl->pwmc);
+	if (retval)
+		goto err_free_mem;
+
+	if (pwmbl->gpio_on != -1) {
+		retval = gpio_request(pwmbl->gpio_on, "gpio_atmel_pwm_bl");
+		if (retval) {
+			pwmbl->gpio_on = -1;
+			goto err_free_pwm;
+		}
+
+		/* Turn display off by defatult. */
+		retval = gpio_direction_output(pwmbl->gpio_on,
+				0 ^ pdata->on_active_low);
+		if (retval)
+			goto err_free_gpio;
+	}
+
+	bldev = backlight_device_register("atmel-pwm-bl",
+			&pdev->dev, pwmbl, &atmel_pwm_bl_ops);
+	if (IS_ERR(bldev)) {
+		retval = PTR_ERR(bldev);
+		goto err_free_gpio;
+	}
+
+	pwmbl->bldev = bldev;
+
+	platform_set_drvdata(pdev, pwmbl);
+
+	/* Power up the backlight by default at middle intesity. */
+	bldev->props.power = FB_BLANK_UNBLANK;
+	bldev->props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
+	bldev->props.brightness = bldev->props.max_brightness / 2;
+
+	retval = atmel_pwm_bl_init_pwm(pwmbl);
+	if (retval)
+		goto err_free_bl_dev;
+
+	atmel_pwm_bl_set_intensity(bldev);
+
+	return 0;
+
+err_free_bl_dev:
+	platform_set_drvdata(pdev, NULL);
+	backlight_device_unregister(bldev);
+err_free_gpio:
+	if (pwmbl->gpio_on != -1)
+		gpio_free(pwmbl->gpio_on);
+err_free_pwm:
+	pwm_channel_free(&pwmbl->pwmc);
+err_free_mem:
+	kfree(pwmbl);
+	return retval;
+}
+
+static int __exit atmel_pwm_bl_remove(struct platform_device *pdev)
+{
+	struct atmel_pwm_bl *pwmbl = platform_get_drvdata(pdev);
+
+	if (pwmbl->gpio_on != -1) {
+		gpio_set_value(pwmbl->gpio_on, 0);
+		gpio_free(pwmbl->gpio_on);
+	}
+	pwm_channel_disable(&pwmbl->pwmc);
+	pwm_channel_free(&pwmbl->pwmc);
+	backlight_device_unregister(pwmbl->bldev);
+	platform_set_drvdata(pdev, NULL);
+	kfree(pwmbl);
+
+	return 0;
+}
+
+static struct platform_driver atmel_pwm_bl_driver = {
+	.driver = {
+		.name = "atmel-pwm-bl",
+	},
+	/* REVISIT add suspend() and resume() */
+	.remove = __exit_p(atmel_pwm_bl_remove),
+};
+
+static int __init atmel_pwm_bl_init(void)
+{
+	return platform_driver_probe(&atmel_pwm_bl_driver, atmel_pwm_bl_probe);
+}
+module_init(atmel_pwm_bl_init);
+
+static void __exit atmel_pwm_bl_exit(void)
+{
+	platform_driver_unregister(&atmel_pwm_bl_driver);
+}
+module_exit(atmel_pwm_bl_exit);
+
+MODULE_AUTHOR("Hans-Christian egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_DESCRIPTION("Atmel PWM backlight driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/atmel-pwm-bl.h b/include/linux/atmel-pwm-bl.h
new file mode 100644
index 00000000000..0153a47806c
--- /dev/null
+++ b/include/linux/atmel-pwm-bl.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * Driver for the AT32AP700X PS/2 controller (PSIF).
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef __INCLUDE_ATMEL_PWM_BL_H
+#define __INCLUDE_ATMEL_PWM_BL_H
+
+/**
+ * struct atmel_pwm_bl_platform_data
+ * @pwm_channel: which PWM channel in the PWM module to use.
+ * @pwm_frequency: PWM frequency to generate, the driver will try to be as
+ *	close as the prescaler allows.
+ * @pwm_compare_max: value to use in the PWM channel compare register.
+ * @pwm_duty_max: maximum duty cycle value, must be less than or equal to
+ *	pwm_compare_max.
+ * @pwm_duty_min: minimum duty cycle value, must be less than pwm_duty_max.
+ * @pwm_active_low: set to one if the low part of the PWM signal increases the
+ *	brightness of the backlight.
+ * @gpio_on: GPIO line to control the backlight on/off, set to -1 if not used.
+ * @on_active_low: set to one if the on/off signal is on when GPIO is low.
+ *
+ * This struct must be added to the platform device in the board code. It is
+ * used by the atmel-pwm-bl driver to setup the GPIO to control on/off and the
+ * PWM device.
+ */
+struct atmel_pwm_bl_platform_data {
+	unsigned int pwm_channel;
+	unsigned int pwm_frequency;
+	unsigned int pwm_compare_max;
+	unsigned int pwm_duty_max;
+	unsigned int pwm_duty_min;
+	unsigned int pwm_active_low;
+	int gpio_on;
+	unsigned int on_active_low;
+};
+
+#endif /* __INCLUDE_ATMEL_PWM_BL_H */
-- 
GitLab


From cba603bf514c101bf48f6adf393c3d00ed457a57 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 23 Jul 2008 21:31:49 -0700
Subject: [PATCH 434/853] fbcon: remove stray semicolons

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/console/fbcon.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/video/console/fbcon.h b/drivers/video/console/fbcon.h
index beb6a74fc7f..de1b1365279 100644
--- a/drivers/video/console/fbcon.h
+++ b/drivers/video/console/fbcon.h
@@ -146,10 +146,8 @@ static inline int attr_col_ec(int shift, struct vc_data *vc,
 	return is_fg ? fg : bg;
 }
 
-#define attr_bgcol_ec(bgshift,vc,info)		\
-	attr_col_ec(bgshift,vc,info,0);
-#define attr_fgcol_ec(fgshift,vc,info)		\
-	attr_col_ec(fgshift,vc,info,1);
+#define attr_bgcol_ec(bgshift, vc, info) attr_col_ec(bgshift, vc, info, 0)
+#define attr_fgcol_ec(fgshift, vc, info) attr_col_ec(fgshift, vc, info, 1)
 
 /* Font */
 #define REFCOUNT(fd)	(((int *)(fd))[-1])
-- 
GitLab


From 5bb49fcd501aa9fd3d321a22b7c01d9b0db7ab36 Mon Sep 17 00:00:00 2001
From: Philippe De Muyter <phdm@macqel.be>
Date: Wed, 23 Jul 2008 21:31:50 -0700
Subject: [PATCH 435/853] video/fb: cleanup FB_MAJOR usage

Currently, linux/major.h defines a GRAPHDEV_MAJOR (29) that nobody uses,
and linux/fb.h defines the real FB_MAJOR (also 29), that only fbmem.c
needs.  Drop GRAPHDEV_MAJOR from major.h, move FB_MAJOR definition from
fb.h to major.h, and fix fbmem.c to use major.h's definition.

Signed-off-by: Philippe De Muyter <phdm@macqel.be>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 1 +
 include/linux/fb.h    | 1 -
 include/linux/major.h | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 6b487801eea..5d84b343109 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -35,6 +35,7 @@
 #include <linux/device.h>
 #include <linux/efi.h>
 #include <linux/fb.h>
+#include <linux/major.h>
 
 #include <asm/fb.h>
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index a084d133586..3b8870e32af 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -8,7 +8,6 @@ struct dentry;
 
 /* Definitions of frame buffers						*/
 
-#define FB_MAJOR		29
 #define FB_MAX			32	/* sufficient for now */
 
 /* ioctls
diff --git a/include/linux/major.h b/include/linux/major.h
index 0cb98053537..53d5fafd85c 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -53,7 +53,7 @@
 #define STL_SIOMEMMAJOR		28
 #define ACSI_MAJOR		28
 #define AZTECH_CDROM_MAJOR	29
-#define GRAPHDEV_MAJOR		29   /* SparcLinux & Linux/68k /dev/fb */
+#define FB_MAJOR		29   /* /dev/fb* framebuffers */
 #define CM206_CDROM_MAJOR	32
 #define IDE2_MAJOR		33
 #define IDE3_MAJOR		34
-- 
GitLab


From b340e8a57ef381e69c99a7a8ede61a6bf71a8014 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Jul 2008 21:31:51 -0700
Subject: [PATCH 436/853] auxdisplay: small cleanups

- Use BUILD_BUG_ON for CFAG12864B_SIZE instead of runtime-check

- Use get_zeroed_page()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Miguel Ojeda Sandonis <maxextreme@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/auxdisplay/cfag12864b.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c
index 683509f013a..eacb175f6bd 100644
--- a/drivers/auxdisplay/cfag12864b.c
+++ b/drivers/auxdisplay/cfag12864b.c
@@ -336,16 +336,9 @@ static int __init cfag12864b_init(void)
 			"ks0108 is not initialized\n");
 		goto none;
 	}
+	BUILD_BUG_ON(PAGE_SIZE < CFAG12864B_SIZE);
 
-	if (PAGE_SIZE < CFAG12864B_SIZE) {
-		printk(KERN_ERR CFAG12864B_NAME ": ERROR: "
-			"page size (%i) < cfag12864b size (%i)\n",
-			(unsigned int)PAGE_SIZE, CFAG12864B_SIZE);
-		ret = -ENOMEM;
-		goto none;
-	}
-
-	cfag12864b_buffer = (unsigned char *) __get_free_page(GFP_KERNEL);
+	cfag12864b_buffer = (unsigned char *) get_zeroed_page(GFP_KERNEL);
 	if (cfag12864b_buffer == NULL) {
 		printk(KERN_ERR CFAG12864B_NAME ": ERROR: "
 			"can't get a free page\n");
@@ -367,8 +360,6 @@ static int __init cfag12864b_init(void)
 	if (cfag12864b_workqueue == NULL)
 		goto cachealloced;
 
-	memset(cfag12864b_buffer, 0, CFAG12864B_SIZE);
-
 	cfag12864b_clear();
 	cfag12864b_on();
 
-- 
GitLab


From f9247273cb69ba101877e946d2d83044409cc8c5 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 24 Jul 2008 17:22:13 +0100
Subject: [PATCH 437/853] UFS: add const to parser token table

This patch adds a "const" to the parser token table. I've done an
allmodconfig build to see if this produces any warnings/failures and the
patch includes a fix for the only warning that was produced.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Acked-by: Alexander Viro <aviro@redhat.com>
Acked-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/super.c         | 2 +-
 include/linux/parser.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977f..506f724055c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1232,7 +1232,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-	struct match_token *tp = tokens;
+	const struct match_token *tp = tokens;
 
 	while (tp->token != Opt_onerror_panic && tp->token != mval)
 		++tp;
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 7dcd0507575..cc554ca8bc7 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -14,7 +14,7 @@ struct match_token {
 	const char *pattern;
 };
 
-typedef struct match_token match_table_t[];
+typedef const struct match_token match_table_t[];
 
 /* Maximum number of arguments that match_token will find in a pattern */
 enum {MAX_OPT_ARGS = 3};
-- 
GitLab


From 6209ed9d8443b63c36d340908530fa470c4d4fff Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 24 Jul 2008 12:49:26 -0700
Subject: [PATCH 438/853] x86-64: make BUILD_IRQ() also reset section back

Commit 9d25d4db81833029d30b7b03cc1000cbbe09e192 ("x86: BUILD_IRQ say
.text to avoid .data.percpu") added a ".text" specifier to make sure
that BUILD_IRQ() builds the irq trampoline in the text segment rather
than in some random left-over segment that the compiler happened to
leave the asm in.

However, we should also make sure that we switch back by adding a
".previous" at the end, so that there are no subtle issues with
subsequent compiler-generated code.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/irqinit_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 9414125f19c..1f26fd9ec4f 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -46,7 +46,8 @@
 	asm("\n.text\n.p2align\n"		\
 	    "IRQ" #nr "_interrupt:\n\t"		\
 	    "push $~(" #nr ") ; "		\
-	    "jmp common_interrupt");
+	    "jmp common_interrupt\n"		\
+	    ".previous");
 
 #define BI(x,y) \
 	BUILD_IRQ(x##y)
-- 
GitLab


From 708e5f9eb68589b87724af3f0fb4e681dfdfd69f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:11 +0200
Subject: [PATCH 439/853] ide: always call ->init_chipset method in
 do_ide_setup_pci_device()

Call ->init_chipset method also for 'tried_config' / '!pciirq' conditions.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/setup-pci.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index b15cad58dc8..a95d5108023 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -499,20 +499,21 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 	 */
 	pciirq = dev->irq;
 
+	/*
+	 * This allows offboard ide-pci cards the enable a BIOS,
+	 * verify interrupt settings of split-mirror pci-config
+	 * space, place chipset into init-mode, and/or preserve
+	 * an interrupt if the card is not native ide support.
+	 */
+	ret = d->init_chipset ? d->init_chipset(dev, d->name) : 0;
+	if (ret < 0)
+		goto out;
+
 	/* Is it an "IDE storage" device in non-PCI mode? */
 	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) {
 		if (noisy)
 			printk(KERN_INFO "%s: not 100%% native mode: "
 				"will probe irqs later\n", d->name);
-		/*
-		 * This allows offboard ide-pci cards the enable a BIOS,
-		 * verify interrupt settings of split-mirror pci-config
-		 * space, place chipset into init-mode, and/or preserve
-		 * an interrupt if the card is not native ide support.
-		 */
-		ret = d->init_chipset ? d->init_chipset(dev, d->name) : 0;
-		if (ret < 0)
-			goto out;
 		pciirq = ret;
 	} else if (tried_config) {
 		if (noisy)
@@ -524,11 +525,6 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 				d->name, pciirq);
 		pciirq = 0;
 	} else {
-		if (d->init_chipset) {
-			ret = d->init_chipset(dev, d->name);
-			if (ret < 0)
-				goto out;
-		}
 		if (noisy)
 			printk(KERN_INFO "%s: 100%% native mode on irq %d\n",
 				d->name, pciirq);
-- 
GitLab


From a95925a309cd9a2e7f5a5713fd70e0dadb09890c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:11 +0200
Subject: [PATCH 440/853] ide: respect dev->irq in do_ide_setup_pci_device()
 also if 'tried_config'

* If device is in the PCI native mode respect dev->irq regardless of
  'tried_config' in do_ide_setup_pci_device().

* Drop no longer needed 'config' argument from ide_setup_pci_controller().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/setup-pci.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index a95d5108023..58f4a95f6f5 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -393,14 +393,14 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
  *	@dev: PCI device
  *	@d: IDE port info
  *	@noisy: verbose flag
- *	@config: returned as 1 if we configured the hardware
  *
  *	Set up the PCI and controller side of the IDE interface. This brings
  *	up the PCI side of the device, checks that the device is enabled
  *	and enables it if need be
  */
 
-static int ide_setup_pci_controller(struct pci_dev *dev, const struct ide_port_info *d, int noisy, int *config)
+static int ide_setup_pci_controller(struct pci_dev *dev,
+				    const struct ide_port_info *d, int noisy)
 {
 	int ret;
 	u16 pcicmd;
@@ -421,7 +421,6 @@ static int ide_setup_pci_controller(struct pci_dev *dev, const struct ide_port_i
 		ret = ide_pci_configure(dev, d);
 		if (ret < 0)
 			goto out;
-		*config = 1;
 		printk(KERN_INFO "%s: device enabled (Linux)\n", d->name);
 	}
 
@@ -487,10 +486,9 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 				   const struct ide_port_info *d,
 				   u8 noisy)
 {
-	int tried_config = 0;
 	int pciirq, ret;
 
-	ret = ide_setup_pci_controller(dev, d, noisy, &tried_config);
+	ret = ide_setup_pci_controller(dev, d, noisy);
 	if (ret < 0)
 		goto out;
 
@@ -515,10 +513,6 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 			printk(KERN_INFO "%s: not 100%% native mode: "
 				"will probe irqs later\n", d->name);
 		pciirq = ret;
-	} else if (tried_config) {
-		if (noisy)
-			printk(KERN_INFO "%s: will probe irqs later\n", d->name);
-		pciirq = 0;
 	} else if (!pciirq) {
 		if (noisy)
 			printk(KERN_WARNING "%s: bad irq (%d): will probe later\n",
-- 
GitLab


From a742d6cf0b37b1a96a1549b1fda0d6b19e0185c2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:12 +0200
Subject: [PATCH 441/853] ide: move ide_setup_pci_controller() call to
 ide_setup_pci_device[s]()

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/setup-pci.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 58f4a95f6f5..98c663d62b9 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -488,10 +488,6 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 {
 	int pciirq, ret;
 
-	ret = ide_setup_pci_controller(dev, d, noisy);
-	if (ret < 0)
-		goto out;
-
 	/*
 	 * Can we trust the reported IRQ?
 	 */
@@ -534,6 +530,10 @@ int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 	int ret;
 
+	ret = ide_setup_pci_controller(dev, d, 1);
+	if (ret < 0)
+		goto out;
+
 	ret = do_ide_setup_pci_device(dev, d, 1);
 
 	if (ret >= 0) {
@@ -542,7 +542,7 @@ int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 
 		ret = ide_host_add(d, hws, NULL);
 	}
-
+out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ide_setup_pci_device);
@@ -555,6 +555,10 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
 	for (i = 0; i < 2; i++) {
+		ret = ide_setup_pci_controller(pdev[i], d, !i);
+		if (ret < 0)
+			goto out;
+
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
 
 		/*
-- 
GitLab


From 8c2eece50a368c7986bae0b3e52739558dd71b51 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:12 +0200
Subject: [PATCH 442/853] ide: call ide_pci_setup_ports() before
 do_ide_setup_pci_device()

* Call ide_pci_setup_ports() before do_ide_setup_pci_device()
  in ide_setup_pci_device[s]().

While at it:

* Remove stale FIXMEs.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/setup-pci.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 98c663d62b9..b85de71fdc8 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -534,14 +534,16 @@ int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 	if (ret < 0)
 		goto out;
 
+	ide_pci_setup_ports(dev, d, 0, &hw[0], &hws[0]);
+
 	ret = do_ide_setup_pci_device(dev, d, 1);
+	if (ret < 0)
+		goto out;
 
-	if (ret >= 0) {
-		/* FIXME: silent failure can happen */
-		ide_pci_setup_ports(dev, d, ret, &hw[0], &hws[0]);
+	/* fixup IRQ */
+	hw[1].irq = hw[0].irq = ret;
 
-		ret = ide_host_add(d, hws, NULL);
-	}
+	ret = ide_host_add(d, hws, NULL);
 out:
 	return ret;
 }
@@ -559,6 +561,8 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 		if (ret < 0)
 			goto out;
 
+		ide_pci_setup_ports(pdev[i], d, 0, &hw[i*2], &hws[i*2]);
+
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
 
 		/*
@@ -568,8 +572,8 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 		if (ret < 0)
 			goto out;
 
-		/* FIXME: silent failure can happen */
-		ide_pci_setup_ports(pdev[i], d, ret, &hw[i*2], &hws[i*2]);
+		/* fixup IRQ */
+		hw[i*2 + 1].irq = hw[i*2].irq = ret;
 	}
 
 	ret = ide_host_add(d, hws, NULL);
-- 
GitLab


From 6cdf6eb357c2681596b7b1672b92396ba82333d4 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:14 +0200
Subject: [PATCH 443/853] ide: add ->dev and ->host_priv fields to struct
 ide_host

* Add 'struct device *dev[2]' and 'void *host_priv' fields
  to struct ide_host.

* Set ->dev[] in ide_host_alloc_all()/ide_setup_pci_device[s]().

* Pass 'void *priv' argument to ide_setup_pci_device[s]()
  and use it to set ->host_priv.

* Set PCI dev's ->driver_data to point to the struct ide_host
  instance if PCI host driver wants to use ->host_priv.

* Rename ide_setup_pci_device[s]() to ide_pci_init_{one,two}().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c        |  3 ++
 drivers/ide/pci/aec62xx.c      |  2 +-
 drivers/ide/pci/alim15x3.c     |  2 +-
 drivers/ide/pci/amd74xx.c      |  2 +-
 drivers/ide/pci/atiixp.c       |  2 +-
 drivers/ide/pci/cmd64x.c       |  2 +-
 drivers/ide/pci/cs5530.c       |  2 +-
 drivers/ide/pci/cs5535.c       |  2 +-
 drivers/ide/pci/cy82c693.c     |  2 +-
 drivers/ide/pci/generic.c      |  2 +-
 drivers/ide/pci/hpt34x.c       |  2 +-
 drivers/ide/pci/hpt366.c       |  4 +--
 drivers/ide/pci/it8213.c       |  2 +-
 drivers/ide/pci/it821x.c       |  2 +-
 drivers/ide/pci/jmicron.c      |  2 +-
 drivers/ide/pci/ns87415.c      |  2 +-
 drivers/ide/pci/opti621.c      |  2 +-
 drivers/ide/pci/pdc202xx_new.c |  4 +--
 drivers/ide/pci/pdc202xx_old.c |  2 +-
 drivers/ide/pci/piix.c         |  2 +-
 drivers/ide/pci/rz1000.c       |  2 +-
 drivers/ide/pci/sc1200.c       |  2 +-
 drivers/ide/pci/serverworks.c  |  2 +-
 drivers/ide/pci/siimage.c      |  2 +-
 drivers/ide/pci/sis5513.c      |  2 +-
 drivers/ide/pci/sl82c105.c     |  2 +-
 drivers/ide/pci/slc90e66.c     |  2 +-
 drivers/ide/pci/tc86c001.c     |  2 +-
 drivers/ide/pci/triflex.c      |  2 +-
 drivers/ide/pci/trm290.c       |  2 +-
 drivers/ide/pci/via82cxxx.c    |  2 +-
 drivers/ide/setup-pci.c        | 52 +++++++++++++++++++++++++++++-----
 include/linux/ide.h            |  7 +++--
 33 files changed, 85 insertions(+), 41 deletions(-)

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 4aa76c45375..890c15b1b3a 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1604,6 +1604,9 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d,
 		return NULL;
 	}
 
+	if (hws[0])
+		host->dev[0] = hws[0]->dev;
+
 	return host;
 }
 EXPORT_SYMBOL_GPL(ide_host_alloc_all);
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index fbc43e121e6..7a5d246fe9b 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -273,7 +273,7 @@ static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_devi
 		}
 	}
 
-	err = ide_setup_pci_device(dev, &d);
+	err = ide_pci_init_one(dev, &d, NULL);
 	if (err)
 		pci_disable_device(dev);
 
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index 5ef7817ac64..7f96e7ca386 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -565,7 +565,7 @@ static int __devinit alim15x3_init_one(struct pci_dev *dev, const struct pci_dev
 	if (idx == 0)
 		d.host_flags |= IDE_HFLAG_CLEAR_SIMPLEX;
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index ef7d971031e..b6a475313c7 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -302,7 +302,7 @@ static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_
 			 d.name, pci_name(dev), dev->revision,
 			 amd_dma[fls(d.udma_mask) - 1]);
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id amd74xx_pci_tbl[] = {
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index 8b637181681..b483a68b39f 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -167,7 +167,7 @@ static const struct ide_port_info atiixp_pci_info[] __devinitdata = {
 
 static int __devinit atiixp_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &atiixp_pci_info[id->driver_data]);
+	return ide_pci_init_one(dev, &atiixp_pci_info[id->driver_data], NULL);
 }
 
 static const struct pci_device_id atiixp_pci_tbl[] = {
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index ce58bfcdb3c..fc0333c9a4e 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -507,7 +507,7 @@ static int __devinit cmd64x_init_one(struct pci_dev *dev, const struct pci_devic
 		}
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id cmd64x_pci_tbl[] = {
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index f5534c1ff34..ba82bad8bf4 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -256,7 +256,7 @@ static const struct ide_port_info cs5530_chipset __devinitdata = {
 
 static int __devinit cs5530_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &cs5530_chipset);
+	return ide_pci_init_one(dev, &cs5530_chipset, NULL);
 }
 
 static const struct pci_device_id cs5530_pci_tbl[] = {
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index 5404fe4f701..2161f43ca1b 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -180,7 +180,7 @@ static const struct ide_port_info cs5535_chipset __devinitdata = {
 static int __devinit cs5535_init_one(struct pci_dev *dev,
 					const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &cs5535_chipset);
+	return ide_pci_init_one(dev, &cs5535_chipset, NULL);
 }
 
 static const struct pci_device_id cs5535_pci_tbl[] = {
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index e14ad5530fa..abd27ed7c30 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -419,7 +419,7 @@ static int __devinit cy82c693_init_one(struct pci_dev *dev, const struct pci_dev
 	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE &&
 	    PCI_FUNC(dev->devfn) == 1) {
 		dev2 = pci_get_slot(dev->bus, dev->devfn + 1);
-		ret = ide_setup_pci_devices(dev, dev2, &cy82c693_chipset);
+		ret = ide_pci_init_two(dev, dev2, &cy82c693_chipset, NULL);
 		/* We leak pci refs here but thats ok - we can't be unloaded */
 	}
 	return ret;
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index 041720e2276..dd0caea5e4f 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -139,7 +139,7 @@ static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_devi
 			goto out;
 		}
 	}
-	ret = ide_setup_pci_device(dev, d);
+	ret = ide_pci_init_one(dev, d, NULL);
 out:
 	return ret;
 }
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index 9e1d1c4741d..3d70c5150ac 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -156,7 +156,7 @@ static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_devic
 
 	d = &hpt34x_chipsets[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0];
 
-	return ide_setup_pci_device(dev, d);
+	return ide_pci_init_one(dev, d, NULL);
 }
 
 static const struct pci_device_id hpt34x_pci_tbl[] = {
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 1f1135ce7cd..b23b7a27800 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1608,13 +1608,13 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 				d.host_flags &= ~IDE_HFLAG_NON_BOOTABLE;
 		}
 
-		ret = ide_setup_pci_devices(dev, dev2, &d);
+		ret = ide_pci_init_two(dev, dev2, &d, NULL);
 		if (ret < 0)
 			pci_dev_put(dev2);
 		return ret;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id hpt366_pci_tbl[] __devinitconst = {
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 2b71bdf74e7..18219fa9ef0 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -184,7 +184,7 @@ static const struct ide_port_info it8213_chipsets[] __devinitdata = {
 
 static int __devinit it8213_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &it8213_chipsets[id->driver_data]);
+	return ide_pci_init_one(dev, &it8213_chipsets[id->driver_data], NULL);
 }
 
 static const struct pci_device_id it8213_pci_tbl[] = {
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index cbf64720299..40186f9e56a 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -664,7 +664,7 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic
 
 	pci_set_drvdata(dev, itdevs);
 
-	return ide_setup_pci_device(dev, &it821x_chipsets[id->driver_data]);
+	return ide_pci_init_one(dev, &it821x_chipsets[id->driver_data], NULL);
 }
 
 static const struct pci_device_id it821x_pci_tbl[] = {
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index 96ef7394f28..a7e3c14f7b0 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -121,7 +121,7 @@ static const struct ide_port_info jmicron_chipset __devinitdata = {
 
 static int __devinit jmicron_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &jmicron_chipset);
+	return ide_pci_init_one(dev, &jmicron_chipset, NULL);
 }
 
 /* All JMB PATA controllers have and will continue to have the same
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index 5cd2b32ff0e..a45c33c0c79 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -324,7 +324,7 @@ static int __devinit ns87415_init_one(struct pci_dev *dev, const struct pci_devi
 		d.tp_ops = &superio_tp_ops;
 	}
 #endif
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id ns87415_pci_tbl[] = {
diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c
index 725c80508d9..edb9132ffbe 100644
--- a/drivers/ide/pci/opti621.c
+++ b/drivers/ide/pci/opti621.c
@@ -209,7 +209,7 @@ static const struct ide_port_info opti621_chipset __devinitdata = {
 
 static int __devinit opti621_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &opti621_chipset);
+	return ide_pci_init_one(dev, &opti621_chipset, NULL);
 }
 
 static const struct pci_device_id opti621_pci_tbl[] = {
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 070df8ab3b2..71a420feb98 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -524,7 +524,7 @@ static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_de
 		dev2 = pdc20270_get_dev2(dev);
 
 		if (dev2) {
-			int ret = ide_setup_pci_devices(dev, dev2, d);
+			int ret = ide_pci_init_two(dev, dev2, d, NULL);
 			if (ret < 0)
 				pci_dev_put(dev2);
 			return ret;
@@ -540,7 +540,7 @@ static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_de
 		return -ENODEV;
 	}
 
-	return ide_setup_pci_device(dev, d);
+	return ide_pci_init_one(dev, d, NULL);
 }
 
 static const struct pci_device_id pdc202new_pci_tbl[] = {
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index e54dc653b8c..eba1d60a73a 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -412,7 +412,7 @@ static int __devinit pdc202xx_init_one(struct pci_dev *dev, const struct pci_dev
 		}
 	}
 
-	return ide_setup_pci_device(dev, d);
+	return ide_pci_init_one(dev, d, NULL);
 }
 
 static const struct pci_device_id pdc202xx_pci_tbl[] = {
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index 0ce41b4ddda..359f65ddcbf 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -394,7 +394,7 @@ static const struct ide_port_info piix_pci_info[] __devinitdata = {
  
 static int __devinit piix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &piix_pci_info[id->driver_data]);
+	return ide_pci_init_one(dev, &piix_pci_info[id->driver_data], NULL);
 }
 
 /**
diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c
index 532154adba2..860ffdeca09 100644
--- a/drivers/ide/pci/rz1000.c
+++ b/drivers/ide/pci/rz1000.c
@@ -48,7 +48,7 @@ static const struct ide_port_info rz1000_chipset __devinitdata = {
 
 static int __devinit rz1000_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &rz1000_chipset);
+	return ide_pci_init_one(dev, &rz1000_chipset, NULL);
 }
 
 static const struct pci_device_id rz1000_pci_tbl[] = {
diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index 14c787b5d95..8fd9cc2119d 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -317,7 +317,7 @@ static const struct ide_port_info sc1200_chipset __devinitdata = {
 
 static int __devinit sc1200_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &sc1200_chipset);
+	return ide_pci_init_one(dev, &sc1200_chipset, NULL);
 }
 
 static const struct pci_device_id sc1200_pci_tbl[] = {
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index 127ccb45e26..34abdfc8d56 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -422,7 +422,7 @@ static int __devinit svwks_init_one(struct pci_dev *dev, const struct pci_device
 			d.host_flags &= ~IDE_HFLAG_SINGLE;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id svwks_pci_tbl[] = {
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 5965a35d94a..48124133601 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -795,7 +795,7 @@ static int __devinit siimage_init_one(struct pci_dev *dev,
 		d.host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id siimage_pci_tbl[] = {
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 2389945ca95..a2330c4ac75 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -583,7 +583,7 @@ static int __devinit sis5513_init_one(struct pci_dev *dev, const struct pci_devi
 
 	d.udma_mask = udma_rates[chipset_family];
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id sis5513_pci_tbl[] = {
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index f82a6502c1b..be22f8125d7 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -335,7 +335,7 @@ static int __devinit sl82c105_init_one(struct pci_dev *dev, const struct pci_dev
 		d.host_flags &= ~IDE_HFLAG_SERIALIZE_DMA;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id sl82c105_pci_tbl[] = {
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index dae6e2c94d8..2fc2f2cf220 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -144,7 +144,7 @@ static const struct ide_port_info slc90e66_chipset __devinitdata = {
 
 static int __devinit slc90e66_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &slc90e66_chipset);
+	return ide_pci_init_one(dev, &slc90e66_chipset, NULL);
 }
 
 static const struct pci_device_id slc90e66_pci_tbl[] = {
diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index 477e1979010..e16e79d2177 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -215,7 +215,7 @@ static const struct ide_port_info tc86c001_chipset __devinitdata = {
 static int __devinit tc86c001_init_one(struct pci_dev *dev,
 				       const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &tc86c001_chipset);
+	return ide_pci_init_one(dev, &tc86c001_chipset, NULL);
 }
 
 static const struct pci_device_id tc86c001_pci_tbl[] = {
diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c
index db65a558d4e..60dcb645d1b 100644
--- a/drivers/ide/pci/triflex.c
+++ b/drivers/ide/pci/triflex.c
@@ -104,7 +104,7 @@ static const struct ide_port_info triflex_device __devinitdata = {
 static int __devinit triflex_init_one(struct pci_dev *dev, 
 		const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &triflex_device);
+	return ide_pci_init_one(dev, &triflex_device, NULL);
 }
 
 static const struct pci_device_id triflex_pci_tbl[] = {
diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index a8a3138682e..d8127b51a54 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -340,7 +340,7 @@ static const struct ide_port_info trm290_chipset __devinitdata = {
 
 static int __devinit trm290_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &trm290_chipset);
+	return ide_pci_init_one(dev, &trm290_chipset, NULL);
 }
 
 static const struct pci_device_id trm290_pci_tbl[] = {
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 09dc4803ef9..2f22abfe003 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -466,7 +466,7 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 
 	d.udma_mask = via_config->udma_mask;
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id via_pci_tbl[] = {
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index b85de71fdc8..ca17bf8896d 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -525,8 +525,10 @@ out:
 	return ret;
 }
 
-int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
+int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
+		     void *priv)
 {
+	struct ide_host *host;
 	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 	int ret;
 
@@ -536,6 +538,19 @@ int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 
 	ide_pci_setup_ports(dev, d, 0, &hw[0], &hws[0]);
 
+	host = ide_host_alloc(d, hws);
+	if (host == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host->dev[0] = &dev->dev;
+
+	host->host_priv = priv;
+
+	if (priv)
+		pci_set_drvdata(dev, host);
+
 	ret = do_ide_setup_pci_device(dev, d, 1);
 	if (ret < 0)
 		goto out;
@@ -543,16 +558,19 @@ int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 	/* fixup IRQ */
 	hw[1].irq = hw[0].irq = ret;
 
-	ret = ide_host_add(d, hws, NULL);
+	ret = ide_host_register(host, d, hws);
+	if (ret)
+		ide_host_free(host);
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ide_setup_pci_device);
+EXPORT_SYMBOL_GPL(ide_pci_init_one);
 
-int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
-			  const struct ide_port_info *d)
+int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
+		     const struct ide_port_info *d, void *priv)
 {
 	struct pci_dev *pdev[] = { dev1, dev2 };
+	struct ide_host *host;
 	int ret, i;
 	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
@@ -562,7 +580,25 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 			goto out;
 
 		ide_pci_setup_ports(pdev[i], d, 0, &hw[i*2], &hws[i*2]);
+	}
 
+	host = ide_host_alloc(d, hws);
+	if (host == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host->dev[0] = &dev1->dev;
+	host->dev[1] = &dev2->dev;
+
+	host->host_priv = priv;
+
+	if (priv) {
+		pci_set_drvdata(pdev[0], host);
+		pci_set_drvdata(pdev[1], host);
+	}
+
+	for (i = 0; i < 2; i++) {
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
 
 		/*
@@ -576,8 +612,10 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 		hw[i*2 + 1].irq = hw[i*2].irq = ret;
 	}
 
-	ret = ide_host_add(d, hws, NULL);
+	ret = ide_host_register(host, d, hws);
+	if (ret)
+		ide_host_free(host);
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ide_setup_pci_devices);
+EXPORT_SYMBOL_GPL(ide_pci_init_two);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index d67ccca2b96..776c574c964 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -626,6 +626,8 @@ typedef struct hwif_s {
 struct ide_host {
 	ide_hwif_t	*ports[MAX_HWIFS];
 	unsigned int	n_ports;
+	struct device	*dev[2];
+	void		*host_priv;
 };
 
 /*
@@ -1201,8 +1203,9 @@ struct ide_port_info {
 	u8			udma_mask;
 };
 
-int ide_setup_pci_device(struct pci_dev *, const struct ide_port_info *);
-int ide_setup_pci_devices(struct pci_dev *, struct pci_dev *, const struct ide_port_info *);
+int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
+int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
+		     const struct ide_port_info *, void *);
 
 void ide_map_sg(ide_drive_t *, struct request *);
 void ide_init_sg_cmd(ide_drive_t *, struct request *);
-- 
GitLab


From 08da591e14cf87247ec09b17c350235157a92fc3 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:15 +0200
Subject: [PATCH 444/853] ide: add ide_device_{get,put}() helpers

* Add 'struct ide_host *host' field to ide_hwif_t and set it
  in ide_host_alloc_all().

* Add ide_device_{get,put}() helpers loosely based on SCSI's
  scsi_device_{get,put}() ones.

* Convert IDE device drivers to use ide_device_{get,put}().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c     | 12 +++++++---
 drivers/ide/ide-disk.c   | 12 +++++++---
 drivers/ide/ide-floppy.c | 12 +++++++---
 drivers/ide/ide-probe.c  |  2 ++
 drivers/ide/ide-tape.c   | 12 +++++++---
 drivers/ide/ide.c        | 47 ++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/ide-scsi.c  |  8 ++++++-
 include/linux/ide.h      |  7 ++++++
 8 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 4e73aeee405..8f253e5f26a 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -57,23 +57,29 @@ static DEFINE_MUTEX(idecd_ref_mutex);
 #define ide_cd_g(disk) \
 	container_of((disk)->private_data, struct cdrom_info, driver)
 
+static void ide_cd_release(struct kref *);
+
 static struct cdrom_info *ide_cd_get(struct gendisk *disk)
 {
 	struct cdrom_info *cd = NULL;
 
 	mutex_lock(&idecd_ref_mutex);
 	cd = ide_cd_g(disk);
-	if (cd)
+	if (cd) {
 		kref_get(&cd->kref);
+		if (ide_device_get(cd->drive)) {
+			kref_put(&cd->kref, ide_cd_release);
+			cd = NULL;
+		}
+	}
 	mutex_unlock(&idecd_ref_mutex);
 	return cd;
 }
 
-static void ide_cd_release(struct kref *);
-
 static void ide_cd_put(struct cdrom_info *cd)
 {
 	mutex_lock(&idecd_ref_mutex);
+	ide_device_put(cd->drive);
 	kref_put(&cd->kref, ide_cd_release);
 	mutex_unlock(&idecd_ref_mutex);
 }
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index df5fe575687..28d85b410f7 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -56,23 +56,29 @@ static DEFINE_MUTEX(idedisk_ref_mutex);
 #define ide_disk_g(disk) \
 	container_of((disk)->private_data, struct ide_disk_obj, driver)
 
+static void ide_disk_release(struct kref *);
+
 static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = NULL;
 
 	mutex_lock(&idedisk_ref_mutex);
 	idkp = ide_disk_g(disk);
-	if (idkp)
+	if (idkp) {
 		kref_get(&idkp->kref);
+		if (ide_device_get(idkp->drive)) {
+			kref_put(&idkp->kref, ide_disk_release);
+			idkp = NULL;
+		}
+	}
 	mutex_unlock(&idedisk_ref_mutex);
 	return idkp;
 }
 
-static void ide_disk_release(struct kref *);
-
 static void ide_disk_put(struct ide_disk_obj *idkp)
 {
 	mutex_lock(&idedisk_ref_mutex);
+	ide_device_put(idkp->drive);
 	kref_put(&idkp->kref, ide_disk_release);
 	mutex_unlock(&idedisk_ref_mutex);
 }
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 3d8e6dd0f41..ca11a26746f 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -158,23 +158,29 @@ static DEFINE_MUTEX(idefloppy_ref_mutex);
 #define ide_floppy_g(disk) \
 	container_of((disk)->private_data, struct ide_floppy_obj, driver)
 
+static void idefloppy_cleanup_obj(struct kref *);
+
 static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
 {
 	struct ide_floppy_obj *floppy = NULL;
 
 	mutex_lock(&idefloppy_ref_mutex);
 	floppy = ide_floppy_g(disk);
-	if (floppy)
+	if (floppy) {
 		kref_get(&floppy->kref);
+		if (ide_device_get(floppy->drive)) {
+			kref_put(&floppy->kref, idefloppy_cleanup_obj);
+			floppy = NULL;
+		}
+	}
 	mutex_unlock(&idefloppy_ref_mutex);
 	return floppy;
 }
 
-static void idefloppy_cleanup_obj(struct kref *);
-
 static void ide_floppy_put(struct ide_floppy_obj *floppy)
 {
 	mutex_lock(&idefloppy_ref_mutex);
+	ide_device_put(floppy->drive);
 	kref_put(&floppy->kref, idefloppy_cleanup_obj);
 	mutex_unlock(&idefloppy_ref_mutex);
 }
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 890c15b1b3a..9ab5892eaea 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1595,6 +1595,8 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d,
 
 		ide_init_port_data(hwif, idx);
 
+		hwif->host = host;
+
 		host->ports[i] = hwif;
 		host->n_ports++;
 	}
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 6962ca4891a..789f3428f07 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -322,23 +322,29 @@ static struct class *idetape_sysfs_class;
 #define ide_tape_g(disk) \
 	container_of((disk)->private_data, struct ide_tape_obj, driver)
 
+static void ide_tape_release(struct kref *);
+
 static struct ide_tape_obj *ide_tape_get(struct gendisk *disk)
 {
 	struct ide_tape_obj *tape = NULL;
 
 	mutex_lock(&idetape_ref_mutex);
 	tape = ide_tape_g(disk);
-	if (tape)
+	if (tape) {
 		kref_get(&tape->kref);
+		if (ide_device_get(tape->drive)) {
+			kref_put(&tape->kref, ide_tape_release);
+			tape = NULL;
+		}
+	}
 	mutex_unlock(&idetape_ref_mutex);
 	return tape;
 }
 
-static void ide_tape_release(struct kref *);
-
 static void ide_tape_put(struct ide_tape_obj *tape)
 {
 	mutex_lock(&idetape_ref_mutex);
+	ide_device_put(tape->drive);
 	kref_put(&tape->kref, ide_tape_release);
 	mutex_unlock(&idetape_ref_mutex);
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 60f0ca66aa9..772451600e4 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -618,6 +618,53 @@ set_val:
 
 EXPORT_SYMBOL(generic_ide_ioctl);
 
+/**
+ * ide_device_get	-	get an additional reference to a ide_drive_t
+ * @drive:	device to get a reference to
+ *
+ * Gets a reference to the ide_drive_t and increments the use count of the
+ * underlying LLDD module.
+ */
+int ide_device_get(ide_drive_t *drive)
+{
+	struct device *host_dev;
+	struct module *module;
+
+	if (!get_device(&drive->gendev))
+		return -ENXIO;
+
+	host_dev = drive->hwif->host->dev[0];
+	module = host_dev ? host_dev->driver->owner : NULL;
+
+	if (module && !try_module_get(module)) {
+		put_device(&drive->gendev);
+		return -ENXIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ide_device_get);
+
+/**
+ * ide_device_put	-	release a reference to a ide_drive_t
+ * @drive:	device to release a reference on
+ *
+ * Release a reference to the ide_drive_t and decrements the use count of
+ * the underlying LLDD module.
+ */
+void ide_device_put(ide_drive_t *drive)
+{
+#ifdef CONFIG_MODULE_UNLOAD
+	struct device *host_dev = drive->hwif->host->dev[0];
+	struct module *module = host_dev ? host_dev->driver->owner : NULL;
+
+	if (module)
+		module_put(module);
+#endif
+	put_device(&drive->gendev);
+}
+EXPORT_SYMBOL_GPL(ide_device_put);
+
 static int ide_bus_match(struct device *dev, struct device_driver *drv)
 {
 	return 1;
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 538552495d4..318ef382448 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -101,8 +101,13 @@ static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk)
 
 	mutex_lock(&idescsi_ref_mutex);
 	scsi = ide_scsi_g(disk);
-	if (scsi)
+	if (scsi) {
 		scsi_host_get(scsi->host);
+		if (ide_device_get(scsi->drive)) {
+			scsi_host_put(scsi->host);
+			scsi = NULL;
+		}
+	}
 	mutex_unlock(&idescsi_ref_mutex);
 	return scsi;
 }
@@ -110,6 +115,7 @@ static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk)
 static void ide_scsi_put(struct ide_scsi_obj *scsi)
 {
 	mutex_lock(&idescsi_ref_mutex);
+	ide_device_put(scsi->drive);
 	scsi_host_put(scsi->host);
 	mutex_unlock(&idescsi_ref_mutex);
 }
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 776c574c964..3eccac0a2a3 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -532,12 +532,16 @@ struct ide_dma_ops {
 	void	(*dma_timeout)(struct ide_drive_s *);
 };
 
+struct ide_host;
+
 typedef struct hwif_s {
 	struct hwif_s *next;		/* for linked-list in ide_hwgroup_t */
 	struct hwif_s *mate;		/* other hwif from same PCI chip */
 	struct hwgroup_s *hwgroup;	/* actually (ide_hwgroup_t *) */
 	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
 
+	struct ide_host *host;
+
 	char name[6];			/* name of interface, eg. "ide0" */
 
 	struct ide_io_ports	io_ports;
@@ -876,6 +880,9 @@ struct ide_driver_s {
 
 #define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver)
 
+int ide_device_get(ide_drive_t *);
+void ide_device_put(ide_drive_t *);
+
 int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long);
 
 extern int ide_vlb_clk;
-- 
GitLab


From 60e57ed7c12917932a01d1679d92a7a8735afbce Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:15 +0200
Subject: [PATCH 445/853] aec62xx: convert to use ->host_priv

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/aec62xx.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index 7a5d246fe9b..7ca7989bc35 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -59,10 +59,6 @@ static const struct chipset_bus_clock_list_entry aec6xxx_34_base [] = {
 	{	0,		0x00,	0x00	}
 };
 
-#define BUSCLOCK(D)	\
-	((struct chipset_bus_clock_list_entry *) pci_get_drvdata((D)))
-
-
 /*
  * TO DO: active tuning and correction of cards without a bios.
  */
@@ -88,6 +84,8 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u16 d_conf		= 0;
 	u8 ultra = 0, ultra_conf = 0;
 	u8 tmp0 = 0, tmp1 = 0, tmp2 = 0;
@@ -96,7 +94,7 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 	local_irq_save(flags);
 	/* 0x40|(2*drive->dn): Active, 0x41|(2*drive->dn): Recovery */
 	pci_read_config_word(dev, 0x40|(2*drive->dn), &d_conf);
-	tmp0 = pci_bus_clock_list(speed, BUSCLOCK(dev));
+	tmp0 = pci_bus_clock_list(speed, bus_clock);
 	d_conf = ((tmp0 & 0xf0) << 4) | (tmp0 & 0xf);
 	pci_write_config_word(dev, 0x40|(2*drive->dn), d_conf);
 
@@ -104,7 +102,7 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 	tmp2 = 0x00;
 	pci_read_config_byte(dev, 0x54, &ultra);
 	tmp1 = ((0x00 << (2*drive->dn)) | (ultra & ~(3 << (2*drive->dn))));
-	ultra_conf = pci_bus_clock_list_ultra(speed, BUSCLOCK(dev));
+	ultra_conf = pci_bus_clock_list_ultra(speed, bus_clock);
 	tmp2 = ((ultra_conf << (2*drive->dn)) | (tmp1 & ~(3 << (2*drive->dn))));
 	pci_write_config_byte(dev, 0x54, tmp2);
 	local_irq_restore(flags);
@@ -114,6 +112,8 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u8 unit		= (drive->select.b.unit & 0x01);
 	u8 tmp1 = 0, tmp2 = 0;
 	u8 ultra = 0, drive_conf = 0, ultra_conf = 0;
@@ -122,12 +122,12 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 	local_irq_save(flags);
 	/* high 4-bits: Active, low 4-bits: Recovery */
 	pci_read_config_byte(dev, 0x40|drive->dn, &drive_conf);
-	drive_conf = pci_bus_clock_list(speed, BUSCLOCK(dev));
+	drive_conf = pci_bus_clock_list(speed, bus_clock);
 	pci_write_config_byte(dev, 0x40|drive->dn, drive_conf);
 
 	pci_read_config_byte(dev, (0x44|hwif->channel), &ultra);
 	tmp1 = ((0x00 << (4*unit)) | (ultra & ~(7 << (4*unit))));
-	ultra_conf = pci_bus_clock_list_ultra(speed, BUSCLOCK(dev));
+	ultra_conf = pci_bus_clock_list_ultra(speed, bus_clock);
 	tmp2 = ((ultra_conf << (4*unit)) | (tmp1 & ~(7 << (4*unit))));
 	pci_write_config_byte(dev, (0x44|hwif->channel), tmp2);
 	local_irq_restore(flags);
@@ -140,13 +140,6 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
 {
-	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-
-	if (bus_speed <= 33)
-		pci_set_drvdata(dev, (void *) aec6xxx_33_base);
-	else
-		pci_set_drvdata(dev, (void *) aec6xxx_34_base);
-
 	/* These are necessary to get AEC6280 Macintosh cards to work */
 	if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) ||
 	    (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R)) {
@@ -254,10 +247,17 @@ static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
 
 static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
+	const struct chipset_bus_clock_list_entry *bus_clock;
 	struct ide_port_info d;
 	u8 idx = id->driver_data;
+	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 	int err;
 
+	if (bus_speed <= 33)
+		bus_clock = aec6xxx_33_base;
+	else
+		bus_clock = aec6xxx_34_base;
+
 	err = pci_enable_device(dev);
 	if (err)
 		return err;
@@ -273,7 +273,7 @@ static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_devi
 		}
 	}
 
-	err = ide_pci_init_one(dev, &d, NULL);
+	err = ide_pci_init_one(dev, &d, (void *)bus_clock);
 	if (err)
 		pci_disable_device(dev);
 
-- 
GitLab


From 74811f355f4f69a187fa74892dcf2a684b84ce99 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:15 +0200
Subject: [PATCH 446/853] hpt366: convert to use ->host_priv

While at it:

* Allocate both struct hpt_info instances at once.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/hpt366.c | 72 +++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 31 deletions(-)

diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index b23b7a27800..8f29571345a 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -620,7 +620,8 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= pci_get_drvdata(dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct hpt_info *info	= host->host_priv + (hwif->dev == host->dev[1]);
 	u8 mask 		= hwif->ultra_mask;
 
 	switch (info->chip_type) {
@@ -660,7 +661,8 @@ static u8 hpt3xx_mdma_filter(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= pci_get_drvdata(dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct hpt_info *info	= host->host_priv + (hwif->dev == host->dev[1]);
 
 	switch (info->chip_type) {
 	case HPT372 :
@@ -694,8 +696,10 @@ static u32 get_speed_setting(u8 speed, struct hpt_info *info)
 
 static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	struct pci_dev  *dev	= to_pci_dev(drive->hwif->dev);
-	struct hpt_info	*info	= pci_get_drvdata(dev);
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct hpt_info *info	= host->host_priv + (hwif->dev == host->dev[1]);
 	struct hpt_timings *t	= info->timings;
 	u8  itr_addr		= 0x40 + (drive->dn * 4);
 	u32 old_itr		= 0;
@@ -738,7 +742,8 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= pci_get_drvdata(dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct hpt_info *info	= host->host_priv + (hwif->dev == host->dev[1]);
 
 	if (drive->quirk_list) {
 		if (info->chip_type >= HPT370) {
@@ -965,22 +970,13 @@ static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f
 
 static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const char *name)
 {
-	struct hpt_info *info	= kmalloc(sizeof(struct hpt_info), GFP_KERNEL);
 	unsigned long io_base	= pci_resource_start(dev, 4);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct hpt_info *info	= host->host_priv + (&dev->dev == host->dev[1]);
 	u8 pci_clk,  dpll_clk	= 0;	/* PCI and DPLL clock in MHz */
 	u8 chip_type;
 	enum ata_clock	clock;
 
-	if (info == NULL) {
-		printk(KERN_ERR "%s: out of memory!\n", name);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Copy everything from a static "template" structure
-	 * to just allocated per-chip hpt_info structure.
-	 */
-	memcpy(info, pci_get_drvdata(dev), sizeof(struct hpt_info));
 	chip_type = info->chip_type;
 
 	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4));
@@ -1142,7 +1138,6 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 
 		if (info->timings->clock_table[clock] == NULL) {
 			printk(KERN_ERR "%s: unknown bus timing!\n", name);
-			kfree(info);
 			return -EIO;
 		}
 
@@ -1169,7 +1164,6 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 		}
 		if (adjust == 8) {
 			printk(KERN_ERR "%s: DPLL did not stabilize!\n", name);
-			kfree(info);
 			return -EIO;
 		}
 
@@ -1186,9 +1180,6 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 	info->pci_clk	= pci_clk;
 	info->clock	= clock;
 
-	/* Point to this chip's own instance of the hpt_info structure. */
-	pci_set_drvdata(dev, info);
-
 	if (chip_type >= HPT370) {
 		u8  mcr1, mcr4;
 
@@ -1218,7 +1209,8 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 static u8 __devinit hpt3xx_cable_detect(ide_hwif_t *hwif)
 {
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= pci_get_drvdata(dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct hpt_info *info	= host->host_priv + (hwif->dev == host->dev[1]);
 	u8 chip_type		= info->chip_type;
 	u8 scr1 = 0, ata66	= hwif->channel ? 0x01 : 0x02;
 
@@ -1262,7 +1254,8 @@ static u8 __devinit hpt3xx_cable_detect(ide_hwif_t *hwif)
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= pci_get_drvdata(dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	struct hpt_info *info	= host->host_priv + (hwif->dev == host->dev[1]);
 	int serialize		= HPT_SERIALIZE_IO;
 	u8  chip_type		= info->chip_type;
 	u8  new_mcr, old_mcr	= 0;
@@ -1542,10 +1535,12 @@ static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
 static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	const struct hpt_info *info = NULL;
+	struct hpt_info *dyn_info;
 	struct pci_dev *dev2 = NULL;
 	struct ide_port_info d;
 	u8 idx = id->driver_data;
 	u8 rev = dev->revision;
+	int ret;
 
 	if ((idx == 0 || idx == 4) && (PCI_FUNC(dev->devfn) & 1))
 		return -ENODEV;
@@ -1591,15 +1586,24 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 	if (info == &hpt370 || info == &hpt370a)
 		d.dma_ops = &hpt370_dma_ops;
 
-	pci_set_drvdata(dev, (void *)info);
-
 	if (info == &hpt36x || info == &hpt374)
 		dev2 = pci_get_slot(dev->bus, dev->devfn + 1);
 
-	if (dev2) {
-		int ret;
+	dyn_info = kzalloc(sizeof(*dyn_info) * (dev2 ? 2 : 1), GFP_KERNEL);
+	if (dyn_info == NULL) {
+		printk(KERN_ERR "%s: out of memory!\n", d.name);
+		pci_dev_put(dev2);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Copy everything from a static "template" structure
+	 * to just allocated per-chip hpt_info structure.
+	 */
+	memcpy(dyn_info, info, sizeof(*dyn_info));
 
-		pci_set_drvdata(dev2, (void *)info);
+	if (dev2) {
+		memcpy(dyn_info + 1, info, sizeof(*dyn_info));
 
 		if (info == &hpt374)
 			hpt374_init(dev, dev2);
@@ -1608,13 +1612,19 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 				d.host_flags &= ~IDE_HFLAG_NON_BOOTABLE;
 		}
 
-		ret = ide_pci_init_two(dev, dev2, &d, NULL);
-		if (ret < 0)
+		ret = ide_pci_init_two(dev, dev2, &d, dyn_info);
+		if (ret < 0) {
 			pci_dev_put(dev2);
+			kfree(dyn_info);
+		}
 		return ret;
 	}
 
-	return ide_pci_init_one(dev, &d, NULL);
+	ret = ide_pci_init_one(dev, &d, dyn_info);
+	if (ret < 0)
+		kfree(dyn_info);
+
+	return ret;
 }
 
 static const struct pci_device_id hpt366_pci_tbl[] __devinitconst = {
-- 
GitLab


From 1d76d9dc448d5a6fc7b49ba06c634aa6927bcc3d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:16 +0200
Subject: [PATCH 447/853] it821x: convert to use ->host_priv

While at it:

* Allocate both struct it821x_dev instances at once.

* Don't leak itdevs on ide_pci_init_one() failure.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/it821x.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index 40186f9e56a..e63fdafe18f 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -534,8 +534,9 @@ static struct ide_dma_ops it821x_pass_through_dma_ops = {
 static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct it821x_dev **itdevs = (struct it821x_dev **)pci_get_drvdata(dev);
-	struct it821x_dev *idev = itdevs[hwif->channel];
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct it821x_dev *itdevs = host->host_priv;
+	struct it821x_dev *idev = itdevs + hwif->channel;
 	u8 conf;
 
 	ide_set_hwifdata(hwif, idev);
@@ -648,23 +649,20 @@ static const struct ide_port_info it821x_chipsets[] __devinitdata = {
 
 static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	struct it821x_dev *itdevs[2] = { NULL, NULL} , *itdev;
-	unsigned int i;
-
-	for (i = 0; i < 2; i++) {
-		itdev = kzalloc(sizeof(*itdev), GFP_KERNEL);
-		if (itdev == NULL) {
-			kfree(itdevs[0]);
-			printk(KERN_ERR "it821x: out of memory\n");
-			return -ENOMEM;
-		}
+	struct it821x_dev *itdevs;
+	int rc;
 
-		itdevs[i] = itdev;
+	itdevs = kzalloc(2 * sizeof(*itdevs), GFP_KERNEL);
+	if (itdevs == NULL) {
+		printk(KERN_ERR "it821x: out of memory\n");
+		return -ENOMEM;
 	}
 
-	pci_set_drvdata(dev, itdevs);
+	rc = ide_pci_init_one(dev, &it821x_chipsets[id->driver_data], itdevs);
+	if (rc)
+		kfree(itdevs);
 
-	return ide_pci_init_one(dev, &it821x_chipsets[id->driver_data], NULL);
+	return rc;
 }
 
 static const struct pci_device_id it821x_pci_tbl[] = {
-- 
GitLab


From 96776f3b57eb7beb889a4368937cc9d74082a47e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:16 +0200
Subject: [PATCH 448/853] sc1200: convert to use ->host_priv

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sc1200.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index 8fd9cc2119d..fa2ce76837d 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -234,20 +234,10 @@ static int sc1200_suspend (struct pci_dev *dev, pm_message_t state)
 	 * we only save state when going from full power to less
 	 */
 	if (state.event == PM_EVENT_ON) {
-		struct sc1200_saved_state *ss;
+		struct ide_host *host = pci_get_drvdata(dev);
+		struct sc1200_saved_state *ss = host->host_priv;
 		unsigned int r;
 
-		/*
-		 * allocate a permanent save area, if not already allocated
-		 */
-		ss = (struct sc1200_saved_state *)pci_get_drvdata(dev);
-		if (ss == NULL) {
-			ss = kmalloc(sizeof(*ss), GFP_KERNEL);
-			if (ss == NULL)
-				return -ENOMEM;
-			pci_set_drvdata(dev, ss);
-		}
-
 		/*
 		 * save timing registers
 		 * (this may be unnecessary if BIOS also does it)
@@ -263,7 +253,8 @@ static int sc1200_suspend (struct pci_dev *dev, pm_message_t state)
 
 static int sc1200_resume (struct pci_dev *dev)
 {
-	struct sc1200_saved_state *ss;
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct sc1200_saved_state *ss = host->host_priv;
 	unsigned int r;
 	int i;
 
@@ -271,16 +262,12 @@ static int sc1200_resume (struct pci_dev *dev)
 	if (i)
 		return i;
 
-	ss = (struct sc1200_saved_state *)pci_get_drvdata(dev);
-
 	/*
 	 * restore timing registers
 	 * (this may be unnecessary if BIOS also does it)
 	 */
-	if (ss) {
-		for (r = 0; r < 8; r++)
-			pci_write_config_dword(dev, 0x40 + r * 4, ss->regs[r]);
-	}
+	for (r = 0; r < 8; r++)
+		pci_write_config_dword(dev, 0x40 + r * 4, ss->regs[r]);
 
 	return 0;
 }
@@ -317,7 +304,19 @@ static const struct ide_port_info sc1200_chipset __devinitdata = {
 
 static int __devinit sc1200_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_pci_init_one(dev, &sc1200_chipset, NULL);
+	struct sc1200_saved_state *ss = NULL;
+	int rc;
+
+#ifdef CONFIG_PM
+	ss = kmalloc(sizeof(*ss), GFP_KERNEL);
+	if (ss == NULL)
+		return -ENOMEM;
+#endif
+	rc = ide_pci_init_one(dev, &sc1200_chipset, ss);
+	if (rc)
+		kfree(ss);
+
+	return rc;
 }
 
 static const struct pci_device_id sc1200_pci_tbl[] = {
-- 
GitLab


From 4c674235d667d7ddc6b0c95a228a507eb94da2d6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:16 +0200
Subject: [PATCH 449/853] siimage: convert to use ->host_priv

While at it:

* Reserve PCI BAR 5 in siimage_init_one() and remove no longer needed
  setup_mmio_siimage().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/siimage.c | 114 +++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 56 deletions(-)

diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 48124133601..1388ffa9764 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -127,9 +127,10 @@ static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
 
 static u8 sil_ioread8(struct pci_dev *dev, unsigned long addr)
 {
+	struct ide_host *host = pci_get_drvdata(dev);
 	u8 tmp = 0;
 
-	if (pci_get_drvdata(dev))
+	if (host->host_priv)
 		tmp = readb((void __iomem *)addr);
 	else
 		pci_read_config_byte(dev, addr, &tmp);
@@ -139,9 +140,10 @@ static u8 sil_ioread8(struct pci_dev *dev, unsigned long addr)
 
 static u16 sil_ioread16(struct pci_dev *dev, unsigned long addr)
 {
+	struct ide_host *host = pci_get_drvdata(dev);
 	u16 tmp = 0;
 
-	if (pci_get_drvdata(dev))
+	if (host->host_priv)
 		tmp = readw((void __iomem *)addr);
 	else
 		pci_read_config_word(dev, addr, &tmp);
@@ -151,7 +153,9 @@ static u16 sil_ioread16(struct pci_dev *dev, unsigned long addr)
 
 static void sil_iowrite8(struct pci_dev *dev, u8 val, unsigned long addr)
 {
-	if (pci_get_drvdata(dev))
+	struct ide_host *host = pci_get_drvdata(dev);
+
+	if (host->host_priv)
 		writeb(val, (void __iomem *)addr);
 	else
 		pci_write_config_byte(dev, addr, val);
@@ -159,7 +163,9 @@ static void sil_iowrite8(struct pci_dev *dev, u8 val, unsigned long addr)
 
 static void sil_iowrite16(struct pci_dev *dev, u16 val, unsigned long addr)
 {
-	if (pci_get_drvdata(dev))
+	struct ide_host *host = pci_get_drvdata(dev);
+
+	if (host->host_priv)
 		writew(val, (void __iomem *)addr);
 	else
 		pci_write_config_word(dev, addr, val);
@@ -167,7 +173,9 @@ static void sil_iowrite16(struct pci_dev *dev, u16 val, unsigned long addr)
 
 static void sil_iowrite32(struct pci_dev *dev, u32 val, unsigned long addr)
 {
-	if (pci_get_drvdata(dev))
+	struct ide_host *host = pci_get_drvdata(dev);
+
+	if (host->host_priv)
 		writel(val, (void __iomem *)addr);
 	else
 		pci_write_config_dword(dev, addr, val);
@@ -444,44 +452,6 @@ static void sil_sata_pre_reset(ide_drive_t *drive)
 	}
 }
 
-/**
- *	setup_mmio_siimage	-	switch controller into MMIO mode
- *	@dev: PCI device we are configuring
- *	@name: device name
- *
- *	Attempt to put the device into MMIO mode. There are some slight
- *	complications here with certain systems where the MMIO BAR isn't
- *	mapped, so we have to be sure that we can fall back to I/O.
- */
-
-static unsigned int setup_mmio_siimage(struct pci_dev *dev, const char *name)
-{
-	resource_size_t bar5	= pci_resource_start(dev, 5);
-	unsigned long barsize	= pci_resource_len(dev, 5);
-	void __iomem *ioaddr;
-
-	/*
-	 *	Drop back to PIO if we can't map the MMIO. Some	systems
-	 *	seem to get terminally confused in the PCI spaces.
-	 */
-	if (!request_mem_region(bar5, barsize, name)) {
-		printk(KERN_WARNING "siimage: IDE controller MMIO ports not "
-				    "available.\n");
-		return 0;
-	}
-
-	ioaddr = ioremap(bar5, barsize);
-	if (ioaddr == NULL) {
-		release_mem_region(bar5, barsize);
-		return 0;
-	}
-
-	pci_set_master(dev);
-	pci_set_drvdata(dev, (void *) ioaddr);
-
-	return 1;
-}
-
 /**
  *	init_chipset_siimage	-	set up an SI device
  *	@dev: PCI device
@@ -494,17 +464,15 @@ static unsigned int setup_mmio_siimage(struct pci_dev *dev, const char *name)
 static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev,
 						   const char *name)
 {
+	struct ide_host *host = pci_get_drvdata(dev);
+	void __iomem *ioaddr = host->host_priv;
 	unsigned long base, scsc_addr;
-	void __iomem *ioaddr = NULL;
-	u8 rev = dev->revision, tmp, BA5_EN;
+	u8 rev = dev->revision, tmp;
 
 	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, rev ? 1 : 255);
 
-	pci_read_config_byte(dev, 0x8A, &BA5_EN);
-
-	if ((BA5_EN & 0x01) || pci_resource_start(dev, 5))
-		if (setup_mmio_siimage(dev, name))
-			ioaddr = pci_get_drvdata(dev);
+	if (ioaddr)
+		pci_set_master(dev);
 
 	base = (unsigned long)ioaddr;
 
@@ -592,7 +560,8 @@ static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev,
 static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	void *addr		= pci_get_drvdata(dev);
+	struct ide_host *host	= pci_get_drvdata(dev);
+	void *addr		= host->host_priv;
 	u8 ch			= hwif->channel;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	unsigned long base;
@@ -691,16 +660,15 @@ static void __devinit sil_quirkproc(ide_drive_t *drive)
 static void __devinit init_iops_siimage(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
+	struct ide_host *host = pci_get_drvdata(dev);
 
 	hwif->hwif_data = NULL;
 
 	/* Pessimal until we finish probing */
 	hwif->rqsize = 15;
 
-	if (pci_get_drvdata(dev) == NULL)
-		return;
-
-	init_mmio_iops_siimage(hwif);
+	if (host->host_priv)
+		init_mmio_iops_siimage(hwif);
 }
 
 /**
@@ -778,8 +746,13 @@ static const struct ide_port_info siimage_chipsets[] __devinitdata = {
 static int __devinit siimage_init_one(struct pci_dev *dev,
 				      const struct pci_device_id *id)
 {
+	void __iomem *ioaddr = NULL;
+	resource_size_t bar5 = pci_resource_start(dev, 5);
+	unsigned long barsize = pci_resource_len(dev, 5);
+	int rc;
 	struct ide_port_info d;
 	u8 idx = id->driver_data;
+	u8 BA5_EN;
 
 	d = siimage_chipsets[idx];
 
@@ -795,7 +768,36 @@ static int __devinit siimage_init_one(struct pci_dev *dev,
 		d.host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
 	}
 
-	return ide_pci_init_one(dev, &d, NULL);
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	pci_read_config_byte(dev, 0x8A, &BA5_EN);
+	if ((BA5_EN & 0x01) || bar5) {
+		/*
+		* Drop back to PIO if we can't map the MMIO. Some systems
+		* seem to get terminally confused in the PCI spaces.
+		*/
+		if (!request_mem_region(bar5, barsize, d.name)) {
+			printk(KERN_WARNING "siimage: IDE controller MMIO "
+					    "ports not available.\n");
+		} else {
+			ioaddr = ioremap(bar5, barsize);
+			if (ioaddr == NULL)
+				release_mem_region(bar5, barsize);
+		}
+	}
+
+	rc = ide_pci_init_one(dev, &d, ioaddr);
+	if (rc) {
+		if (ioaddr) {
+			iounmap(ioaddr);
+			release_mem_region(bar5, barsize);
+		}
+		pci_disable_device(dev);
+	}
+
+	return rc;
 }
 
 static const struct pci_device_id siimage_pci_tbl[] = {
-- 
GitLab


From ee77325b074a73694b66ec9eca4f7e55dad58b84 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:17 +0200
Subject: [PATCH 450/853] via82cxxx: convert to use ->host_priv

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/via82cxxx.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 2f22abfe003..a8c050b462b 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -113,7 +113,8 @@ struct via82cxxx_dev
 static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct via82cxxx_dev *vdev = pci_get_drvdata(dev);
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct via82cxxx_dev *vdev = host->host_priv;
 	u8 t;
 
 	if (~vdev->via_config->flags & VIA_BAD_AST) {
@@ -153,7 +154,8 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
 	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *peer = hwif->drives + (~drive->dn & 1);
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct via82cxxx_dev *vdev = pci_get_drvdata(dev);
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct via82cxxx_dev *vdev = host->host_priv;
 	struct ide_timing t, p;
 	unsigned int T, UT;
 
@@ -266,19 +268,13 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
 
 static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name)
 {
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct via82cxxx_dev *vdev = host->host_priv;
 	struct pci_dev *isa = NULL;
-	struct via82cxxx_dev *vdev;
 	struct via_isa_bridge *via_config;
 	u8 t, v;
 	u32 u;
 
-	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
-	if (!vdev) {
-		printk(KERN_ERR "VP_IDE: out of memory :(\n");
-		return -ENOMEM;
-	}
-	pci_set_drvdata(dev, vdev);
-
 	/*
 	 * Find the ISA bridge to see how good the IDE is.
 	 */
@@ -402,7 +398,8 @@ static int via_cable_override(struct pci_dev *pdev)
 static u8 __devinit via82cxxx_cable_detect(ide_hwif_t *hwif)
 {
 	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	struct via82cxxx_dev *vdev = pci_get_drvdata(pdev);
+	struct ide_host *host = pci_get_drvdata(pdev);
+	struct via82cxxx_dev *vdev = host->host_priv;
 
 	if (via_cable_override(pdev))
 		return ATA_CBL_PATA40_SHORT;
@@ -436,6 +433,8 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 {
 	struct pci_dev *isa = NULL;
 	struct via_isa_bridge *via_config;
+	struct via82cxxx_dev *vdev;
+	int rc;
 	u8 idx = id->driver_data;
 	struct ide_port_info d;
 
@@ -466,7 +465,17 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 
 	d.udma_mask = via_config->udma_mask;
 
-	return ide_pci_init_one(dev, &d, NULL);
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev) {
+		printk(KERN_ERR "VP_IDE: out of memory :(\n");
+		return -ENOMEM;
+	}
+
+	rc = ide_pci_init_one(dev, &d, vdev);
+	if (rc)
+		kfree(vdev);
+
+	return rc;
 }
 
 static const struct pci_device_id via_pci_tbl[] = {
-- 
GitLab


From b16040b14e766d390138b04c8829c816f4c1d95b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:17 +0200
Subject: [PATCH 451/853] tc86c001: remove ->init_chipset method

* Reserve PCI BAR 5 in tc86c001_init_one() and remove no longer needed
  init_chipset_tc86c001().

While at it:

* Add & use DRV_NAME define.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/tc86c001.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index e16e79d2177..bb329219935 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -11,6 +11,8 @@
 #include <linux/pci.h>
 #include <linux/ide.h>
 
+#define DRV_NAME "TC86C001"
+
 static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
@@ -173,16 +175,6 @@ static void __devinit init_hwif_tc86c001(ide_hwif_t *hwif)
 	hwif->rqsize	 = 0xffff;
 }
 
-static unsigned int __devinit init_chipset_tc86c001(struct pci_dev *dev,
-							const char *name)
-{
-	int err = pci_request_region(dev, 5, name);
-
-	if (err)
-		printk(KERN_ERR "%s: system control regs already in use", name);
-	return err;
-}
-
 static const struct ide_port_ops tc86c001_port_ops = {
 	.set_pio_mode		= tc86c001_set_pio_mode,
 	.set_dma_mode		= tc86c001_set_mode,
@@ -202,7 +194,6 @@ static const struct ide_dma_ops tc86c001_dma_ops = {
 
 static const struct ide_port_info tc86c001_chipset __devinitdata = {
 	.name		= "TC86C001",
-	.init_chipset	= init_chipset_tc86c001,
 	.init_hwif	= init_hwif_tc86c001,
 	.port_ops	= &tc86c001_port_ops,
 	.dma_ops	= &tc86c001_dma_ops,
@@ -215,7 +206,30 @@ static const struct ide_port_info tc86c001_chipset __devinitdata = {
 static int __devinit tc86c001_init_one(struct pci_dev *dev,
 				       const struct pci_device_id *id)
 {
-	return ide_pci_init_one(dev, &tc86c001_chipset, NULL);
+	int rc;
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		goto out;
+
+	rc = pci_request_region(dev, 5, DRV_NAME);
+	if (rc) {
+		printk(KERN_ERR DRV_NAME ": system control regs already in use");
+		goto out_disable;
+	}
+
+	rc = ide_pci_init_one(dev, &tc86c001_chipset, NULL);
+	if (rc)
+		goto out_release;
+
+	goto out;
+
+out_release:
+	pci_release_region(dev, 5);
+out_disable:
+	pci_disable_device(dev);
+out:
+	return rc;
 }
 
 static const struct pci_device_id tc86c001_pci_tbl[] = {
-- 
GitLab


From d51f19c86583ca70468883d8137a92689f1a80c1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:17 +0200
Subject: [PATCH 452/853] amd74xx: cleanup ->init_chipset method

Move amd_clock setup from init_chipset_amd74xx() to amd74xx_probe().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/amd74xx.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index b6a475313c7..5115d448ff8 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -173,24 +173,6 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev,
 		t |= 0xf0;
 	pci_write_config_byte(dev, AMD_IDE_CONFIG + offset, t);
 
-/*
- * Determine the system bus clock.
- */
-
-	amd_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
-
-	switch (amd_clock) {
-		case 33000: amd_clock = 33333; break;
-		case 37000: amd_clock = 37500; break;
-		case 41000: amd_clock = 41666; break;
-	}
-
-	if (amd_clock < 20000 || amd_clock > 50000) {
-		printk(KERN_WARNING "%s: User given PCI clock speed impossible (%d), using 33 MHz instead.\n",
-				    name, amd_clock);
-		amd_clock = 33333;
-	}
-
 	return dev->irq;
 }
 
@@ -302,6 +284,24 @@ static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_
 			 d.name, pci_name(dev), dev->revision,
 			 amd_dma[fls(d.udma_mask) - 1]);
 
+	/*
+	* Determine the system bus clock.
+	*/
+	amd_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
+
+	switch (amd_clock) {
+	case 33000: amd_clock = 33333; break;
+	case 37000: amd_clock = 37500; break;
+	case 41000: amd_clock = 41666; break;
+	}
+
+	if (amd_clock < 20000 || amd_clock > 50000) {
+		printk(KERN_WARNING "%s: User given PCI clock speed impossible"
+				    " (%d), using 33 MHz instead.\n",
+				    d.name, amd_clock);
+		amd_clock = 33333;
+	}
+
 	return ide_pci_init_one(dev, &d, NULL);
 }
 
-- 
GitLab


From 0794230fd4b1bf61af8aabd7e987a595d6dbc430 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:17 +0200
Subject: [PATCH 453/853] cmd64x: cleanup ->init_chipset method

Remove verbose reporting for CMD646 (PCI device revision is always
logged by IDE PCI layer).

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/cmd64x.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index fc0333c9a4e..4eebcf09e0d 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -334,24 +334,6 @@ static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const cha
 {
 	u8 mrdmode = 0;
 
-	if (dev->device == PCI_DEVICE_ID_CMD_646) {
-
-		switch (dev->revision) {
-		case 0x07:
-		case 0x05:
-			printk("%s: UltraDMA capable\n", name);
-			break;
-		case 0x03:
-		default:
-			printk("%s: MultiWord DMA force limited\n", name);
-			break;
-		case 0x01:
-			printk("%s: MultiWord DMA limited, "
-			       "IRQ workaround enabled\n", name);
-			break;
-		}
-	}
-
 	/* Set a good latency timer and cache line size value. */
 	(void) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64);
 	/* FIXME: pci_set_master() to ensure a good latency timer value */
-- 
GitLab


From 37525bebcfc15a1fe5a9cb50bf49b21bf43559c1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:18 +0200
Subject: [PATCH 454/853] via82cxxx: cleanup ->init_chipset method

* Move the boot message and via_clock setup from
  init_chipset_via82cxxx() to via_init_one().

* Set vdev->via_config in via_init_one() and cleanup
  init_chipset_via82cxxx() accordingly.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/via82cxxx.c | 81 ++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 46 deletions(-)

diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index a8c050b462b..bc1675da37d 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -270,21 +270,10 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
 {
 	struct ide_host *host = pci_get_drvdata(dev);
 	struct via82cxxx_dev *vdev = host->host_priv;
-	struct pci_dev *isa = NULL;
-	struct via_isa_bridge *via_config;
+	struct via_isa_bridge *via_config = vdev->via_config;
 	u8 t, v;
 	u32 u;
 
-	/*
-	 * Find the ISA bridge to see how good the IDE is.
-	 */
-	vdev->via_config = via_config = via_config_find(&isa);
-
-	/* We checked this earlier so if it fails here deeep badness
-	   is involved */
-
-	BUG_ON(!via_config->id);
-
 	/*
 	 * Detect cable and configure Clk66
 	 */
@@ -330,39 +319,6 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
 
 	pci_write_config_byte(dev, VIA_FIFO_CONFIG, t);
 
-	/*
-	 * Determine system bus clock.
-	 */
-
-	via_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
-
-	switch (via_clock) {
-		case 33000: via_clock = 33333; break;
-		case 37000: via_clock = 37500; break;
-		case 41000: via_clock = 41666; break;
-	}
-
-	if (via_clock < 20000 || via_clock > 50000) {
-		printk(KERN_WARNING "VP_IDE: User given PCI clock speed "
-			"impossible (%d), using 33 MHz instead.\n", via_clock);
-		printk(KERN_WARNING "VP_IDE: Use ide0=ata66 if you want "
-			"to assume 80-wire cable.\n");
-		via_clock = 33333;
-	}
-
-	/*
-	 * Print the boot message.
-	 */
-
-	printk(KERN_INFO "VP_IDE: VIA %s (rev %02x) IDE %sDMA%s "
-		"controller on pci%s\n",
-		via_config->name, isa->revision,
-		via_config->udma_mask ? "U" : "MW",
-		via_dma[via_config->udma_mask ?
-			(fls(via_config->udma_mask) - 1) : 0],
-		pci_name(dev));
-
-	pci_dev_put(isa);
 	return 0;
 }
 
@@ -444,12 +400,43 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 	 * Find the ISA bridge and check we know what it is.
 	 */
 	via_config = via_config_find(&isa);
-	pci_dev_put(isa);
 	if (!via_config->id) {
 		printk(KERN_WARNING "VP_IDE: Unknown VIA SouthBridge, disabling DMA.\n");
 		return -ENODEV;
 	}
 
+	/*
+	 * Print the boot message.
+	 */
+	printk(KERN_INFO "VP_IDE: VIA %s (rev %02x) IDE %sDMA%s "
+		"controller on pci%s\n",
+		via_config->name, isa->revision,
+		via_config->udma_mask ? "U" : "MW",
+		via_dma[via_config->udma_mask ?
+			(fls(via_config->udma_mask) - 1) : 0],
+		pci_name(dev));
+
+	pci_dev_put(isa);
+
+	/*
+	 * Determine system bus clock.
+	 */
+	via_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
+
+	switch (via_clock) {
+	case 33000: via_clock = 33333; break;
+	case 37000: via_clock = 37500; break;
+	case 41000: via_clock = 41666; break;
+	}
+
+	if (via_clock < 20000 || via_clock > 50000) {
+		printk(KERN_WARNING "VP_IDE: User given PCI clock speed "
+			"impossible (%d), using 33 MHz instead.\n", via_clock);
+		printk(KERN_WARNING "VP_IDE: Use ide0=ata66 if you want "
+			"to assume 80-wire cable.\n");
+		via_clock = 33333;
+	}
+
 	if (idx == 0)
 		d.host_flags |= IDE_HFLAG_NO_AUTODMA;
 	else
@@ -471,6 +458,8 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 		return -ENOMEM;
 	}
 
+	vdev->via_config = via_config;
+
 	rc = ide_pci_init_one(dev, &d, vdev);
 	if (rc)
 		kfree(vdev);
-- 
GitLab


From ef0b04276d8f719d754c092434fbd62c2aeb5307 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:19 +0200
Subject: [PATCH 455/853] ide: add ide_pci_remove() helper

* Add 'unsigned long host_flags' field to struct ide_host.

* Set ->host_flags in ide_host_alloc_all().

* Always set PCI dev's ->driver_data in ide_pci_init_{one,two}().

* Add ide_pci_remove() helper (the default implementation for
  struct pci_driver's ->remove method).

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c |  3 +++
 drivers/ide/setup-pci.c | 39 +++++++++++++++++++++++++++++++++------
 include/linux/ide.h     |  2 ++
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 9ab5892eaea..f0c162488ec 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1609,6 +1609,9 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d,
 	if (hws[0])
 		host->dev[0] = hws[0]->dev;
 
+	if (d)
+		host->host_flags = d->host_flags;
+
 	return host;
 }
 EXPORT_SYMBOL_GPL(ide_host_alloc_all);
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index ca17bf8896d..20f0ee00469 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -548,8 +548,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 
 	host->host_priv = priv;
 
-	if (priv)
-		pci_set_drvdata(dev, host);
+	pci_set_drvdata(dev, host);
 
 	ret = do_ide_setup_pci_device(dev, d, 1);
 	if (ret < 0)
@@ -593,10 +592,8 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 
 	host->host_priv = priv;
 
-	if (priv) {
-		pci_set_drvdata(pdev[0], host);
-		pci_set_drvdata(pdev[1], host);
-	}
+	pci_set_drvdata(pdev[0], host);
+	pci_set_drvdata(pdev[1], host);
 
 	for (i = 0; i < 2; i++) {
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
@@ -619,3 +616,33 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ide_pci_init_two);
+
+void ide_pci_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
+	int bars;
+
+	if (host->host_flags & IDE_HFLAG_SINGLE)
+		bars = (1 << 2) - 1;
+	else
+		bars = (1 << 4) - 1;
+
+	if ((host->host_flags & IDE_HFLAG_NO_DMA) == 0) {
+		if (host->host_flags & IDE_HFLAG_CS5520)
+			bars |= (1 << 2);
+		else
+			bars |= (1 << 4);
+	}
+
+	ide_host_remove(host);
+
+	if (dev2)
+		pci_release_selected_regions(dev2, bars);
+	pci_release_selected_regions(dev, bars);
+
+	if (dev2)
+		pci_disable_device(dev2);
+	pci_disable_device(dev);
+}
+EXPORT_SYMBOL_GPL(ide_pci_remove);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 3eccac0a2a3..dbd0aeb3a56 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -631,6 +631,7 @@ struct ide_host {
 	ide_hwif_t	*ports[MAX_HWIFS];
 	unsigned int	n_ports;
 	struct device	*dev[2];
+	unsigned long	host_flags;
 	void		*host_priv;
 };
 
@@ -1213,6 +1214,7 @@ struct ide_port_info {
 int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
 int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
 		     const struct ide_port_info *, void *);
+void ide_pci_remove(struct pci_dev *);
 
 void ide_map_sg(ide_drive_t *, struct request *);
 void ide_init_sg_cmd(ide_drive_t *, struct request *);
-- 
GitLab


From eb7cb98b1cc8be1d4395d9accf49ae3924cd68f1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:19 +0200
Subject: [PATCH 456/853] aec62xx: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/aec62xx.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index 7ca7989bc35..253299961a1 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -280,6 +280,12 @@ static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_devi
 	return err;
 }
 
+static void __devexit aec62xx_remove(struct pci_dev *dev)
+{
+	ide_pci_remove(dev);
+	pci_disable_device(dev);
+}
+
 static const struct pci_device_id aec62xx_pci_tbl[] = {
 	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP850UF), 0 },
 	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP860),   1 },
@@ -294,6 +300,7 @@ static struct pci_driver driver = {
 	.name		= "AEC62xx_IDE",
 	.id_table	= aec62xx_pci_tbl,
 	.probe		= aec62xx_init_one,
+	.remove		= aec62xx_remove,
 };
 
 static int __init aec62xx_ide_init(void)
@@ -301,7 +308,13 @@ static int __init aec62xx_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit aec62xx_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(aec62xx_ide_init);
+module_exit(aec62xx_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for ARTOP AEC62xx IDE");
-- 
GitLab


From 8ee3f3b69d9c37f86a45862f53451699ec77fe12 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:19 +0200
Subject: [PATCH 457/853] alim15x3: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/alim15x3.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index 7f96e7ca386..ac171502b99 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -580,6 +580,7 @@ static struct pci_driver driver = {
 	.name		= "ALI15x3_IDE",
 	.id_table	= alim15x3_pci_tbl,
 	.probe		= alim15x3_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init ali15x3_ide_init(void)
@@ -587,7 +588,13 @@ static int __init ali15x3_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit ali15x3_ide_exit(void)
+{
+	return pci_unregister_driver(&driver);
+}
+
 module_init(ali15x3_ide_init);
+module_exit(ali15x3_ide_exit);
 
 MODULE_AUTHOR("Michael Aubry, Andrzej Krzysztofowicz, CJ, Andre Hedrick, Alan Cox");
 MODULE_DESCRIPTION("PCI driver module for ALi 15x3 IDE");
-- 
GitLab


From b2509ac1d9dbe7a9d3a9915afbe108978002c95b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:19 +0200
Subject: [PATCH 458/853] amd74xx: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/amd74xx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 5115d448ff8..a91f2e873ba 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -341,6 +341,7 @@ static struct pci_driver driver = {
 	.name		= "AMD_IDE",
 	.id_table	= amd74xx_pci_tbl,
 	.probe		= amd74xx_probe,
+	.remove		= ide_pci_remove,
 };
 
 static int __init amd74xx_ide_init(void)
@@ -348,7 +349,13 @@ static int __init amd74xx_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit amd74xx_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(amd74xx_ide_init);
+module_exit(amd74xx_ide_exit);
 
 MODULE_AUTHOR("Vojtech Pavlik");
 MODULE_DESCRIPTION("AMD PCI IDE driver");
-- 
GitLab


From f354fbc4b45a730aa0f876322ea4f096b47d1013 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:20 +0200
Subject: [PATCH 459/853] atiixp: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/atiixp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index b483a68b39f..4c49c1ba618 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -184,6 +184,7 @@ static struct pci_driver driver = {
 	.name		= "ATIIXP_IDE",
 	.id_table	= atiixp_pci_tbl,
 	.probe		= atiixp_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init atiixp_ide_init(void)
@@ -191,7 +192,13 @@ static int __init atiixp_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit atiixp_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(atiixp_ide_init);
+module_exit(atiixp_ide_exit);
 
 MODULE_AUTHOR("HUI YU");
 MODULE_DESCRIPTION("PCI driver module for ATI IXP IDE");
-- 
GitLab


From e2b15b4765ca032d0837dfc8c195ecd3bc56a433 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:20 +0200
Subject: [PATCH 460/853] cmd64x: add ->remove method and module_exit()

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/cmd64x.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index 4eebcf09e0d..0a4d194bc35 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -505,6 +505,7 @@ static struct pci_driver driver = {
 	.name		= "CMD64x_IDE",
 	.id_table	= cmd64x_pci_tbl,
 	.probe		= cmd64x_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init cmd64x_ide_init(void)
@@ -512,7 +513,13 @@ static int __init cmd64x_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit cmd64x_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(cmd64x_ide_init);
+module_exit(cmd64x_ide_exit);
 
 MODULE_AUTHOR("Eddie Dost, David Miller, Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for CMD64x IDE");
-- 
GitLab


From d16492a9789982955e627a7ffdcd1c3b945f7e85 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:20 +0200
Subject: [PATCH 461/853] cs5530: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/cs5530.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index ba82bad8bf4..dff345c763e 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -269,6 +269,7 @@ static struct pci_driver driver = {
 	.name		= "CS5530 IDE",
 	.id_table	= cs5530_pci_tbl,
 	.probe		= cs5530_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init cs5530_ide_init(void)
@@ -276,7 +277,13 @@ static int __init cs5530_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit cs5530_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(cs5530_ide_init);
+module_exit(cs5530_ide_exit);
 
 MODULE_AUTHOR("Mark Lord");
 MODULE_DESCRIPTION("PCI driver module for Cyrix/NS 5530 IDE");
-- 
GitLab


From 40c8a7f67d38de87f97a548b81b6cd0621a3ff9a Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:20 +0200
Subject: [PATCH 462/853] cs5535: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/cs5535.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index 2161f43ca1b..a7300139153 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -194,6 +194,7 @@ static struct pci_driver driver = {
 	.name       = "CS5535_IDE",
 	.id_table   = cs5535_pci_tbl,
 	.probe      = cs5535_init_one,
+	.remove     = ide_pci_remove,
 };
 
 static int __init cs5535_ide_init(void)
@@ -201,7 +202,13 @@ static int __init cs5535_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit cs5535_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(cs5535_ide_init);
+module_exit(cs5535_ide_exit);
 
 MODULE_AUTHOR("AMD");
 MODULE_DESCRIPTION("PCI driver module for AMD/NS CS5535 IDE");
-- 
GitLab


From cd68841b854e24076d41c32eae3ccfce6ae60a59 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:21 +0200
Subject: [PATCH 463/853] cy82c693: add ->remove method and module_exit()

Fix the refcounting for dev2 while at it.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/cy82c693.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index abd27ed7c30..04f268866b6 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -420,11 +420,21 @@ static int __devinit cy82c693_init_one(struct pci_dev *dev, const struct pci_dev
 	    PCI_FUNC(dev->devfn) == 1) {
 		dev2 = pci_get_slot(dev->bus, dev->devfn + 1);
 		ret = ide_pci_init_two(dev, dev2, &cy82c693_chipset, NULL);
-		/* We leak pci refs here but thats ok - we can't be unloaded */
+		if (ret)
+			pci_dev_put(dev2);
 	}
 	return ret;
 }
 
+static void __devexit cy82c693_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
+
+	ide_pci_remove(dev);
+	pci_dev_put(dev2);
+}
+
 static const struct pci_device_id cy82c693_pci_tbl[] = {
 	{ PCI_VDEVICE(CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693), 0 },
 	{ 0, },
@@ -435,6 +445,7 @@ static struct pci_driver driver = {
 	.name		= "Cypress_IDE",
 	.id_table	= cy82c693_pci_tbl,
 	.probe		= cy82c693_init_one,
+	.remove		= cy82c693_remove,
 };
 
 static int __init cy82c693_ide_init(void)
@@ -442,7 +453,13 @@ static int __init cy82c693_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit cy82c693_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(cy82c693_ide_init);
+module_exit(cy82c693_ide_exit);
 
 MODULE_AUTHOR("Andreas Krebs, Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for the Cypress CY82C693 IDE");
-- 
GitLab


From f566bcae9fb39b108e39a2f31594c028d6ee2e77 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:21 +0200
Subject: [PATCH 464/853] ide/pci/generic: add ->remove method and
 module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/generic.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index dd0caea5e4f..81fc171f8ff 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -174,6 +174,7 @@ static struct pci_driver driver = {
 	.name		= "PCI_IDE",
 	.id_table	= generic_pci_tbl,
 	.probe		= generic_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init generic_ide_init(void)
@@ -181,7 +182,13 @@ static int __init generic_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit generic_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(generic_ide_init);
+module_exit(generic_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for generic PCI IDE");
-- 
GitLab


From 741ac62f6fca55ddbef52513fbc687ba6b04f99e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:21 +0200
Subject: [PATCH 465/853] hpt34x: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/hpt34x.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index 3d70c5150ac..b52f8339102 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -169,6 +169,7 @@ static struct pci_driver driver = {
 	.name		= "HPT34x_IDE",
 	.id_table	= hpt34x_pci_tbl,
 	.probe		= hpt34x_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init hpt34x_ide_init(void)
@@ -176,7 +177,13 @@ static int __init hpt34x_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit hpt34x_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(hpt34x_ide_init);
+module_exit(hpt34x_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for Highpoint 34x IDE");
-- 
GitLab


From a6c43a2be9721d00ef9d6ef5b7b0e8113444577b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:21 +0200
Subject: [PATCH 466/853] hpt366: add ->remove method and module_exit()

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/hpt366.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 8f29571345a..398808905f9 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1627,6 +1627,17 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 	return ret;
 }
 
+static void __devexit hpt366_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct ide_info *info = host->host_priv;
+	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
+
+	ide_pci_remove(dev);
+	pci_dev_put(dev2);
+	kfree(info);
+}
+
 static const struct pci_device_id hpt366_pci_tbl[] __devinitconst = {
 	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT366),  0 },
 	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT372),  1 },
@@ -1642,6 +1653,7 @@ static struct pci_driver driver = {
 	.name		= "HPT366_IDE",
 	.id_table	= hpt366_pci_tbl,
 	.probe		= hpt366_init_one,
+	.remove		= hpt366_remove,
 };
 
 static int __init hpt366_ide_init(void)
@@ -1649,7 +1661,13 @@ static int __init hpt366_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit hpt366_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(hpt366_ide_init);
+module_exit(hpt366_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for Highpoint HPT366 IDE");
-- 
GitLab


From 5102f768570b3486979afb68c595b71cfb7f026f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:22 +0200
Subject: [PATCH 467/853] it8213: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/it8213.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 18219fa9ef0..575bf2386f8 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -198,6 +198,7 @@ static struct pci_driver driver = {
 	.name		= "ITE8213_IDE",
 	.id_table	= it8213_pci_tbl,
 	.probe		= it8213_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init it8213_ide_init(void)
@@ -205,7 +206,13 @@ static int __init it8213_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit it8213_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(it8213_ide_init);
+module_exit(it8213_ide_exit);
 
 MODULE_AUTHOR("Jack Lee, Alan Cox");
 MODULE_DESCRIPTION("PCI driver module for the ITE 8213");
-- 
GitLab


From 87d8b61356108835f5e91c0fb32b830ec585978c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:22 +0200
Subject: [PATCH 468/853] it821x: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/it821x.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index e63fdafe18f..c5c91f2109d 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -665,6 +665,15 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic
 	return rc;
 }
 
+static void __devexit it821x_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct it821x_dev *itdevs = host->host_priv;
+
+	ide_pci_remove(dev);
+	kfree(itdevs);
+}
+
 static const struct pci_device_id it821x_pci_tbl[] = {
 	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), 0 },
 	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), 0 },
@@ -677,6 +686,7 @@ static struct pci_driver driver = {
 	.name		= "ITE821x IDE",
 	.id_table	= it821x_pci_tbl,
 	.probe		= it821x_init_one,
+	.remove		= it821x_remove,
 };
 
 static int __init it821x_ide_init(void)
@@ -684,7 +694,13 @@ static int __init it821x_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit it821x_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(it821x_ide_init);
+module_exit(it821x_ide_exit);
 
 module_param_named(noraid, it8212_noraid, int, S_IRUGO);
 MODULE_PARM_DESC(noraid, "Force card into bypass mode");
-- 
GitLab


From 1bcaaba7749dce7c0506cff0e811c9bed8121f38 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:22 +0200
Subject: [PATCH 469/853] jmicron: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/jmicron.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index a7e3c14f7b0..39e221b076e 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -152,6 +152,7 @@ static struct pci_driver driver = {
 	.name		= "JMicron IDE",
 	.id_table	= jmicron_pci_tbl,
 	.probe		= jmicron_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init jmicron_ide_init(void)
@@ -159,7 +160,13 @@ static int __init jmicron_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit jmicron_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(jmicron_ide_init);
+module_exit(jmicron_ide_exit);
 
 MODULE_AUTHOR("Alan Cox");
 MODULE_DESCRIPTION("PCI driver module for the JMicron in legacy modes");
-- 
GitLab


From aa6e518d75742fd3ac3d2cb4c2bcbae850319fc1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:23 +0200
Subject: [PATCH 470/853] ns87415: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/ns87415.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index a45c33c0c79..afcc742a63a 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -337,6 +337,7 @@ static struct pci_driver driver = {
 	.name		= "NS87415_IDE",
 	.id_table	= ns87415_pci_tbl,
 	.probe		= ns87415_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init ns87415_ide_init(void)
@@ -344,7 +345,13 @@ static int __init ns87415_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit ns87415_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(ns87415_ide_init);
+module_exit(ns87415_ide_exit);
 
 MODULE_AUTHOR("Mark Lord, Eddie Dost, Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for NS87415 IDE");
-- 
GitLab


From adc7f85ae68bd2e8db2e0136dcd4679891e5c321 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:23 +0200
Subject: [PATCH 471/853] opti621: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/opti621.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c
index edb9132ffbe..4895f2ff3f0 100644
--- a/drivers/ide/pci/opti621.c
+++ b/drivers/ide/pci/opti621.c
@@ -223,6 +223,7 @@ static struct pci_driver driver = {
 	.name		= "Opti621_IDE",
 	.id_table	= opti621_pci_tbl,
 	.probe		= opti621_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init opti621_ide_init(void)
@@ -230,7 +231,13 @@ static int __init opti621_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit opti621_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(opti621_ide_init);
+module_exit(opti621_ide_exit);
 
 MODULE_AUTHOR("Jaromir Koutek, Jan Harkes, Mark Lord");
 MODULE_DESCRIPTION("PCI driver module for Opti621 IDE");
-- 
GitLab


From d69c8f8c0068b9fc7f5a5082d8a891618b732e2d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:23 +0200
Subject: [PATCH 472/853] pdc202xx_new: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/pdc202xx_new.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 71a420feb98..9881a1a8b89 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -543,6 +543,15 @@ static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_de
 	return ide_pci_init_one(dev, d, NULL);
 }
 
+static void __devexit pdc202new_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
+
+	ide_pci_remove(dev);
+	pci_dev_put(dev2);
+}
+
 static const struct pci_device_id pdc202new_pci_tbl[] = {
 	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20268), 0 },
 	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20269), 1 },
@@ -559,6 +568,7 @@ static struct pci_driver driver = {
 	.name		= "Promise_IDE",
 	.id_table	= pdc202new_pci_tbl,
 	.probe		= pdc202new_init_one,
+	.remove		= pdc202new_remove,
 };
 
 static int __init pdc202new_ide_init(void)
@@ -566,7 +576,13 @@ static int __init pdc202new_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit pdc202new_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(pdc202new_ide_init);
+module_exit(pdc202new_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick, Frank Tiernan");
 MODULE_DESCRIPTION("PCI driver module for Promise PDC20268 and higher");
-- 
GitLab


From 574a1c24b63fdb584935b4924a38b451eeb0880e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:24 +0200
Subject: [PATCH 473/853] pdc202xx_old: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/pdc202xx_old.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index eba1d60a73a..17d99ce273a 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -429,6 +429,7 @@ static struct pci_driver driver = {
 	.name		= "Promise_Old_IDE",
 	.id_table	= pdc202xx_pci_tbl,
 	.probe		= pdc202xx_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init pdc202xx_ide_init(void)
@@ -436,7 +437,13 @@ static int __init pdc202xx_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit pdc202xx_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(pdc202xx_ide_init);
+module_exit(pdc202xx_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick, Frank Tiernan");
 MODULE_DESCRIPTION("PCI driver module for older Promise IDE");
-- 
GitLab


From da8c3e0d21c5dbb2815d7c8f1f09e0c68f626ed1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:24 +0200
Subject: [PATCH 474/853] piix: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/piix.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index 359f65ddcbf..c16b1ab4d4f 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -462,6 +462,7 @@ static struct pci_driver driver = {
 	.name		= "PIIX_IDE",
 	.id_table	= piix_pci_tbl,
 	.probe		= piix_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init piix_ide_init(void)
@@ -470,7 +471,13 @@ static int __init piix_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit piix_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(piix_ide_init);
+module_exit(piix_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick, Andrzej Krzysztofowicz");
 MODULE_DESCRIPTION("PCI driver module for Intel PIIX IDE");
-- 
GitLab


From 0fd188047ca75df85191cc55f929cb2889631430 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:24 +0200
Subject: [PATCH 475/853] rz1000: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/rz1000.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c
index 860ffdeca09..f7a3b9aff29 100644
--- a/drivers/ide/pci/rz1000.c
+++ b/drivers/ide/pci/rz1000.c
@@ -62,6 +62,7 @@ static struct pci_driver driver = {
 	.name		= "RZ1000_IDE",
 	.id_table	= rz1000_pci_tbl,
 	.probe		= rz1000_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init rz1000_ide_init(void)
@@ -69,7 +70,13 @@ static int __init rz1000_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit rz1000_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(rz1000_ide_init);
+module_exit(rz1000_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for RZ1000 IDE");
-- 
GitLab


From 991f5e69c512b284aaec81432dff0440b2a2f418 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:24 +0200
Subject: [PATCH 476/853] sc1200: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sc1200.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index fa2ce76837d..6509560ba66 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -329,6 +329,7 @@ static struct pci_driver driver = {
 	.name		= "SC1200_IDE",
 	.id_table	= sc1200_pci_tbl,
 	.probe		= sc1200_init_one,
+	.remove		= ide_pci_remove,
 #ifdef CONFIG_PM
 	.suspend	= sc1200_suspend,
 	.resume		= sc1200_resume,
@@ -340,7 +341,13 @@ static int __init sc1200_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit sc1200_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(sc1200_ide_init);
+module_exit(sc1200_ide_exit);
 
 MODULE_AUTHOR("Mark Lord");
 MODULE_DESCRIPTION("PCI driver module for NS SC1200 IDE");
-- 
GitLab


From bc2c9a8025921972f0774859b8f19b324734e824 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:25 +0200
Subject: [PATCH 477/853] serverworks: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/serverworks.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index 34abdfc8d56..eca52fede28 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -439,6 +439,7 @@ static struct pci_driver driver = {
 	.name		= "Serverworks_IDE",
 	.id_table	= svwks_pci_tbl,
 	.probe		= svwks_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init svwks_ide_init(void)
@@ -446,7 +447,13 @@ static int __init svwks_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit svwks_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(svwks_ide_init);
+module_exit(svwks_ide_exit);
 
 MODULE_AUTHOR("Michael Aubry. Andrzej Krzysztofowicz, Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for Serverworks OSB4/CSB5/CSB6 IDE");
-- 
GitLab


From fe3825808ad67af02bd826a0d2ca6831e947e80e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:25 +0200
Subject: [PATCH 478/853] siimage: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/siimage.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 1388ffa9764..f2e4dac9b71 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -800,6 +800,24 @@ static int __devinit siimage_init_one(struct pci_dev *dev,
 	return rc;
 }
 
+static void __devexit siimage_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	void __iomem *ioaddr = host->host_priv;
+
+	ide_pci_remove(dev);
+
+	if (ioaddr) {
+		resource_size_t bar5 = pci_resource_start(dev, 5);
+		unsigned long barsize = pci_resource_len(dev, 5);
+
+		iounmap(ioaddr);
+		release_mem_region(bar5, barsize);
+	}
+
+	pci_disable_device(dev);
+}
+
 static const struct pci_device_id siimage_pci_tbl[] = {
 	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_680),    0 },
 #ifdef CONFIG_BLK_DEV_IDE_SATA
@@ -814,6 +832,7 @@ static struct pci_driver driver = {
 	.name		= "SiI_IDE",
 	.id_table	= siimage_pci_tbl,
 	.probe		= siimage_init_one,
+	.remove		= siimage_remove,
 };
 
 static int __init siimage_ide_init(void)
@@ -821,7 +840,13 @@ static int __init siimage_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit siimage_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(siimage_ide_init);
+module_exit(siimage_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick, Alan Cox");
 MODULE_DESCRIPTION("PCI driver module for SiI IDE");
-- 
GitLab


From 1ceb906b4062954e92295191402e9214345ee0e9 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:25 +0200
Subject: [PATCH 479/853] sis5513: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sis5513.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index a2330c4ac75..f3cf7c6c0cc 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -586,6 +586,12 @@ static int __devinit sis5513_init_one(struct pci_dev *dev, const struct pci_devi
 	return ide_pci_init_one(dev, &d, NULL);
 }
 
+static void __devexit sis5513_remove(struct pci_dev *dev)
+{
+	ide_pci_remove(dev);
+	pci_disable_device(dev);
+}
+
 static const struct pci_device_id sis5513_pci_tbl[] = {
 	{ PCI_VDEVICE(SI, PCI_DEVICE_ID_SI_5513), 0 },
 	{ PCI_VDEVICE(SI, PCI_DEVICE_ID_SI_5518), 0 },
@@ -598,6 +604,7 @@ static struct pci_driver driver = {
 	.name		= "SIS_IDE",
 	.id_table	= sis5513_pci_tbl,
 	.probe		= sis5513_init_one,
+	.remove		= sis5513_remove,
 };
 
 static int __init sis5513_ide_init(void)
@@ -605,7 +612,13 @@ static int __init sis5513_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit sis5513_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(sis5513_ide_init);
+module_exit(sis5513_ide_exit);
 
 MODULE_AUTHOR("Lionel Bouton, L C Chang, Andre Hedrick, Vojtech Pavlik");
 MODULE_DESCRIPTION("PCI driver module for SIS IDE");
-- 
GitLab


From 6ce7199897bcbad05ecd06a4df22795fb37f4d0a Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:25 +0200
Subject: [PATCH 480/853] sl82c105: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sl82c105.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index be22f8125d7..72899f85c5b 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -348,6 +348,7 @@ static struct pci_driver driver = {
 	.name		= "W82C105_IDE",
 	.id_table	= sl82c105_pci_tbl,
 	.probe		= sl82c105_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init sl82c105_ide_init(void)
@@ -355,7 +356,13 @@ static int __init sl82c105_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit sl82c105_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(sl82c105_ide_init);
+module_exit(sl82c105_ide_exit);
 
 MODULE_DESCRIPTION("PCI driver module for W82C105 IDE");
 MODULE_LICENSE("GPL");
-- 
GitLab


From 64b0fed31d6704e4e2e42e9a1ac5995b0a1b54e4 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:25 +0200
Subject: [PATCH 481/853] slc90e66: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/slc90e66.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index 2fc2f2cf220..fee5ebe4bb9 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -157,6 +157,7 @@ static struct pci_driver driver = {
 	.name		= "SLC90e66_IDE",
 	.id_table	= slc90e66_pci_tbl,
 	.probe		= slc90e66_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init slc90e66_ide_init(void)
@@ -164,7 +165,13 @@ static int __init slc90e66_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit slc90e66_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(slc90e66_ide_init);
+module_exit(slc90e66_ide_exit);
 
 MODULE_AUTHOR("Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for SLC90E66 IDE");
-- 
GitLab


From ea881d6d6c58aa6d56105d1faba7432243ea7118 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:26 +0200
Subject: [PATCH 482/853] tc86c001: add ->remove method and module_exit()

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/tc86c001.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index bb329219935..102cd7c40cd 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -232,6 +232,13 @@ out:
 	return rc;
 }
 
+static void __devexit tc86c001_remove(struct pci_dev *dev)
+{
+	ide_pci_remove(dev);
+	pci_release_region(dev, 5);
+	pci_disable_device(dev);
+}
+
 static const struct pci_device_id tc86c001_pci_tbl[] = {
 	{ PCI_VDEVICE(TOSHIBA_2, PCI_DEVICE_ID_TOSHIBA_TC86C001_IDE), 0 },
 	{ 0, }
@@ -241,14 +248,22 @@ MODULE_DEVICE_TABLE(pci, tc86c001_pci_tbl);
 static struct pci_driver driver = {
 	.name		= "TC86C001",
 	.id_table	= tc86c001_pci_tbl,
-	.probe		= tc86c001_init_one
+	.probe		= tc86c001_init_one,
+	.remove		= tc86c001_remove,
 };
 
 static int __init tc86c001_ide_init(void)
 {
 	return ide_pci_register_driver(&driver);
 }
+
+static void __exit tc86c001_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(tc86c001_ide_init);
+module_exit(tc86c001_ide_exit);
 
 MODULE_AUTHOR("MontaVista Software, Inc. <source@mvista.com>");
 MODULE_DESCRIPTION("PCI driver module for TC86C001 IDE");
-- 
GitLab


From 29d72f2df933ea5ecf294b170b2f02af2af88120 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:26 +0200
Subject: [PATCH 483/853] triflex: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/triflex.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c
index 60dcb645d1b..78e24ac8097 100644
--- a/drivers/ide/pci/triflex.c
+++ b/drivers/ide/pci/triflex.c
@@ -117,6 +117,7 @@ static struct pci_driver driver = {
 	.name		= "TRIFLEX_IDE",
 	.id_table	= triflex_pci_tbl,
 	.probe		= triflex_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init triflex_ide_init(void)
@@ -124,7 +125,13 @@ static int __init triflex_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit triflex_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(triflex_ide_init);
+module_exit(triflex_ide_exit);
 
 MODULE_AUTHOR("Torben Mathiasen");
 MODULE_DESCRIPTION("PCI driver module for Compaq Triflex IDE");
-- 
GitLab


From fc2c32b737fa370683f8c44d74f41febe33b9c23 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:26 +0200
Subject: [PATCH 484/853] trm290: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/trm290.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index d8127b51a54..449f4de9a70 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -353,6 +353,7 @@ static struct pci_driver driver = {
 	.name		= "TRM290_IDE",
 	.id_table	= trm290_pci_tbl,
 	.probe		= trm290_init_one,
+	.remove		= ide_pci_remove,
 };
 
 static int __init trm290_ide_init(void)
@@ -360,7 +361,13 @@ static int __init trm290_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit trm290_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(trm290_ide_init);
+module_exit(trm290_ide_exit);
 
 MODULE_AUTHOR("Mark Lord");
 MODULE_DESCRIPTION("PCI driver module for Tekram TRM290 IDE");
-- 
GitLab


From 585f67e736eece4cdf96b628042170273221e770 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:26 +0200
Subject: [PATCH 485/853] via82cxxx: add ->remove method and module_exit()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/via82cxxx.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index bc1675da37d..5f07eeb736a 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -467,6 +467,15 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 	return rc;
 }
 
+static void __devexit via_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct via82cxxx_dev *vdev = host->host_priv;
+
+	ide_pci_remove(dev);
+	kfree(vdev);
+}
+
 static const struct pci_device_id via_pci_tbl[] = {
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1),  0 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1),  0 },
@@ -481,6 +490,7 @@ static struct pci_driver driver = {
 	.name 		= "VIA_IDE",
 	.id_table 	= via_pci_tbl,
 	.probe 		= via_init_one,
+	.remove		= via_remove,
 };
 
 static int __init via_ide_init(void)
@@ -488,7 +498,13 @@ static int __init via_ide_init(void)
 	return ide_pci_register_driver(&driver);
 }
 
+static void __exit via_ide_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
 module_init(via_ide_init);
+module_exit(via_ide_exit);
 
 MODULE_AUTHOR("Vojtech Pavlik, Michel Aubry, Jeff Garzik, Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for VIA IDE");
-- 
GitLab


From 8e27cb1135de4cc69bf358209f91e1f7ba81eca1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:27 +0200
Subject: [PATCH 486/853] icside: add module_exit()

Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/arm/icside.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c
index f575e8341ae..df4af408395 100644
--- a/drivers/ide/arm/icside.c
+++ b/drivers/ide/arm/icside.c
@@ -710,8 +710,14 @@ static int __init icside_init(void)
 	return ecard_register_driver(&icside_driver);
 }
 
+static void __exit icside_exit(void);
+{
+	ecard_unregister_driver(&icside_driver);
+}
+
 MODULE_AUTHOR("Russell King <rmk@arm.linux.org.uk>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("ICS IDE driver");
 
 module_init(icside_init);
+module_exit(icside_exit);
-- 
GitLab


From 37c5ef56989717d871d048f98fb6411e7a17c43d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:27 +0200
Subject: [PATCH 487/853] rapide: add module_exit()

Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/arm/rapide.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c
index 2bdd8b734af..78d27d9ae43 100644
--- a/drivers/ide/arm/rapide.c
+++ b/drivers/ide/arm/rapide.c
@@ -95,7 +95,13 @@ static int __init rapide_init(void)
 	return ecard_register_driver(&rapide_driver);
 }
 
+static void __exit rapide_exit(void)
+{
+	ecard_unregister_driver(&rapide_driver);
+}
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Yellowstone RAPIDE driver");
 
 module_init(rapide_init);
+module_exit(rapide_exit);
-- 
GitLab


From b0a62817961796f6dcef5f316134d8bc7279bf6e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:27 +0200
Subject: [PATCH 488/853] ide: fix <asm-xtensa/ide.h>

* Add missing <asm-generic/ide_iops.h> include.

While at it:

* Remove needless ide_default_{irq,io_base}() inlines.

Cc: Chris Zankel <chris@zankel.net>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-xtensa/ide.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/include/asm-xtensa/ide.h b/include/asm-xtensa/ide.h
index 6b912742a42..cb995701c42 100644
--- a/include/asm-xtensa/ide.h
+++ b/include/asm-xtensa/ide.h
@@ -19,17 +19,8 @@
 # define MAX_HWIFS	1
 #endif
 
-static __inline__ int ide_default_irq(unsigned long base)
-{
-	/* Unsupported! */
-  	return 0;
-}
-
-static __inline__ unsigned long ide_default_io_base(int index)
-{
-	/* Unsupported! */
-  	return 0;
-}
+#include <asm-generic/ide_iops.h>
 
 #endif	/* __KERNEL__ */
+
 #endif	/* _XTENSA_IDE_H */
-- 
GitLab


From ac32f3238c1d95a6ebea2c312160dbdbd61bf91c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:27 +0200
Subject: [PATCH 489/853] ide-generic: fix ide_default_io_base() for m32r

Fix ide_default_io_base() to match ide_default_irq().

Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-m32r/ide.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
index 1e7f6474d13..72798d62422 100644
--- a/include/asm-m32r/ide.h
+++ b/include/asm-m32r/ide.h
@@ -52,12 +52,20 @@ static __inline__ int ide_default_irq(unsigned long base)
 static __inline__ unsigned long ide_default_io_base(int index)
 {
 	switch (index) {
+#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_MAPPI2) \
+	|| defined(CONFIG_PLAT_OPSPUT)
+		case 0:	return 0x1f0;
+#elif defined(CONFIG_PLAT_MAPPI3)
+		case 0:	return 0x1f0;
+		case 1:	return 0x170;
+#else
 		case 0:	return 0x1f0;
 		case 1:	return 0x170;
 		case 2: return 0x1e8;
 		case 3: return 0x168;
 		case 4: return 0x1e0;
 		case 5: return 0x160;
+#endif
 		default:
 			return 0;
 	}
-- 
GitLab


From dbdec839c4c2bfc8f2da8e50c06b9947e5ad0394 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:28 +0200
Subject: [PATCH 490/853] ide-generic: minor fix for mips

Move ide_probe_legacy() call to ide_generic_init() so it fails
early if necessary and returns the proper error value (nowadays
ide_default_io_base() is used only by ide-generic).

Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-generic.c           | 4 ++++
 include/asm-mips/mach-generic/ide.h | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 31d98fec775..567fd843c7f 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -87,6 +87,10 @@ static int __init ide_generic_init(void)
 	unsigned long io_addr;
 	int i, rc;
 
+#ifdef CONFIG_MIPS
+	if (!ide_probe_legacy())
+		return -ENODEV;
+#endif
 	printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" module "
 			 "parameter for probing all legacy ISA IDE ports\n");
 
diff --git a/include/asm-mips/mach-generic/ide.h b/include/asm-mips/mach-generic/ide.h
index 0f6c251f5fe..71a01c5aec1 100644
--- a/include/asm-mips/mach-generic/ide.h
+++ b/include/asm-mips/mach-generic/ide.h
@@ -72,8 +72,6 @@ static __inline__ int ide_default_irq(unsigned long base)
 
 static __inline__ unsigned long ide_default_io_base(int index)
 {
-	if (!ide_probe_legacy())
-		return 0;
 	/*
 	 *      If PCI is present then it is not safe to poke around
 	 *      the other legacy IDE ports. Only 0x1f0 and 0x170 are
-- 
GitLab


From b6cd7da5be2522b62bbc48d41b36c828b88e02fe Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:28 +0200
Subject: [PATCH 491/853] ide-generic: remove "no_pci_devices()" quirk from
 ide_default_io_base()

Since the decision to probe for ISA ide2-6 is now left to the user
"no_pci_devices()" quirk is no longer needed and may be removed.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-mips/mach-generic/ide.h | 18 ++++--------------
 include/asm-x86/ide.h               | 18 ++++--------------
 2 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/include/asm-mips/mach-generic/ide.h b/include/asm-mips/mach-generic/ide.h
index 71a01c5aec1..f34740ee677 100644
--- a/include/asm-mips/mach-generic/ide.h
+++ b/include/asm-mips/mach-generic/ide.h
@@ -72,23 +72,13 @@ static __inline__ int ide_default_irq(unsigned long base)
 
 static __inline__ unsigned long ide_default_io_base(int index)
 {
-	/*
-	 *      If PCI is present then it is not safe to poke around
-	 *      the other legacy IDE ports. Only 0x1f0 and 0x170 are
-	 *      defined compatibility mode ports for PCI. A user can
-	 *      override this using ide= but we must default safe.
-	 */
-	if (no_pci_devices()) {
-		switch (index) {
-		case 2: return 0x1e8;
-		case 3: return 0x168;
-		case 4: return 0x1e0;
-		case 5: return 0x160;
-		}
-	}
 	switch (index) {
 	case 0: return 0x1f0;
 	case 1: return 0x170;
+	case 2: return 0x1e8;
+	case 3: return 0x168;
+	case 4: return 0x1e0;
+	case 5: return 0x160;
 	default:
 		return 0;
 	}
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index cf9c98e5bdb..34050747f38 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -36,23 +36,13 @@ static __inline__ int ide_default_irq(unsigned long base)
 
 static __inline__ unsigned long ide_default_io_base(int index)
 {
-	/*
-	 *	If PCI is present then it is not safe to poke around
-	 *	the other legacy IDE ports. Only 0x1f0 and 0x170 are
-	 *	defined compatibility mode ports for PCI. A user can 
-	 *	override this using ide= but we must default safe.
-	 */
-	if (no_pci_devices()) {
-		switch(index) {
-			case 2: return 0x1e8;
-			case 3: return 0x168;
-			case 4: return 0x1e0;
-			case 5: return 0x160;
-		}
-	}
 	switch (index) {
 		case 0:	return 0x1f0;
 		case 1:	return 0x170;
+		case 2: return 0x1e8;
+		case 3: return 0x168;
+		case 4: return 0x1e0;
+		case 5: return 0x160;
 		default:
 			return 0;
 	}
-- 
GitLab


From 2c9d86438a0104800da2a8ecdc1e27baf38ba6a4 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:29 +0200
Subject: [PATCH 492/853] ide: remove <asm-cris/ide.h>

Remove <asm-cris/arch-v{10,32}/ide.h> and <asm-cris/ide.h>.

This has been a broken code for some time now and needs rewrite
to match IDE core code / host driver model anyway.

Cc: Jesper Nilsson <Jesper.Nilsson@axis.com>
Cc: Mikael Starvik <mikael.starvik@axis.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-cris/arch-v10/ide.h | 91 ---------------------------------
 include/asm-cris/arch-v32/ide.h | 56 --------------------
 include/asm-cris/ide.h          |  1 -
 3 files changed, 148 deletions(-)
 delete mode 100644 include/asm-cris/arch-v10/ide.h
 delete mode 100644 include/asm-cris/arch-v32/ide.h
 delete mode 100644 include/asm-cris/ide.h

diff --git a/include/asm-cris/arch-v10/ide.h b/include/asm-cris/arch-v10/ide.h
deleted file mode 100644
index 5366e623932..00000000000
--- a/include/asm-cris/arch-v10/ide.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- *  linux/include/asm-cris/ide.h
- *
- *  Copyright (C) 2000, 2001, 2002  Axis Communications AB
- *
- *  Authors:    Bjorn Wesen
- *
- */
-
-/*
- *  This file contains the ETRAX 100LX specific IDE code.
- */
-
-#ifndef __ASMCRIS_IDE_H
-#define __ASMCRIS_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm/arch/svinto.h>
-#include <asm/io.h>
-#include <asm-generic/ide_iops.h>
-
-
-/* ETRAX 100 can support 4 IDE busses on the same pins (serialized) */
-
-#define MAX_HWIFS	4
-
-static inline int ide_default_irq(unsigned long base)
-{
-	/* all IDE busses share the same IRQ, number 4.
-	 * this has the side-effect that ide-probe.c will cluster our 4 interfaces
-	 * together in a hwgroup, and will serialize accesses. this is good, because
-	 * we can't access more than one interface at the same time on ETRAX100.
-	 */
-	return 4;
-}
-
-static inline unsigned long ide_default_io_base(int index)
-{
-	/* we have no real I/O base address per interface, since all go through the
-	 * same register. but in a bitfield in that register, we have the i/f number.
-	 * so we can use the io_base to remember that bitfield.
-	 */
-	static const unsigned long io_bases[MAX_HWIFS] = {
-		IO_FIELD(R_ATA_CTRL_DATA, sel, 0),
-		IO_FIELD(R_ATA_CTRL_DATA, sel, 1),
-		IO_FIELD(R_ATA_CTRL_DATA, sel, 2),
-		IO_FIELD(R_ATA_CTRL_DATA, sel, 3)
-	};
-	return io_bases[index];
-}
-
-/* this is called once for each interface, to setup the port addresses. data_port is the result
- * of the ide_default_io_base call above. ctrl_port will be 0, but that is don't care for us.
- */
-
-static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, unsigned long ctrl_port, int *irq)
-{
-	int i;
-
-	/* fill in ports for ATA addresses 0 to 7 */
-	for (i = 0; i <= 7; i++) {
-		hw->io_ports_array[i] = data_port |
-			IO_FIELD(R_ATA_CTRL_DATA, addr, i) |
-			IO_STATE(R_ATA_CTRL_DATA, cs0, active);
-	}
-
-	/* the IDE control register is at ATA address 6, with CS1 active instead of CS0 */
-	hw->io_ports.ctl_addr = data_port |
-			IO_FIELD(R_ATA_CTRL_DATA, addr, 6) |
-			IO_STATE(R_ATA_CTRL_DATA, cs1, active);
-
-	/* whats this for ? */
-	hw->io_ports.irq_addr = 0;
-}
-
-static inline void ide_init_default_hwifs(void)
-{
-	hw_regs_t hw;
-	int index;
-
-	for(index = 0; index < MAX_HWIFS; index++) {
-		ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
-		hw.irq = ide_default_irq(ide_default_io_base(index));
-		ide_register_hw(&hw, NULL);
-	}
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMCRIS_IDE_H */
diff --git a/include/asm-cris/arch-v32/ide.h b/include/asm-cris/arch-v32/ide.h
deleted file mode 100644
index fb9c3627a5b..00000000000
--- a/include/asm-cris/arch-v32/ide.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  linux/include/asm-cris/ide.h
- *
- *  Copyright (C) 2000-2004  Axis Communications AB
- *
- *  Authors:    Bjorn Wesen, Mikael Starvik
- *
- */
-
-/*
- *  This file contains the ETRAX FS specific IDE code.
- */
-
-#ifndef __ASMCRIS_IDE_H
-#define __ASMCRIS_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm/arch/hwregs/intr_vect.h>
-#include <asm/arch/hwregs/ata_defs.h>
-#include <asm/io.h>
-#include <asm-generic/ide_iops.h>
-
-
-/* ETRAX FS can support 4 IDE busses on the same pins (serialized) */
-
-#define MAX_HWIFS	4
-
-static inline int ide_default_irq(unsigned long base)
-{
-	/* all IDE busses share the same IRQ,
-	 * this has the side-effect that ide-probe.c will cluster our 4 interfaces
-	 * together in a hwgroup, and will serialize accesses. this is good, because
-	 * we can't access more than one interface at the same time on ETRAX100.
-	 */
-	return ATA_INTR_VECT;
-}
-
-static inline unsigned long ide_default_io_base(int index)
-{
-	reg_ata_rw_ctrl2 ctrl2 = {.sel = index};
-	/* we have no real I/O base address per interface, since all go through the
-	 * same register. but in a bitfield in that register, we have the i/f number.
-	 * so we can use the io_base to remember that bitfield.
-	 */
-        ctrl2.sel = index;
-
-	return REG_TYPE_CONV(unsigned long, reg_ata_rw_ctrl2, ctrl2);
-}
-
-#define IDE_ARCH_ACK_INTR
-#define ide_ack_intr(hwif)	((hwif)->ack_intr(hwif))
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMCRIS_IDE_H */
diff --git a/include/asm-cris/ide.h b/include/asm-cris/ide.h
deleted file mode 100644
index a894f66665f..00000000000
--- a/include/asm-cris/ide.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm/arch/ide.h>
-- 
GitLab


From d83b8b85cd56a083d30df73f3fd5e4714591b910 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:30 +0200
Subject: [PATCH 493/853] ide: define MAX_HWIFS in <linux/ide.h>

* Now that ide_hwif_t instances are allocated dynamically
  the difference between MAX_HWIFS == 2 and MAX_HWIFS == 10
  is ~100 bytes (x86-32) so use MAX_HWIFS == 10 on all archs
  except these ones that use MAX_HWIFS == 1.

* Define MAX_HWIFS in <linux/ide.h> instead of <asm/ide.h>.

[ Please note that avr32/cris/v850 have no <asm/ide.h>
  and alpha/ia64/sh always define CONFIG_IDE_MAX_HWIFS. ]

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-arm/ide.h               | 4 ----
 include/asm-blackfin/ide.h          | 2 --
 include/asm-frv/ide.h               | 4 ----
 include/asm-h8300/ide.h             | 2 --
 include/asm-m32r/ide.h              | 8 --------
 include/asm-m68k/ide.h              | 4 ----
 include/asm-mips/mach-generic/ide.h | 8 --------
 include/asm-mn10300/ide.h           | 4 ----
 include/asm-parisc/ide.h            | 4 ----
 include/asm-powerpc/ide.h           | 8 --------
 include/asm-sparc/ide.h             | 3 ---
 include/asm-x86/ide.h               | 9 ---------
 include/asm-xtensa/ide.h            | 5 -----
 include/linux/ide.h                 | 8 ++++++++
 14 files changed, 8 insertions(+), 65 deletions(-)

diff --git a/include/asm-arm/ide.h b/include/asm-arm/ide.h
index 88f4d231ce4..a48019f99d0 100644
--- a/include/asm-arm/ide.h
+++ b/include/asm-arm/ide.h
@@ -13,10 +13,6 @@
 
 #ifdef __KERNEL__
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS	4
-#endif
-
 #define __ide_mm_insw(port,addr,len)	readsw(port,addr,len)
 #define __ide_mm_insl(port,addr,len)	readsl(port,addr,len)
 #define __ide_mm_outsw(port,addr,len)	writesw(port,addr,len)
diff --git a/include/asm-blackfin/ide.h b/include/asm-blackfin/ide.h
index 5b88de115bf..90bc50bd22e 100644
--- a/include/asm-blackfin/ide.h
+++ b/include/asm-blackfin/ide.h
@@ -17,8 +17,6 @@
 #ifdef __KERNEL__
 /****************************************************************************/
 
-#define MAX_HWIFS	1
-
 #include <asm-generic/ide_iops.h>
 
 /****************************************************************************/
diff --git a/include/asm-frv/ide.h b/include/asm-frv/ide.h
index 8c9a540d434..7ebcc56a222 100644
--- a/include/asm-frv/ide.h
+++ b/include/asm-frv/ide.h
@@ -18,10 +18,6 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS 8
-#endif
-
 /****************************************************************************/
 /*
  * some bits needed for parts of the IDE subsystem to compile
diff --git a/include/asm-h8300/ide.h b/include/asm-h8300/ide.h
index f8535ce7476..8f79ba2ff92 100644
--- a/include/asm-h8300/ide.h
+++ b/include/asm-h8300/ide.h
@@ -16,8 +16,6 @@
 #ifdef __KERNEL__
 /****************************************************************************/
 
-#define MAX_HWIFS	1
-
 #include <asm-generic/ide_iops.h>
 
 /****************************************************************************/
diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
index 72798d62422..d755d41b993 100644
--- a/include/asm-m32r/ide.h
+++ b/include/asm-m32r/ide.h
@@ -15,14 +15,6 @@
 
 #include <asm/m32r.h>
 
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS	10
-# else
-#define MAX_HWIFS	2
-# endif
-#endif
-
 static __inline__ int ide_default_irq(unsigned long base)
 {
 	switch (base) {
diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index 909c6dfd385..1daf6cbdd9f 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -45,10 +45,6 @@
 #include <asm/macints.h>
 #endif
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS	4	/* same as the other archs */
-#endif
-
 /*
  * Get rid of defs from io.h - ide has its private and conflicting versions
  * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we
diff --git a/include/asm-mips/mach-generic/ide.h b/include/asm-mips/mach-generic/ide.h
index f34740ee677..8ee6bff030d 100644
--- a/include/asm-mips/mach-generic/ide.h
+++ b/include/asm-mips/mach-generic/ide.h
@@ -19,14 +19,6 @@
 #include <linux/stddef.h>
 #include <asm/processor.h>
 
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS	10
-# else
-#define MAX_HWIFS	6
-# endif
-#endif
-
 static __inline__ int ide_probe_legacy(void)
 {
 #ifdef CONFIG_PCI
diff --git a/include/asm-mn10300/ide.h b/include/asm-mn10300/ide.h
index dc235121ec4..6adcdd92e83 100644
--- a/include/asm-mn10300/ide.h
+++ b/include/asm-mn10300/ide.h
@@ -23,10 +23,6 @@
 #undef SUPPORT_VLB_SYNC
 #define SUPPORT_VLB_SYNC 0
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS 8
-#endif
-
 /*
  * some bits needed for parts of the IDE subsystem to compile
  */
diff --git a/include/asm-parisc/ide.h b/include/asm-parisc/ide.h
index db0c9441009..c246ef75017 100644
--- a/include/asm-parisc/ide.h
+++ b/include/asm-parisc/ide.h
@@ -13,10 +13,6 @@
 
 #ifdef __KERNEL__
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS	2
-#endif
-
 #define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
 #define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
 #define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h
index 3d90bf7d3d7..262def6a9f0 100644
--- a/include/asm-powerpc/ide.h
+++ b/include/asm-powerpc/ide.h
@@ -14,14 +14,6 @@
 #endif
 #include <asm/io.h>
 
-#ifndef MAX_HWIFS
-#ifdef __powerpc64__
-#define MAX_HWIFS	10
-#else
-#define MAX_HWIFS	8
-#endif
-#endif
-
 #define __ide_mm_insw(p, a, c)	readsw((void __iomem *)(p), (a), (c))
 #define __ide_mm_insl(p, a, c)	readsl((void __iomem *)(p), (a), (c))
 #define __ide_mm_outsw(p, a, c)	writesw((void __iomem *)(p), (a), (c))
diff --git a/include/asm-sparc/ide.h b/include/asm-sparc/ide.h
index 879fcec72dc..b7af3d65823 100644
--- a/include/asm-sparc/ide.h
+++ b/include/asm-sparc/ide.h
@@ -21,9 +21,6 @@
 #include <asm/psr.h>
 #endif
 
-#undef  MAX_HWIFS
-#define MAX_HWIFS	2
-
 #define __ide_insl(data_reg, buffer, wcount) \
 	__ide_insw(data_reg, buffer, (wcount)<<1)
 #define __ide_outsl(data_reg, buffer, wcount) \
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index 34050747f38..bc54879daed 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -11,15 +11,6 @@
 
 #ifdef __KERNEL__
 
-
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS	10
-# else
-#define MAX_HWIFS	6
-# endif
-#endif
-
 static __inline__ int ide_default_irq(unsigned long base)
 {
 	switch (base) {
diff --git a/include/asm-xtensa/ide.h b/include/asm-xtensa/ide.h
index cb995701c42..18342a2cc77 100644
--- a/include/asm-xtensa/ide.h
+++ b/include/asm-xtensa/ide.h
@@ -14,11 +14,6 @@
 
 #ifdef __KERNEL__
 
-
-#ifndef MAX_HWIFS
-# define MAX_HWIFS	1
-#endif
-
 #include <asm-generic/ide_iops.h>
 
 #endif	/* __KERNEL__ */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index dbd0aeb3a56..76fe00b24b5 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -213,6 +213,14 @@ static inline int __ide_default_irq(unsigned long base)
 
 #include <asm/ide.h>
 
+#ifndef MAX_HWIFS
+#if defined(CONFIG_BLACKFIN) || defined(CONFIG_H8300) || defined(CONFIG_XTENSA)
+# define MAX_HWIFS	1
+#else
+# define MAX_HWIFS	10
+#endif
+#endif
+
 #if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED)
 #undef MAX_HWIFS
 #define MAX_HWIFS	CONFIG_IDE_MAX_HWIFS
-- 
GitLab


From ffed0b6e1a6f5132681d4b521531d992f893190b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:30 +0200
Subject: [PATCH 494/853] ide-generic: remove broken PPC_PREP support

PPC_PREP has been depending on BROKEN for some time now.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-powerpc/ide.h | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h
index 262def6a9f0..1aaf27be874 100644
--- a/include/asm-powerpc/ide.h
+++ b/include/asm-powerpc/ide.h
@@ -31,16 +31,6 @@ static __inline__ int ide_default_irq(unsigned long base)
 	case 0x1f0:	return 14;
 	case 0x170:	return 15;
 	}
-#endif
-#ifdef CONFIG_PPC_PREP
-	switch (base) {
-	case 0x1f0:	return 13;
-	case 0x170:	return 13;
-	case 0x1e8:	return 11;
-	case 0x168:	return 10;
-	case 0xfff0:	return 14;	/* MCP(N)750 ide0 */
-	case 0xffe0:	return 15;	/* MCP(N)750 ide1 */
-	}
 #endif
 	return 0;
 }
@@ -53,14 +43,6 @@ static __inline__ unsigned long ide_default_io_base(int index)
 	case 0:		return 0x1f0;
 	case 1:		return 0x170;
 	}
-#endif
-#ifdef CONFIG_PPC_PREP
-	switch (index) {
-	case 0:		return 0x1f0;
-	case 1:		return 0x170;
-	case 2:		return 0x1e8;
-	case 3:		return 0x168;
-	}
 #endif
 	return 0;
 }
-- 
GitLab


From 35bbac9a2f73a7e0967d0a1d3e3673e2590ef716 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:30 +0200
Subject: [PATCH 495/853] ide-generic: is no longer needed on ppc32

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 04d9c4d459d..130ef64b44f 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -314,7 +314,7 @@ comment "IDE chipset support/bugfixes"
 
 config IDE_GENERIC
 	tristate "generic/default IDE chipset support"
-	depends on ALPHA || X86 || IA64 || M32R || MIPS || PPC32
+	depends on ALPHA || X86 || IA64 || M32R || MIPS
 	help
 	  If unsure, say N.
 
-- 
GitLab


From f01d35d87f39ab794ddcdefadb79c11054bcbfbc Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:31 +0200
Subject: [PATCH 496/853] ide-generic: remove ide_default_{io_base,irq}()
 inlines (take 3)

Replace ide_default_{io_base,irq}() inlines by legacy_{bases,irqs}[].

v2:
Add missing zero-ing of hws[] (caught during testing by Borislav Petkov).

v3:
Fix zero-oing of hws[] for _real_ this time.

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-generic.c           | 32 ++++++++++++++++--
 include/asm-alpha/ide.h             | 24 --------------
 include/asm-ia64/ide.h              | 29 -----------------
 include/asm-m32r/ide.h              | 50 -----------------------------
 include/asm-mips/mach-generic/ide.h | 28 ----------------
 include/asm-x86/ide.h               | 28 ----------------
 6 files changed, 29 insertions(+), 162 deletions(-)

diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 567fd843c7f..8fe8b5b9cf7 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -20,6 +20,11 @@
 #include <linux/module.h>
 #include <linux/ide.h>
 
+/* FIXME: convert m32r to use ide_platform host driver */
+#ifdef CONFIG_M32R
+#include <asm/m32r.h>
+#endif
+
 #define DRV_NAME	"ide_generic"
 
 static int probe_mask = 0x03;
@@ -80,6 +85,21 @@ static int __init ide_generic_sysfs_init(void)
 	return 0;
 }
 
+#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_MAPPI2) \
+	|| defined(CONFIG_PLAT_OPSPUT)
+static const u16 legacy_bases[] = { 0x1f0 };
+static const int legacy_irqs[]  = { PLD_IRQ_CFIREQ };
+#elif defined(CONFIG_PLAT_MAPPI3)
+static const u16 legacy_bases[] = { 0x1f0, 0x170 };
+static const int legacy_irqs[]  = { PLD_IRQ_CFIREQ, PLD_IRQ_IDEIREQ };
+#elif defined(CONFIG_ALPHA)
+static const u16 legacy_bases[] = { 0x1f0, 0x170, 0x1e8, 0x168 };
+static const int legacy_irqs[]  = { 14, 15, 11, 10 };
+#else
+static const u16 legacy_bases[] = { 0x1f0, 0x170, 0x1e8, 0x168, 0x1e0, 0x160 };
+static const int legacy_irqs[]  = { 14, 15, 11, 10, 8, 12 };
+#endif
+
 static int __init ide_generic_init(void)
 {
 	hw_regs_t hw[MAX_HWIFS], *hws[MAX_HWIFS];
@@ -94,8 +114,10 @@ static int __init ide_generic_init(void)
 	printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" module "
 			 "parameter for probing all legacy ISA IDE ports\n");
 
-	for (i = 0; i < MAX_HWIFS; i++) {
-		io_addr = ide_default_io_base(i);
+	memset(hws, 0, sizeof(hw_regs_t *) * MAX_HWIFS);
+
+	for (i = 0; i < ARRAY_SIZE(legacy_bases); i++) {
+		io_addr = legacy_bases[i];
 
 		hws[i] = NULL;
 
@@ -117,7 +139,11 @@ static int __init ide_generic_init(void)
 
 			memset(&hw[i], 0, sizeof(hw[i]));
 			ide_std_init_ports(&hw[i], io_addr, io_addr + 0x206);
-			hw[i].irq = ide_default_irq(io_addr);
+#ifdef CONFIG_IA64
+			hw[i].irq = isa_irq_to_vector(legacy_irqs[i]);
+#else
+			hw[i].irq = legacy_irqs[i];
+#endif
 			hw[i].chipset = ide_generic;
 
 			hws[i] = &hw[i];
diff --git a/include/asm-alpha/ide.h b/include/asm-alpha/ide.h
index f44129abc02..55f9f687024 100644
--- a/include/asm-alpha/ide.h
+++ b/include/asm-alpha/ide.h
@@ -13,30 +13,6 @@
 
 #ifdef __KERNEL__
 
-static inline int ide_default_irq(unsigned long base)
-{
-	switch (base) {
-		case 0x1f0: return 14;
-		case 0x170: return 15;
-		case 0x1e8: return 11;
-		case 0x168: return 10;
-		default:
-			return 0;
-	}
-}
-
-static inline unsigned long ide_default_io_base(int index)
-{
-	switch (index) {
-		case 0:	return 0x1f0;
-		case 1:	return 0x170;
-		case 2: return 0x1e8;
-		case 3: return 0x168;
-		default:
-			return 0;
-	}
-}
-
 #include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-ia64/ide.h b/include/asm-ia64/ide.h
index 8fa3f8cd067..5a0aedea476 100644
--- a/include/asm-ia64/ide.h
+++ b/include/asm-ia64/ide.h
@@ -13,37 +13,8 @@
 
 #ifdef __KERNEL__
 
-
 #include <linux/irq.h>
 
-static inline int ide_default_irq(unsigned long base)
-{
-	switch (base) {
-	      case 0x1f0: return isa_irq_to_vector(14);
-	      case 0x170: return isa_irq_to_vector(15);
-	      case 0x1e8: return isa_irq_to_vector(11);
-	      case 0x168: return isa_irq_to_vector(10);
-	      case 0x1e0: return isa_irq_to_vector(8);
-	      case 0x160: return isa_irq_to_vector(12);
-	      default:
-		return 0;
-	}
-}
-
-static inline unsigned long ide_default_io_base(int index)
-{
-	switch (index) {
-	      case 0: return 0x1f0;
-	      case 1: return 0x170;
-	      case 2: return 0x1e8;
-	      case 3: return 0x168;
-	      case 4: return 0x1e0;
-	      case 5: return 0x160;
-	      default:
-		return 0;
-	}
-}
-
 #include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
index d755d41b993..0f1ec697387 100644
--- a/include/asm-m32r/ide.h
+++ b/include/asm-m32r/ide.h
@@ -13,56 +13,6 @@
 
 #ifdef __KERNEL__
 
-#include <asm/m32r.h>
-
-static __inline__ int ide_default_irq(unsigned long base)
-{
-	switch (base) {
-#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_MAPPI2) \
-	|| defined(CONFIG_PLAT_OPSPUT)
-		case 0x1f0: return PLD_IRQ_CFIREQ;
-		default:
-			return 0;
-#elif defined(CONFIG_PLAT_MAPPI3)
-		case 0x1f0: return PLD_IRQ_CFIREQ;
-		case 0x170: return PLD_IRQ_IDEIREQ;
-		default:
-			return 0;
-#else
-		case 0x1f0: return 14;
-		case 0x170: return 15;
-		case 0x1e8: return 11;
-		case 0x168: return 10;
-		case 0x1e0: return 8;
-		case 0x160: return 12;
-		default:
-			return 0;
-#endif
-	}
-}
-
-static __inline__ unsigned long ide_default_io_base(int index)
-{
-	switch (index) {
-#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_MAPPI2) \
-	|| defined(CONFIG_PLAT_OPSPUT)
-		case 0:	return 0x1f0;
-#elif defined(CONFIG_PLAT_MAPPI3)
-		case 0:	return 0x1f0;
-		case 1:	return 0x170;
-#else
-		case 0:	return 0x1f0;
-		case 1:	return 0x170;
-		case 2: return 0x1e8;
-		case 3: return 0x168;
-		case 4: return 0x1e0;
-		case 5: return 0x160;
-#endif
-		default:
-			return 0;
-	}
-}
-
 #include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-mips/mach-generic/ide.h b/include/asm-mips/mach-generic/ide.h
index 8ee6bff030d..73008f7bdc9 100644
--- a/include/asm-mips/mach-generic/ide.h
+++ b/include/asm-mips/mach-generic/ide.h
@@ -48,34 +48,6 @@ found:
 #endif
 }
 
-static __inline__ int ide_default_irq(unsigned long base)
-{
-	switch (base) {
-		case 0x1f0: return 14;
-		case 0x170: return 15;
-		case 0x1e8: return 11;
-		case 0x168: return 10;
-		case 0x1e0: return 8;
-		case 0x160: return 12;
-		default:
-			return 0;
-	}
-}
-
-static __inline__ unsigned long ide_default_io_base(int index)
-{
-	switch (index) {
-	case 0: return 0x1f0;
-	case 1: return 0x170;
-	case 2: return 0x1e8;
-	case 3: return 0x168;
-	case 4: return 0x1e0;
-	case 5: return 0x160;
-	default:
-		return 0;
-	}
-}
-
 /* MIPS port and memory-mapped I/O string operations.  */
 static inline void __ide_flush_prologue(void)
 {
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index bc54879daed..0289baf9ce0 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -11,34 +11,6 @@
 
 #ifdef __KERNEL__
 
-static __inline__ int ide_default_irq(unsigned long base)
-{
-	switch (base) {
-		case 0x1f0: return 14;
-		case 0x170: return 15;
-		case 0x1e8: return 11;
-		case 0x168: return 10;
-		case 0x1e0: return 8;
-		case 0x160: return 12;
-		default:
-			return 0;
-	}
-}
-
-static __inline__ unsigned long ide_default_io_base(int index)
-{
-	switch (index) {
-		case 0:	return 0x1f0;
-		case 1:	return 0x170;
-		case 2: return 0x1e8;
-		case 3: return 0x168;
-		case 4: return 0x1e0;
-		case 5: return 0x160;
-		default:
-			return 0;
-	}
-}
-
 #include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
-- 
GitLab


From 2a8f7450f828eaee49d66f41f99ac2e54f1160a6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:31 +0200
Subject: [PATCH 497/853] ide: remove <asm/ide.h> for some archs

* Remove <linux/irq.h> include from <asm-ia64.h> (<linux/ide.h> includes
  <linux/interrupt.h> which is enough).

* Remove <asm/ide.h> for alpha/blackfin/h8300/ia64/m32r/sh/x86/xtensa
  (this leaves us with arm/frv/m68k/mips/mn10300/parisc/powerpc/sparc[64]).

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-alpha/ide.h    | 20 --------------------
 include/asm-blackfin/ide.h | 25 -------------------------
 include/asm-h8300/ide.h    | 24 ------------------------
 include/asm-ia64/ide.h     | 22 ----------------------
 include/asm-m32r/ide.h     | 20 --------------------
 include/asm-sh/ide.h       | 21 ---------------------
 include/asm-x86/ide.h      | 18 ------------------
 include/asm-xtensa/ide.h   | 21 ---------------------
 include/linux/ide.h        |  6 ++++++
 9 files changed, 6 insertions(+), 171 deletions(-)
 delete mode 100644 include/asm-alpha/ide.h
 delete mode 100644 include/asm-blackfin/ide.h
 delete mode 100644 include/asm-h8300/ide.h
 delete mode 100644 include/asm-ia64/ide.h
 delete mode 100644 include/asm-m32r/ide.h
 delete mode 100644 include/asm-sh/ide.h
 delete mode 100644 include/asm-x86/ide.h
 delete mode 100644 include/asm-xtensa/ide.h

diff --git a/include/asm-alpha/ide.h b/include/asm-alpha/ide.h
deleted file mode 100644
index 55f9f687024..00000000000
--- a/include/asm-alpha/ide.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  linux/include/asm-alpha/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the alpha architecture specific IDE code.
- */
-
-#ifndef __ASMalpha_IDE_H
-#define __ASMalpha_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMalpha_IDE_H */
diff --git a/include/asm-blackfin/ide.h b/include/asm-blackfin/ide.h
deleted file mode 100644
index 90bc50bd22e..00000000000
--- a/include/asm-blackfin/ide.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/****************************************************************************/
-
-/*
- *  linux/include/asm-blackfin/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- *  Copyright (C) 2001       Lineo Inc., davidm@snapgear.com
- *  Copyright (C) 2002       Greg Ungerer (gerg@snapgear.com)
- *  Copyright (C) 2002       Yoshinori Sato (ysato@users.sourceforge.jp)
- *  Copyright (C) 2005       Hennerich Michael (hennerich@blackfin.uclinux.org)
- */
-
-/****************************************************************************/
-#ifndef _BLACKFIN_IDE_H
-#define _BLACKFIN_IDE_H
-/****************************************************************************/
-#ifdef __KERNEL__
-/****************************************************************************/
-
-#include <asm-generic/ide_iops.h>
-
-/****************************************************************************/
-#endif				/* __KERNEL__ */
-#endif				/* _BLACKFIN_IDE_H */
-/****************************************************************************/
diff --git a/include/asm-h8300/ide.h b/include/asm-h8300/ide.h
deleted file mode 100644
index 8f79ba2ff92..00000000000
--- a/include/asm-h8300/ide.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/****************************************************************************/
-
-/*
- *  linux/include/asm-h8300/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- *  Copyright (C) 2001       Lineo Inc., davidm@snapgear.com
- *  Copyright (C) 2002       Greg Ungerer (gerg@snapgear.com)
- *  Copyright (C) 2002       Yoshinori Sato (ysato@users.sourceforge.jp)
- */
-
-/****************************************************************************/
-#ifndef _H8300_IDE_H
-#define _H8300_IDE_H
-/****************************************************************************/
-#ifdef __KERNEL__
-/****************************************************************************/
-
-#include <asm-generic/ide_iops.h>
-
-/****************************************************************************/
-#endif /* __KERNEL__ */
-#endif /* _H8300_IDE_H */
-/****************************************************************************/
diff --git a/include/asm-ia64/ide.h b/include/asm-ia64/ide.h
deleted file mode 100644
index 5a0aedea476..00000000000
--- a/include/asm-ia64/ide.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  linux/include/asm-ia64/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the ia64 architecture specific IDE code.
- */
-
-#ifndef __ASM_IA64_IDE_H
-#define __ASM_IA64_IDE_H
-
-#ifdef __KERNEL__
-
-#include <linux/irq.h>
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_IA64_IDE_H */
diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
deleted file mode 100644
index 0f1ec697387..00000000000
--- a/include/asm-m32r/ide.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_M32R_IDE_H
-#define _ASM_M32R_IDE_H
-
-/*
- *  linux/include/asm-m32r/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the i386 architecture specific IDE code.
- */
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_M32R_IDE_H */
diff --git a/include/asm-sh/ide.h b/include/asm-sh/ide.h
deleted file mode 100644
index 58e0bdd52be..00000000000
--- a/include/asm-sh/ide.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *  linux/include/asm-sh/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the i386 architecture specific IDE code.
- *  In future, SuperH code.
- */
-
-#ifndef __ASM_SH_IDE_H
-#define __ASM_SH_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_SH_IDE_H */
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
deleted file mode 100644
index 0289baf9ce0..00000000000
--- a/include/asm-x86/ide.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the i386 architecture specific IDE code.
- */
-
-#ifndef __ASMi386_IDE_H
-#define __ASMi386_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMi386_IDE_H */
diff --git a/include/asm-xtensa/ide.h b/include/asm-xtensa/ide.h
deleted file mode 100644
index 18342a2cc77..00000000000
--- a/include/asm-xtensa/ide.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * include/asm-xtensa/ide.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994 - 1996  Linus Torvalds & authors
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_IDE_H
-#define _XTENSA_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif	/* __KERNEL__ */
-
-#endif	/* _XTENSA_IDE_H */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 76fe00b24b5..fd78b401b03 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -211,7 +211,13 @@ static inline int __ide_default_irq(unsigned long base)
 	return 0;
 }
 
+#if defined(CONFIG_ARM) || defined(CONFIG_FRV) || defined(CONFIG_M68K) || \
+    defined(CONFIG_MIPS) || defined(CONFIG_MN10300) || defined(CONFIG_PARISC) \
+    || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || defined(CONFIG_SPARC64)
 #include <asm/ide.h>
+#else
+#include <asm-generic/ide_iops.h>
+#endif
 
 #ifndef MAX_HWIFS
 #if defined(CONFIG_BLACKFIN) || defined(CONFIG_H8300) || defined(CONFIG_XTENSA)
-- 
GitLab


From 28cfd8af52a9ed4e5bd1751ea6bc0b8c870f68ec Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:31 +0200
Subject: [PATCH 498/853] ide: include PCI device name in messages from IDE PCI
 host drivers

While at it:

* Apply small fixes to messages (s/dma/DMA/, remove trailing '.', etc).

* Fix printk() call in ide_setup_pci_baseregs() to use KERN_INFO.

* Move printk() call from ide_pci_clear_simplex() to the caller.

* Cleanup do_ide_setup_pci_device() a bit.

* amd74xx.c: remove superfluous PCI device revision information.

* hpt366.c: fix two printk() calls in ->init_chipset to use KERN_INFO.

* pdc202xx_new.c: fix printk() call in ->init_chipset to use KERN_INFO.

* pdc202xx_old.c: fix driver message in pdc202xx_init_one().

* via82cxxx.c: fix driver warning message in via_init_one().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/amd74xx.c      | 11 ++--
 drivers/ide/pci/generic.c      |  4 +-
 drivers/ide/pci/hpt366.c       | 31 ++++++-----
 drivers/ide/pci/it821x.c       | 11 ++--
 drivers/ide/pci/pdc202xx_new.c | 21 ++++----
 drivers/ide/pci/pdc202xx_old.c |  9 ++--
 drivers/ide/pci/serverworks.c  |  3 +-
 drivers/ide/pci/siimage.c      |  7 +--
 drivers/ide/pci/sis5513.c      | 17 +++---
 drivers/ide/pci/trm290.c       |  4 +-
 drivers/ide/pci/via82cxxx.c    | 13 +++--
 drivers/ide/setup-pci.c        | 99 +++++++++++++++++++---------------
 12 files changed, 131 insertions(+), 99 deletions(-)

diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index a91f2e873ba..2b118f80fab 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -129,9 +129,9 @@ static void __devinit amd7411_cable_detect(struct pci_dev *dev,
 	amd_80w = ((t & 0x3) ? 1 : 0) | ((t & 0xc) ? 2 : 0);
 	for (i = 24; i >= 0; i -= 8)
 		if (((u >> i) & 4) && !(amd_80w & (1 << (1 - (i >> 4))))) {
-			printk(KERN_WARNING "%s: BIOS didn't set cable bits "
-					    "correctly. Enabling workaround.\n",
-					    name);
+			printk(KERN_WARNING "%s %s: BIOS didn't set cable bits "
+				"correctly. Enabling workaround.\n",
+				name, pci_name(dev));
 			amd_80w |= (1 << (1 - (i >> 4)));
 		}
 }
@@ -280,9 +280,8 @@ static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_
 			d.udma_mask = ATA_UDMA5;
 	}
 
-	printk(KERN_INFO "%s: %s (rev %02x) UDMA%s controller\n",
-			 d.name, pci_name(dev), dev->revision,
-			 amd_dma[fls(d.udma_mask) - 1]);
+	printk(KERN_INFO "%s %s: UDMA%s controller\n",
+		d.name, pci_name(dev), amd_dma[fls(d.udma_mask) - 1]);
 
 	/*
 	* Determine the system bus clock.
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index 81fc171f8ff..e034e21af7f 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -134,8 +134,8 @@ static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_devi
 		u16 command;
 		pci_read_config_word(dev, PCI_COMMAND, &command);
 		if (!(command & PCI_COMMAND_IO)) {
-			printk(KERN_INFO "Skipping disabled %s IDE "
-					"controller.\n", d->name);
+			printk(KERN_INFO "%s %s: skipping disabled "
+				"controller\n", d->name, pci_name(dev));
 			goto out;
 		}
 	}
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 398808905f9..b8004c331ed 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1044,8 +1044,8 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 		if ((temp & 0xFFFFF000) != 0xABCDE000) {
 			int i;
 
-			printk(KERN_WARNING "%s: no clock data saved by BIOS\n",
-			       name);
+			printk(KERN_WARNING "%s %s: no clock data saved by "
+				"BIOS\n", name, pci_name(dev));
 
 			/* Calculate the average value of f_CNT. */
 			for (temp = i = 0; i < 128; i++) {
@@ -1070,8 +1070,9 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 		else
 			pci_clk = 66;
 
-		printk(KERN_INFO "%s: DPLL base: %d MHz, f_CNT: %d, "
-		       "assuming %d MHz PCI\n", name, dpll_clk, f_cnt, pci_clk);
+		printk(KERN_INFO "%s %s: DPLL base: %d MHz, f_CNT: %d, "
+			"assuming %d MHz PCI\n", name, pci_name(dev),
+			dpll_clk, f_cnt, pci_clk);
 	} else {
 		u32 itr1 = 0;
 
@@ -1137,7 +1138,8 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 		}
 
 		if (info->timings->clock_table[clock] == NULL) {
-			printk(KERN_ERR "%s: unknown bus timing!\n", name);
+			printk(KERN_ERR "%s %s: unknown bus timing!\n",
+				name, pci_name(dev));
 			return -EIO;
 		}
 
@@ -1163,16 +1165,19 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 				f_low += adjust >> 1;
 		}
 		if (adjust == 8) {
-			printk(KERN_ERR "%s: DPLL did not stabilize!\n", name);
+			printk(KERN_ERR "%s %s: DPLL did not stabilize!\n",
+				name, pci_name(dev));
 			return -EIO;
 		}
 
-		printk("%s: using %d MHz DPLL clock\n", name, dpll_clk);
+		printk(KERN_INFO "%s %s: using %d MHz DPLL clock\n",
+			name, pci_name(dev), dpll_clk);
 	} else {
 		/* Mark the fact that we're not using the DPLL. */
 		dpll_clk = 0;
 
-		printk("%s: using %d MHz PCI clock\n", name, pci_clk);
+		printk(KERN_INFO "%s %s: using %d MHz PCI clock\n",
+			name, pci_name(dev), pci_clk);
 	}
 
 	/* Store the clock frequencies. */
@@ -1357,7 +1362,8 @@ static void __devinit hpt374_init(struct pci_dev *dev, struct pci_dev *dev2)
 	if (dev2->irq != dev->irq) {
 		/* FIXME: we need a core pci_set_interrupt() */
 		dev2->irq = dev->irq;
-		printk(KERN_INFO "HPT374: PCI config space interrupt fixed\n");
+		printk(KERN_INFO "HPT374 %s: PCI config space interrupt "
+			"fixed\n", pci_name(dev2));
 	}
 }
 
@@ -1392,8 +1398,8 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
 	pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin2);
 
 	if (pin1 != pin2 && dev->irq == dev2->irq) {
-		printk(KERN_INFO "HPT36x: onboard version of chipset, "
-				 "pin1=%d pin2=%d\n", pin1, pin2);
+		printk(KERN_INFO "HPT36x %s: onboard version of chipset, "
+			"pin1=%d pin2=%d\n", pci_name(dev), pin1, pin2);
 		return 1;
 	}
 
@@ -1591,7 +1597,8 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 
 	dyn_info = kzalloc(sizeof(*dyn_info) * (dev2 ? 2 : 1), GFP_KERNEL);
 	if (dyn_info == NULL) {
-		printk(KERN_ERR "%s: out of memory!\n", d.name);
+		printk(KERN_ERR "%s %s: out of memory!\n",
+			d.name, pci_name(dev));
 		pci_dev_put(dev2);
 		return -ENOMEM;
 	}
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index c5c91f2109d..4ce5db98f89 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -569,7 +569,8 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
 		idev->timing10 = 1;
 		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
 		if (idev->smart == 0)
-			printk(KERN_WARNING "it821x: Revision 0x10, workarounds activated.\n");
+			printk(KERN_WARNING "it821x %s: revision 0x10, "
+				"workarounds activated\n", pci_name(dev));
 	}
 
 	if (idev->smart == 0) {
@@ -609,11 +610,13 @@ static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const cha
 
 	/* Force the card into bypass mode if so requested */
 	if (it8212_noraid) {
-		printk(KERN_INFO "it8212: forcing bypass mode.\n");
+		printk(KERN_INFO "it821x %s: forcing bypass mode\n",
+			pci_name(dev));
 		it8212_disable_raid(dev);
 	}
 	pci_read_config_byte(dev, 0x50, &conf);
-	printk(KERN_INFO "it821x: controller in %s mode.\n", mode[conf & 1]);
+	printk(KERN_INFO "it821x %s: controller in %s mode\n",
+		pci_name(dev), mode[conf & 1]);
 	return 0;
 }
 
@@ -654,7 +657,7 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic
 
 	itdevs = kzalloc(2 * sizeof(*itdevs), GFP_KERNEL);
 	if (itdevs == NULL) {
-		printk(KERN_ERR "it821x: out of memory\n");
+		printk(KERN_ERR "it821x %s: out of memory\n", pci_name(dev));
 		return -ENOMEM;
 	}
 
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 9881a1a8b89..4c2b669d7de 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -358,12 +358,13 @@ static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev, const cha
 	 * registers setting.
 	 */
 	pll_input = detect_pll_input_clock(dma_base);
-	printk("%s: PLL input clock is %ld kHz\n", name, pll_input / 1000);
+	printk(KERN_INFO "%s %s: PLL input clock is %ld kHz\n",
+		name, pci_name(dev), pll_input / 1000);
 
 	/* Sanity check */
 	if (unlikely(pll_input < 5000000L || pll_input > 70000000L)) {
-		printk(KERN_ERR "%s: Bad PLL input clock %ld Hz, giving up!\n",
-		       name, pll_input);
+		printk(KERN_ERR "%s %s: Bad PLL input clock %ld Hz, giving up!"
+			"\n", name, pci_name(dev), pll_input);
 		goto out;
 	}
 
@@ -399,7 +400,8 @@ static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev, const cha
 		r = 0x00;
 	} else {
 		/* Invalid ratio */
-		printk(KERN_ERR "%s: Bad ratio %ld, giving up!\n", name, ratio);
+		printk(KERN_ERR "%s %s: Bad ratio %ld, giving up!\n",
+			name, pci_name(dev), ratio);
 		goto out;
 	}
 
@@ -409,7 +411,8 @@ static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev, const cha
 
 	if (unlikely(f < 0 || f > 127)) {
 		/* Invalid F */
-		printk(KERN_ERR "%s: F[%d] invalid!\n", name, f);
+		printk(KERN_ERR "%s %s: F[%d] invalid!\n",
+			name, pci_name(dev), f);
 		goto out;
 	}
 
@@ -455,8 +458,8 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev)
 
 		if (dev2->irq != dev->irq) {
 			dev2->irq = dev->irq;
-			printk(KERN_INFO "PDC20270: PCI config space "
-					 "interrupt fixed\n");
+			printk(KERN_INFO "PDC20270 %s: PCI config space "
+				"interrupt fixed\n", pci_name(dev));
 		}
 
 		return dev2;
@@ -535,8 +538,8 @@ static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_de
 	    bridge->vendor == PCI_VENDOR_ID_INTEL &&
 	    (bridge->device == PCI_DEVICE_ID_INTEL_I960 ||
 	     bridge->device == PCI_DEVICE_ID_INTEL_I960RM)) {
-		printk(KERN_INFO "PDC20276: attached to I2O RAID controller, "
-				 "skipping\n");
+		printk(KERN_INFO "PDC20276 %s: attached to I2O RAID controller,"
+			" skipping\n", pci_name(dev));
 		return -ENODEV;
 	}
 
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index 17d99ce273a..5cb2731047e 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -304,8 +304,8 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev,
 		if (irq != irq2) {
 			pci_write_config_byte(dev,
 				(PCI_INTERRUPT_LINE)|0x80, irq);     /* 0xbc */
-			printk(KERN_INFO "%s: PCI config space interrupt "
-					 "mirror fixed\n", name);
+			printk(KERN_INFO "%s %s: PCI config space interrupt "
+				"mirror fixed\n", name, pci_name(dev));
 		}
 	}
 }
@@ -406,8 +406,9 @@ static int __devinit pdc202xx_init_one(struct pci_dev *dev, const struct pci_dev
 		    bridge->vendor == PCI_VENDOR_ID_INTEL &&
 		    (bridge->device == PCI_DEVICE_ID_INTEL_I960 ||
 		     bridge->device == PCI_DEVICE_ID_INTEL_I960RM)) {
-			printk(KERN_INFO "ide: Skipping Promise PDC20265 "
-				"attached to I2O RAID controller\n");
+			printk(KERN_INFO "pdc202xx_old %s: skipping Promise "
+				"PDC20265 attached to I2O RAID controller\n",
+				pci_name(dev));
 			return -ENODEV;
 		}
 	}
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index eca52fede28..1106ff44cde 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -188,7 +188,8 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha
 			pci_read_config_dword(isa_dev, 0x64, &reg);
 			reg &= ~0x00002000; /* disable 600ns interrupt mask */
 			if(!(reg & 0x00004000))
-				printk(KERN_DEBUG "%s: UDMA not BIOS enabled.\n", name);
+				printk(KERN_DEBUG "%s %s: UDMA not BIOS "
+					"enabled.\n", name, pci_name(dev));
 			reg |=  0x00004000; /* enable UDMA/33 support */
 			pci_write_config_dword(isa_dev, 0x64, reg);
 		}
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index f2e4dac9b71..fc29f1ca503 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -539,7 +539,8 @@ static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev,
 			{ "== 100", "== 133", "== 2X PCI", "DISABLED!" };
 
 		tmp >>= 4;
-		printk(KERN_INFO "%s: BASE CLOCK %s\n", name, clk_str[tmp & 3]);
+		printk(KERN_INFO "%s %s: BASE CLOCK %s\n",
+			name, pci_name(dev), clk_str[tmp & 3]);
 	}
 
 	return 0;
@@ -779,8 +780,8 @@ static int __devinit siimage_init_one(struct pci_dev *dev,
 		* seem to get terminally confused in the PCI spaces.
 		*/
 		if (!request_mem_region(bar5, barsize, d.name)) {
-			printk(KERN_WARNING "siimage: IDE controller MMIO "
-					    "ports not available.\n");
+			printk(KERN_WARNING "siimage %s: MMIO ports not "
+				"available\n", pci_name(dev));
 		} else {
 			ioaddr = ioremap(bar5, barsize);
 			if (ioaddr == NULL)
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index f3cf7c6c0cc..518d8ab413f 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -380,8 +380,9 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 		}
 		pci_dev_put(host);
 
-		printk(KERN_INFO "SIS5513: %s %s controller\n",
-			 SiSHostChipInfo[i].name, chipset_capability[chipset_family]);
+		printk(KERN_INFO "SIS5513 %s: %s %s controller\n",
+			pci_name(dev), SiSHostChipInfo[i].name,
+			chipset_capability[chipset_family]);
 	}
 
 	if (!chipset_family) { /* Belongs to pci-quirks */
@@ -396,7 +397,8 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 			pci_write_config_dword(dev, 0x54, idemisc);
 
 			if (trueid == 0x5518) {
-				printk(KERN_INFO "SIS5513: SiS 962/963 MuTIOL IDE UDMA133 controller\n");
+				printk(KERN_INFO "SIS5513 %s: SiS 962/963 MuTIOL IDE UDMA133 controller\n",
+					pci_name(dev));
 				chipset_family = ATA_133;
 
 				/* Check for 5513 compability mapping
@@ -405,7 +407,8 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 				 */
 				if ((idemisc & 0x40000000) == 0) {
 					pci_write_config_dword(dev, 0x54, idemisc | 0x40000000);
-					printk(KERN_INFO "SIS5513: Switching to 5513 register mapping\n");
+					printk(KERN_INFO "SIS5513 %s: Switching to 5513 register mapping\n",
+						pci_name(dev));
 				}
 			}
 	}
@@ -429,10 +432,12 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 				pci_dev_put(lpc_bridge);
 
 				if (lpc_bridge->revision == 0x10 && (prefctl & 0x80)) {
-					printk(KERN_INFO "SIS5513: SiS 961B MuTIOL IDE UDMA133 controller\n");
+					printk(KERN_INFO "SIS5513 %s: SiS 961B MuTIOL IDE UDMA133 controller\n",
+						pci_name(dev));
 					chipset_family = ATA_133a;
 				} else {
-					printk(KERN_INFO "SIS5513: SiS 961 MuTIOL IDE UDMA100 controller\n");
+					printk(KERN_INFO "SIS5513 %s: SiS 961 MuTIOL IDE UDMA100 controller\n",
+						pci_name(dev));
 					chipset_family = ATA_100;
 				}
 			}
diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index 449f4de9a70..7bda5ed92e1 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -245,10 +245,10 @@ static void __devinit init_hwif_trm290(ide_hwif_t *hwif)
 	u8 reg = 0;
 
 	if ((dev->class & 5) && cfg_base)
-		printk(KERN_INFO "TRM290: chip");
+		printk(KERN_INFO "TRM290 %s: chip", pci_name(dev));
 	else {
 		cfg_base = 0x3df0;
-		printk(KERN_INFO "TRM290: using default");
+		printk(KERN_INFO "TRM290 %s: using default", pci_name(dev));
 	}
 	printk(KERN_CONT " config base at 0x%04x\n", cfg_base);
 	hwif->config_data = cfg_base;
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 5f07eeb736a..23332556e61 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -401,20 +401,19 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 	 */
 	via_config = via_config_find(&isa);
 	if (!via_config->id) {
-		printk(KERN_WARNING "VP_IDE: Unknown VIA SouthBridge, disabling DMA.\n");
+		printk(KERN_WARNING "VP_IDE %s: unknown chipset, skipping\n",
+			pci_name(dev));
 		return -ENODEV;
 	}
 
 	/*
 	 * Print the boot message.
 	 */
-	printk(KERN_INFO "VP_IDE: VIA %s (rev %02x) IDE %sDMA%s "
-		"controller on pci%s\n",
-		via_config->name, isa->revision,
+	printk(KERN_INFO "VP_IDE %s: VIA %s (rev %02x) IDE %sDMA%s\n",
+		pci_name(dev), via_config->name, isa->revision,
 		via_config->udma_mask ? "U" : "MW",
 		via_dma[via_config->udma_mask ?
-			(fls(via_config->udma_mask) - 1) : 0],
-		pci_name(dev));
+			(fls(via_config->udma_mask) - 1) : 0]);
 
 	pci_dev_put(isa);
 
@@ -454,7 +453,7 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 
 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 	if (!vdev) {
-		printk(KERN_ERR "VP_IDE: out of memory :(\n");
+		printk(KERN_ERR "VP_IDE %s: out of memory :(\n", pci_name(dev));
 		return -ENOMEM;
 	}
 
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 20f0ee00469..d9655aeb013 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -39,17 +39,18 @@ static int ide_setup_pci_baseregs(struct pci_dev *dev, const char *name)
 	if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
 			 (progif & 5) != 5) {
 		if ((progif & 0xa) != 0xa) {
-			printk(KERN_INFO "%s: device not capable of full "
-				"native PCI mode\n", name);
+			printk(KERN_INFO "%s %s: device not capable of full "
+				"native PCI mode\n", name, pci_name(dev));
 			return -EOPNOTSUPP;
 		}
-		printk("%s: placing both ports into native PCI mode\n", name);
+		printk(KERN_INFO "%s %s: placing both ports into native PCI "
+			"mode\n", name, pci_name(dev));
 		(void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
 		if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
 		    (progif & 5) != 5) {
-			printk(KERN_ERR "%s: rewrite of PROGIF failed, wanted "
-				"0x%04x, got 0x%04x\n",
-				name, progif|5, progif);
+			printk(KERN_ERR "%s %s: rewrite of PROGIF failed, "
+				"wanted 0x%04x, got 0x%04x\n",
+				name, pci_name(dev), progif | 5, progif);
 			return -EOPNOTSUPP;
 		}
 	}
@@ -57,14 +58,14 @@ static int ide_setup_pci_baseregs(struct pci_dev *dev, const char *name)
 }
 
 #ifdef CONFIG_BLK_DEV_IDEDMA_PCI
-static void ide_pci_clear_simplex(unsigned long dma_base, const char *name)
+static int ide_pci_clear_simplex(unsigned long dma_base, const char *name)
 {
 	u8 dma_stat = inb(dma_base + 2);
 
 	outb(dma_stat & 0x60, dma_base + 2);
 	dma_stat = inb(dma_base + 2);
-	if (dma_stat & 0x80)
-		printk(KERN_INFO "%s: simplex device: DMA forced\n", name);
+
+	return (dma_stat & 0x80) ? 1 : 0;
 }
 
 /**
@@ -91,7 +92,8 @@ unsigned long ide_pci_dma_base(ide_hwif_t *hwif, const struct ide_port_info *d)
 		dma_base = pci_resource_start(dev, baridx);
 
 		if (dma_base == 0) {
-			printk(KERN_ERR "%s: DMA base is invalid\n", d->name);
+			printk(KERN_ERR "%s %s: DMA base is invalid\n",
+				d->name, pci_name(dev));
 			return 0;
 		}
 	}
@@ -105,13 +107,16 @@ EXPORT_SYMBOL_GPL(ide_pci_dma_base);
 
 int ide_pci_check_simplex(ide_hwif_t *hwif, const struct ide_port_info *d)
 {
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u8 dma_stat;
 
 	if (d->host_flags & (IDE_HFLAG_MMIO | IDE_HFLAG_CS5520))
 		goto out;
 
 	if (d->host_flags & IDE_HFLAG_CLEAR_SIMPLEX) {
-		ide_pci_clear_simplex(hwif->dma_base, d->name);
+		if (ide_pci_clear_simplex(hwif->dma_base, d->name))
+			printk(KERN_INFO "%s %s: simplex device: DMA forced\n",
+				d->name, pci_name(dev));
 		goto out;
 	}
 
@@ -127,7 +132,8 @@ int ide_pci_check_simplex(ide_hwif_t *hwif, const struct ide_port_info *d)
 	 */
 	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
 	if ((dma_stat & 0x80) && hwif->mate && hwif->mate->dma_base) {
-		printk(KERN_INFO "%s: simplex device: DMA disabled\n", d->name);
+		printk(KERN_INFO "%s %s: simplex device: DMA disabled\n",
+			d->name, pci_name(dev));
 		return -1;
 	}
 out:
@@ -149,8 +155,8 @@ int ide_pci_set_master(struct pci_dev *dev, const char *name)
 
 		if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) ||
 		    (pcicmd & PCI_COMMAND_MASTER) == 0) {
-			printk(KERN_ERR "%s: error updating PCICMD on %s\n",
-					name, pci_name(dev));
+			printk(KERN_ERR "%s %s: error updating PCICMD\n",
+				name, pci_name(dev));
 			return -EIO;
 		}
 	}
@@ -162,9 +168,9 @@ EXPORT_SYMBOL_GPL(ide_pci_set_master);
 
 void ide_setup_pci_noise(struct pci_dev *dev, const struct ide_port_info *d)
 {
-	printk(KERN_INFO "%s: IDE controller (0x%04x:0x%04x rev 0x%02x) at "
-			 " PCI slot %s\n", d->name, dev->vendor, dev->device,
-			 dev->revision, pci_name(dev));
+	printk(KERN_INFO "%s %s: IDE controller (0x%04x:0x%04x rev 0x%02x)\n",
+		d->name, pci_name(dev),
+		dev->vendor, dev->device, dev->revision);
 }
 EXPORT_SYMBOL_GPL(ide_setup_pci_noise);
 
@@ -189,11 +195,12 @@ static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d)
 	if (pci_enable_device(dev)) {
 		ret = pci_enable_device_io(dev);
 		if (ret < 0) {
-			printk(KERN_WARNING "%s: (ide_setup_pci_device:) "
-				"Could not enable device.\n", d->name);
+			printk(KERN_WARNING "%s %s: couldn't enable device\n",
+				d->name, pci_name(dev));
 			goto out;
 		}
-		printk(KERN_WARNING "%s: BIOS configuration fixed.\n", d->name);
+		printk(KERN_WARNING "%s %s: BIOS configuration fixed\n",
+			d->name, pci_name(dev));
 	}
 
 	/*
@@ -203,7 +210,8 @@ static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d)
 	 */
 	ret = pci_set_dma_mask(dev, DMA_32BIT_MASK);
 	if (ret < 0) {
-		printk(KERN_ERR "%s: can't set dma mask\n", d->name);
+		printk(KERN_ERR "%s %s: can't set DMA mask\n",
+			d->name, pci_name(dev));
 		goto out;
 	}
 
@@ -221,7 +229,8 @@ static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d)
 
 	ret = pci_request_selected_regions(dev, bars, d->name);
 	if (ret < 0)
-		printk(KERN_ERR "%s: can't reserve resources\n", d->name);
+		printk(KERN_ERR "%s %s: can't reserve resources\n",
+			d->name, pci_name(dev));
 out:
 	return ret;
 }
@@ -247,15 +256,18 @@ static int ide_pci_configure(struct pci_dev *dev, const struct ide_port_info *d)
 	 */
 	if (ide_setup_pci_baseregs(dev, d->name) ||
 	    pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_IO)) {
-		printk(KERN_INFO "%s: device disabled (BIOS)\n", d->name);
+		printk(KERN_INFO "%s %s: device disabled (BIOS)\n",
+			d->name, pci_name(dev));
 		return -ENODEV;
 	}
 	if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd)) {
-		printk(KERN_ERR "%s: error accessing PCI regs\n", d->name);
+		printk(KERN_ERR "%s %s: error accessing PCI regs\n",
+			d->name, pci_name(dev));
 		return -EIO;
 	}
 	if (!(pcicmd & PCI_COMMAND_IO)) {
-		printk(KERN_ERR "%s: unable to enable IDE controller\n", d->name);
+		printk(KERN_ERR "%s %s: unable to enable IDE controller\n",
+			d->name, pci_name(dev));
 		return -ENXIO;
 	}
 	return 0;
@@ -311,8 +323,9 @@ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
 	if ((d->host_flags & IDE_HFLAG_ISA_PORTS) == 0) {
 		if (ide_pci_check_iomem(dev, d, 2 * port) ||
 		    ide_pci_check_iomem(dev, d, 2 * port + 1)) {
-			printk(KERN_ERR "%s: I/O baseregs (BIOS) are reported "
-					"as MEM for port %d!\n", d->name, port);
+			printk(KERN_ERR "%s %s: I/O baseregs (BIOS) are "
+				"reported as MEM for port %d!\n",
+				d->name, pci_name(dev), port);
 			return -EINVAL;
 		}
 
@@ -325,8 +338,8 @@ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
 	}
 
 	if (!base || !ctl) {
-		printk(KERN_ERR "%s: bad PCI BARs for port %d, skipping\n",
-				d->name, port);
+		printk(KERN_ERR "%s %s: bad PCI BARs for port %d, skipping\n",
+			d->name, pci_name(dev), port);
 		return -EINVAL;
 	}
 
@@ -414,14 +427,16 @@ static int ide_setup_pci_controller(struct pci_dev *dev,
 
 	ret = pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
 	if (ret < 0) {
-		printk(KERN_ERR "%s: error accessing PCI regs\n", d->name);
+		printk(KERN_ERR "%s %s: error accessing PCI regs\n",
+			d->name, pci_name(dev));
 		goto out;
 	}
 	if (!(pcicmd & PCI_COMMAND_IO)) {	/* is device disabled? */
 		ret = ide_pci_configure(dev, d);
 		if (ret < 0)
 			goto out;
-		printk(KERN_INFO "%s: device enabled (Linux)\n", d->name);
+		printk(KERN_INFO "%s %s: device enabled (Linux)\n",
+			d->name, pci_name(dev));
 	}
 
 out:
@@ -460,7 +475,8 @@ void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
 
 		if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) ||
 		    (tmp & e->mask) != e->val)) {
-			printk(KERN_INFO "%s: IDE port disabled\n", d->name);
+			printk(KERN_INFO "%s %s: IDE port disabled\n",
+				d->name, pci_name(dev));
 			continue;	/* port not enabled */
 		}
 
@@ -506,18 +522,15 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 	/* Is it an "IDE storage" device in non-PCI mode? */
 	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) {
 		if (noisy)
-			printk(KERN_INFO "%s: not 100%% native mode: "
-				"will probe irqs later\n", d->name);
+			printk(KERN_INFO "%s %s: not 100%% native mode: will "
+				"probe irqs later\n", d->name, pci_name(dev));
 		pciirq = ret;
-	} else if (!pciirq) {
-		if (noisy)
-			printk(KERN_WARNING "%s: bad irq (%d): will probe later\n",
-				d->name, pciirq);
-		pciirq = 0;
-	} else {
-		if (noisy)
-			printk(KERN_INFO "%s: 100%% native mode on irq %d\n",
-				d->name, pciirq);
+	} else if (!pciirq && noisy) {
+		printk(KERN_WARNING "%s %s: bad irq (%d): will probe later\n",
+			d->name, pci_name(dev), pciirq);
+	} else if (noisy) {
+		printk(KERN_INFO "%s %s: 100%% native mode on irq %d\n",
+			d->name, pci_name(dev), pciirq);
 	}
 
 	ret = pciirq;
-- 
GitLab


From 29f1ca920cb8d65b979f7edf2fc7d11095461306 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:32 +0200
Subject: [PATCH 499/853] it8213: remove DECLARE_ITE_DEV() macro

While at it:

* it8213_chipsets[] -> it8213_chipset.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/it8213.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 575bf2386f8..451b87fd821 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -155,23 +155,17 @@ static const struct ide_port_ops it8213_port_ops = {
 	.cable_detect		= it8213_cable_detect,
 };
 
-#define DECLARE_ITE_DEV(name_str)			\
-	{						\
-		.name		= name_str,		\
-		.enablebits	= { {0x41, 0x80, 0x80} }, \
-		.port_ops	= &it8213_port_ops,	\
-		.host_flags	= IDE_HFLAG_SINGLE,	\
-		.pio_mask	= ATA_PIO4,		\
-		.swdma_mask	= ATA_SWDMA2_ONLY,	\
-		.mwdma_mask	= ATA_MWDMA12_ONLY,	\
-		.udma_mask	= ATA_UDMA6,		\
-	}
-
-static const struct ide_port_info it8213_chipsets[] __devinitdata = {
-	/* 0 */ DECLARE_ITE_DEV("IT8213"),
+static const struct ide_port_info it8213_chipset __devinitdata = {
+	.name		= "IT8213",
+	.enablebits	= { {0x41, 0x80, 0x80} },
+	.port_ops	= &it8213_port_ops,
+	.host_flags	= IDE_HFLAG_SINGLE,
+	.pio_mask	= ATA_PIO4,
+	.swdma_mask	= ATA_SWDMA2_ONLY,
+	.mwdma_mask	= ATA_MWDMA12_ONLY,
+	.udma_mask	= ATA_UDMA6,
 };
 
-
 /**
  *	it8213_init_one	-	pci layer discovery entry
  *	@dev: PCI device
@@ -184,7 +178,7 @@ static const struct ide_port_info it8213_chipsets[] __devinitdata = {
 
 static int __devinit it8213_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_pci_init_one(dev, &it8213_chipsets[id->driver_data], NULL);
+	return ide_pci_init_one(dev, &it8213_chipset, NULL);
 }
 
 static const struct pci_device_id it8213_pci_tbl[] = {
-- 
GitLab


From 04ba6e739e9c0623c25f94b191fd20dfbd1b26e3 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:32 +0200
Subject: [PATCH 500/853] it821x: remove DECLARE_ITE_DEV() macro

While at it:

* it821x_chipsets[] -> it821x_chipset.

* Fix it821x_chipset's name field (as it is used for IT8211/8212).

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/it821x.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index 4ce5db98f89..a3d8959436c 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -628,17 +628,12 @@ static const struct ide_port_ops it821x_port_ops = {
 	.cable_detect		= it821x_cable_detect,
 };
 
-#define DECLARE_ITE_DEV(name_str)			\
-	{						\
-		.name		= name_str,		\
-		.init_chipset	= init_chipset_it821x,	\
-		.init_hwif	= init_hwif_it821x,	\
-		.port_ops	= &it821x_port_ops,	\
-		.pio_mask	= ATA_PIO4,		\
-	}
-
-static const struct ide_port_info it821x_chipsets[] __devinitdata = {
-	/* 0 */ DECLARE_ITE_DEV("IT8212"),
+static const struct ide_port_info it821x_chipset __devinitdata = {
+	.name		= "IT821X",
+	.init_chipset	= init_chipset_it821x,
+	.init_hwif	= init_hwif_it821x,
+	.port_ops	= &it821x_port_ops,
+	.pio_mask	= ATA_PIO4,
 };
 
 /**
@@ -661,7 +656,7 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic
 		return -ENOMEM;
 	}
 
-	rc = ide_pci_init_one(dev, &it821x_chipsets[id->driver_data], itdevs);
+	rc = ide_pci_init_one(dev, &it821x_chipset, itdevs);
 	if (rc)
 		kfree(itdevs);
 
-- 
GitLab


From ced3ec8aa7d0fa3300187ee47c144a22ccfc974e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:32 +0200
Subject: [PATCH 501/853] ide: prefix messages from IDE PCI host drivers by
 driver name

Prefix messages from IDE PCI host drivers by driver name instead of marketed
chipset name (it is still possible to exactly identify the particular chipset
basing on driver messages).

As a bonus this provides nice code savings for some drivers:

   text    data     bss     dec     hex filename
   3826     112       8    3946     f6a drivers/ide/pci/amd74xx.o.before
   2786     112       8    2906     b5a drivers/ide/pci/amd74xx.o.after
    764     108       0     872     368 drivers/ide/pci/cs5520.o.before
    680     108       0     788     314 drivers/ide/pci/cs5520.o.after
   1680     112       4    1796     704 drivers/ide/pci/generic.o.before
   1155     112       4    1271     4f7 drivers/ide/pci/generic.o.after
   7128     792       0    7920    1ef0 drivers/ide/pci/hpt366.o.before
   6984     792       0    7776    1e60 drivers/ide/pci/hpt366.o.after
   2800     148       0    2948     b84 drivers/ide/pci/pdc202xx_new.o.before
   2523     148       0    2671     a6f drivers/ide/pci/pdc202xx_new.o.after
   2831     148       0    2979     ba3 drivers/ide/pci/pdc202xx_old.o.before
   2683     148       0    2831     b0f drivers/ide/pci/pdc202xx_old.o.after
   3776     112       4    3892     f34 drivers/ide/pci/piix.o.before
   2804     112       4    2920     b68 drivers/ide/pci/piix.o.after
   4693     116       0    4809    12c9 drivers/ide/pci/siimage.o.before
   4600     116       0    4716    126c drivers/ide/pci/siimage.o.after

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/aec62xx.c      |  29 +++++----
 drivers/ide/pci/alim15x3.c     |   4 +-
 drivers/ide/pci/amd74xx.c      |  86 +++++++++++---------------
 drivers/ide/pci/atiixp.c       |  11 ++--
 drivers/ide/pci/cmd64x.c       |  21 ++++---
 drivers/ide/pci/cs5520.c       |  24 ++++----
 drivers/ide/pci/cs5530.c       |   4 +-
 drivers/ide/pci/cs5535.c       |   4 +-
 drivers/ide/pci/cy82c693.c     |   4 +-
 drivers/ide/pci/generic.c      |  65 ++++++++++----------
 drivers/ide/pci/hpt34x.c       |  10 ++--
 drivers/ide/pci/hpt366.c       |  65 ++++----------------
 drivers/ide/pci/it8213.c       |   4 +-
 drivers/ide/pci/it821x.c       |  12 ++--
 drivers/ide/pci/jmicron.c      |   4 +-
 drivers/ide/pci/ns87415.c      |   4 +-
 drivers/ide/pci/opti621.c      |   4 +-
 drivers/ide/pci/pdc202xx_new.c |  38 +++++-------
 drivers/ide/pci/pdc202xx_old.c |  30 +++++-----
 drivers/ide/pci/piix.c         | 106 +++++++++++++++------------------
 drivers/ide/pci/rz1000.c       |   4 +-
 drivers/ide/pci/sc1200.c       |   4 +-
 drivers/ide/pci/serverworks.c  |  26 ++++----
 drivers/ide/pci/siimage.c      |  17 +++---
 drivers/ide/pci/sis5513.c      |  14 +++--
 drivers/ide/pci/sl82c105.c     |   6 +-
 drivers/ide/pci/slc90e66.c     |   4 +-
 drivers/ide/pci/tc86c001.c     |   4 +-
 drivers/ide/pci/triflex.c      |   4 +-
 drivers/ide/pci/trm290.c       |   8 ++-
 drivers/ide/pci/via82cxxx.c    |  15 +++--
 31 files changed, 308 insertions(+), 327 deletions(-)

diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index 253299961a1..f6dc6c20f3a 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -13,6 +13,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "aec62xx"
+
 struct chipset_bus_clock_list_entry {
 	u8 xfer_speed;
 	u8 chipset_settings;
@@ -180,8 +182,8 @@ static const struct ide_port_ops atp86x_port_ops = {
 };
 
 static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
-	{	/* 0 */
-		.name		= "AEC6210",
+	{	/* 0: AEC6210 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_aec62xx,
 		.enablebits	= {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
 		.port_ops	= &atp850_port_ops,
@@ -192,8 +194,9 @@ static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA2,
-	},{	/* 1 */
-		.name		= "AEC6260",
+	},
+	{	/* 1: AEC6260 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_aec62xx,
 		.port_ops	= &atp86x_port_ops,
 		.host_flags	= IDE_HFLAG_NO_ATAPI_DMA | IDE_HFLAG_NO_AUTODMA |
@@ -201,8 +204,9 @@ static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA4,
-	},{	/* 2 */
-		.name		= "AEC6260R",
+	},
+	{	/* 2: AEC6260R */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_aec62xx,
 		.enablebits	= {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
 		.port_ops	= &atp86x_port_ops,
@@ -211,8 +215,9 @@ static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA4,
-	},{	/* 3 */
-		.name		= "AEC6280",
+	},
+	{	/* 3: AEC6280 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_aec62xx,
 		.port_ops	= &atp86x_port_ops,
 		.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
@@ -220,8 +225,9 @@ static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA5,
-	},{	/* 4 */
-		.name		= "AEC6280R",
+	},
+	{	/* 4: AEC6280R */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_aec62xx,
 		.enablebits	= {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
 		.port_ops	= &atp86x_port_ops,
@@ -268,7 +274,8 @@ static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_devi
 		unsigned long dma_base = pci_resource_start(dev, 4);
 
 		if (inb(dma_base + 2) & 0x10) {
-			d.name = (idx == 4) ? "AEC6880R" : "AEC6880";
+			printk(KERN_INFO DRV_NAME " %s: AEC6880%s card detected"
+				"\n", pci_name(dev), (idx == 4) ? "R" : "");
 			d.udma_mask = ATA_UDMA6;
 		}
 	}
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index ac171502b99..a099c4dd599 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -38,6 +38,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "alim15x3"
+
 /*
  * Allow UDMA on M1543C-E chipset for WDC disks that ignore CRC checking
  * (this is DANGEROUS and could result in data corruption).
@@ -515,7 +517,7 @@ static const struct ide_dma_ops ali_dma_ops = {
 };
 
 static const struct ide_port_info ali15x3_chipset __devinitdata = {
-	.name		= "ALI15X3",
+	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_ali15x3,
 	.init_hwif	= init_hwif_ali15x3,
 	.init_dma	= init_dma_ali15x3,
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 2b118f80fab..cbf78edfe00 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -21,6 +21,8 @@
 #include <linux/init.h>
 #include <linux/ide.h>
 
+#define DRV_NAME "amd74xx"
+
 enum {
 	AMD_IDE_CONFIG		= 0x41,
 	AMD_CABLE_DETECT	= 0x42,
@@ -204,9 +206,9 @@ static const struct ide_port_ops amd_port_ops = {
 	 IDE_HFLAG_IO_32BIT | \
 	 IDE_HFLAG_UNMASK_IRQS)
 
-#define DECLARE_AMD_DEV(name_str, swdma, udma)				\
+#define DECLARE_AMD_DEV(swdma, udma)				\
 	{								\
-		.name		= name_str,				\
+		.name		= DRV_NAME,				\
 		.init_chipset	= init_chipset_amd74xx,			\
 		.init_hwif	= init_hwif_amd74xx,			\
 		.enablebits	= {{0x40,0x02,0x02}, {0x40,0x01,0x01}},	\
@@ -218,9 +220,9 @@ static const struct ide_port_ops amd_port_ops = {
 		.udma_mask	= udma,					\
 	}
 
-#define DECLARE_NV_DEV(name_str, udma)					\
+#define DECLARE_NV_DEV(udma)					\
 	{								\
-		.name		= name_str,				\
+		.name		= DRV_NAME,				\
 		.init_chipset	= init_chipset_amd74xx,			\
 		.init_hwif	= init_hwif_amd74xx,			\
 		.enablebits	= {{0x50,0x02,0x02}, {0x50,0x01,0x01}},	\
@@ -233,31 +235,15 @@ static const struct ide_port_ops amd_port_ops = {
 	}
 
 static const struct ide_port_info amd74xx_chipsets[] __devinitdata = {
-	/*  0 */ DECLARE_AMD_DEV("AMD7401",	  0x00, ATA_UDMA2),
-	/*  1 */ DECLARE_AMD_DEV("AMD7409", ATA_SWDMA2, ATA_UDMA4),
-	/*  2 */ DECLARE_AMD_DEV("AMD7411", ATA_SWDMA2, ATA_UDMA5),
-	/*  3 */ DECLARE_AMD_DEV("AMD7441", ATA_SWDMA2, ATA_UDMA5),
-	/*  4 */ DECLARE_AMD_DEV("AMD8111", ATA_SWDMA2, ATA_UDMA6),
-
-	/*  5 */ DECLARE_NV_DEV("NFORCE",		ATA_UDMA5),
-	/*  6 */ DECLARE_NV_DEV("NFORCE2",		ATA_UDMA6),
-	/*  7 */ DECLARE_NV_DEV("NFORCE2-U400R",	ATA_UDMA6),
-	/*  8 */ DECLARE_NV_DEV("NFORCE2-U400R-SATA",	ATA_UDMA6),
-	/*  9 */ DECLARE_NV_DEV("NFORCE3-150",		ATA_UDMA6),
-	/* 10 */ DECLARE_NV_DEV("NFORCE3-250",		ATA_UDMA6),
-	/* 11 */ DECLARE_NV_DEV("NFORCE3-250-SATA",	ATA_UDMA6),
-	/* 12 */ DECLARE_NV_DEV("NFORCE3-250-SATA2",	ATA_UDMA6),
-	/* 13 */ DECLARE_NV_DEV("NFORCE-CK804",		ATA_UDMA6),
-	/* 14 */ DECLARE_NV_DEV("NFORCE-MCP04",		ATA_UDMA6),
-	/* 15 */ DECLARE_NV_DEV("NFORCE-MCP51",		ATA_UDMA6),
-	/* 16 */ DECLARE_NV_DEV("NFORCE-MCP55",		ATA_UDMA6),
-	/* 17 */ DECLARE_NV_DEV("NFORCE-MCP61",		ATA_UDMA6),
-	/* 18 */ DECLARE_NV_DEV("NFORCE-MCP65",		ATA_UDMA6),
-	/* 19 */ DECLARE_NV_DEV("NFORCE-MCP67",		ATA_UDMA6),
-	/* 20 */ DECLARE_NV_DEV("NFORCE-MCP73",		ATA_UDMA6),
-	/* 21 */ DECLARE_NV_DEV("NFORCE-MCP77",		ATA_UDMA6),
-
-	/* 22 */ DECLARE_AMD_DEV("AMD5536", ATA_SWDMA2, ATA_UDMA5),
+	/* 0: AMD7401 */	DECLARE_AMD_DEV(0x00, ATA_UDMA2),
+	/* 1: AMD7409 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA4),
+	/* 2: AMD7411/7441 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA5),
+	/* 3: AMD8111 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA6),
+
+	/* 4: NFORCE */		DECLARE_NV_DEV(ATA_UDMA5),
+	/* 5: >= NFORCE2 */	DECLARE_NV_DEV(ATA_UDMA6),
+
+	/* 6: AMD5536 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA5),
 };
 
 static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id)
@@ -274,7 +260,7 @@ static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_
 		if (dev->revision <= 7)
 			d.swdma_mask = 0;
 		d.host_flags |= IDE_HFLAG_CLEAR_SIMPLEX;
-	} else if (idx == 4) {
+	} else if (idx == 3) {
 		if (dev->subsystem_vendor == PCI_VENDOR_ID_AMD &&
 		    dev->subsystem_device == PCI_DEVICE_ID_AMD_SERENADE)
 			d.udma_mask = ATA_UDMA5;
@@ -308,30 +294,30 @@ static const struct pci_device_id amd74xx_pci_tbl[] = {
 	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_COBRA_7401),		 0 },
 	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_VIPER_7409),		 1 },
 	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_VIPER_7411),		 2 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_OPUS_7441),		 3 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_8111_IDE),		 4 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE),	 6 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE),	 7 },
+	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_OPUS_7441),		 2 },
+	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_8111_IDE),		 3 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_IDE),	 4 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE),	 5 },
 #ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA),	 8 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA),	 5 },
 #endif
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE),	 9 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE),	10 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE),	 5 },
 #ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA),	11 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2),	12 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2),	 5 },
 #endif
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE),	13 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE),	14 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE),	15 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE),	16 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE),	17 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE),	18 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE),	19 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE),	20 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE),	21 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_CS5536_IDE),		22 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE),	 5 },
+	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE),	 5 },
+	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_CS5536_IDE),		 6 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl);
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index 4c49c1ba618..332f08f43b5 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -11,6 +11,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "atiixp"
+
 #define ATIIXP_IDE_PIO_TIMING		0x40
 #define ATIIXP_IDE_MDMA_TIMING		0x44
 #define ATIIXP_IDE_PIO_CONTROL		0x48
@@ -137,16 +139,17 @@ static const struct ide_port_ops atiixp_port_ops = {
 };
 
 static const struct ide_port_info atiixp_pci_info[] __devinitdata = {
-	{	/* 0 */
-		.name		= "ATIIXP",
+	{	/* 0: IXP200/300/400/700 */
+		.name		= DRV_NAME,
 		.enablebits	= {{0x48,0x01,0x00}, {0x48,0x08,0x00}},
 		.port_ops	= &atiixp_port_ops,
 		.host_flags	= IDE_HFLAG_LEGACY_IRQS,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA5,
-	},{	/* 1 */
-		.name		= "SB600_PATA",
+	},
+	{	/* 1: IXP600 */
+		.name		= DRV_NAME,
 		.enablebits	= {{0x48,0x01,0x00}, {0x00,0x00,0x00}},
 		.port_ops	= &atiixp_port_ops,
 		.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_LEGACY_IRQS,
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index 0a4d194bc35..3d84debaf81 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -19,6 +19,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "cmd64x"
+
 #define CMD_DEBUG 0
 
 #if CMD_DEBUG
@@ -407,8 +409,8 @@ static const struct ide_dma_ops cmd648_dma_ops = {
 };
 
 static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
-	{	/* 0 */
-		.name		= "CMD643",
+	{	/* 0: CMD643 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_cmd64x,
 		.enablebits	= {{0x00,0x00,0x00}, {0x51,0x08,0x08}},
 		.port_ops	= &cmd64x_port_ops,
@@ -418,8 +420,9 @@ static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
 		.pio_mask	= ATA_PIO5,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= 0x00, /* no udma */
-	},{	/* 1 */
-		.name		= "CMD646",
+	},
+	{	/* 1: CMD646 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_cmd64x,
 		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
 		.chipset	= ide_cmd646,
@@ -429,8 +432,9 @@ static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
 		.pio_mask	= ATA_PIO5,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA2,
-	},{	/* 2 */
-		.name		= "CMD648",
+	},
+	{	/* 2: CMD648 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_cmd64x,
 		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
 		.port_ops	= &cmd64x_port_ops,
@@ -439,8 +443,9 @@ static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
 		.pio_mask	= ATA_PIO5,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA4,
-	},{	/* 3 */
-		.name		= "CMD649",
+	},
+	{	/* 3: CMD649 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_cmd64x,
 		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
 		.port_ops	= &cmd64x_port_ops,
diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c
index b03d8ae947e..c0364b287f1 100644
--- a/drivers/ide/pci/cs5520.c
+++ b/drivers/ide/pci/cs5520.c
@@ -41,6 +41,8 @@
 #include <linux/ide.h>
 #include <linux/dma-mapping.h>
 
+#define DRV_NAME "cs5520"
+
 struct pio_clocks
 {
 	int address;
@@ -92,18 +94,11 @@ static const struct ide_port_ops cs5520_port_ops = {
 	.set_dma_mode		= cs5520_set_dma_mode,
 };
 
-#define DECLARE_CS_DEV(name_str)				\
-	{							\
-		.name		= name_str,			\
-		.port_ops	= &cs5520_port_ops,		\
-		.host_flags	= IDE_HFLAG_ISA_PORTS |		\
-				  IDE_HFLAG_CS5520,		\
-		.pio_mask	= ATA_PIO4,			\
-	}
-
-static const struct ide_port_info cyrix_chipsets[] __devinitdata = {
-	/* 0 */ DECLARE_CS_DEV("Cyrix 5510"),
-	/* 1 */ DECLARE_CS_DEV("Cyrix 5520")
+static const struct ide_port_info cyrix_chipset __devinitdata = {
+	.name		= DRV_NAME,
+	.port_ops	= &cs5520_port_ops,
+	.host_flags	= IDE_HFLAG_ISA_PORTS | IDE_HFLAG_CS5520,
+	.pio_mask	= ATA_PIO4,
 };
 
 /*
@@ -114,7 +109,7 @@ static const struct ide_port_info cyrix_chipsets[] __devinitdata = {
  
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	const struct ide_port_info *d = &cyrix_chipsets[id->driver_data];
+	const struct ide_port_info *d = &cyrix_chipset;
 	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
 	ide_setup_pci_noise(dev, d);
@@ -128,7 +123,8 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic
 	}
 	pci_set_master(dev);
 	if (pci_set_dma_mask(dev, DMA_32BIT_MASK)) {
-		printk(KERN_WARNING "cs5520: No suitable DMA available.\n");
+		printk(KERN_WARNING "%s: No suitable DMA available.\n",
+			d->name);
 		return -ENODEV;
 	}
 
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index dff345c763e..5543c8677a5 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -22,6 +22,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "cs5530"
+
 /*
  * Here are the standard PIO mode 0-4 timings for each "format".
  * Format-0 uses fast data reg timings, with slower command reg timings.
@@ -243,7 +245,7 @@ static const struct ide_port_ops cs5530_port_ops = {
 };
 
 static const struct ide_port_info cs5530_chipset __devinitdata = {
-	.name		= "CS5530",
+	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_cs5530,
 	.init_hwif	= init_hwif_cs5530,
 	.port_ops	= &cs5530_port_ops,
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index a7300139153..f7b50cdeefa 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -26,6 +26,8 @@
 #include <linux/pci.h>
 #include <linux/ide.h>
 
+#define DRV_NAME "cs5535"
+
 #define MSR_ATAC_BASE		0x51300000
 #define ATAC_GLD_MSR_CAP	(MSR_ATAC_BASE+0)
 #define ATAC_GLD_MSR_CONFIG	(MSR_ATAC_BASE+0x01)
@@ -169,7 +171,7 @@ static const struct ide_port_ops cs5535_port_ops = {
 };
 
 static const struct ide_port_info cs5535_chipset __devinitdata = {
-	.name		= "CS5535",
+	.name		= DRV_NAME,
 	.port_ops	= &cs5535_port_ops,
 	.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_POST_SET_MODE,
 	.pio_mask	= ATA_PIO4,
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index 04f268866b6..41c7f3351eb 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -48,6 +48,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "cy82c693"
+
 /* the current version */
 #define CY82_VERSION	"CY82C693U driver v0.34 99-13-12 Andreas S. Krebs (akrebs@altavista.net)"
 
@@ -398,7 +400,7 @@ static const struct ide_port_ops cy82c693_port_ops = {
 };
 
 static const struct ide_port_info cy82c693_chipset __devinitdata = {
-	.name		= "CY82C693",
+	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_cy82c693,
 	.init_iops	= init_iops_cy82c693,
 	.port_ops	= &cy82c693_port_ops,
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index e034e21af7f..b07d4f4273b 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -27,6 +27,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "ide_pci_generic"
+
 static int ide_generic_all;		/* Set to claim all devices */
 
 module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
@@ -34,9 +36,9 @@ MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE st
 
 #define IDE_HFLAGS_UMC (IDE_HFLAG_NO_DMA | IDE_HFLAG_FORCE_LEGACY_IRQS)
 
-#define DECLARE_GENERIC_PCI_DEV(name_str, extra_flags) \
+#define DECLARE_GENERIC_PCI_DEV(extra_flags) \
 	{ \
-		.name		= name_str, \
+		.name		= DRV_NAME, \
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA | \
 				  extra_flags, \
 		.swdma_mask	= ATA_SWDMA2, \
@@ -45,10 +47,11 @@ MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE st
 	}
 
 static const struct ide_port_info generic_chipsets[] __devinitdata = {
-	/*  0 */ DECLARE_GENERIC_PCI_DEV("Unknown",	0),
+	/*  0: Unknown */
+	DECLARE_GENERIC_PCI_DEV(0),
 
-	{	/* 1 */
-		.name		= "NS87410",
+	{	/* 1: NS87410 */
+		.name		= DRV_NAME,
 		.enablebits	= { {0x43, 0x08, 0x08}, {0x47, 0x08, 0x08} },
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA,
 		.swdma_mask	= ATA_SWDMA2,
@@ -56,17 +59,15 @@ static const struct ide_port_info generic_chipsets[] __devinitdata = {
 		.udma_mask	= ATA_UDMA6,
 	},
 
-	/*  2 */ DECLARE_GENERIC_PCI_DEV("SAMURAI",	0),
-	/*  3 */ DECLARE_GENERIC_PCI_DEV("HT6565",	0),
-	/*  4 */ DECLARE_GENERIC_PCI_DEV("UM8673F",	IDE_HFLAGS_UMC),
-	/*  5 */ DECLARE_GENERIC_PCI_DEV("UM8886A",	IDE_HFLAGS_UMC),
-	/*  6 */ DECLARE_GENERIC_PCI_DEV("UM8886BF",	IDE_HFLAGS_UMC),
-	/*  7 */ DECLARE_GENERIC_PCI_DEV("HINT_IDE",	0),
-	/*  8 */ DECLARE_GENERIC_PCI_DEV("VIA_IDE",	IDE_HFLAG_NO_AUTODMA),
-	/*  9 */ DECLARE_GENERIC_PCI_DEV("OPTI621V",	IDE_HFLAG_NO_AUTODMA),
-
-	{	/* 10 */
-		.name		= "VIA8237SATA",
+	/*  2: SAMURAI / HT6565 / HINT_IDE */
+	DECLARE_GENERIC_PCI_DEV(0),
+	/*  3: UM8673F / UM8886A / UM8886BF */
+	DECLARE_GENERIC_PCI_DEV(IDE_HFLAGS_UMC),
+	/*  4: VIA_IDE / OPTI621V / Piccolo010{2,3,5} */
+	DECLARE_GENERIC_PCI_DEV(IDE_HFLAG_NO_AUTODMA),
+
+	{	/* 5: VIA8237SATA */
+		.name		= DRV_NAME,
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_OFF_BOARD,
 		.swdma_mask	= ATA_SWDMA2,
@@ -74,12 +75,8 @@ static const struct ide_port_info generic_chipsets[] __devinitdata = {
 		.udma_mask	= ATA_UDMA6,
 	},
 
-	/* 11 */ DECLARE_GENERIC_PCI_DEV("Piccolo0102",	IDE_HFLAG_NO_AUTODMA),
-	/* 12 */ DECLARE_GENERIC_PCI_DEV("Piccolo0103",	IDE_HFLAG_NO_AUTODMA),
-	/* 13 */ DECLARE_GENERIC_PCI_DEV("Piccolo0105",	IDE_HFLAG_NO_AUTODMA),
-
-	{	/* 14 */
-		.name		= "Revolution",
+	{	/* 6: Revolution */
+		.name		= DRV_NAME,
 		.host_flags	= IDE_HFLAG_CLEAR_SIMPLEX |
 				  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_OFF_BOARD,
@@ -147,20 +144,20 @@ out:
 static const struct pci_device_id generic_pci_tbl[] = {
 	{ PCI_VDEVICE(NS,	PCI_DEVICE_ID_NS_87410),		 1 },
 	{ PCI_VDEVICE(PCTECH,	PCI_DEVICE_ID_PCTECH_SAMURAI_IDE),	 2 },
-	{ PCI_VDEVICE(HOLTEK,	PCI_DEVICE_ID_HOLTEK_6565),		 3 },
-	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8673F),		 4 },
-	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8886A),		 5 },
-	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8886BF),		 6 },
-	{ PCI_VDEVICE(HINT,	PCI_DEVICE_ID_HINT_VXPROII_IDE),	 7 },
-	{ PCI_VDEVICE(VIA,	PCI_DEVICE_ID_VIA_82C561),		 8 },
-	{ PCI_VDEVICE(OPTI,	PCI_DEVICE_ID_OPTI_82C558),		 9 },
+	{ PCI_VDEVICE(HOLTEK,	PCI_DEVICE_ID_HOLTEK_6565),		 2 },
+	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8673F),		 3 },
+	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8886A),		 3 },
+	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8886BF),		 3 },
+	{ PCI_VDEVICE(HINT,	PCI_DEVICE_ID_HINT_VXPROII_IDE),	 2 },
+	{ PCI_VDEVICE(VIA,	PCI_DEVICE_ID_VIA_82C561),		 4 },
+	{ PCI_VDEVICE(OPTI,	PCI_DEVICE_ID_OPTI_82C558),		 4 },
 #ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(VIA,	PCI_DEVICE_ID_VIA_8237_SATA),		10 },
+	{ PCI_VDEVICE(VIA,	PCI_DEVICE_ID_VIA_8237_SATA),		 5 },
 #endif
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO),		11 },
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_1),	12 },
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_2),	13 },
-	{ PCI_VDEVICE(NETCELL,	PCI_DEVICE_ID_REVOLUTION),		14 },
+	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO),		 4 },
+	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_1),	 4 },
+	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_2),	 4 },
+	{ PCI_VDEVICE(NETCELL,	PCI_DEVICE_ID_REVOLUTION),		 6 },
 	/*
 	 * Must come last.  If you add entries adjust
 	 * this table and generic_chipsets[] appropriately.
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index b52f8339102..baabb4ce0d7 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -33,6 +33,8 @@
 #include <linux/init.h>
 #include <linux/ide.h>
 
+#define DRV_NAME "hpt34x"
+
 #define HPT343_DEBUG_DRIVE_INFO		0
 
 static void hpt34x_set_mode(ide_drive_t *drive, const u8 speed)
@@ -126,15 +128,15 @@ static const struct ide_port_ops hpt34x_port_ops = {
 	 IDE_HFLAG_NO_AUTODMA)
 
 static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
-	{ /* 0 */
-		.name		= "HPT343",
+	{ /* 0: HPT343 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_hpt34x,
 		.port_ops	= &hpt34x_port_ops,
 		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_NON_BOOTABLE,
 		.pio_mask	= ATA_PIO5,
 	},
-	{ /* 1 */
-		.name		= "HPT345",
+	{ /* 1: HPT345 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_hpt34x,
 		.port_ops	= &hpt34x_port_ops,
 		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_OFF_BOARD,
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index b8004c331ed..6a1c65c3be3 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -131,6 +131,8 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
+#define DRV_NAME "hpt366"
+
 /* various tuning parameters */
 #define HPT_RESET_STATE_ENGINE
 #undef	HPT_DELAY_INTERRUPT
@@ -1362,7 +1364,7 @@ static void __devinit hpt374_init(struct pci_dev *dev, struct pci_dev *dev2)
 	if (dev2->irq != dev->irq) {
 		/* FIXME: we need a core pci_set_interrupt() */
 		dev2->irq = dev->irq;
-		printk(KERN_INFO "HPT374 %s: PCI config space interrupt "
+		printk(KERN_INFO DRV_NAME " %s: PCI config space interrupt "
 			"fixed\n", pci_name(dev2));
 	}
 }
@@ -1398,7 +1400,7 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
 	pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin2);
 
 	if (pin1 != pin2 && dev->irq == dev2->irq) {
-		printk(KERN_INFO "HPT36x %s: onboard version of chipset, "
+		printk(KERN_INFO DRV_NAME " %s: onboard version of chipset, "
 			"pin1=%d pin2=%d\n", pci_name(dev), pin1, pin2);
 		return 1;
 	}
@@ -1454,8 +1456,8 @@ static const struct ide_dma_ops hpt36x_dma_ops = {
 };
 
 static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
-	{	/* 0 */
-		.name		= "HPT36x",
+	{	/* 0: HPT36x */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
@@ -1471,53 +1473,9 @@ static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
 		.host_flags	= IDE_HFLAGS_HPT3XX | IDE_HFLAG_SINGLE,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
-	},{	/* 1 */
-		.name		= "HPT372A",
-		.init_chipset	= init_chipset_hpt366,
-		.init_hwif	= init_hwif_hpt366,
-		.init_dma	= init_dma_hpt366,
-		.enablebits	= {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
-		.port_ops	= &hpt3xx_port_ops,
-		.dma_ops	= &hpt37x_dma_ops,
-		.host_flags	= IDE_HFLAGS_HPT3XX,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-	},{	/* 2 */
-		.name		= "HPT302",
-		.init_chipset	= init_chipset_hpt366,
-		.init_hwif	= init_hwif_hpt366,
-		.init_dma	= init_dma_hpt366,
-		.enablebits	= {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
-		.port_ops	= &hpt3xx_port_ops,
-		.dma_ops	= &hpt37x_dma_ops,
-		.host_flags	= IDE_HFLAGS_HPT3XX,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-	},{	/* 3 */
-		.name		= "HPT371",
-		.init_chipset	= init_chipset_hpt366,
-		.init_hwif	= init_hwif_hpt366,
-		.init_dma	= init_dma_hpt366,
-		.enablebits	= {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
-		.port_ops	= &hpt3xx_port_ops,
-		.dma_ops	= &hpt37x_dma_ops,
-		.host_flags	= IDE_HFLAGS_HPT3XX,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-	},{	/* 4 */
-		.name		= "HPT374",
-		.init_chipset	= init_chipset_hpt366,
-		.init_hwif	= init_hwif_hpt366,
-		.init_dma	= init_dma_hpt366,
-		.enablebits	= {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
-		.udma_mask	= ATA_UDMA5,
-		.port_ops	= &hpt3xx_port_ops,
-		.dma_ops	= &hpt37x_dma_ops,
-		.host_flags	= IDE_HFLAGS_HPT3XX,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-	},{	/* 5 */
-		.name		= "HPT372N",
+	},
+	{	/* 1: HPT3xx */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
@@ -1583,9 +1541,10 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 		break;
 	}
 
-	d = hpt366_chipsets[idx];
+	printk(KERN_INFO DRV_NAME ": %s chipset detected\n", info->chip_name);
+
+	d = hpt366_chipsets[min_t(u8, idx, 1)];
 
-	d.name = info->chip_name;
 	d.udma_mask = info->udma_mask;
 
 	/* fixup ->dma_ops for HPT370/HPT370A */
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 451b87fd821..6eba8f18826 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -14,6 +14,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "it8213"
+
 /**
  *	it8213_set_pio_mode	-	set host controller for PIO mode
  *	@drive: drive
@@ -156,7 +158,7 @@ static const struct ide_port_ops it8213_port_ops = {
 };
 
 static const struct ide_port_info it8213_chipset __devinitdata = {
-	.name		= "IT8213",
+	.name		= DRV_NAME,
 	.enablebits	= { {0x41, 0x80, 0x80} },
 	.port_ops	= &it8213_port_ops,
 	.host_flags	= IDE_HFLAG_SINGLE,
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index a3d8959436c..74173352741 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -67,6 +67,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "it821x"
+
 struct it821x_dev
 {
 	unsigned int smart:1,		/* Are we in smart raid mode */
@@ -569,7 +571,7 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
 		idev->timing10 = 1;
 		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
 		if (idev->smart == 0)
-			printk(KERN_WARNING "it821x %s: revision 0x10, "
+			printk(KERN_WARNING DRV_NAME " %s: revision 0x10, "
 				"workarounds activated\n", pci_name(dev));
 	}
 
@@ -610,12 +612,12 @@ static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const cha
 
 	/* Force the card into bypass mode if so requested */
 	if (it8212_noraid) {
-		printk(KERN_INFO "it821x %s: forcing bypass mode\n",
+		printk(KERN_INFO DRV_NAME " %s: forcing bypass mode\n",
 			pci_name(dev));
 		it8212_disable_raid(dev);
 	}
 	pci_read_config_byte(dev, 0x50, &conf);
-	printk(KERN_INFO "it821x %s: controller in %s mode\n",
+	printk(KERN_INFO DRV_NAME " %s: controller in %s mode\n",
 		pci_name(dev), mode[conf & 1]);
 	return 0;
 }
@@ -629,7 +631,7 @@ static const struct ide_port_ops it821x_port_ops = {
 };
 
 static const struct ide_port_info it821x_chipset __devinitdata = {
-	.name		= "IT821X",
+	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_it821x,
 	.init_hwif	= init_hwif_it821x,
 	.port_ops	= &it821x_port_ops,
@@ -652,7 +654,7 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic
 
 	itdevs = kzalloc(2 * sizeof(*itdevs), GFP_KERNEL);
 	if (itdevs == NULL) {
-		printk(KERN_ERR "it821x %s: out of memory\n", pci_name(dev));
+		printk(KERN_ERR DRV_NAME " %s: out of memory\n", pci_name(dev));
 		return -ENOMEM;
 	}
 
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index 39e221b076e..545b6e172d9 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -12,6 +12,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "jmicron"
+
 typedef enum {
 	PORT_PATA0 = 0,
 	PORT_PATA1 = 1,
@@ -102,7 +104,7 @@ static const struct ide_port_ops jmicron_port_ops = {
 };
 
 static const struct ide_port_info jmicron_chipset __devinitdata = {
-	.name		= "JMB",
+	.name		= DRV_NAME,
 	.enablebits	= { { 0x40, 0x01, 0x01 }, { 0x40, 0x10, 0x10 } },
 	.port_ops	= &jmicron_port_ops,
 	.pio_mask	= ATA_PIO5,
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index afcc742a63a..ffefcd15196 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -19,6 +19,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "ns87415"
+
 #ifdef CONFIG_SUPERIO
 /* SUPERIO 87560 is a PoS chip that NatSem denies exists.
  * Unfortunately, it's built-in on all Astro-based PA-RISC workstations
@@ -305,7 +307,7 @@ static const struct ide_dma_ops ns87415_dma_ops = {
 };
 
 static const struct ide_port_info ns87415_chipset __devinitdata = {
-	.name		= "NS87415",
+	.name		= DRV_NAME,
 	.init_hwif	= init_hwif_ns87415,
 	.port_ops	= &ns87415_port_ops,
 	.dma_ops	= &ns87415_dma_ops,
diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c
index 4895f2ff3f0..e28e672ddaf 100644
--- a/drivers/ide/pci/opti621.c
+++ b/drivers/ide/pci/opti621.c
@@ -90,6 +90,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "opti621"
+
 #define READ_REG 0	/* index of Read cycle timing register */
 #define WRITE_REG 1	/* index of Write cycle timing register */
 #define CNTRL_REG 3	/* index of Control register */
@@ -200,7 +202,7 @@ static const struct ide_port_ops opti621_port_ops = {
 };
 
 static const struct ide_port_info opti621_chipset __devinitdata = {
-	.name		= "OPTI621/X",
+	.name		= DRV_NAME,
 	.enablebits	= { {0x45, 0x80, 0x00}, {0x40, 0x08, 0x00} },
 	.port_ops	= &opti621_port_ops,
 	.host_flags	= IDE_HFLAG_NO_DMA,
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 4c2b669d7de..1f679195722 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -31,6 +31,8 @@
 #include <asm/pci-bridge.h>
 #endif
 
+#define DRV_NAME "pdc202xx_new"
+
 #undef DEBUG
 
 #ifdef DEBUG
@@ -458,7 +460,7 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev)
 
 		if (dev2->irq != dev->irq) {
 			dev2->irq = dev->irq;
-			printk(KERN_INFO "PDC20270 %s: PCI config space "
+			printk(KERN_INFO DRV_NAME " %s: PCI config space "
 				"interrupt fixed\n", pci_name(dev));
 		}
 
@@ -476,9 +478,9 @@ static const struct ide_port_ops pdcnew_port_ops = {
 	.cable_detect		= pdcnew_cable_detect,
 };
 
-#define DECLARE_PDCNEW_DEV(name_str, udma) \
+#define DECLARE_PDCNEW_DEV(udma) \
 	{ \
-		.name		= name_str, \
+		.name		= DRV_NAME, \
 		.init_chipset	= init_chipset_pdcnew, \
 		.port_ops	= &pdcnew_port_ops, \
 		.host_flags	= IDE_HFLAG_POST_SET_MODE | \
@@ -490,13 +492,8 @@ static const struct ide_port_ops pdcnew_port_ops = {
 	}
 
 static const struct ide_port_info pdcnew_chipsets[] __devinitdata = {
-	/* 0 */ DECLARE_PDCNEW_DEV("PDC20268", ATA_UDMA5),
-	/* 1 */ DECLARE_PDCNEW_DEV("PDC20269", ATA_UDMA6),
-	/* 2 */ DECLARE_PDCNEW_DEV("PDC20270", ATA_UDMA5),
-	/* 3 */ DECLARE_PDCNEW_DEV("PDC20271", ATA_UDMA6),
-	/* 4 */ DECLARE_PDCNEW_DEV("PDC20275", ATA_UDMA6),
-	/* 5 */ DECLARE_PDCNEW_DEV("PDC20276", ATA_UDMA6),
-	/* 6 */ DECLARE_PDCNEW_DEV("PDC20277", ATA_UDMA6),
+	/* 0: PDC202{68,70} */		DECLARE_PDCNEW_DEV(ATA_UDMA5),
+	/* 1: PDC202{69,71,75,76,77} */	DECLARE_PDCNEW_DEV(ATA_UDMA6),
 };
 
 /**
@@ -510,13 +507,10 @@ static const struct ide_port_info pdcnew_chipsets[] __devinitdata = {
  
 static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	const struct ide_port_info *d;
+	const struct ide_port_info *d = &pdcnew_chipsets[id->driver_data];
 	struct pci_dev *bridge = dev->bus->self;
-	u8 idx = id->driver_data;
-
-	d = &pdcnew_chipsets[idx];
 
-	if (idx == 2 && bridge &&
+	if (dev->device == PCI_DEVICE_ID_PROMISE_20270 && bridge &&
 	    bridge->vendor == PCI_VENDOR_ID_DEC &&
 	    bridge->device == PCI_DEVICE_ID_DEC_21150) {
 		struct pci_dev *dev2;
@@ -534,11 +528,11 @@ static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_de
 		}
 	}
 
-	if (idx == 5 && bridge &&
+	if (dev->device == PCI_DEVICE_ID_PROMISE_20276 && bridge &&
 	    bridge->vendor == PCI_VENDOR_ID_INTEL &&
 	    (bridge->device == PCI_DEVICE_ID_INTEL_I960 ||
 	     bridge->device == PCI_DEVICE_ID_INTEL_I960RM)) {
-		printk(KERN_INFO "PDC20276 %s: attached to I2O RAID controller,"
+		printk(KERN_INFO DRV_NAME " %s: attached to I2O RAID controller,"
 			" skipping\n", pci_name(dev));
 		return -ENODEV;
 	}
@@ -558,11 +552,11 @@ static void __devexit pdc202new_remove(struct pci_dev *dev)
 static const struct pci_device_id pdc202new_pci_tbl[] = {
 	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20268), 0 },
 	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20269), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20270), 2 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20271), 3 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20275), 4 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20276), 5 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20277), 6 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20270), 0 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20271), 1 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20275), 1 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20276), 1 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20277), 1 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, pdc202new_pci_tbl);
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index 5cb2731047e..da92d127868 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -20,6 +20,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "pdc202xx_old"
+
 #define PDC202XX_DEBUG_DRIVE_INFO	0
 
 static const char *pdc_quirk_drives[] = {
@@ -350,9 +352,9 @@ static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_timeout		= pdc202xx_dma_timeout,
 };
 
-#define DECLARE_PDC2026X_DEV(name_str, udma, extra_flags) \
+#define DECLARE_PDC2026X_DEV(udma, extra_flags) \
 	{ \
-		.name		= name_str, \
+		.name		= DRV_NAME, \
 		.init_chipset	= init_chipset_pdc202xx, \
 		.port_ops	= &pdc2026x_port_ops, \
 		.dma_ops	= &pdc2026x_dma_ops, \
@@ -363,8 +365,8 @@ static const struct ide_dma_ops pdc2026x_dma_ops = {
 	}
 
 static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
-	{	/* 0 */
-		.name		= "PDC20246",
+	{	/* 0: PDC20246 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_pdc202xx,
 		.port_ops	= &pdc20246_port_ops,
 		.dma_ops	= &pdc20246_dma_ops,
@@ -374,10 +376,10 @@ static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
 		.udma_mask	= ATA_UDMA2,
 	},
 
-	/* 1 */ DECLARE_PDC2026X_DEV("PDC20262", ATA_UDMA4, 0),
-	/* 2 */ DECLARE_PDC2026X_DEV("PDC20263", ATA_UDMA4, 0),
-	/* 3 */ DECLARE_PDC2026X_DEV("PDC20265", ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
-	/* 4 */ DECLARE_PDC2026X_DEV("PDC20267", ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
+	/* 1: PDC2026{2,3} */
+	DECLARE_PDC2026X_DEV(ATA_UDMA4, 0),
+	/* 2: PDC2026{5,7} */
+	DECLARE_PDC2026X_DEV(ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
 };
 
 /**
@@ -396,17 +398,17 @@ static int __devinit pdc202xx_init_one(struct pci_dev *dev, const struct pci_dev
 
 	d = &pdc202xx_chipsets[idx];
 
-	if (idx < 3)
+	if (idx < 2)
 		pdc202ata4_fixup_irq(dev, d->name);
 
-	if (idx == 3) {
+	if (dev->vendor == PCI_DEVICE_ID_PROMISE_20265) {
 		struct pci_dev *bridge = dev->bus->self;
 
 		if (bridge &&
 		    bridge->vendor == PCI_VENDOR_ID_INTEL &&
 		    (bridge->device == PCI_DEVICE_ID_INTEL_I960 ||
 		     bridge->device == PCI_DEVICE_ID_INTEL_I960RM)) {
-			printk(KERN_INFO "pdc202xx_old %s: skipping Promise "
+			printk(KERN_INFO DRV_NAME " %s: skipping Promise "
 				"PDC20265 attached to I2O RAID controller\n",
 				pci_name(dev));
 			return -ENODEV;
@@ -419,9 +421,9 @@ static int __devinit pdc202xx_init_one(struct pci_dev *dev, const struct pci_dev
 static const struct pci_device_id pdc202xx_pci_tbl[] = {
 	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20246), 0 },
 	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20262), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20263), 2 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20265), 3 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20267), 4 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20263), 1 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20265), 2 },
+	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20267), 2 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, pdc202xx_pci_tbl);
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index c16b1ab4d4f..9eb411f5c35 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -54,6 +54,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "piix"
+
 static int no_piix_dma;
 
 /**
@@ -314,9 +316,9 @@ static const struct ide_port_ops piix_port_ops = {
  #define IDE_HFLAGS_PIIX 0
 #endif
 
-#define DECLARE_PIIX_DEV(name_str, udma) \
+#define DECLARE_PIIX_DEV(udma) \
 	{						\
-		.name		= name_str,		\
+		.name		= DRV_NAME,		\
 		.init_hwif	= init_hwif_piix,	\
 		.enablebits	= {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, \
 		.port_ops	= &piix_port_ops,	\
@@ -327,9 +329,9 @@ static const struct ide_port_ops piix_port_ops = {
 		.udma_mask	= udma,			\
 	}
 
-#define DECLARE_ICH_DEV(name_str, udma) \
+#define DECLARE_ICH_DEV(udma) \
 	{ \
-		.name		= name_str, \
+		.name		= DRV_NAME, \
 		.init_chipset	= init_chipset_ich, \
 		.init_hwif	= init_hwif_ich, \
 		.enablebits	= {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, \
@@ -342,45 +344,31 @@ static const struct ide_port_ops piix_port_ops = {
 	}
 
 static const struct ide_port_info piix_pci_info[] __devinitdata = {
-	/*  0 */ DECLARE_PIIX_DEV("PIIXa",	0x00),	/* no udma */
-	/*  1 */ DECLARE_PIIX_DEV("PIIXb",	0x00),	/* no udma */
-
-	/*  2 */
+	/* 0: MPIIX */
 	{	/*
 		 * MPIIX actually has only a single IDE channel mapped to
 		 * the primary or secondary ports depending on the value
 		 * of the bit 14 of the IDETIM register at offset 0x6c
 		 */
-		.name		= "MPIIX",
+		.name		= DRV_NAME,
 		.enablebits	= {{0x6d,0xc0,0x80}, {0x6d,0xc0,0xc0}},
 		.host_flags	= IDE_HFLAG_ISA_PORTS | IDE_HFLAG_NO_DMA |
 				  IDE_HFLAGS_PIIX,
 		.pio_mask	= ATA_PIO4,
 		/* This is a painful system best to let it self tune for now */
 	},
-
-	/*  3 */ DECLARE_PIIX_DEV("PIIX3",	0x00),	/* no udma */
-	/*  4 */ DECLARE_PIIX_DEV("PIIX4",	ATA_UDMA2),
-	/*  5 */ DECLARE_ICH_DEV("ICH0",	ATA_UDMA2),
-	/*  6 */ DECLARE_PIIX_DEV("PIIX4",	ATA_UDMA2),
-	/*  7 */ DECLARE_ICH_DEV("ICH",		ATA_UDMA4),
-	/*  8 */ DECLARE_PIIX_DEV("PIIX4",	ATA_UDMA4),
-	/*  9 */ DECLARE_PIIX_DEV("PIIX4",	ATA_UDMA2),
-	/* 10 */ DECLARE_ICH_DEV("ICH2",	ATA_UDMA5),
-	/* 11 */ DECLARE_ICH_DEV("ICH2M",	ATA_UDMA5),
-	/* 12 */ DECLARE_ICH_DEV("ICH3M",	ATA_UDMA5),
-	/* 13 */ DECLARE_ICH_DEV("ICH3",	ATA_UDMA5),
-	/* 14 */ DECLARE_ICH_DEV("ICH4",	ATA_UDMA5),
-	/* 15 */ DECLARE_ICH_DEV("ICH5",	ATA_UDMA5),
-	/* 16 */ DECLARE_ICH_DEV("C-ICH",	ATA_UDMA5),
-	/* 17 */ DECLARE_ICH_DEV("ICH4",	ATA_UDMA5),
-	/* 18 */ DECLARE_ICH_DEV("ICH5-SATA",	ATA_UDMA5),
-	/* 19 */ DECLARE_ICH_DEV("ICH5",	ATA_UDMA5),
-	/* 20 */ DECLARE_ICH_DEV("ICH6",	ATA_UDMA5),
-	/* 21 */ DECLARE_ICH_DEV("ICH7",	ATA_UDMA5),
-	/* 22 */ DECLARE_ICH_DEV("ICH4",	ATA_UDMA5),
-	/* 23 */ DECLARE_ICH_DEV("ESB2",	ATA_UDMA5),
-	/* 24 */ DECLARE_ICH_DEV("ICH8M",	ATA_UDMA5),
+	/* 1: PIIXa/PIIXb/PIIX3 */
+	DECLARE_PIIX_DEV(0x00), /* no udma */
+	/* 2: PIIX4 */
+	DECLARE_PIIX_DEV(ATA_UDMA2),
+	/* 3: ICH0 */
+	DECLARE_ICH_DEV(ATA_UDMA2),
+	/* 4: ICH */
+	DECLARE_ICH_DEV(ATA_UDMA4),
+	/* 5: PIIX4 */
+	DECLARE_PIIX_DEV(ATA_UDMA4),
+	/* 6: ICH[2-7]/ICH[2-3]M/C-ICH/ICH5-SATA/ESB2/ICH8M */
+	DECLARE_ICH_DEV(ATA_UDMA5),
 };
 
 /**
@@ -421,39 +409,39 @@ static void __devinit piix_check_450nx(void)
 			no_piix_dma = 2;
 	}
 	if(no_piix_dma)
-		printk(KERN_WARNING "piix: 450NX errata present, disabling IDE DMA.\n");
+		printk(KERN_WARNING DRV_NAME ": 450NX errata present, disabling IDE DMA.\n");
 	if(no_piix_dma == 2)
-		printk(KERN_WARNING "piix: A BIOS update may resolve this.\n");
+		printk(KERN_WARNING DRV_NAME ": A BIOS update may resolve this.\n");
 }		
 
 static const struct pci_device_id piix_pci_tbl[] = {
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371FB_0),   0 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371FB_1),   1 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371MX),     2 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371SB_1),   3 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371AB),     4 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801AB_1),   5 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82443MX_1),   6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801AA_1),   7 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82372FB_1),   8 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82451NX),     9 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801BA_9),  10 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801BA_8),  11 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801CA_10), 12 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801CA_11), 13 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_11), 14 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801EB_11), 15 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801E_11),  16 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_10), 17 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371FB_0),  1 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371FB_1),  1 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371MX),    0 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371SB_1),  1 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371AB),    2 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801AB_1),  3 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82443MX_1),  2 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801AA_1),  4 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82372FB_1),  5 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82451NX),    2 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801BA_9),  6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801BA_8),  6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801CA_10), 6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801CA_11), 6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_11), 6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801EB_11), 6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801E_11),  6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_10), 6 },
 #ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801EB_1),  18 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801EB_1),  6 },
 #endif
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB_2),      19 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH6_19),    20 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH7_21),    21 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_1),  22 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB2_18),    23 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH8_6),     24 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB_2),      6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH6_19),    6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH7_21),    6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_1),  6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB2_18),    6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH8_6),     6 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, piix_pci_tbl);
diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c
index f7a3b9aff29..8d11ee838a2 100644
--- a/drivers/ide/pci/rz1000.c
+++ b/drivers/ide/pci/rz1000.c
@@ -21,6 +21,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "rz1000"
+
 static void __devinit init_hwif_rz1000 (ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -40,7 +42,7 @@ static void __devinit init_hwif_rz1000 (ide_hwif_t *hwif)
 }
 
 static const struct ide_port_info rz1000_chipset __devinitdata = {
-	.name		= "RZ100x",
+	.name		= DRV_NAME,
 	.init_hwif	= init_hwif_rz1000,
 	.chipset	= ide_rz1000,
 	.host_flags	= IDE_HFLAG_NO_DMA,
diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index 6509560ba66..8efaed16fea 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -22,6 +22,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "sc1200"
+
 #define SC1200_REV_A	0x00
 #define SC1200_REV_B1	0x01
 #define SC1200_REV_B3	0x02
@@ -291,7 +293,7 @@ static const struct ide_dma_ops sc1200_dma_ops = {
 };
 
 static const struct ide_port_info sc1200_chipset __devinitdata = {
-	.name		= "SC1200",
+	.name		= DRV_NAME,
 	.port_ops	= &sc1200_port_ops,
 	.dma_ops	= &sc1200_dma_ops,
 	.host_flags	= IDE_HFLAG_SERIALIZE |
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index 1106ff44cde..e26bc8326db 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -38,6 +38,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "serverworks"
+
 #define SVWKS_CSB5_REVISION_NEW	0x92 /* min PCI_REVISION_ID for UDMA5 (A2.0) */
 #define SVWKS_CSB6_REVISION	0xa0 /* min PCI_REVISION_ID for UDMA4 (A1.0) */
 
@@ -353,40 +355,44 @@ static const struct ide_port_ops svwks_port_ops = {
 #define IDE_HFLAGS_SVWKS IDE_HFLAG_LEGACY_IRQS
 
 static const struct ide_port_info serverworks_chipsets[] __devinitdata = {
-	{	/* 0 */
-		.name		= "SvrWks OSB4",
+	{	/* 0: OSB4 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_svwks,
 		.port_ops	= &osb4_port_ops,
 		.host_flags	= IDE_HFLAGS_SVWKS,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= 0x00, /* UDMA is problematic on OSB4 */
-	},{	/* 1 */
-		.name		= "SvrWks CSB5",
+	},
+	{	/* 1: CSB5 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_svwks,
 		.port_ops	= &svwks_port_ops,
 		.host_flags	= IDE_HFLAGS_SVWKS,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA5,
-	},{	/* 2 */
-		.name		= "SvrWks CSB6",
+	},
+	{	/* 2: CSB6 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_svwks,
 		.port_ops	= &svwks_port_ops,
 		.host_flags	= IDE_HFLAGS_SVWKS,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA5,
-	},{	/* 3 */
-		.name		= "SvrWks CSB6",
+	},
+	{	/* 3: CSB6-2 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_svwks,
 		.port_ops	= &svwks_port_ops,
 		.host_flags	= IDE_HFLAGS_SVWKS | IDE_HFLAG_SINGLE,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA5,
-	},{	/* 4 */
-		.name		= "SvrWks HT1000",
+	},
+	{	/* 4: HT1000 */
+		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_svwks,
 		.port_ops	= &svwks_port_ops,
 		.host_flags	= IDE_HFLAGS_SVWKS | IDE_HFLAG_SINGLE,
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index fc29f1ca503..572b479a392 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -44,6 +44,8 @@
 #include <linux/init.h>
 #include <linux/io.h>
 
+#define DRV_NAME "siimage"
+
 /**
  *	pdev_is_sata		-	check if device is SATA
  *	@pdev:	PCI device to check
@@ -717,9 +719,9 @@ static const struct ide_dma_ops sil_dma_ops = {
 	.dma_lost_irq		= ide_dma_lost_irq,
 };
 
-#define DECLARE_SII_DEV(name_str, p_ops)		\
+#define DECLARE_SII_DEV(p_ops)				\
 	{						\
-		.name		= name_str,		\
+		.name		= DRV_NAME,		\
 		.init_chipset	= init_chipset_siimage,	\
 		.init_iops	= init_iops_siimage,	\
 		.port_ops	= p_ops,		\
@@ -730,9 +732,8 @@ static const struct ide_dma_ops sil_dma_ops = {
 	}
 
 static const struct ide_port_info siimage_chipsets[] __devinitdata = {
-	/* 0 */ DECLARE_SII_DEV("SiI680",		&sil_pata_port_ops),
-	/* 1 */ DECLARE_SII_DEV("SiI3112 Serial ATA",	&sil_sata_port_ops),
-	/* 2 */ DECLARE_SII_DEV("Adaptec AAR-1210SA",	&sil_sata_port_ops)
+	/* 0: SiI680 */  DECLARE_SII_DEV(&sil_pata_port_ops),
+	/* 1: SiI3112 */ DECLARE_SII_DEV(&sil_sata_port_ops)
 };
 
 /**
@@ -761,7 +762,7 @@ static int __devinit siimage_init_one(struct pci_dev *dev,
 		static int first = 1;
 
 		if (first) {
-			printk(KERN_INFO "siimage: For full SATA support you "
+			printk(KERN_INFO DRV_NAME ": For full SATA support you "
 				"should use the libata sata_sil module.\n");
 			first = 0;
 		}
@@ -780,7 +781,7 @@ static int __devinit siimage_init_one(struct pci_dev *dev,
 		* seem to get terminally confused in the PCI spaces.
 		*/
 		if (!request_mem_region(bar5, barsize, d.name)) {
-			printk(KERN_WARNING "siimage %s: MMIO ports not "
+			printk(KERN_WARNING DRV_NAME " %s: MMIO ports not "
 				"available\n", pci_name(dev));
 		} else {
 			ioaddr = ioremap(bar5, barsize);
@@ -823,7 +824,7 @@ static const struct pci_device_id siimage_pci_tbl[] = {
 	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_680),    0 },
 #ifdef CONFIG_BLK_DEV_IDE_SATA
 	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_3112),   1 },
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_1210SA), 2 },
+	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_1210SA), 1 },
 #endif
 	{ 0, },
 };
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 518d8ab413f..6fcb46c8787 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -52,6 +52,8 @@
 #include <linux/init.h>
 #include <linux/ide.h>
 
+#define DRV_NAME "sis5513"
+
 /* registers layout and init values are chipset family dependant */
 
 #define ATA_16		0x01
@@ -380,7 +382,7 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 		}
 		pci_dev_put(host);
 
-		printk(KERN_INFO "SIS5513 %s: %s %s controller\n",
+		printk(KERN_INFO DRV_NAME " %s: %s %s controller\n",
 			pci_name(dev), SiSHostChipInfo[i].name,
 			chipset_capability[chipset_family]);
 	}
@@ -397,7 +399,7 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 			pci_write_config_dword(dev, 0x54, idemisc);
 
 			if (trueid == 0x5518) {
-				printk(KERN_INFO "SIS5513 %s: SiS 962/963 MuTIOL IDE UDMA133 controller\n",
+				printk(KERN_INFO DRV_NAME " %s: SiS 962/963 MuTIOL IDE UDMA133 controller\n",
 					pci_name(dev));
 				chipset_family = ATA_133;
 
@@ -407,7 +409,7 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 				 */
 				if ((idemisc & 0x40000000) == 0) {
 					pci_write_config_dword(dev, 0x54, idemisc | 0x40000000);
-					printk(KERN_INFO "SIS5513 %s: Switching to 5513 register mapping\n",
+					printk(KERN_INFO DRV_NAME " %s: Switching to 5513 register mapping\n",
 						pci_name(dev));
 				}
 			}
@@ -432,11 +434,11 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 				pci_dev_put(lpc_bridge);
 
 				if (lpc_bridge->revision == 0x10 && (prefctl & 0x80)) {
-					printk(KERN_INFO "SIS5513 %s: SiS 961B MuTIOL IDE UDMA133 controller\n",
+					printk(KERN_INFO DRV_NAME " %s: SiS 961B MuTIOL IDE UDMA133 controller\n",
 						pci_name(dev));
 					chipset_family = ATA_133a;
 				} else {
-					printk(KERN_INFO "SIS5513 %s: SiS 961 MuTIOL IDE UDMA100 controller\n",
+					printk(KERN_INFO DRV_NAME " %s: SiS 961 MuTIOL IDE UDMA100 controller\n",
 						pci_name(dev));
 					chipset_family = ATA_100;
 				}
@@ -560,7 +562,7 @@ static const struct ide_port_ops sis_ata133_port_ops = {
 };
 
 static const struct ide_port_info sis5513_chipset __devinitdata = {
-	.name		= "SIS5513",
+	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_sis5513,
 	.enablebits	= { {0x4a, 0x02, 0x02}, {0x4a, 0x04, 0x04} },
 	.host_flags	= IDE_HFLAG_LEGACY_IRQS | IDE_HFLAG_NO_AUTODMA,
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index 72899f85c5b..fa720db3de1 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -23,6 +23,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "sl82c105"
+
 #undef DEBUG
 
 #ifdef DEBUG
@@ -301,7 +303,7 @@ static const struct ide_dma_ops sl82c105_dma_ops = {
 };
 
 static const struct ide_port_info sl82c105_chipset __devinitdata = {
-	.name		= "W82C105",
+	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_sl82c105,
 	.enablebits	= {{0x40,0x01,0x01}, {0x40,0x10,0x10}},
 	.port_ops	= &sl82c105_port_ops,
@@ -328,7 +330,7 @@ static int __devinit sl82c105_init_one(struct pci_dev *dev, const struct pci_dev
 		 * Never ever EVER under any circumstances enable
 		 * DMA when the bridge is this old.
 		 */
-		printk(KERN_INFO "W82C105_IDE: Winbond W83C553 bridge "
+		printk(KERN_INFO DRV_NAME ": Winbond W83C553 bridge "
 				 "revision %d, BM-DMA disabled\n", rev);
 		d.dma_ops = NULL;
 		d.mwdma_mask = 0;
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index fee5ebe4bb9..13d1fa491f2 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -15,6 +15,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "slc90e66"
+
 static DEFINE_SPINLOCK(slc90e66_lock);
 
 static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
@@ -132,7 +134,7 @@ static const struct ide_port_ops slc90e66_port_ops = {
 };
 
 static const struct ide_port_info slc90e66_chipset __devinitdata = {
-	.name		= "SLC90E66",
+	.name		= DRV_NAME,
 	.enablebits	= { {0x41, 0x80, 0x80}, {0x43, 0x80, 0x80} },
 	.port_ops	= &slc90e66_port_ops,
 	.host_flags	= IDE_HFLAG_LEGACY_IRQS,
diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index 102cd7c40cd..b1cb8a9ce5a 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -11,7 +11,7 @@
 #include <linux/pci.h>
 #include <linux/ide.h>
 
-#define DRV_NAME "TC86C001"
+#define DRV_NAME "tc86c001"
 
 static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 {
@@ -193,7 +193,7 @@ static const struct ide_dma_ops tc86c001_dma_ops = {
 };
 
 static const struct ide_port_info tc86c001_chipset __devinitdata = {
-	.name		= "TC86C001",
+	.name		= DRV_NAME,
 	.init_hwif	= init_hwif_tc86c001,
 	.port_ops	= &tc86c001_port_ops,
 	.dma_ops	= &tc86c001_dma_ops,
diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c
index 78e24ac8097..b77ec35151b 100644
--- a/drivers/ide/pci/triflex.c
+++ b/drivers/ide/pci/triflex.c
@@ -33,6 +33,8 @@
 #include <linux/ide.h>
 #include <linux/init.h>
 
+#define DRV_NAME "triflex"
+
 static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 {
 	ide_hwif_t *hwif = HWIF(drive);
@@ -93,7 +95,7 @@ static const struct ide_port_ops triflex_port_ops = {
 };
 
 static const struct ide_port_info triflex_device __devinitdata = {
-	.name		= "TRIFLEX",
+	.name		= DRV_NAME,
 	.enablebits	= {{0x80, 0x01, 0x01}, {0x80, 0x02, 0x02}},
 	.port_ops	= &triflex_port_ops,
 	.pio_mask	= ATA_PIO4,
diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index 7bda5ed92e1..fd28b49977f 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -141,6 +141,8 @@
 
 #include <asm/io.h>
 
+#define DRV_NAME "trm290"
+
 static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
 {
 	ide_hwif_t *hwif = HWIF(drive);
@@ -245,10 +247,10 @@ static void __devinit init_hwif_trm290(ide_hwif_t *hwif)
 	u8 reg = 0;
 
 	if ((dev->class & 5) && cfg_base)
-		printk(KERN_INFO "TRM290 %s: chip", pci_name(dev));
+		printk(KERN_INFO DRV_NAME " %s: chip", pci_name(dev));
 	else {
 		cfg_base = 0x3df0;
-		printk(KERN_INFO "TRM290 %s: using default", pci_name(dev));
+		printk(KERN_INFO DRV_NAME " %s: using default", pci_name(dev));
 	}
 	printk(KERN_CONT " config base at 0x%04x\n", cfg_base);
 	hwif->config_data = cfg_base;
@@ -325,7 +327,7 @@ static struct ide_dma_ops trm290_dma_ops = {
 };
 
 static const struct ide_port_info trm290_chipset __devinitdata = {
-	.name		= "TRM290",
+	.name		= DRV_NAME,
 	.init_hwif	= init_hwif_trm290,
 	.chipset	= ide_trm290,
 	.port_ops	= &trm290_port_ops,
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 23332556e61..170e058f1fb 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -35,6 +35,8 @@
 #include <asm/processor.h>
 #endif
 
+#define DRV_NAME "via82cxxx"
+
 #define VIA_IDE_ENABLE		0x40
 #define VIA_IDE_CONFIG		0x41
 #define VIA_FIFO_CONFIG		0x43
@@ -373,7 +375,7 @@ static const struct ide_port_ops via_port_ops = {
 };
 
 static const struct ide_port_info via82cxxx_chipset __devinitdata = {
-	.name		= "VP_IDE",
+	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_via82cxxx,
 	.enablebits	= { { 0x40, 0x02, 0x02 }, { 0x40, 0x01, 0x01 } },
 	.port_ops	= &via_port_ops,
@@ -401,7 +403,7 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 	 */
 	via_config = via_config_find(&isa);
 	if (!via_config->id) {
-		printk(KERN_WARNING "VP_IDE %s: unknown chipset, skipping\n",
+		printk(KERN_WARNING DRV_NAME " %s: unknown chipset, skipping\n",
 			pci_name(dev));
 		return -ENODEV;
 	}
@@ -409,7 +411,7 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 	/*
 	 * Print the boot message.
 	 */
-	printk(KERN_INFO "VP_IDE %s: VIA %s (rev %02x) IDE %sDMA%s\n",
+	printk(KERN_INFO DRV_NAME " %s: VIA %s (rev %02x) IDE %sDMA%s\n",
 		pci_name(dev), via_config->name, isa->revision,
 		via_config->udma_mask ? "U" : "MW",
 		via_dma[via_config->udma_mask ?
@@ -429,9 +431,9 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 	}
 
 	if (via_clock < 20000 || via_clock > 50000) {
-		printk(KERN_WARNING "VP_IDE: User given PCI clock speed "
+		printk(KERN_WARNING DRV_NAME ": User given PCI clock speed "
 			"impossible (%d), using 33 MHz instead.\n", via_clock);
-		printk(KERN_WARNING "VP_IDE: Use ide0=ata66 if you want "
+		printk(KERN_WARNING DRV_NAME ": Use ide0=ata66 if you want "
 			"to assume 80-wire cable.\n");
 		via_clock = 33333;
 	}
@@ -453,7 +455,8 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 
 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 	if (!vdev) {
-		printk(KERN_ERR "VP_IDE %s: out of memory :(\n", pci_name(dev));
+		printk(KERN_ERR DRV_NAME " %s: out of memory :(\n",
+			pci_name(dev));
 		return -ENOMEM;
 	}
 
-- 
GitLab


From a326b02b0c576001353dbc489154959b0889c6bf Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:33 +0200
Subject: [PATCH 502/853] ide: drop 'name' parameter from ->init_chipset method

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/aec62xx.c      |  2 +-
 drivers/ide/pci/alim15x3.c     |  5 ++---
 drivers/ide/pci/amd74xx.c      | 19 ++++++++-----------
 drivers/ide/pci/cmd64x.c       |  2 +-
 drivers/ide/pci/cs5530.c       |  7 +++----
 drivers/ide/pci/cy82c693.c     | 10 +++++-----
 drivers/ide/pci/hpt34x.c       |  2 +-
 drivers/ide/pci/hpt366.c       |  3 ++-
 drivers/ide/pci/it821x.c       |  2 +-
 drivers/ide/pci/pdc202xx_new.c |  3 ++-
 drivers/ide/pci/pdc202xx_old.c |  3 +--
 drivers/ide/pci/piix.c         |  3 +--
 drivers/ide/pci/serverworks.c  |  6 +++---
 drivers/ide/pci/siimage.c      |  8 +++-----
 drivers/ide/pci/sis5513.c      |  3 +--
 drivers/ide/pci/sl82c105.c     |  2 +-
 drivers/ide/pci/via82cxxx.c    |  3 +--
 drivers/ide/setup-pci.c        |  2 +-
 include/linux/ide.h            |  2 +-
 19 files changed, 39 insertions(+), 48 deletions(-)

diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index f6dc6c20f3a..e0c8fe7d9fe 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -140,7 +140,7 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
 }
 
-static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev)
 {
 	/* These are necessary to get AEC6280 Macintosh cards to work */
 	if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) ||
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index a099c4dd599..b582687e0cd 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -209,13 +209,12 @@ static int ali15x3_dma_setup(ide_drive_t *drive)
 /**
  *	init_chipset_ali15x3	-	Initialise an ALi IDE controller
  *	@dev: PCI device
- *	@name: Name of the controller
  *
  *	This function initializes the ALI IDE controller and where 
  *	appropriate also sets up the 1533 southbridge.
  */
-  
-static unsigned int __devinit init_chipset_ali15x3 (struct pci_dev *dev, const char *name)
+
+static unsigned int __devinit init_chipset_ali15x3(struct pci_dev *dev)
 {
 	unsigned long flags;
 	u8 tmpbyte;
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index cbf78edfe00..2cea7bf51a0 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -112,15 +112,13 @@ static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	amd_set_drive(drive, XFER_PIO_0 + pio);
 }
 
-static void __devinit amd7409_cable_detect(struct pci_dev *dev,
-					   const char *name)
+static void __devinit amd7409_cable_detect(struct pci_dev *dev)
 {
 	/* no host side cable detection */
 	amd_80w = 0x03;
 }
 
-static void __devinit amd7411_cable_detect(struct pci_dev *dev,
-					   const char *name)
+static void __devinit amd7411_cable_detect(struct pci_dev *dev)
 {
 	int i;
 	u32 u = 0;
@@ -131,9 +129,9 @@ static void __devinit amd7411_cable_detect(struct pci_dev *dev,
 	amd_80w = ((t & 0x3) ? 1 : 0) | ((t & 0xc) ? 2 : 0);
 	for (i = 24; i >= 0; i -= 8)
 		if (((u >> i) & 4) && !(amd_80w & (1 << (1 - (i >> 4))))) {
-			printk(KERN_WARNING "%s %s: BIOS didn't set cable bits "
-				"correctly. Enabling workaround.\n",
-				name, pci_name(dev));
+			printk(KERN_WARNING DRV_NAME " %s: BIOS didn't set "
+				"cable bits correctly. Enabling workaround.\n",
+				pci_name(dev));
 			amd_80w |= (1 << (1 - (i >> 4)));
 		}
 }
@@ -142,8 +140,7 @@ static void __devinit amd7411_cable_detect(struct pci_dev *dev,
  * The initialization callback.  Initialize drive independent registers.
  */
 
-static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev,
-						   const char *name)
+static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev)
 {
 	u8 t = 0, offset = amd_offset(dev);
 
@@ -156,9 +153,9 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev,
 		; /* no UDMA > 2 */
 	else if (dev->vendor == PCI_VENDOR_ID_AMD &&
 		 dev->device == PCI_DEVICE_ID_AMD_VIPER_7409)
-		amd7409_cable_detect(dev, name);
+		amd7409_cable_detect(dev);
 	else
-		amd7411_cable_detect(dev, name);
+		amd7411_cable_detect(dev);
 
 /*
  * Take care of prefetch & postwrite.
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index 3d84debaf81..1360b4fa9fd 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -332,7 +332,7 @@ static int cmd646_1_dma_end(ide_drive_t *drive)
 	return (dma_stat & 7) != 4;
 }
 
-static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev)
 {
 	u8 mrdmode = 0;
 
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index 5543c8677a5..f235db8c678 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -129,12 +129,11 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
 /**
  *	init_chipset_5530	-	set up 5530 bridge
  *	@dev: PCI device
- *	@name: device name
  *
  *	Initialize the cs5530 bridge for reliable IDE DMA operation.
  */
 
-static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_cs5530(struct pci_dev *dev)
 {
 	struct pci_dev *master_0 = NULL, *cs5530_0 = NULL;
 
@@ -153,11 +152,11 @@ static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const ch
 		}
 	}
 	if (!master_0) {
-		printk(KERN_ERR "%s: unable to locate PCI MASTER function\n", name);
+		printk(KERN_ERR DRV_NAME ": unable to locate PCI MASTER function\n");
 		goto out;
 	}
 	if (!cs5530_0) {
-		printk(KERN_ERR "%s: unable to locate CS5530 LEGACY function\n", name);
+		printk(KERN_ERR DRV_NAME ": unable to locate CS5530 LEGACY function\n");
 		goto out;
 	}
 
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index 41c7f3351eb..bfae2f882f4 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -332,7 +332,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
 /*
  * this function is called during init and is used to setup the cy82c693 chip
  */
-static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev)
 {
 	if (PCI_FUNC(dev->devfn) != 1)
 		return 0;
@@ -351,8 +351,8 @@ static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev, const c
 	data = inb(CY82_DATA_PORT);
 
 #if CY82C693_DEBUG_INFO
-	printk(KERN_INFO "%s: Peripheral Configuration Register: 0x%X\n",
-		name, data);
+	printk(KERN_INFO DRV_NAME ": Peripheral Configuration Register: 0x%X\n",
+		data);
 #endif /* CY82C693_DEBUG_INFO */
 
 	/*
@@ -373,8 +373,8 @@ static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev, const c
 	outb(data, CY82_DATA_PORT);
 
 #if CY82C693_DEBUG_INFO
-	printk(KERN_INFO "%s: New Peripheral Configuration Register: 0x%X\n",
-		name, data);
+	printk(KERN_INFO ": New Peripheral Configuration Register: 0x%X\n",
+		data);
 #endif /* CY82C693_DEBUG_INFO */
 
 #endif /* CY82C693_SETDMA_CLOCK */
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index baabb4ce0d7..6009b0b9655 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -79,7 +79,7 @@ static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
  */
 #define	HPT34X_PCI_INIT_REG		0x80
 
-static unsigned int __devinit init_chipset_hpt34x(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_hpt34x(struct pci_dev *dev)
 {
 	int i = 0;
 	unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 6a1c65c3be3..5271b246b88 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -970,11 +970,12 @@ static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f
 	return 1;
 }
 
-static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev)
 {
 	unsigned long io_base	= pci_resource_start(dev, 4);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct hpt_info *info	= host->host_priv + (&dev->dev == host->dev[1]);
+	const char *name	= DRV_NAME;
 	u8 pci_clk,  dpll_clk	= 0;	/* PCI and DPLL clock in MHz */
 	u8 chip_type;
 	enum ata_clock	clock;
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index 74173352741..e16a1d113a2 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -605,7 +605,7 @@ static void __devinit it8212_disable_raid(struct pci_dev *dev)
 	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
 }
 
-static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev)
 {
 	u8 conf;
 	static char *mode[2] = { "pass through", "smart" };
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 1f679195722..998615fa285 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -326,8 +326,9 @@ static void __devinit apple_kiwi_init(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PPC_PMAC */
 
-static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev)
 {
+	const char *name = DRV_NAME;
 	unsigned long dma_base = pci_resource_start(dev, 4);
 	unsigned long sec_dma_base = dma_base + 0x08;
 	long pll_input, pll_output, ratio;
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index da92d127868..6ff2def58da 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -265,8 +265,7 @@ static void pdc202xx_dma_timeout(ide_drive_t *drive)
 	ide_dma_timeout(drive);
 }
 
-static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev,
-						    const char *name)
+static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev)
 {
 	unsigned long dmabase = pci_resource_start(dev, 4);
 	u8 udma_speed_flag = 0, primary_mode = 0, secondary_mode = 0;
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index 9eb411f5c35..7fc3022dcf6 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -200,13 +200,12 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 /**
  *	init_chipset_ich	-	set up the ICH chipset
  *	@dev: PCI device to set up
- *	@name: Name of the device
  *
  *	Initialize the PCI device as required.  For the ICH this turns
  *	out to be nice and simple.
  */
 
-static unsigned int __devinit init_chipset_ich(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_ich(struct pci_dev *dev)
 {
 	u32 extra = 0;
 
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index e26bc8326db..d173f293772 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -174,7 +174,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	pci_write_config_byte(dev, 0x54, ultra_enable);
 }
 
-static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_svwks(struct pci_dev *dev)
 {
 	unsigned int reg;
 	u8 btr;
@@ -190,8 +190,8 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha
 			pci_read_config_dword(isa_dev, 0x64, &reg);
 			reg &= ~0x00002000; /* disable 600ns interrupt mask */
 			if(!(reg & 0x00004000))
-				printk(KERN_DEBUG "%s %s: UDMA not BIOS "
-					"enabled.\n", name, pci_name(dev));
+				printk(KERN_DEBUG DRV_NAME " %s: UDMA not BIOS "
+					"enabled.\n", pci_name(dev));
 			reg |=  0x00004000; /* enable UDMA/33 support */
 			pci_write_config_dword(isa_dev, 0x64, reg);
 		}
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 572b479a392..b8ad9ad6cf0 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -457,14 +457,12 @@ static void sil_sata_pre_reset(ide_drive_t *drive)
 /**
  *	init_chipset_siimage	-	set up an SI device
  *	@dev: PCI device
- *	@name: device name
  *
  *	Perform the initial PCI set up for this device. Attempt to switch
  *	to 133 MHz clocking if the system isn't already set up to do it.
  */
 
-static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev,
-						   const char *name)
+static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev)
 {
 	struct ide_host *host = pci_get_drvdata(dev);
 	void __iomem *ioaddr = host->host_priv;
@@ -541,8 +539,8 @@ static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev,
 			{ "== 100", "== 133", "== 2X PCI", "DISABLED!" };
 
 		tmp >>= 4;
-		printk(KERN_INFO "%s %s: BASE CLOCK %s\n",
-			name, pci_name(dev), clk_str[tmp & 3]);
+		printk(KERN_INFO DRV_NAME " %s: BASE CLOCK %s\n",
+			pci_name(dev), clk_str[tmp & 3]);
 	}
 
 	return 0;
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 6fcb46c8787..cc95f90b53b 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -448,8 +448,7 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 	return chipset_family;
 }
 
-static unsigned int __devinit init_chipset_sis5513(struct pci_dev *dev,
-						   const char *name)
+static unsigned int __devinit init_chipset_sis5513(struct pci_dev *dev)
 {
 	/* Make general config ops here
 	   1/ tell IDE channels to operate in Compatibility mode only
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index fa720db3de1..73905bcc08f 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -272,7 +272,7 @@ static u8 sl82c105_bridge_revision(struct pci_dev *dev)
  * channel 0 here at least, but channel 1 has to be enabled by
  * firmware or arch code. We still set both to 16 bits mode.
  */
-static unsigned int __devinit init_chipset_sl82c105(struct pci_dev *dev, const char *msg)
+static unsigned int __devinit init_chipset_sl82c105(struct pci_dev *dev)
 {
 	u32 val;
 
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 170e058f1fb..454d2bf62dc 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -262,13 +262,12 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
 /**
  *	init_chipset_via82cxxx	-	initialization handler
  *	@dev: PCI device
- *	@name: Name of interface
  *
  *	The initialization callback. Here we determine the IDE chip type
  *	and initialize its drive independent registers.
  */
 
-static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev)
 {
 	struct ide_host *host = pci_get_drvdata(dev);
 	struct via82cxxx_dev *vdev = host->host_priv;
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index d9655aeb013..a8e9e8a69a5 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -515,7 +515,7 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 	 * space, place chipset into init-mode, and/or preserve
 	 * an interrupt if the card is not native ide support.
 	 */
-	ret = d->init_chipset ? d->init_chipset(dev, d->name) : 0;
+	ret = d->init_chipset ? d->init_chipset(dev) : 0;
 	if (ret < 0)
 		goto out;
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index fd78b401b03..b846bc44a27 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1206,7 +1206,7 @@ enum {
 
 struct ide_port_info {
 	char			*name;
-	unsigned int		(*init_chipset)(struct pci_dev *, const char *);
+	unsigned int		(*init_chipset)(struct pci_dev *);
 	void			(*init_iops)(ide_hwif_t *);
 	void                    (*init_hwif)(ide_hwif_t *);
 	int			(*init_dma)(ide_hwif_t *,
-- 
GitLab


From cd740ab0f69f6c94d9c7f916758e308f30a439fa Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 24 Jul 2008 22:53:33 +0200
Subject: [PATCH 503/853] ide: ide-tape.c sparse annotations and unaligned
 access removal

If this is actually unaligned the access of speed/max_speed above
is already broken and needs a get_unaligned.  Otherwise it is
aligned and they can be removed.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-tape.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 789f3428f07..82c2afe4d28 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -655,10 +655,10 @@ static void ide_tape_callback(ide_drive_t *drive)
 			uptodate = 0;
 		} else {
 			debug_log(DBG_SENSE, "Block Location - %u\n",
-					be32_to_cpu(*(u32 *)&readpos[4]));
+					be32_to_cpup((__be32 *)&readpos[4]));
 
 			tape->partition = readpos[1];
-			tape->first_frame = be32_to_cpu(*(u32 *)&readpos[4]);
+			tape->first_frame = be32_to_cpup((__be32 *)&readpos[4]);
 			set_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags);
 		}
 	}
@@ -2381,23 +2381,23 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 	caps = pc.buf + 4 + pc.buf[3];
 
 	/* convert to host order and save for later use */
-	speed = be16_to_cpu(*(u16 *)&caps[14]);
-	max_speed = be16_to_cpu(*(u16 *)&caps[8]);
+	speed = be16_to_cpup((__be16 *)&caps[14]);
+	max_speed = be16_to_cpup((__be16 *)&caps[8]);
 
-	put_unaligned(max_speed, (u16 *)&caps[8]);
-	put_unaligned(be16_to_cpu(*(u16 *)&caps[12]), (u16 *)&caps[12]);
-	put_unaligned(speed, (u16 *)&caps[14]);
-	put_unaligned(be16_to_cpu(*(u16 *)&caps[16]), (u16 *)&caps[16]);
+	*(u16 *)&caps[8] = max_speed;
+	*(u16 *)&caps[12] = be16_to_cpup((__be16 *)&caps[12]);
+	*(u16 *)&caps[14] = speed;
+	*(u16 *)&caps[16] = be16_to_cpup((__be16 *)&caps[16]);
 
 	if (!speed) {
 		printk(KERN_INFO "ide-tape: %s: invalid tape speed "
 				"(assuming 650KB/sec)\n", drive->name);
-		put_unaligned(650, (u16 *)&caps[14]);
+		*(u16 *)&caps[14] = 650;
 	}
 	if (!max_speed) {
 		printk(KERN_INFO "ide-tape: %s: invalid max_speed "
 				"(assuming 650KB/sec)\n", drive->name);
-		put_unaligned(650, (u16 *)&caps[8]);
+		*(u16 *)&caps[8] = 650;
 	}
 
 	memcpy(&tape->caps, caps, 20);
-- 
GitLab


From 7fa897b91a3ea0f16c2873b869d7a0eef05acff4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 24 Jul 2008 22:53:34 +0200
Subject: [PATCH 504/853] ide: trivial sparse annotations

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-dma.c  | 2 +-
 drivers/ide/ide-iops.c | 6 ++----
 drivers/ide/ide-proc.c | 4 ++--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index be99d463dcc..71c377a7bcf 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -173,7 +173,7 @@ EXPORT_SYMBOL_GPL(ide_build_sglist);
 int ide_build_dmatable (ide_drive_t *drive, struct request *rq)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
-	unsigned int *table	= hwif->dmatable_cpu;
+	__le32 *table = (__le32 *)hwif->dmatable_cpu;
 	unsigned int is_trm290	= (hwif->chipset == ide_trm290) ? 1 : 0;
 	unsigned int count = 0;
 	int i;
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 07da5fb9eaf..8aae9176451 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -510,10 +510,8 @@ void ide_fixstring (u8 *s, const int bytecount, const int byteswap)
 
 	if (byteswap) {
 		/* convert from big-endian to host byte order */
-		for (p = end ; p != s;) {
-			unsigned short *pp = (unsigned short *) (p -= 2);
-			*pp = ntohs(*pp);
-		}
+		for (p = end ; p != s;)
+			be16_to_cpus((u16 *)(p -= 2));
 	}
 	/* strip leading blanks */
 	while (s != end && *s == ' ')
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 151c91e933d..f66c9c3f6fc 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -105,7 +105,7 @@ static int proc_ide_read_identify
 	len = sprintf(page, "\n");
 
 	if (drive) {
-		unsigned short *val = (unsigned short *) page;
+		__le16 *val = (__le16 *)page;
 
 		err = taskfile_lib_get_identify(drive, page);
 		if (!err) {
@@ -113,7 +113,7 @@ static int proc_ide_read_identify
 			page = out;
 			do {
 				out += sprintf(out, "%04x%c",
-					le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+					le16_to_cpup(val), (++i & 7) ? ' ' : '\n');
 				val += 1;
 			} while (i < (SECTOR_WORDS * 2));
 			len = out - page;
-- 
GitLab


From a0f403bc58dcaa118f02ec70c3ecfec1bc26e445 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Thu, 24 Jul 2008 22:53:34 +0200
Subject: [PATCH 505/853] palm_bk3710: add UltraDMA/100 support

This controller supports UltraDMA up to mode 5 but it should be clocked with
at least twice the data strobe frequency, so enable mode 5 for 100+ MHz IDECLK.

While at it, start passing the correct device to clk_get() -- it worked anyway
but WTF? :-/

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/arm/palm_bk3710.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c
index 65bb4b8fd57..3e842d60eae 100644
--- a/drivers/ide/arm/palm_bk3710.c
+++ b/drivers/ide/arm/palm_bk3710.c
@@ -82,6 +82,7 @@ static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
 	{100, 120},		/* UDMA Mode 2 */
 	{100, 90},		/* UDMA Mode 3 */
 	{100, 60},		/* UDMA Mode 4 */
+	{85,  40},		/* UDMA Mode 5 */
 };
 
 static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
@@ -334,12 +335,11 @@ static const struct ide_port_ops palm_bk3710_ports_ops = {
 	.cable_detect		= palm_bk3710_cable_detect,
 };
 
-static const struct ide_port_info __devinitdata palm_bk3710_port_info = {
+static struct ide_port_info __devinitdata palm_bk3710_port_info = {
 	.init_dma		= palm_bk3710_init_dma,
 	.port_ops		= &palm_bk3710_ports_ops,
 	.host_flags		= IDE_HFLAG_MMIO,
 	.pio_mask		= ATA_PIO4,
-	.udma_mask		= ATA_UDMA4,	/* (input clk 99MHz) */
 	.mwdma_mask		= ATA_MWDMA2,
 };
 
@@ -352,7 +352,7 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev)
 	int i, rc;
 	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
-	clk = clk_get(NULL, "IDECLK");
+	clk = clk_get(&pdev->dev, "IDECLK");
 	if (IS_ERR(clk))
 		return -ENODEV;
 
@@ -392,6 +392,9 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev)
 	hw.irq = irq->start;
 	hw.chipset = ide_palm3710;
 
+	palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 :
+							     ATA_UDMA5;
+
 	rc = ide_host_add(&palm_bk3710_port_info, hws, NULL);
 	if (rc)
 		goto out;
-- 
GitLab


From 96cc112c09b3c6674da01ef8b377f7a916883ea2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:34 +0200
Subject: [PATCH 506/853] gayle: release resources on ide_host_add() failure

"gayle: reserve memory resources at once" patch temporary removed
freeing of resources on failure (to ease convertion to ide_host_add()
interface).  This patch fixes it.

Thanks to Geert for noticing the issue.

Noticed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/legacy/gayle.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c
index dd5c467d8dd..51ba085d7aa 100644
--- a/drivers/ide/legacy/gayle.c
+++ b/drivers/ide/legacy/gayle.c
@@ -127,7 +127,7 @@ static int __init gayle_init(void)
     unsigned long phys_base, res_start, res_n;
     unsigned long base, ctrlport, irqport;
     ide_ack_intr_t *ack_intr;
-    int a4000, i;
+    int a4000, i, rc;
     hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
 
     if (!MACH_IS_AMIGA)
@@ -179,7 +179,11 @@ found:
 	hws[i] = &hw[i];
     }
 
-    return ide_host_add(NULL, hws, NULL);
+    rc = ide_host_add(NULL, hws, NULL);
+    if (rc)
+	release_mem_region(res_start, res_n);
+
+    return rc;
 }
 
 module_init(gayle_init);
-- 
GitLab


From e8e7b9eb11c34ee18bde8b7011af41938d1ad667 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 24 Jul 2008 22:53:35 +0200
Subject: [PATCH 507/853] ide-cd: fix oops when using growisofs

cdrom_read_capacity() will blindly return the capacity from the device
without sanity-checking it.  This later causes code in fs/buffer.c to
oops.

Fix this by checking that the device is telling us sensible things.

From: Jens Axboe <jens.axboe@oracle.com>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: <stable@kernel.org>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[bart: print device name instead of driver name]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
[harvey: blocklen is a big-endian value]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 8f253e5f26a..e617cf08aef 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1311,13 +1311,30 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 
 	stat = ide_cd_queue_pc(drive, cmd, 0, &capbuf, &len, sense, 0,
 			       REQ_QUIET);
-	if (stat == 0) {
-		*capacity = 1 + be32_to_cpu(capbuf.lba);
-		*sectors_per_frame =
-			be32_to_cpu(capbuf.blocklen) >> SECTOR_BITS;
+	if (stat)
+		return stat;
+
+	/*
+	 * Sanity check the given block size
+	 */
+	switch (capbuf.blocklen) {
+	case __constant_cpu_to_be32(512):
+	case __constant_cpu_to_be32(1024):
+	case __constant_cpu_to_be32(2048):
+	case __constant_cpu_to_be32(4096):
+		break;
+	default:
+		printk(KERN_ERR "%s: weird block size %u\n",
+			drive->name, capbuf.blocklen);
+		printk(KERN_ERR "%s: default to 2kb block size\n",
+			drive->name);
+		capbuf.blocklen = __constant_cpu_to_be32(2048);
+		break;
 	}
 
-	return stat;
+	*capacity = 1 + be32_to_cpu(capbuf.lba);
+	*sectors_per_frame = be32_to_cpu(capbuf.blocklen) >> SECTOR_BITS;
+	return 0;
 }
 
 static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
-- 
GitLab


From eb34b2d90e71380ad19695188934230b06a3668b Mon Sep 17 00:00:00 2001
From: Jan Evert van Grootheest <j.e.van.grootheest@caiway.nl>
Date: Thu, 24 Jul 2008 22:53:35 +0200
Subject: [PATCH 508/853] ht6560b: update email address

Update email address.

From: Jan Evert van Grootheest <j.e.van.grootheest@caiway.nl>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/legacy/ht6560b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c
index 7bc8fd59ea9..7262b3ad45c 100644
--- a/drivers/ide/legacy/ht6560b.c
+++ b/drivers/ide/legacy/ht6560b.c
@@ -30,7 +30,7 @@
  *  Use hdparm utility to enable PIO mode support.
  *
  *  Author:    Mikko Ala-Fossi            <maf@iki.fi>
- *             Jan Evert van Grootheest   <janevert@caiway.nl>
+ *             Jan Evert van Grootheest   <j.e.van.grootheest@caiway.nl>
  *
  *  Try:  http://www.maf.iki.fi/~maf/ht6560b/
  */
-- 
GitLab


From 216f9a88feabf5ed574c3aa78447a6bd872910bc Mon Sep 17 00:00:00 2001
From: Jan Evert van Grootheest <j.e.van.grootheest@caiway.nl>
Date: Thu, 24 Jul 2008 22:53:35 +0200
Subject: [PATCH 509/853] ht6560b: remove old history

Remove the ancient version history. Git does a better job.

From: Jan Evert van Grootheest <j.e.van.grootheest@caiway.nl>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/legacy/ht6560b.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c
index 7262b3ad45c..98f7c95e39e 100644
--- a/drivers/ide/legacy/ht6560b.c
+++ b/drivers/ide/legacy/ht6560b.c
@@ -3,28 +3,6 @@
  */
 
 /*
- *
- *  Version 0.01        Initial version hacked out of ide.c
- *
- *  Version 0.02        Added support for PIO modes, auto-tune
- *
- *  Version 0.03        Some cleanups
- *
- *  Version 0.05        PIO mode cycle timings auto-tune using bus-speed
- *
- *  Version 0.06        Prefetch mode now defaults no OFF. To set
- *                      prefetch mode OFF/ON use "hdparm -p8/-p9".
- *                      Unmask irq is disabled when prefetch mode
- *                      is enabled.
- *
- *  Version 0.07        Trying to fix CD-ROM detection problem.
- *                      "Prefetch" mode bit OFF for ide disks and
- *                      ON for anything else.
- *
- *  Version 0.08        Need to force prefetch for CDs and other non-disk
- *                      devices. (not sure which devices exactly need
- *                      prefetch)
- *
  *  HT-6560B EIDE-controller support
  *  To activate controller support use kernel parameter "ide0=ht6560b".
  *  Use hdparm utility to enable PIO mode support.
-- 
GitLab


From e27420d046600cd3e4139ea1b6cba59a8b4050eb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 24 Jul 2008 22:53:35 +0200
Subject: [PATCH 510/853] ide-scsi: remove kmalloced struct request

This converts ide-scsi to use blk_get/put_request instead of
kmalloc/kfree.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/scsi/ide-scsi.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 318ef382448..b40a673985a 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -207,15 +207,15 @@ static int idescsi_check_condition(ide_drive_t *drive,
 
 	/* stuff a sense request in front of our current request */
 	pc = kzalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC);
-	rq = kmalloc(sizeof(struct request), GFP_ATOMIC);
+	rq = blk_get_request(drive->queue, READ, GFP_ATOMIC);
 	buf = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_ATOMIC);
 	if (!pc || !rq || !buf) {
 		kfree(buf);
-		kfree(rq);
+		if (rq)
+			blk_put_request(rq);
 		kfree(pc);
 		return -ENOMEM;
 	}
-	blk_rq_init(NULL, rq);
 	rq->special = (char *) pc;
 	pc->rq = rq;
 	pc->buf = buf;
@@ -232,6 +232,7 @@ static int idescsi_check_condition(ide_drive_t *drive,
 		ide_scsi_hex_dump(pc->c, 6);
 	}
 	rq->rq_disk = scsi->disk;
+	rq->ref_count++;
 	memcpy(rq->cmd, pc->c, 12);
 	ide_do_drive_cmd(drive, rq);
 	return 0;
@@ -278,7 +279,7 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
 			SCSI_SENSE_BUFFERSIZE);
 		kfree(pc->buf);
 		kfree(pc);
-		kfree(rq);
+		blk_put_request(rq);
 		pc = opc;
 		rq = pc->rq;
 		pc->scsi_cmd->result = (CHECK_CONDITION << 1) |
@@ -309,7 +310,7 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
 	pc->done(pc->scsi_cmd);
 	spin_unlock_irqrestore(host->host_lock, flags);
 	kfree(pc);
-	kfree(rq);
+	blk_put_request(rq);
 	scsi->pc = NULL;
 	return 0;
 }
@@ -583,6 +584,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 	ide_drive_t *drive = scsi->drive;
 	struct request *rq = NULL;
 	struct ide_atapi_pc *pc = NULL;
+	int write = cmd->sc_data_direction == DMA_TO_DEVICE;
 
 	if (!drive) {
 		scmd_printk (KERN_ERR, cmd, "drive not present\n");
@@ -590,7 +592,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 	}
 	scsi = drive_to_idescsi(drive);
 	pc = kmalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC);
-	rq = kmalloc(sizeof(struct request), GFP_ATOMIC);
+	rq = blk_get_request(drive->queue, write, GFP_ATOMIC);
 	if (rq == NULL || pc == NULL) {
 		printk (KERN_ERR "ide-scsi: %s: out of memory\n", drive->name);
 		goto abort;
@@ -620,17 +622,18 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 		}
 	}
 
-	blk_rq_init(NULL, rq);
 	rq->special = (char *) pc;
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	spin_unlock_irq(host->host_lock);
+	rq->ref_count++;
 	memcpy(rq->cmd, pc->c, 12);
 	blk_execute_rq_nowait(drive->queue, scsi->disk, rq, 0, NULL);
 	spin_lock_irq(host->host_lock);
 	return 0;
 abort:
 	kfree (pc);
-	kfree (rq);
+	if (rq)
+		blk_put_request(rq);
 	cmd->result = DID_ERROR << 16;
 	done(cmd);
 	return 0;
@@ -678,7 +681,9 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 
 		if (blk_sense_request(scsi->pc->rq))
 			kfree(scsi->pc->buf);
-		kfree(scsi->pc->rq);
+		/* we need to call blk_put_request twice. */
+		blk_put_request(scsi->pc->rq);
+		blk_put_request(scsi->pc->rq);
 		kfree(scsi->pc);
 		scsi->pc = NULL;
 
@@ -730,7 +735,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 		kfree(scsi->pc->buf);
 	kfree(scsi->pc);
 	scsi->pc = NULL;
-	kfree(req);
+	blk_put_request(req);
 
 	/* now nuke the drive queue */
 	while ((req = elv_next_request(drive->queue))) {
-- 
GitLab


From 90d2c6bc68745d67cdbf00bab43818d90aa0dfb6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:36 +0200
Subject: [PATCH 511/853] ide: enable local IRQs in all handlers for
 TASKFILE_NO_DATA data phase

It is already done by task_no_data_intr() and there is no reason
not to do it in other TASKFILE_NO_DATA data phase handlers.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-taskfile.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index aeddbbd69e8..7fb6f1c8627 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -126,7 +126,10 @@ EXPORT_SYMBOL_GPL(do_rw_taskfile);
 static ide_startstop_t set_multmode_intr(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	u8 stat = hwif->tp_ops->read_status(hwif);
+	u8 stat;
+
+	local_irq_enable_in_hardirq();
+	stat = hwif->tp_ops->read_status(hwif);
 
 	if (OK_STAT(stat, READY_STAT, BAD_STAT))
 		drive->mult_count = drive->mult_req;
@@ -147,6 +150,8 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive)
 	int retries = 5;
 	u8 stat;
 
+	local_irq_enable_in_hardirq();
+
 	while (1) {
 		stat = hwif->tp_ops->read_status(hwif);
 		if ((stat & BUSY_STAT) == 0 || retries-- == 0)
@@ -170,7 +175,10 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive)
 static ide_startstop_t recal_intr(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	u8 stat = hwif->tp_ops->read_status(hwif);
+	u8 stat;
+
+	local_irq_enable_in_hardirq();
+	stat = hwif->tp_ops->read_status(hwif);
 
 	if (!OK_STAT(stat, READY_STAT, BAD_STAT))
 		return ide_error(drive, "recal_intr", stat);
-- 
GitLab


From d0b53f6866fa185da94968e62ae97923db18298c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:36 +0200
Subject: [PATCH 512/853] ide: remove stale comments from drivers/ide/Makefile

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 5d414e301a5..64e0ecdc4ed 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -1,13 +1,6 @@
 #
-# Makefile for the kernel ata, atapi, and ide block device drivers.
-#
-# 12 September 2000, Bartlomiej Zolnierkiewicz <bkz@linux-ide.org>
-# Rewritten to use lists instead of if-statements.
-#
-# Note : at this point, these files are compiled on all systems.
-# In the future, some of these should be built conditionally.
-#
 # link order is important here
+#
 
 EXTRA_CFLAGS				+= -Idrivers/ide
 
-- 
GitLab


From 52f3a771feafe3e9c56f8d00c8eb53fd8f578f2d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:36 +0200
Subject: [PATCH 513/853] ide: fix for EATA SCSI HBA in ATA emulating mode

IDE probing code used to skip devices attached to EATA SCSI HBA
in ATA emulating mode but because of warm-plug support port I/O
resources are no longer freed if no devices are detected on a port
and the decision about the driver to use is left up to the user.

Remove no longer valid EATA SCSI HBA quirk from do_identify().

Noticed-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f0c162488ec..d227fe425d2 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -134,18 +134,6 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 #endif
 	ide_fix_driveid(id);
 
-#if defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA)
-	/*
-	 * EATA SCSI controllers do a hardware ATA emulation:
-	 * Ignore them if there is a driver for them available.
-	 */
-	if ((id->model[0] == 'P' && id->model[1] == 'M') ||
-	    (id->model[0] == 'S' && id->model[1] == 'K')) {
-		printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model);
-		goto err_misc;
-	}
-#endif /* CONFIG_SCSI_EATA || CONFIG_SCSI_EATA_PIO */
-
 	/*
 	 *  WIN_IDENTIFY returns little-endian info,
 	 *  WIN_PIDENTIFY *usually* returns little-endian info.
-- 
GitLab


From 1b8ebad87b459e2e1333fbf28005977245ff5402 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:36 +0200
Subject: [PATCH 514/853] ide: use proper printk() KERN_* levels in ide-probe.c

While at it:

- fixup printk() messages in save_match() and hwif_init().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c | 51 ++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index d227fe425d2..994e41099b4 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -155,7 +155,8 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 	if (strstr(id->model, "E X A B Y T E N E S T"))
 		goto err_misc;
 
-	printk("%s: %s, ", drive->name, id->model);
+	printk(KERN_INFO "%s: %s, ", drive->name, id->model);
+
 	drive->present = 1;
 	drive->dead = 0;
 
@@ -164,16 +165,17 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 	 */
 	if (cmd == WIN_PIDENTIFY) {
 		u8 type = (id->config >> 8) & 0x1f;
-		printk("ATAPI ");
+
+		printk(KERN_CONT "ATAPI ");
 		switch (type) {
 			case ide_floppy:
 				if (!strstr(id->model, "CD-ROM")) {
 					if (!strstr(id->model, "oppy") &&
 					    !strstr(id->model, "poyp") &&
 					    !strstr(id->model, "ZIP"))
-						printk("cdrom or floppy?, assuming ");
+						printk(KERN_CONT "cdrom or floppy?, assuming ");
 					if (drive->media != ide_cdrom) {
-						printk ("FLOPPY");
+						printk(KERN_CONT "FLOPPY");
 						drive->removable = 1;
 						break;
 					}
@@ -186,25 +188,25 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 				/* kludge for Apple PowerBook internal zip */
 				if (!strstr(id->model, "CD-ROM") &&
 				    strstr(id->model, "ZIP")) {
-					printk ("FLOPPY");
+					printk(KERN_CONT "FLOPPY");
 					type = ide_floppy;
 					break;
 				}
 #endif
-				printk ("CD/DVD-ROM");
+				printk(KERN_CONT "CD/DVD-ROM");
 				break;
 			case ide_tape:
-				printk ("TAPE");
+				printk(KERN_CONT "TAPE");
 				break;
 			case ide_optical:
-				printk ("OPTICAL");
+				printk(KERN_CONT "OPTICAL");
 				drive->removable = 1;
 				break;
 			default:
-				printk("UNKNOWN (type %d)", type);
+				printk(KERN_CONT "UNKNOWN (type %d)", type);
 				break;
 		}
-		printk (" drive\n");
+		printk(KERN_CONT " drive\n");
 		drive->media = type;
 		/* an ATAPI device ignores DRDY */
 		drive->ready_stat = 0;
@@ -224,7 +226,9 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 		drive->removable = 1;
 
 	drive->media = ide_disk;
-	printk("%s DISK drive\n", (id->config == 0x848a) ? "CFA" : "ATA" );
+
+	printk(KERN_CONT "%s DISK drive\n",
+		(id->config == 0x848a) ? "CFA" : "ATA");
 
 	return;
 
@@ -375,7 +379,7 @@ static int try_to_identify (ide_drive_t *drive, u8 cmd)
 				/* Mmmm.. multiple IRQs..
 				 * don't know which was ours
 				 */
-				printk("%s: IRQ probe failed (0x%lx)\n",
+				printk(KERN_ERR "%s: IRQ probe failed (0x%lx)\n",
 					drive->name, cookie);
 			}
 		}
@@ -444,7 +448,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 			return 4;
 	}
 #ifdef DEBUG
-	printk("probing for %s: present=%d, media=%d, probetype=%s\n",
+	printk(KERN_INFO "probing for %s: present=%d, media=%d, probetype=%s\n",
 		drive->name, drive->present, drive->media,
 		(cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI");
 #endif
@@ -522,7 +526,8 @@ static void enable_nest (ide_drive_t *drive)
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	u8 stat;
 
-	printk("%s: enabling %s -- ", hwif->name, drive->id->model);
+	printk(KERN_INFO "%s: enabling %s -- ", hwif->name, drive->id->model);
+
 	SELECT_DRIVE(drive);
 	msleep(50);
 	tp_ops->exec_command(hwif, EXABYTE_ENABLE_NEST);
@@ -871,7 +876,7 @@ static void save_match(ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
 	if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
 		if (!new->hwgroup)
 			return;
-		printk("%s: potential irq problem with %s and %s\n",
+		printk(KERN_WARNING "%s: potential IRQ problem with %s and %s\n",
 			hwif->name, new->name, m->name);
 	}
 	if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
@@ -1130,17 +1135,17 @@ static int init_irq (ide_hwif_t *hwif)
 	}
 
 #if !defined(__mc68000__)
-	printk("%s at 0x%03lx-0x%03lx,0x%03lx on irq %d", hwif->name,
+	printk(KERN_INFO "%s at 0x%03lx-0x%03lx,0x%03lx on irq %d", hwif->name,
 		io_ports->data_addr, io_ports->status_addr,
 		io_ports->ctl_addr, hwif->irq);
 #else
-	printk("%s at 0x%08lx on irq %d", hwif->name,
+	printk(KERN_INFO "%s at 0x%08lx on irq %d", hwif->name,
 		io_ports->data_addr, hwif->irq);
 #endif /* __mc68000__ */
 	if (match)
-		printk(" (%sed with %s)",
+		printk(KERN_CONT " (%sed with %s)",
 			hwif->sharing_irq ? "shar" : "serializ", match->name);
-	printk("\n");
+	printk(KERN_CONT "\n");
 
 	mutex_unlock(&ide_cfg_mtx);
 	return 0;
@@ -1275,7 +1280,7 @@ static int hwif_init(ide_hwif_t *hwif)
 	if (!hwif->irq) {
 		hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
 		if (!hwif->irq) {
-			printk("%s: DISABLED, NO IRQ\n", hwif->name);
+			printk(KERN_ERR "%s: disabled, no IRQ\n", hwif->name);
 			return 0;
 		}
 	}
@@ -1305,16 +1310,16 @@ static int hwif_init(ide_hwif_t *hwif)
 	 */
 	hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
 	if (!hwif->irq) {
-		printk("%s: Disabled unable to get IRQ %d.\n",
+		printk(KERN_ERR "%s: disabled, unable to get IRQ %d\n",
 			hwif->name, old_irq);
 		goto out;
 	}
 	if (init_irq(hwif)) {
-		printk("%s: probed IRQ %d and default IRQ %d failed.\n",
+		printk(KERN_ERR "%s: probed IRQ %d and default IRQ %d failed\n",
 			hwif->name, old_irq, hwif->irq);
 		goto out;
 	}
-	printk("%s: probed IRQ %d failed, using default.\n",
+	printk(KERN_WARNING "%s: probed IRQ %d failed, using default\n",
 		hwif->name, hwif->irq);
 
 done:
-- 
GitLab


From b30f3ae50cd03ef2ff433a5030fbf88dd8323528 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 24 Jul 2008 15:43:44 -0700
Subject: [PATCH 515/853] x86-64: Clean up 'save/restore_i387()' usage

Suresh Siddha wants to fix a possible FPU leakage in error conditions,
but the fact that save/restore_i387() are inlines in a header file makes
that harder to do than necessary.  So start off with an obvious cleanup.

This just moves the x86-64 version of save/restore_i387() out of the
header file, and moves it to the only file that it is actually used in:
arch/x86/kernel/signal_64.c.  So exposing it in a header file was wrong
to begin with.

[ Side note: I'd like to fix up some of the games we play with the
  32-bit version of these functions too, but that's a separate
  matter.  The 32-bit versions are shared - under different names
  at that! - by both the native x86-32 code and the x86-64 32-bit
  compatibility code ]

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/signal_64.c | 53 ++++++++++++++++++++++++++++++++++++
 include/asm-x86/i387.h      | 54 -------------------------------------
 2 files changed, 53 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 47c3d249e63..b45ef8ddd65 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -53,6 +53,59 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
 
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387(struct _fpstate __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+			sizeof(tsk->thread.xstate->fxsave));
+
+	if ((unsigned long)buf % 16)
+		printk("save_i387: bad fpstate %p\n", buf);
+
+	if (!used_math())
+		return 0;
+	clear_used_math(); /* trigger finit */
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		err = save_i387_checking((struct i387_fxsave_struct __user *)
+					 buf);
+		if (err)
+			return err;
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		stts();
+	} else {
+		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
+				   sizeof(struct i387_fxsave_struct)))
+			return -1;
+	}
+	return 1;
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+static inline int restore_i387(struct _fpstate __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err;
+
+	if (!used_math()) {
+		err = init_fpu(tsk);
+		if (err)
+			return err;
+	}
+
+	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+		clts();
+		task_thread_info(current)->status |= TS_USEDFPU;
+	}
+	return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+}
 
 /*
  * Do a signal return; undo the signal stack.
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 37672f79dcc..96fa8449ff1 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -137,60 +137,6 @@ static inline void __save_init_fpu(struct task_struct *tsk)
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
-/*
- * Signal frame handlers.
- */
-
-static inline int save_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err = 0;
-
-	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.xstate->fxsave));
-
-	if ((unsigned long)buf % 16)
-		printk("save_i387: bad fpstate %p\n", buf);
-
-	if (!used_math())
-		return 0;
-	clear_used_math(); /* trigger finit */
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		err = save_i387_checking((struct i387_fxsave_struct __user *)
-					 buf);
-		if (err)
-			return err;
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
-	} else {
-		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
-				   sizeof(struct i387_fxsave_struct)))
-			return -1;
-	}
-	return 1;
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err;
-
-	if (!used_math()) {
-		err = init_fpu(tsk);
-		if (err)
-			return err;
-	}
-
-	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
-		clts();
-		task_thread_info(current)->status |= TS_USEDFPU;
-	}
-	return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
-}
-
 #else  /* CONFIG_X86_32 */
 
 extern void finit(void);
-- 
GitLab


From 4b9f12a3779c548b68bc9af7d94030868ad3aa1b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 24 Jul 2008 17:29:00 -0700
Subject: [PATCH 516/853] x86/oprofile/nmi_int: add Nehalem to list of ppro
 cores

..otherwise oprofile will fall back on that poor timer interrupt.

Also replace the unreadable chain of if-statements with a "switch()"
statement instead. It generates better code, and is a lot clearer.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/oprofile/nmi_int.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 7f3329b55d2..3f90289410e 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -369,20 +369,34 @@ static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
-	if (cpu_model == 14)
+	switch (cpu_model) {
+	case 0 ... 2:
+		*cpu_type = "i386/ppro";
+		break;
+	case 3 ... 5:
+		*cpu_type = "i386/pii";
+		break;
+	case 6 ... 8:
+		*cpu_type = "i386/piii";
+		break;
+	case 9:
+		*cpu_type = "i386/p6_mobile";
+		break;
+	case 10 ... 13:
+		*cpu_type = "i386/p6";
+		break;
+	case 14:
 		*cpu_type = "i386/core";
-	else if (cpu_model == 15 || cpu_model == 23)
+		break;
+	case 15: case 23:
+		*cpu_type = "i386/core_2";
+		break;
+	case 26:
 		*cpu_type = "i386/core_2";
-	else if (cpu_model > 0xd)
+		break;
+	default:
+		/* Unknown */
 		return 0;
-	else if (cpu_model == 9) {
-		*cpu_type = "i386/p6_mobile";
-	} else if (cpu_model > 5) {
-		*cpu_type = "i386/piii";
-	} else if (cpu_model > 2) {
-		*cpu_type = "i386/pii";
-	} else {
-		*cpu_type = "i386/ppro";
 	}
 
 	model = &op_ppro_spec;
-- 
GitLab


From fb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 02:55:49 +0300
Subject: [PATCH 517/853] fix fs/nfs/nfsroot.c compilation

This fixes the following compile error caused by commit
f9247273cb69ba101877e946d2d83044409cc8c5 ("UFS: add const to parser
token table"):

    CC      fs/nfs/nfsroot.o
  /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/nfs/nfsroot.c:130: error: tokens causes a section type conflict
  make[3]: *** [fs/nfs/nfsroot.o] Error 1

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/nfsroot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 46763d1cd39..8478fc25dae 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t __initdata tokens = {
+static match_table_t __initconst tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rsize, "rsize=%u"},
 	{Opt_wsize, "wsize=%u"},
-- 
GitLab


From 9953ca6cb757fb317bb7cdd2fcbf9b88312e241b Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Tue, 27 May 2008 12:06:26 +0100
Subject: [PATCH 518/853] virtio: fix virtio_net xmit of freed skb bug

On Mon, 2008-05-26 at 17:42 +1000, Rusty Russell wrote:
> If we fail to transmit a packet, we assume the queue is full and put
> the skb into last_xmit_skb.  However, if more space frees up before we
> xmit it, we loop, and the result can be transmitting the same skb twice.
>
> Fix is simple: set skb to NULL if we've used it in some way, and check
> before sending.
...
> diff -r 564237b31993 drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c	Mon May 19 12:22:00 2008 +1000
> +++ b/drivers/net/virtio_net.c	Mon May 19 12:24:58 2008 +1000
> @@ -287,21 +287,25 @@ again:
>  	free_old_xmit_skbs(vi);
>
>  	/* If we has a buffer left over from last time, send it now. */
> -	if (vi->last_xmit_skb) {
> +	if (unlikely(vi->last_xmit_skb)) {
>  		if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
>  			/* Drop this skb: we only queue one. */
>  			vi->dev->stats.tx_dropped++;
>  			kfree_skb(skb);
> +			skb = NULL;
>  			goto stop_queue;
>  		}
>  		vi->last_xmit_skb = NULL;

With this, may drop an skb and then later in the function discover that
we could have sent it after all. Poor wee skb :)

How about the incremental patch below?

Cheers,
Mark.

Subject: [PATCH] virtio_net: Delay dropping tx skbs

Currently we drop the skb in start_xmit() if we have a
queued buffer and fail to transmit it.

However, if we delay dropping it until we've stopped the
queue and enabled the tx notification callback, then there
is a chance space might become available for it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c28d7cb2035..06d5c43bb20 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -335,16 +335,11 @@ again:
 	free_old_xmit_skbs(vi);
 
 	/* If we has a buffer left over from last time, send it now. */
-	if (unlikely(vi->last_xmit_skb)) {
-		if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
-			/* Drop this skb: we only queue one. */
-			vi->dev->stats.tx_dropped++;
-			kfree_skb(skb);
-			skb = NULL;
-			goto stop_queue;
-		}
-		vi->last_xmit_skb = NULL;
-	}
+	if (unlikely(vi->last_xmit_skb) &&
+	    xmit_skb(vi, vi->last_xmit_skb) != 0)
+		goto stop_queue;
+
+	vi->last_xmit_skb = NULL;
 
 	/* Put new one in send queue and do transmit */
 	if (likely(skb)) {
@@ -370,6 +365,11 @@ stop_queue:
 		netif_start_queue(dev);
 		goto again;
 	}
+	if (skb) {
+		/* Drop this skb: we only queue one. */
+		vi->dev->stats.tx_dropped++;
+		kfree_skb(skb);
+	}
 	goto done;
 }
 
-- 
GitLab


From a9ea3fc6f2654a7407864fec983d1671d775b5ee Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Apr 2008 11:21:42 +0800
Subject: [PATCH 519/853] virtio net: Add ethtool ops for SG/GSO

This patch adds some basic ethtool operations to virtio_net so
I could test SG without GSO (which was really useful because TSO
turned out to be buggy :)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (remove MTU setting)
---
 drivers/net/virtio_net.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 06d5c43bb20..ce37a7e9541 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -19,6 +19,7 @@
 //#define DEBUG
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
 #include <linux/virtio_net.h>
@@ -408,6 +409,22 @@ static int virtnet_close(struct net_device *dev)
 	return 0;
 }
 
+static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtio_device *vdev = vi->vdev;
+
+	if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
+		return -ENOSYS;
+
+	return ethtool_op_set_tx_hw_csum(dev, data);
+}
+
+static struct ethtool_ops virtnet_ethtool_ops = {
+	.set_tx_csum = virtnet_set_tx_csum,
+	.set_sg = ethtool_op_set_sg,
+};
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int err;
@@ -427,6 +444,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = virtnet_netpoll;
 #endif
+	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */
-- 
GitLab


From 97402b96f87c6e32f75f1bffdd91a5ee144b679d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Apr 2008 11:24:27 +0800
Subject: [PATCH 520/853] virtio net: Allow receiving SG packets

Finally this patch lets virtio_net receive GSO packets in addition
to sending them.  This can definitely be optimised for the non-GSO
case.  For comparison the Xen approach stores one page in each skb
and uses subsequent skb's pages to construct an SG skb instead of
preallocating the maximum amount of pages per skb.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (added feature bits)
---
 drivers/net/virtio_net.c | 44 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ce37a7e9541..0886b8a2d92 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -55,6 +55,9 @@ struct virtnet_info
 	struct tasklet_struct tasklet;
 	bool free_in_tasklet;
 
+	/* I like... big packets and I cannot lie! */
+	bool big_packets;
+
 	/* Receive & send queues. */
 	struct sk_buff_head recv;
 	struct sk_buff_head send;
@@ -89,6 +92,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
 			unsigned len)
 {
 	struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
+	int err;
 
 	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
 		pr_debug("%s: short packet %i\n", dev->name, len);
@@ -96,10 +100,14 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
 		goto drop;
 	}
 	len -= sizeof(struct virtio_net_hdr);
-	BUG_ON(len > MAX_PACKET_LEN);
-
-	skb_trim(skb, len);
 
+	err = pskb_trim(skb, len);
+	if (err) {
+		pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
+		dev->stats.rx_dropped++;
+		goto drop;
+	}
+	skb->truesize += skb->data_len;
 	dev->stats.rx_bytes += skb->len;
 	dev->stats.rx_packets++;
 
@@ -161,7 +169,7 @@ static void try_fill_recv(struct virtnet_info *vi)
 {
 	struct sk_buff *skb;
 	struct scatterlist sg[2+MAX_SKB_FRAGS];
-	int num, err;
+	int num, err, i;
 
 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 	for (;;) {
@@ -171,6 +179,24 @@ static void try_fill_recv(struct virtnet_info *vi)
 
 		skb_put(skb, MAX_PACKET_LEN);
 		vnet_hdr_to_sg(sg, skb);
+
+		if (vi->big_packets) {
+			for (i = 0; i < MAX_SKB_FRAGS; i++) {
+				skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+				f->page = alloc_page(GFP_ATOMIC);
+				if (!f->page)
+					break;
+
+				f->page_offset = 0;
+				f->size = PAGE_SIZE;
+
+				skb->data_len += PAGE_SIZE;
+				skb->len += PAGE_SIZE;
+
+				skb_shinfo(skb)->nr_frags++;
+			}
+		}
+
 		num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
 		skb_queue_head(&vi->recv, skb);
 
@@ -485,6 +511,12 @@ static int virtnet_probe(struct virtio_device *vdev)
 	 * the timer. */
 	vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
 
+	/* If we can receive ANY GSO packets, we must allocate large ones. */
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
+	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
+	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
+		vi->big_packets = true;
+
 	/* We expect two virtqueues, receive then send. */
 	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
 	if (IS_ERR(vi->rvq)) {
@@ -571,7 +603,9 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
 	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
-	VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY,
+	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
+	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
+	VIRTIO_F_NOTIFY_ON_EMPTY,
 };
 
 static struct virtio_driver virtio_net = {
-- 
GitLab


From fb6813f480806d62361719e84777c8e00d3e86a8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:01 -0500
Subject: [PATCH 521/853] virtio: Recycle unused recv buffer pages for large
 skbs in net driver

If we hack the virtio_net driver to always allocate full-sized (64k+)
skbuffs, the driver slows down (lguest numbers):

  Time to receive 1GB (small buffers): 10.85 seconds
  Time to receive 1GB (64k+ buffers): 24.75 seconds

Of course, large buffers use up more space in the ring, so we increase
that from 128 to 2048:

  Time to receive 1GB (64k+ buffers, 2k ring): 16.61 seconds

If we recycle pages rather than using alloc_page/free_page:

  Time to receive 1GB (64k+ buffers, 2k ring, recycle pages): 10.81 seconds

This demonstrates that with efficient allocation, we don't need to
have a separate "small buffer" queue.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0886b8a2d92..0196a0df902 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -61,6 +61,9 @@ struct virtnet_info
 	/* Receive & send queues. */
 	struct sk_buff_head recv;
 	struct sk_buff_head send;
+
+	/* Chain pages by the private ptr. */
+	struct page *pages;
 };
 
 static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
@@ -73,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
 	sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
 }
 
+static void give_a_page(struct virtnet_info *vi, struct page *page)
+{
+	page->private = (unsigned long)vi->pages;
+	vi->pages = page;
+}
+
+static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+{
+	struct page *p = vi->pages;
+
+	if (p)
+		vi->pages = (struct page *)p->private;
+	else
+		p = alloc_page(gfp_mask);
+	return p;
+}
+
 static void skb_xmit_done(struct virtqueue *svq)
 {
 	struct virtnet_info *vi = svq->vdev->priv;
@@ -101,6 +121,15 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
 	}
 	len -= sizeof(struct virtio_net_hdr);
 
+	if (len <= MAX_PACKET_LEN) {
+		unsigned int i;
+
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page);
+		skb->data_len = 0;
+		skb_shinfo(skb)->nr_frags = 0;
+	}
+
 	err = pskb_trim(skb, len);
 	if (err) {
 		pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
@@ -183,7 +212,7 @@ static void try_fill_recv(struct virtnet_info *vi)
 		if (vi->big_packets) {
 			for (i = 0; i < MAX_SKB_FRAGS; i++) {
 				skb_frag_t *f = &skb_shinfo(skb)->frags[i];
-				f->page = alloc_page(GFP_ATOMIC);
+				f->page = get_a_page(vi, GFP_ATOMIC);
 				if (!f->page)
 					break;
 
@@ -506,6 +535,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	vi->dev = dev;
 	vi->vdev = vdev;
 	vdev->priv = vi;
+	vi->pages = NULL;
 
 	/* If they give us a callback when all buffers are done, we don't need
 	 * the timer. */
@@ -591,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev)
 	vdev->config->del_vq(vi->svq);
 	vdev->config->del_vq(vi->rvq);
 	unregister_netdev(vi->dev);
+
+	while (vi->pages)
+		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
+
 	free_netdev(vi->dev);
 }
 
-- 
GitLab


From 674bfc23c585b34c42263d73fb51710d49762a23 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:03 -0500
Subject: [PATCH 522/853] virtio: clarify that ABI is usable by any
 implementations

We want others to implement and use virtio, so it makes sense to BSD
license the non-__KERNEL__ parts of the headers to make this crystal
clear.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Mark McLoughlin <markmc@redhat.com>
Acked-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Eric Van Hensbergen <ericvh@gmail.com>
Acked-by: Anthony Liguori <aliguori@us.ibm.com>
---
 include/linux/virtio_9p.h      | 2 ++
 include/linux/virtio_balloon.h | 2 ++
 include/linux/virtio_blk.h     | 2 ++
 include/linux/virtio_config.h  | 3 +++
 include/linux/virtio_console.h | 2 ++
 include/linux/virtio_net.h     | 2 ++
 include/linux/virtio_pci.h     | 5 ++---
 include/linux/virtio_rng.h     | 2 ++
 8 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 8eff0b53910..b3c4a60ceeb 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_9P_H
 #define _LINUX_VIRTIO_9P_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio console */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 979524ee75b..c30c7bfbf39 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_BALLOON_H
 #define _LINUX_VIRTIO_BALLOON_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 5f79a5f9de7..6a66c7f30bc 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_BLK_H
 #define _LINUX_VIRTIO_BLK_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_block */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index f364bbf63c3..7eb4b34d13b 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,5 +1,8 @@
 #ifndef _LINUX_VIRTIO_CONFIG_H
 #define _LINUX_VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
+
 /* Virtio devices use a standardized configuration space to define their
  * features and pass configuration information, but each implementation can
  * store and access that space differently. */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index ed2d4ead7eb..19a0da0dba4 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_VIRTIO_CONSOLE_H
 #define _LINUX_VIRTIO_CONSOLE_H
 #include <linux/virtio_config.h>
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
 
 /* The ID for virtio console */
 #define VIRTIO_ID_CONSOLE	3
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 38c0571820f..5e33761b9b8 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_NET_H
 #define _LINUX_VIRTIO_NET_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_net */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index b3151659cf4..cdef3574293 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -9,9 +9,8 @@
  * Authors:
  *  Anthony Liguori  <aliguori@us.ibm.com>
  *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
  */
 
 #ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 331afb6c9f6..1a85dab8a94 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_RNG_H
 #define _LINUX_VIRTIO_RNG_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_rng */
-- 
GitLab


From 44653eae1407f79dff6f52fcf594ae84cb165ec4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:04 -0500
Subject: [PATCH 523/853] virtio: don't always force a notification when ring
 is full

We force notification when the ring is full, even if the host has
indicated it doesn't want to know.  This seemed like a good idea at
the time: if we fill the transmit ring, we should tell the host
immediately.

Unfortunately this logic also applies to the receiving ring, which is
refilled constantly.  We should introduce real notification thesholds
to replace this logic.  Meanwhile, removing the logic altogether breaks
the heuristics which KVM uses, so we use a hack: only notify if there are
outgoing parts of the new buffer.

Here are the number of exits with lguest's crappy network implementation:
Before:
	network xmit 7859051 recv 236420
After:
	network xmit 7858610 recv 118136

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 72bf8bc0901..21d9a62767a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -87,8 +87,11 @@ static int vring_add_buf(struct virtqueue *_vq,
 	if (vq->num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 out + in, vq->num_free);
-		/* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */
-		vq->notify(&vq->vq);
+		/* FIXME: for historical reasons, we force a notify here if
+		 * there are outgoing parts to the buffer.  Presumably the
+		 * host should service the ring ASAP. */
+		if (out)
+			vq->notify(&vq->vq);
 		END_USE(vq);
 		return -ENOSPC;
 	}
-- 
GitLab


From e962fa660d391fc9b90988e6538c94c858c099f9 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Fri, 13 Jun 2008 13:46:40 +0100
Subject: [PATCH 524/853] virtio: Use bus_type probe and remove methods

Hook up to the probe() and remove() methods in bus_type
rather than device_driver. The latter has been preferred
since 2.6.16.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 7084e7e146c..fc85cba6457 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -71,13 +71,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
 			      dev->id.device, dev->id.vendor);
 }
 
-static struct bus_type virtio_bus = {
-	.name  = "virtio",
-	.match = virtio_dev_match,
-	.dev_attrs = virtio_dev_attrs,
-	.uevent = virtio_uevent,
-};
-
 static void add_status(struct virtio_device *dev, unsigned status)
 {
 	dev->config->set_status(dev, dev->config->get_status(dev) | status);
@@ -147,13 +140,20 @@ static int virtio_dev_remove(struct device *_d)
 	return 0;
 }
 
+static struct bus_type virtio_bus = {
+	.name  = "virtio",
+	.match = virtio_dev_match,
+	.dev_attrs = virtio_dev_attrs,
+	.uevent = virtio_uevent,
+	.probe = virtio_dev_probe,
+	.remove = virtio_dev_remove,
+};
+
 int register_virtio_driver(struct virtio_driver *driver)
 {
 	/* Catch this early. */
 	BUG_ON(driver->feature_table_size && !driver->feature_table);
 	driver->driver.bus = &virtio_bus;
-	driver->driver.probe = virtio_dev_probe;
-	driver->driver.remove = virtio_dev_remove;
 	return driver_register(&driver->driver);
 }
 EXPORT_SYMBOL_GPL(register_virtio_driver);
-- 
GitLab


From 066f4d82a67f621ddd547bfa4b9c94631d8457b0 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 29 May 2008 11:08:26 +0200
Subject: [PATCH 525/853] virtio_blk: check for hardsector size from host

Currently virtio_blk assumes a 512 byte hard sector size. This can cause
trouble / performance issues if the backing has a different block size
(like a file on an ext3 file system formatted with 4k block size or a dasd).

Lets add a feature flag that tells the guest to use a different hard sector
size than 512 byte.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 10 +++++++++-
 include/linux/virtio_blk.h |  3 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dd7ea203f94..42251095134 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	int err;
 	u64 cap;
 	u32 v;
+	u32 blk_size;
 
 	if (index_to_minor(index) >= 1 << MINORBITS)
 		return -ENOSPC;
@@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err)
 		blk_queue_max_hw_segments(vblk->disk->queue, v);
 
+	/* Host can optionally specify the block size of the device */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+				offsetof(struct virtio_blk_config, blk_size),
+				&blk_size);
+	if (!err)
+		blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+
 	add_disk(vblk->disk);
 	return 0;
 
@@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = {
 
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
-	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO,
+	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 6a66c7f30bc..c1aef85243b 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -13,6 +13,7 @@
 #define VIRTIO_BLK_F_SEG_MAX	2	/* Indicates maximum # of segments */
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 
 struct virtio_blk_config
 {
@@ -28,6 +29,8 @@ struct virtio_blk_config
 		__u8 heads;
 		__u8 sectors;
 	} geometry;
+	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+	__u32 blk_size;
 } __attribute__((packed));
 
 /* These two define direction. */
-- 
GitLab


From 611e097d7707741a336a0677d9d69bec40f29f3d Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 20 Jun 2008 15:24:08 +0200
Subject: [PATCH 526/853] hvc_console: rework setup to replace irq functions
 with callbacks

This patch tries to change hvc_console to not use request_irq/free_irq if
the backend does not use irqs. This allows virtio_console to use hvc_console
without having a linker reference to request_irq/free_irq.

In addition, together with patch 2/3 it improves the performance for virtio
console input. (an earlier version of this patch was tested by Yajin on lguest)

The irq specific code is moved to hvc_irq.c and selected by the drivers that
use irqs (System p, System i, XEN).

I replaced "int irq" with the opaque "int data". The request_irq and
free_irq calls are replaced with notifier_add and notifier_del. I have also
changed the code a bit to call the notifier_add and notifier_del inside the
spinlock area as the callbacks are found via hp->ops.

Changes since last version:
o remove ifdef
o reintroduce "irq_requested" as "notified"
o cleanups, sparse..

I did not move the timer based polling into a separate polling scheme. I
played with several variants, but it seems we need to sleep/schedule in
a thread even for irq based consoles, as there are throttleing and buffer
size constraints.

I also kept hvc_struct defined in hvc_console.h so that hvc_irq.c can access
the irq_requested element.

Feedback is appreciated. virtio_console is currently the only available console
for kvm on s390. I plan to push this change as soon as all affected parties
agree on it. I would love to get test results from System p, Xen etc.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/Kconfig       |  5 +++
 drivers/char/Makefile      |  1 +
 drivers/char/hvc_console.c | 81 +++++++++-----------------------------
 drivers/char/hvc_console.h | 35 ++++++++++++++--
 drivers/char/hvc_irq.c     | 44 +++++++++++++++++++++
 drivers/char/hvc_iseries.c |  2 +
 drivers/char/hvc_vio.c     |  2 +
 drivers/char/hvc_xen.c     |  2 +
 8 files changed, 105 insertions(+), 67 deletions(-)
 create mode 100644 drivers/char/hvc_irq.c

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 67b07576f8b..d825361a6ba 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -578,11 +578,14 @@ config HVC_DRIVER
 	  It will automatically be selected if one of the back-end console drivers
 	  is selected.
 
+config HVC_IRQ
+	bool
 
 config HVC_CONSOLE
 	bool "pSeries Hypervisor Virtual Console support"
 	depends on PPC_PSERIES
 	select HVC_DRIVER
+	select HVC_IRQ
 	help
 	  pSeries machines when partitioned support a hypervisor virtual
 	  console. This driver allows each pSeries partition to have a console
@@ -593,6 +596,7 @@ config HVC_ISERIES
 	depends on PPC_ISERIES
 	default y
 	select HVC_DRIVER
+	select HVC_IRQ
 	help
 	  iSeries machines support a hypervisor virtual console.
 
@@ -614,6 +618,7 @@ config HVC_XEN
 	bool "Xen Hypervisor Console support"
 	depends on XEN
 	select HVC_DRIVER
+	select HVC_IRQ
 	default y
 	help
 	  Xen virtual console device driver
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 4b6e736cfa0..eb02c350680 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES)	+= hvc_iseries.o
 obj-$(CONFIG_HVC_RTAS)		+= hvc_rtas.o
 obj-$(CONFIG_HVC_BEAT)		+= hvc_beat.o
 obj-$(CONFIG_HVC_DRIVER)	+= hvc_console.o
+obj-$(CONFIG_HVC_IRQ)		+= hvc_irq.o
 obj-$(CONFIG_HVC_XEN)		+= hvc_xen.o
 obj-$(CONFIG_VIRTIO_CONSOLE)	+= virtio_console.o
 obj-$(CONFIG_RAW_DRIVER)	+= raw.o
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 2f9759d625c..2f5b7fb6704 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/kbd_kern.h>
 #include <linux/kernel.h>
-#include <linux/kref.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -75,23 +74,6 @@ static int hvc_init(void);
 static int sysrq_pressed;
 #endif
 
-struct hvc_struct {
-	spinlock_t lock;
-	int index;
-	struct tty_struct *tty;
-	unsigned int count;
-	int do_wakeup;
-	char *outbuf;
-	int outbuf_size;
-	int n_outbuf;
-	uint32_t vtermno;
-	struct hv_ops *ops;
-	int irq_requested;
-	int irq;
-	struct list_head next;
-	struct kref kref; /* ref count & hvc_struct lifetime */
-};
-
 /* dynamic list of hvc_struct instances */
 static LIST_HEAD(hvc_structs);
 
@@ -300,26 +282,12 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops)
 }
 
 /* Wake the sleeping khvcd */
-static void hvc_kick(void)
+void hvc_kick(void)
 {
 	hvc_kicked = 1;
 	wake_up_process(hvc_task);
 }
 
-static int hvc_poll(struct hvc_struct *hp);
-
-/*
- * NOTE: This API isn't used if the console adapter doesn't support interrupts.
- * In this case the console is poll driven.
- */
-static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
-{
-	/* if hvc_poll request a repoll, then kick the hvcd thread */
-	if (hvc_poll(dev_instance))
-		hvc_kick();
-	return IRQ_HANDLED;
-}
-
 static void hvc_unthrottle(struct tty_struct *tty)
 {
 	hvc_kick();
@@ -333,7 +301,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
 {
 	struct hvc_struct *hp;
 	unsigned long flags;
-	int irq = 0;
 	int rc = 0;
 
 	/* Auto increments kref reference if found. */
@@ -352,18 +319,15 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
 	tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */
 
 	hp->tty = tty;
-	/* Save for request_irq outside of spin_lock. */
-	irq = hp->irq;
-	if (irq)
-		hp->irq_requested = 1;
+
+	if (hp->ops->notifier_add)
+		rc = hp->ops->notifier_add(hp, hp->data);
 
 	spin_unlock_irqrestore(&hp->lock, flags);
-	/* check error, fallback to non-irq */
-	if (irq)
-		rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp);
+
 
 	/*
-	 * If the request_irq() fails and we return an error.  The tty layer
+	 * If the notifier fails we return an error.  The tty layer
 	 * will call hvc_close() after a failed open but we don't want to clean
 	 * up there so we'll clean up here and clear out the previously set
 	 * tty fields and return the kref reference.
@@ -371,7 +335,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
 	if (rc) {
 		spin_lock_irqsave(&hp->lock, flags);
 		hp->tty = NULL;
-		hp->irq_requested = 0;
 		spin_unlock_irqrestore(&hp->lock, flags);
 		tty->driver_data = NULL;
 		kref_put(&hp->kref, destroy_hvc_struct);
@@ -386,7 +349,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
 static void hvc_close(struct tty_struct *tty, struct file * filp)
 {
 	struct hvc_struct *hp;
-	int irq = 0;
 	unsigned long flags;
 
 	if (tty_hung_up_p(filp))
@@ -404,9 +366,8 @@ static void hvc_close(struct tty_struct *tty, struct file * filp)
 	spin_lock_irqsave(&hp->lock, flags);
 
 	if (--hp->count == 0) {
-		if (hp->irq_requested)
-			irq = hp->irq;
-		hp->irq_requested = 0;
+		if (hp->ops->notifier_del)
+			hp->ops->notifier_del(hp, hp->data);
 
 		/* We are done with the tty pointer now. */
 		hp->tty = NULL;
@@ -418,10 +379,6 @@ static void hvc_close(struct tty_struct *tty, struct file * filp)
 		 * waking periodically to check chars_in_buffer().
 		 */
 		tty_wait_until_sent(tty, HVC_CLOSE_WAIT);
-
-		if (irq)
-			free_irq(irq, hp);
-
 	} else {
 		if (hp->count < 0)
 			printk(KERN_ERR "hvc_close %X: oops, count is %d\n",
@@ -436,7 +393,6 @@ static void hvc_hangup(struct tty_struct *tty)
 {
 	struct hvc_struct *hp = tty->driver_data;
 	unsigned long flags;
-	int irq = 0;
 	int temp_open_count;
 
 	if (!hp)
@@ -458,13 +414,12 @@ static void hvc_hangup(struct tty_struct *tty)
 	hp->count = 0;
 	hp->n_outbuf = 0;
 	hp->tty = NULL;
-	if (hp->irq_requested)
-		/* Saved for use outside of spin_lock. */
-		irq = hp->irq;
-	hp->irq_requested = 0;
+
+	if (hp->ops->notifier_del)
+			hp->ops->notifier_del(hp, hp->data);
+
 	spin_unlock_irqrestore(&hp->lock, flags);
-	if (irq)
-		free_irq(irq, hp);
+
 	while(temp_open_count) {
 		--temp_open_count;
 		kref_put(&hp->kref, destroy_hvc_struct);
@@ -575,7 +530,7 @@ static u32 timeout = MIN_TIMEOUT;
 #define HVC_POLL_READ	0x00000001
 #define HVC_POLL_WRITE	0x00000002
 
-static int hvc_poll(struct hvc_struct *hp)
+int hvc_poll(struct hvc_struct *hp)
 {
 	struct tty_struct *tty;
 	int i, n, poll_mask = 0;
@@ -602,10 +557,10 @@ static int hvc_poll(struct hvc_struct *hp)
 	if (test_bit(TTY_THROTTLED, &tty->flags))
 		goto throttled;
 
-	/* If we aren't interrupt driven and aren't throttled, we always
+	/* If we aren't notifier driven and aren't throttled, we always
 	 * request a reschedule
 	 */
-	if (hp->irq == 0)
+	if (!hp->irq_requested)
 		poll_mask |= HVC_POLL_READ;
 
 	/* Read data if any */
@@ -733,7 +688,7 @@ static const struct tty_operations hvc_ops = {
 	.chars_in_buffer = hvc_chars_in_buffer,
 };
 
-struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
+struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data,
 					struct hv_ops *ops, int outbuf_size)
 {
 	struct hvc_struct *hp;
@@ -754,7 +709,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
 	memset(hp, 0x00, sizeof(*hp));
 
 	hp->vtermno = vtermno;
-	hp->irq = irq;
+	hp->data = data;
 	hp->ops = ops;
 	hp->outbuf_size = outbuf_size;
 	hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h
index 42ffb17e15d..d9ce1091562 100644
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -26,6 +26,7 @@
 
 #ifndef HVC_CONSOLE_H
 #define HVC_CONSOLE_H
+#include <linux/kref.h>
 
 /*
  * This is the max number of console adapters that can/will be found as
@@ -42,24 +43,50 @@
  */
 #define HVC_ALLOC_TTY_ADAPTERS	8
 
+struct hvc_struct {
+	spinlock_t lock;
+	int index;
+	struct tty_struct *tty;
+	unsigned int count;
+	int do_wakeup;
+	char *outbuf;
+	int outbuf_size;
+	int n_outbuf;
+	uint32_t vtermno;
+	struct hv_ops *ops;
+	int irq_requested;
+	int data;
+	struct list_head next;
+	struct kref kref; /* ref count & hvc_struct lifetime */
+};
 
 /* implemented by a low level driver */
 struct hv_ops {
 	int (*get_chars)(uint32_t vtermno, char *buf, int count);
 	int (*put_chars)(uint32_t vtermno, const char *buf, int count);
-};
 
-struct hvc_struct;
+	/* Callbacks for notification. Called in open and close */
+	int (*notifier_add)(struct hvc_struct *hp, int irq);
+	void (*notifier_del)(struct hvc_struct *hp, int irq);
+};
 
 /* Register a vterm and a slot index for use as a console (console_init) */
 extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
 
 /* register a vterm for hvc tty operation (module_init or hotplug add) */
-extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
+extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data,
 				struct hv_ops *ops, int outbuf_size);
-/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
+/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */
 extern int __devexit hvc_remove(struct hvc_struct *hp);
 
+/* data available */
+int hvc_poll(struct hvc_struct *hp);
+void hvc_kick(void);
+
+/* default notifier for irq based notification */
+extern int notifier_add_irq(struct hvc_struct *hp, int data);
+extern void notifier_del_irq(struct hvc_struct *hp, int data);
+
 
 #if defined(CONFIG_XMON) && defined(CONFIG_SMP)
 #include <asm/xmon.h>
diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c
new file mode 100644
index 00000000000..73a59cdb894
--- /dev/null
+++ b/drivers/char/hvc_irq.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright IBM Corp. 2001,2008
+ *
+ * This file contains the IRQ specific code for hvc_console
+ *
+ */
+
+#include <linux/interrupt.h>
+
+#include "hvc_console.h"
+
+static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
+{
+	/* if hvc_poll request a repoll, then kick the hvcd thread */
+	if (hvc_poll(dev_instance))
+		hvc_kick();
+	return IRQ_HANDLED;
+}
+
+/*
+ * For IRQ based systems these callbacks can be used
+ */
+int notifier_add_irq(struct hvc_struct *hp, int irq)
+{
+	int rc;
+
+	if (!irq) {
+		hp->irq_requested = 0;
+		return 0;
+	}
+	rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED,
+			   "hvc_console", hp);
+	if (!rc)
+		hp->irq_requested = 1;
+	return rc;
+}
+
+void notifier_del_irq(struct hvc_struct *hp, int irq)
+{
+	if (!irq)
+		return;
+	free_irq(irq, hp);
+	hp->irq_requested = 0;
+}
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
index a08f8f981c1..b71c610fe5a 100644
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -200,6 +200,8 @@ done:
 static struct hv_ops hvc_get_put_ops = {
 	.get_chars = get_chars,
 	.put_chars = put_chars,
+	.notifier_add = notifier_add_irq,
+	.notifier_del = notifier_del_irq,
 };
 
 static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 79711aa4b41..93f3840c168 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -80,6 +80,8 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count)
 static struct hv_ops hvc_get_put_ops = {
 	.get_chars = filtered_get_chars,
 	.put_chars = hvc_put_chars,
+	.notifier_add = notifier_add_irq,
+	.notifier_del = notifier_del_irq,
 };
 
 static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index db2ae421627..6b70aa66a58 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -100,6 +100,8 @@ static int read_console(uint32_t vtermno, char *buf, int len)
 static struct hv_ops hvc_ops = {
 	.get_chars = read_console,
 	.put_chars = write_console,
+	.notifier_add = notifier_add_irq,
+	.notifier_del = notifier_del_irq,
 };
 
 static int __init xen_init(void)
-- 
GitLab


From 91fcad19d03ed67cb50fd0e1913a8b89cc3ed3ec Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 20 Jun 2008 15:24:15 +0200
Subject: [PATCH 527/853] virtio_console: use virtqueue notification for
 hvc_console

This patch exploits the new notifier callbacks of the hvc_console. We can
use the virtio callbacks instead of the polling code.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/virtio_console.c | 40 +++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index dc17fe3a88b..d0f4eb6fdb7 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -46,6 +46,9 @@ static char *in, *inbuf;
 /* The operations for our console. */
 static struct hv_ops virtio_cons;
 
+/* The hvc device */
+static struct hvc_struct *hvc;
+
 /*D:310 The put_chars() callback is pretty straightforward.
  *
  * We turn the characters into a scatter-gather list, add it to the output
@@ -134,6 +137,27 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
 	return hvc_instantiate(0, 0, &virtio_cons);
 }
 
+/*
+ * we support only one console, the hvc struct is a global var
+ * There is no need to do anything
+ */
+static int notifier_add_vio(struct hvc_struct *hp, int data)
+{
+	hp->irq_requested = 1;
+	return 0;
+}
+
+static void notifier_del_vio(struct hvc_struct *hp, int data)
+{
+	hp->irq_requested = 0;
+}
+
+static void hvc_handle_input(struct virtqueue *vq)
+{
+	if (hvc_poll(hvc))
+		hvc_kick();
+}
+
 /*D:370 Once we're further in boot, we get probed like any other virtio device.
  * At this stage we set up the output virtqueue.
  *
@@ -144,7 +168,6 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
 static int __devinit virtcons_probe(struct virtio_device *dev)
 {
 	int err;
-	struct hvc_struct *hvc;
 
 	vdev = dev;
 
@@ -158,7 +181,7 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	/* Find the input queue. */
 	/* FIXME: This is why we want to wean off hvc: we do nothing
 	 * when input comes in. */
-	in_vq = vdev->config->find_vq(vdev, 0, NULL);
+	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
 	if (IS_ERR(in_vq)) {
 		err = PTR_ERR(in_vq);
 		goto free;
@@ -173,15 +196,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	/* Start using the new console output. */
 	virtio_cons.get_chars = get_chars;
 	virtio_cons.put_chars = put_chars;
+	virtio_cons.notifier_add = notifier_add_vio;
+	virtio_cons.notifier_del = notifier_del_vio;
 
 	/* The first argument of hvc_alloc() is the virtual console number, so
-	 * we use zero.  The second argument is the interrupt number; we
-	 * currently leave this as zero: it would be better not to use the
-	 * hvc mechanism and fix this (FIXME!).
+	 * we use zero.  The second argument is the parameter for the
+	 * notification mechanism (like irq number). We currently leave this
+	 * as zero, virtqueues have implicit notifications.
 	 *
 	 * The third argument is a "struct hv_ops" containing the put_chars()
-	 * and get_chars() pointers.  The final argument is the output buffer
-	 * size: we can do any size, so we put PAGE_SIZE here. */
+	 * get_chars(), notifier_add() and notifier_del() pointers.
+	 * The final argument is the output buffer size: we can do any size,
+	 * so we put PAGE_SIZE here. */
 	hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
 	if (IS_ERR(hvc)) {
 		err = PTR_ERR(hvc);
-- 
GitLab


From 7721c494a28e06543a3d6aa412957aa783a4a531 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 25 Jul 2008 12:06:06 -0500
Subject: [PATCH 528/853] virtio: console as a config option

I also added a small Kconfig change that allows the user to specify the
virtio console in menuconfig.

(Fixes to export symbols from Stephen Rothwell <sfr@canb.auug.org.au>)
(Fixes for CONFIG_VIRTIO_CONSOLE=y vs CONFIG_VIRTIO=m from Christian himself)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
---
 drivers/char/Kconfig       | 6 +++++-
 drivers/char/hvc_console.c | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index d825361a6ba..6c070dc5f2d 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -624,8 +624,12 @@ config HVC_XEN
 	  Xen virtual console device driver
 
 config VIRTIO_CONSOLE
-	bool
+	tristate "Virtio console"
+	depends on VIRTIO
 	select HVC_DRIVER
+	help
+	  Virtio console for use with lguest and other hypervisors.
+
 
 config HVCS
 	tristate "IBM Hypervisor Virtual Console Server support"
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 2f5b7fb6704..02aac104842 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -280,6 +280,7 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hvc_instantiate);
 
 /* Wake the sleeping khvcd */
 void hvc_kick(void)
@@ -287,6 +288,7 @@ void hvc_kick(void)
 	hvc_kicked = 1;
 	wake_up_process(hvc_task);
 }
+EXPORT_SYMBOL_GPL(hvc_kick);
 
 static void hvc_unthrottle(struct tty_struct *tty)
 {
@@ -629,6 +631,7 @@ int hvc_poll(struct hvc_struct *hp)
 
 	return poll_mask;
 }
+EXPORT_SYMBOL_GPL(hvc_poll);
 
 /*
  * This kthread is either polling or interrupt driven.  This is determined by
@@ -739,6 +742,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data,
 
 	return hp;
 }
+EXPORT_SYMBOL_GPL(hvc_alloc);
 
 int __devexit hvc_remove(struct hvc_struct *hp)
 {
-- 
GitLab


From faeba830b086bc9e58748869054e994cb09693cd Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 20 Jun 2008 15:24:18 +0200
Subject: [PATCH 529/853] s390: use virtio_console for KVM on s390

This patch enables virtio_console as the default console on kvm for
s390. We currently use the same notify hack as lguest for early
console output. I will try to address this for lguest and s390 later.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/s390/Kconfig             |  1 +
 arch/s390/kernel/setup.c      |  4 +++-
 drivers/s390/kvm/kvm_virtio.c | 20 ++++++++++++++++++++
 include/asm-s390/kvm_virtio.h | 10 ++++++++++
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index eb530b4128b..2ed88122be9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)"
 	depends on 64BIT && EXPERIMENTAL
 	select VIRTIO
 	select VIRTIO_RING
+	select VIRTIO_CONSOLE
 	help
 	  Select this option if you want to run the kernel under s390 linux
 endmenu
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b358e18273b..62122bad1e3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -54,6 +54,7 @@
 #include <asm/sections.h>
 #include <asm/ebcdic.h>
 #include <asm/compat.h>
+#include <asm/kvm_virtio.h>
 
 long psw_kernel_bits	= (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
 			   PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
@@ -766,7 +767,8 @@ setup_arch(char **cmdline_p)
 		printk("We are running under VM (64 bit mode)\n");
 	else if (MACHINE_IS_KVM) {
 		printk("We are running under KVM (64 bit mode)\n");
-		add_preferred_console("ttyS", 1, NULL);
+		add_preferred_console("hvc", 0, NULL);
+		s390_virtio_console_init();
 	} else
 		printk("We are running native (64 bit mode)\n");
 #endif /* CONFIG_64BIT */
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 5ab34340919..d41f234bb2c 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
 #include <linux/interrupt.h>
 #include <linux/virtio_ring.h>
 #include <linux/pfn.h>
@@ -333,6 +334,25 @@ static int __init kvm_devices_init(void)
 	return 0;
 }
 
+/* code for early console output with virtio_console */
+static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+{
+	char scratch[17];
+	unsigned int len = count;
+
+	if (len > sizeof(scratch) - 1)
+		len = sizeof(scratch) - 1;
+	scratch[len] = '\0';
+	memcpy(scratch, buf, len);
+	kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch));
+	return len;
+}
+
+void s390_virtio_console_init(void)
+{
+	virtio_cons_early_init(early_put_chars);
+}
+
 /*
  * We do this after core stuff, but before the drivers.
  */
diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h
index 5c871a990c2..146100224de 100644
--- a/include/asm-s390/kvm_virtio.h
+++ b/include/asm-s390/kvm_virtio.h
@@ -50,4 +50,14 @@ struct kvm_vqconfig {
 #define KVM_S390_VIRTIO_RESET		1
 #define KVM_S390_VIRTIO_SET_STATUS	2
 
+#ifdef __KERNEL__
+/* early virtio console setup */
+#ifdef CONFIG_VIRTIO_CONSOLE
+extern void s390_virtio_console_init(void);
+#else
+static inline void s390_virtio_console_init(void)
+{
+}
+#endif /* CONFIG_VIRTIO_CONSOLE */
+#endif /* __KERNEL__ */
 #endif
-- 
GitLab


From dd7c7bc46211785a1aa7d70feb15830f62682b3c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:07 -0500
Subject: [PATCH 530/853] virtio: Formally reserve bits 28-31 to be 'transport'
 features.

We assign feature bits as required, but it makes sense to reserve some
for the particular transport, rather than the particular device.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c       | 5 +++++
 include/linux/virtio_config.h | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fc85cba6457..baf103361e3 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,6 +113,11 @@ static int virtio_dev_probe(struct device *_d)
 			set_bit(f, dev->features);
 	}
 
+	/* Transport features are always preserved to pass to set_features. */
+	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
+		if (device_features & (1 << i))
+			set_bit(i, dev->features);
+
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7eb4b34d13b..5a30cfb7934 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -18,6 +18,12 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
+/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
+ * transport being used (eg. virtio_ring), the rest are per-device feature
+ * bits. */
+#define VIRTIO_TRANSPORT_F_START	28
+#define VIRTIO_TRANSPORT_F_END		32
+
 /* Do we get callbacks when the ring is completely used, even if we've
  * suppressed them? */
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
-- 
GitLab


From c624896e488ba2bff5ae497782cfb265c8b00646 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:07 -0500
Subject: [PATCH 531/853] virtio: Rename set_features to finalize_features

Rather than explicitly handing the features to the lower-level, we just
hand the virtio_device and have it set the features.  This make it clear
that it has the chance to manipulate the features of the device at this
point (and that all feature negotiation is already done).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c | 11 ++++++-----
 drivers/s390/kvm/kvm_virtio.c  | 11 ++++++-----
 drivers/virtio/virtio.c        |  5 ++---
 drivers/virtio/virtio_pci.c    | 10 ++++++----
 include/linux/virtio_config.h  |  7 ++++---
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1a8de57289e..54fdc2aa480 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -98,16 +98,17 @@ static u32 lg_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void lg_set_features(struct virtio_device *vdev, u32 features)
+static void lg_finalize_features(struct virtio_device *vdev)
 {
-	unsigned int i;
+	unsigned int i, bits;
 	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = lg_features(desc) + desc->feature_len;
 
 	memset(out_features, 0, desc->feature_len);
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-		if (features & (1 << i))
+	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (test_bit(i, vdev->features))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
@@ -297,7 +298,7 @@ static void lg_del_vq(struct virtqueue *vq)
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
-	.set_features = lg_set_features,
+	.finalize_features = lg_finalize_features,
 	.get = lg_get,
 	.set = lg_set,
 	.get_status = lg_get_status,
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index d41f234bb2c..5953510e7d5 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -88,16 +88,17 @@ static u32 kvm_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void kvm_set_features(struct virtio_device *vdev, u32 features)
+static void kvm_finalize_features(struct virtio_device *vdev)
 {
-	unsigned int i;
+	unsigned int i, bits;
 	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
 
 	memset(out_features, 0, desc->feature_len);
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-		if (features & (1 << i))
+	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (test_bit(i, vdev->features))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
@@ -223,7 +224,7 @@ static void kvm_del_vq(struct virtqueue *vq)
  */
 static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.get_features = kvm_get_features,
-	.set_features = kvm_set_features,
+	.finalize_features = kvm_finalize_features,
 	.get = kvm_get,
 	.set = kvm_set,
 	.get_status = kvm_get_status,
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index baf103361e3..5b78fd0aff0 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,7 +113,7 @@ static int virtio_dev_probe(struct device *_d)
 			set_bit(f, dev->features);
 	}
 
-	/* Transport features are always preserved to pass to set_features. */
+	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
 		if (device_features & (1 << i))
 			set_bit(i, dev->features);
@@ -122,8 +122,7 @@ static int virtio_dev_probe(struct device *_d)
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
 	else {
-		/* They should never have set feature bits beyond 32 */
-		dev->config->set_features(dev, dev->features[0]);
+		dev->config->finalize_features(dev);
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 	}
 	return err;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index eae7236310e..9855975a72a 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -94,12 +94,14 @@ static u32 vp_get_features(struct virtio_device *vdev)
 	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
 }
 
-/* virtio config->set_features() implementation */
-static void vp_set_features(struct virtio_device *vdev, u32 features)
+/* virtio config->finalize_features() implementation */
+static void vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+	/* We only support 32 feature bits. */
+	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
@@ -297,7 +299,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.find_vq	= vp_find_vq,
 	.del_vq		= vp_del_vq,
 	.get_features	= vp_get_features,
-	.set_features	= vp_set_features,
+	.finalize_features = vp_finalize_features,
 };
 
 /* the PCI probing function */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 5a30cfb7934..bf8ec283b23 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -61,9 +61,10 @@
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
  *	Returns the first 32 feature bits (all we currently need).
- * @set_features: confirm what device features we'll be using.
+ * @finalize_features: confirm what device features we'll be using.
  *	vdev: the virtio_device
- *	feature: the first 32 feature bits
+ *	This gives the final feature bits for the device: it can change
+ *	the dev->feature bits if it wants.
  */
 struct virtio_config_ops
 {
@@ -79,7 +80,7 @@ struct virtio_config_ops
 				     void (*callback)(struct virtqueue *));
 	void (*del_vq)(struct virtqueue *vq);
 	u32 (*get_features)(struct virtio_device *vdev);
-	void (*set_features)(struct virtio_device *vdev, u32 features);
+	void (*finalize_features)(struct virtio_device *vdev);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
-- 
GitLab


From e34f87256794b87e7f4a8f1812538be7b7b5214c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:13 -0500
Subject: [PATCH 532/853] virtio: Add transport feature handling stub for
 virtio_ring.

To prepare for virtio_ring transport feature bits, hook in a call in
all the users to manipulate them.  This currently just clears all the
bits, since it doesn't understand any features.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |  3 +++
 drivers/s390/kvm/kvm_virtio.c  |  3 +++
 drivers/virtio/virtio_pci.c    |  3 +++
 drivers/virtio/virtio_ring.c   | 16 ++++++++++++++++
 include/linux/virtio_ring.h    |  2 ++
 5 files changed, 27 insertions(+)

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 54fdc2aa480..37344aaee22 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -105,6 +105,9 @@ static void lg_finalize_features(struct virtio_device *vdev)
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = lg_features(desc) + desc->feature_len;
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 5953510e7d5..79954bd6bfa 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -95,6 +95,9 @@ static void kvm_finalize_features(struct virtio_device *vdev)
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 9855975a72a..c7dc37c7cce 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -99,6 +99,9 @@ static void vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	/* We only support 32 feature bits. */
 	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
 	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 21d9a62767a..6eb5303fed1 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -18,6 +18,7 @@
  */
 #include <linux/virtio.h>
 #include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
 #include <linux/device.h>
 
 #ifdef DEBUG
@@ -323,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
 
+/* Manipulates transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev)
+{
+	unsigned int i;
+
+	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
+		switch (i) {
+		default:
+			/* We don't understand this bit. */
+			clear_bit(i, vdev->features);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(vring_transport_features);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index abe481ed990..c4a598fb382 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq));
 void vring_del_virtqueue(struct virtqueue *vq);
+/* Filter out transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev);
 
 irqreturn_t vring_interrupt(int irq, void *_vq);
 #endif /* __KERNEL__ */
-- 
GitLab


From ed9559d38a87a44e3bda87d73a50aab92471d7dc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:11:09 +1000
Subject: [PATCH 533/853] Label kthread_create() with printf attribute tag.

Obvious misc patch been in my queue (& linux-next) for over a cycle.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 00dd957e245..aabc8a13ba7 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -6,7 +6,8 @@
 
 struct task_struct *kthread_create(int (*threadfn)(void *data),
 				   void *data,
-				   const char namefmt[], ...);
+				   const char namefmt[], ...)
+	__attribute__((format(printf, 3, 4)));
 
 /**
  * kthread_run - create and wake a thread.
-- 
GitLab


From 483fad1c3fa1060d7e6710e84a065ad514571739 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Tue, 22 Jul 2008 04:48:46 +1000
Subject: [PATCH 534/853] ELF loader support for auxvec base platform string

Some IBM POWER-based platforms have the ability to run in a
mode which mostly appears to the OS as a different processor from the
actual hardware.  For example, a Power6 system may appear to be a
Power5+, which makes the AT_PLATFORM value "power5+".  This means that
programs are restricted to the ISA supported by Power5+;
Power6-specific instructions are treated as illegal.

However, some applications (virtual machines, optimized libraries) can
benefit from knowledge of the underlying CPU model.  A new aux vector
entry, AT_BASE_PLATFORM, will denote the actual hardware.  For
example, on a Power6 system in Power5+ compatibility mode, AT_PLATFORM
will be "power5+" and AT_BASE_PLATFORM will be "power6".  The idea is
that AT_PLATFORM indicates the instruction set supported, while
AT_BASE_PLATFORM indicates the underlying microarchitecture.

If the architecture has defined ELF_BASE_PLATFORM, copy that value to
the user stack in the same manner as ELF_PLATFORM.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 fs/binfmt_elf.c        | 28 ++++++++++++++++++++++++++++
 include/linux/auxvec.h |  6 +++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 639d2d8b571..742c8f53048 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 #endif
 
+#ifndef ELF_BASE_PLATFORM
+/*
+ * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
+ * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
+ * will be copied to the user stack in the same manner as AT_PLATFORM.
+ */
+#define ELF_BASE_PLATFORM NULL
+#endif
+
 static int
 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	elf_addr_t __user *envp;
 	elf_addr_t __user *sp;
 	elf_addr_t __user *u_platform;
+	elf_addr_t __user *u_base_platform;
 	const char *k_platform = ELF_PLATFORM;
+	const char *k_base_platform = ELF_BASE_PLATFORM;
 	int items;
 	elf_addr_t *elf_info;
 	int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 			return -EFAULT;
 	}
 
+	/*
+	 * If this architecture has a "base" platform capability
+	 * string, copy it to userspace.
+	 */
+	u_base_platform = NULL;
+	if (k_base_platform) {
+		size_t len = strlen(k_base_platform) + 1;
+
+		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+		if (__copy_to_user(u_base_platform, k_base_platform, len))
+			return -EFAULT;
+	}
+
 	/* Create the ELF interpreter info */
 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		NEW_AUX_ENT(AT_PLATFORM,
 			    (elf_addr_t)(unsigned long)u_platform);
 	}
+	if (k_base_platform) {
+		NEW_AUX_ENT(AT_BASE_PLATFORM,
+			    (elf_addr_t)(unsigned long)u_base_platform);
+	}
 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 	}
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 0da17d14fd1..d7afa9dd663 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -26,9 +26,13 @@
 
 #define AT_SECURE 23   /* secure mode boolean */
 
+#define AT_BASE_PLATFORM 24	/* string identifying real platform, may
+				 * differ from AT_PLATFORM. */
+
 #define AT_EXECFN  31	/* filename of program */
+
 #ifdef __KERNEL__
-#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif
 
-- 
GitLab


From 9115d13453dee22473a1e8cacc90a8d64a9c4bc9 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Wed, 16 Jul 2008 09:58:51 +1000
Subject: [PATCH 535/853] powerpc: Enable AT_BASE_PLATFORM aux vector

Stash the first platform string matched by identify_cpu() in
powerpc_base_platform, and supply that to the ELF loader for the value
of AT_BASE_PLATFORM.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/cputable.c | 11 +++++++++++
 include/asm-powerpc/cputable.h |  2 ++
 include/asm-powerpc/elf.h      |  8 ++++++++
 3 files changed, 21 insertions(+)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b936a1dd0a5..25a052c1675 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,6 +23,9 @@
 struct cpu_spec* cur_cpu_spec = NULL;
 EXPORT_SYMBOL(cur_cpu_spec);
 
+/* The platform string corresponding to the real PVR */
+const char *powerpc_base_platform;
+
 /* NOTE:
  * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
  * the responsibility of the appropriate CPU save/restore functions to
@@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
 			} else
 				*t = *s;
 			*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+
+			/*
+			 * Set the base platform string once; assumes
+			 * we're called with real pvr first.
+			 */
+			if (powerpc_base_platform == NULL)
+				powerpc_base_platform = t->platform;
+
 #if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
 			/* ppc64 and booke expect identify_cpu to also call
 			 * setup_cpu for that processor. I will consolidate
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 2a3e9075a5a..ef8a248dfd5 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -127,6 +127,8 @@ extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
 extern void do_feature_fixups(unsigned long value, void *fixup_start,
 			      void *fixup_end);
 
+extern const char *powerpc_base_platform;
+
 #endif /* __ASSEMBLY__ */
 
 /* CPU kernel features */
diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h
index 89664675b46..80d1f399ee5 100644
--- a/include/asm-powerpc/elf.h
+++ b/include/asm-powerpc/elf.h
@@ -217,6 +217,14 @@ typedef elf_vrregset_t elf_fpxregset_t;
 
 #define ELF_PLATFORM	(cur_cpu_spec->platform)
 
+/* While ELF_PLATFORM indicates the ISA supported by the platform, it
+ * may not accurately reflect the underlying behavior of the hardware
+ * (as in the case of running in Power5+ compatibility mode on a
+ * Power6 machine).  ELF_BASE_PLATFORM allows ld.so to load libraries
+ * that are tuned for the real hardware.
+ */
+#define ELF_BASE_PLATFORM (powerpc_base_platform)
+
 #ifdef __powerpc64__
 # define ELF_PLAT_INIT(_r, load_addr)	do {	\
 	_r->gpr[2] = load_addr; 		\
-- 
GitLab


From 00bf6e906156b07cd641fe154ad0efe78f989692 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 23 Jul 2008 10:44:58 +1000
Subject: [PATCH 536/853] powerpc: Fallout from sysdev API changes

A struct sysdev_attribute * parameter was added to the show routine by
commit 4a0b2b4dbe1335b8b9886ba3dc85a145d5d938ed "sysdev: Pass the
attribute to the low level sysdev show/store function".

This eliminates a warning:

arch/powerpc/kernel/sysfs.c:538: warning: initialization from incompatible pointer type

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/sysfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index aba0ba95f06..800e5e9a087 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -529,7 +529,8 @@ static void register_nodes(void)
 #endif
 
 /* Only valid if CPU is present. */
-static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+static ssize_t show_physical_id(struct sys_device *dev,
+				struct sysdev_attribute *attr, char *buf)
 {
 	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
 
-- 
GitLab


From d6a61bfc06d6f2248f3e75f208d64e794082013c Mon Sep 17 00:00:00 2001
From: Luis Machado <luisgpm@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 02:10:41 +1000
Subject: [PATCH 537/853] powerpc: BookE hardware watchpoint support

This patch implements support for HW based watchpoint via the
DBSR_DAC (Data Address Compare) facility of the BookE processors.

It does so by interfacing with the existing DABR breakpoint code
and adding the necessary bits and pieces for the new bits to
be properly set or cleared

Signed-off-by: Luis Machado <luisgpm@br.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/entry_32.S |  6 +--
 arch/powerpc/kernel/process.c  | 46 ++++++++++++++++++++++
 arch/powerpc/kernel/ptrace.c   | 72 +++++++++++++++++++++++++++++++---
 arch/powerpc/kernel/signal.c   |  6 ++-
 arch/powerpc/kernel/traps.c    | 16 ++++++++
 arch/powerpc/mm/fault.c        | 25 ------------
 include/asm-powerpc/system.h   |  2 +
 7 files changed, 138 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index da52269aec1..81c8324a4a3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -148,7 +148,7 @@ transfer_to_handler:
 	/* Check to see if the dbcr0 register is set up to debug.  Use the
 	   internal debug mode bit to do this. */
 	lwz	r12,THREAD_DBCR0(r12)
-	andis.	r12,r12,DBCR0_IDM@h
+	andis.	r12,r12,(DBCR0_IDM  | DBSR_DAC1R | DBSR_DAC1W)@h
 	beq+	3f
 	/* From user and task is ptraced - load up global dbcr0 */
 	li	r12,-1			/* clear all pending debug events */
@@ -292,7 +292,7 @@ syscall_exit_cont:
 	/* If the process has its own DBCR0 value, load it up.  The internal
 	   debug mode bit tells us that dbcr0 should be loaded. */
 	lwz	r0,THREAD+THREAD_DBCR0(r2)
-	andis.	r10,r0,DBCR0_IDM@h
+	andis.	r10,r0,(DBCR0_IDM  | DBSR_DAC1R | DBSR_DAC1W)@h
 	bnel-	load_dbcr0
 #endif
 #ifdef CONFIG_44x
@@ -720,7 +720,7 @@ restore_user:
 	/* Check whether this process has its own DBCR0 value.  The internal
 	   debug mode bit tells us that dbcr0 should be loaded. */
 	lwz	r0,THREAD+THREAD_DBCR0(r2)
-	andis.	r10,r0,DBCR0_IDM@h
+	andis.	r10,r0,(DBCR0_IDM  | DBSR_DAC1R | DBSR_DAC1W)@h
 	bnel-	load_dbcr0
 #endif
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 219f3634115..db2497ccc11 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -47,6 +47,8 @@
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #endif
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
 
 extern unsigned long _get_SP(void);
 
@@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void)
 }
 #endif /* CONFIG_SMP */
 
+void do_dabr(struct pt_regs *regs, unsigned long address,
+		    unsigned long error_code)
+{
+	siginfo_t info;
+
+	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+			11, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (debugger_dabr_match(regs))
+		return;
+
+	/* Clear the DAC and struct entries.  One shot trigger */
+#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
+	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
+							| DBCR0_IDM));
+#endif
+
+	/* Clear the DABR */
+	set_dabr(0);
+
+	/* Deliver the signal to userspace */
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code = TRAP_HWBKPT;
+	info.si_addr = (void __user *)address;
+	force_sig_info(SIGTRAP, &info, current);
+}
+
 static DEFINE_PER_CPU(unsigned long, current_dabr);
 
 int set_dabr(unsigned long dabr)
@@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr)
 #if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
 	mtspr(SPRN_DABR, dabr);
 #endif
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+	mtspr(SPRN_DAC1, dabr);
+#endif
+
 	return 0;
 }
 
@@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
 		set_dabr(new->thread.dabr);
 
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+	/* If new thread DAC (HW breakpoint) is the same then leave it */
+	if (new->thread.dabr)
+		set_dabr(new->thread.dabr);
+#endif
+
 	new_thread = &new->thread;
 	old_thread = &current->thread;
 
@@ -525,6 +567,10 @@ void flush_thread(void)
 	if (current->thread.dabr) {
 		current->thread.dabr = 0;
 		set_dabr(0);
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+		current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
+#endif
 	}
 }
 
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8feb93e7890..a5d0e78779c 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task)
 
 	if (regs != NULL) {
 #if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-		task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC;
+		task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 		regs->msr |= MSR_DE;
 #else
 		regs->msr |= MSR_SE;
@@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task)
 {
 	struct pt_regs *regs = task->thread.regs;
 
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+	/* If DAC then do not single step, skip */
+	if (task->thread.dabr)
+		return;
+#endif
+
 	if (regs != NULL) {
 #if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-		task->thread.dbcr0 = 0;
+		task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
 		regs->msr &= ~MSR_DE;
 #else
 		regs->msr &= ~MSR_SE;
@@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task)
 	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
 
-static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 			       unsigned long data)
 {
-	/* We only support one DABR and no IABRS at the moment */
+	/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+	 *  For embedded processors we support one DAC and no IAC's at the
+	 *  moment.
+	 */
 	if (addr > 0)
 		return -EINVAL;
 
-	/* The bottom 3 bits are flags */
 	if ((data & ~0x7UL) >= TASK_SIZE)
 		return -EIO;
 
-	/* Ensure translation is on */
+#ifdef CONFIG_PPC64
+
+	/* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+	 *  It was assumed, on previous implementations, that 3 bits were
+	 *  passed together with the data address, fitting the design of the
+	 *  DABR register, as follows:
+	 *
+	 *  bit 0: Read flag
+	 *  bit 1: Write flag
+	 *  bit 2: Breakpoint translation
+	 *
+	 *  Thus, we use them here as so.
+	 */
+
+	/* Ensure breakpoint translation bit is set */
 	if (data && !(data & DABR_TRANSLATION))
 		return -EIO;
 
+	/* Move contents to the DABR register */
 	task->thread.dabr = data;
+
+#endif
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+
+	/* As described above, it was assumed 3 bits were passed with the data
+	 *  address, but we will assume only the mode bits will be passed
+	 *  as to not cause alignment restrictions for DAC-based processors.
+	 */
+
+	/* DAC's hold the whole address without any mode flags */
+	task->thread.dabr = data & ~0x3UL;
+
+	if (task->thread.dabr == 0) {
+		task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
+		task->thread.regs->msr &= ~MSR_DE;
+		return 0;
+	}
+
+	/* Read or Write bits must be set */
+
+	if (!(data & 0x3UL))
+		return -EINVAL;
+
+	/* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
+	   register */
+	task->thread.dbcr0 = DBCR0_IDM;
+
+	/* Check for write and read flags and set DBCR0
+	   accordingly */
+	if (data & 0x1UL)
+		task->thread.dbcr0 |= DBSR_DAC1R;
+	if (data & 0x2UL)
+		task->thread.dbcr0 |= DBSR_DAC1W;
+
+	task->thread.regs->msr |= MSR_DE;
+#endif
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index ad55488939c..7aada783ec6 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
 	 * user space. The DABR will have been cleared if it
 	 * triggered inside the kernel.
 	 */
-	if (current->thread.dabr)
+	if (current->thread.dabr) {
 		set_dabr(current->thread.dabr);
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+		mtspr(SPRN_DBCR0, current->thread.dbcr0);
+#endif
+	}
 
 	if (is32) {
         	if (ka.sa.sa_flags & SA_SIGINFO)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 878fbddb6ae..81ccb8dd1a5 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 		}
 
 		_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+	} else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
+		regs->msr &= ~MSR_DE;
+
+		if (user_mode(regs)) {
+			current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
+								DBCR0_IDM);
+		} else {
+			/* Disable DAC interupts */
+			mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
+						DBSR_DAC1W | DBCR0_IDM));
+
+			/* Clear the DAC event */
+			mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
+		}
+		/* Setup and send the trap to the handler */
+		do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
 	}
 }
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1707d00331f..565b7a237c8 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs)
 	return 0;
 }
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-static void do_dabr(struct pt_regs *regs, unsigned long address,
-		    unsigned long error_code)
-{
-	siginfo_t info;
-
-	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
-			11, SIGSEGV) == NOTIFY_STOP)
-		return;
-
-	if (debugger_dabr_match(regs))
-		return;
-
-	/* Clear the DABR */
-	set_dabr(0);
-
-	/* Deliver the signal to userspace */
-	info.si_signo = SIGTRAP;
-	info.si_errno = 0;
-	info.si_code = TRAP_HWBKPT;
-	info.si_addr = (void __user *)address;
-	force_sig_info(SIGTRAP, &info, current);
-}
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
-
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
  * for a data fault, SRR1 for an instruction fault. For 400-family processors
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index e6e25e2364e..d6648c14332 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -110,6 +110,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
 extern int set_dabr(unsigned long dabr);
+extern void do_dabr(struct pt_regs *regs, unsigned long address,
+		    unsigned long error_code);
 extern void print_backtrace(unsigned long *);
 extern void show_regs(struct pt_regs * regs);
 extern void flush_instruction_cache(void);
-- 
GitLab


From 7886250e9d71b24d0205ac6798ee855fb3836318 Mon Sep 17 00:00:00 2001
From: Mark Nelson <markn@au1.ibm.com>
Date: Thu, 24 Jul 2008 14:28:48 +1000
Subject: [PATCH 538/853] powerpc/cell: Fixed IOMMU mapping uses weak ordering
 for a pcie endpoint

At the moment the fixed mapping is by default strongly ordered (the
iommu_fixed=weak boot option must be used to make the fixed mapping weakly
ordered). If we're on a setup where the southbridge is being used in
endpoint mode (triblade and CAB boards) the default should be a weakly
ordered fixed mapping.

This adds a check so that if a node of type pcie-endpoint can be found in
the device tree the fixed mapping is set to be weak by default (but can be
overridden using iommu_fixed=strong).

Signed-off-by: Mark Nelson <markn@au1.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/cell/iommu.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 208005ca262..031124a8e37 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -1150,12 +1150,23 @@ static int iommu_fixed_disabled;
 
 static int __init setup_iommu_fixed(char *str)
 {
+	struct device_node *pciep;
+
 	if (strcmp(str, "off") == 0)
 		iommu_fixed_disabled = 1;
 
-	else if (strcmp(str, "weak") == 0)
+	/* If we can find a pcie-endpoint in the device tree assume that
+	 * we're on a triblade or a CAB so by default the fixed mapping
+	 * should be set to be weakly ordered; but only if the boot
+	 * option WASN'T set for strong ordering
+	 */
+	pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+	if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
 		iommu_fixed_is_weak = 1;
 
+	of_node_put(pciep);
+
 	return 1;
 }
 __setup("iommu_fixed=", setup_iommu_fixed);
-- 
GitLab


From 80c60bf9b96f6108c630d90efc073cd520801e6c Mon Sep 17 00:00:00 2001
From: Segher Boessenkool <segher@kernel.crashing.org>
Date: Fri, 25 Jul 2008 10:08:41 +1000
Subject: [PATCH 539/853] powerpc: Fix compile error with binutils 2.15

My previous patch to fix compilation with binutils-2.17 causes
a "file truncated" build error from ld with binutils 2.15 (and
possibly older), and a warning with 2.16 and 2.17.

This fixes it.

Signed-off-by: Segher Boessenkool <segher@kernel.crashing.org>
Acked-by: Chuck Meade <chuckmeade@mindspring.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vmlinux.lds.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index a914411bced..4a8ce62fe11 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
 
 	/* The dummy segment contents for the bug workaround mentioned above
 	   near PHDRS.  */
-	.dummy : {
+	.dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
 		LONG(0xf177)
 	} :kernel :dummy
 
-- 
GitLab


From 545500b307658ad5783e0f3a52a32b97b2dfaed2 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Thu, 24 Jul 2008 04:25:00 +1000
Subject: [PATCH 540/853] powerpc/pseries: Remove extraneous error reporting
 for hcall failures in lparcfg

Remove the extraneous error reporting used when a hcall made from lparcfg fails.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 827a5726a03..20278ece31d 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -129,33 +129,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
 /*
  * Methods used to fetch LPAR data when running on a pSeries platform.
  */
-static void log_plpar_hcall_return(unsigned long rc, char *tag)
-{
-	switch(rc) {
-	case 0:
-		return;
-	case H_HARDWARE:
-		printk(KERN_INFO "plpar-hcall (%s) "
-				"Hardware fault\n", tag);
-		return;
-	case H_FUNCTION:
-		printk(KERN_INFO "plpar-hcall (%s) "
-				"Function not allowed\n", tag);
-		return;
-	case H_AUTHORITY:
-		printk(KERN_INFO "plpar-hcall (%s) "
-				"Not authorized to this function\n", tag);
-		return;
-	case H_PARAMETER:
-		printk(KERN_INFO "plpar-hcall (%s) "
-				"Bad parameter(s)\n",tag);
-		return;
-	default:
-		printk(KERN_INFO "plpar-hcall (%s) "
-				"Unexpected rc(0x%lx)\n", tag, rc);
-	}
-}
-
 /*
  * H_GET_PPP hcall returns info in 4 parms.
  *  entitled_capacity,unallocated_capacity,
@@ -191,8 +164,6 @@ static unsigned int h_get_ppp(unsigned long *entitled,
 	*aggregation = retbuf[2];
 	*resource = retbuf[3];
 
-	log_plpar_hcall_return(rc, "H_GET_PPP");
-
 	return rc;
 }
 
@@ -205,9 +176,6 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
 
 	*pool_idle_time = retbuf[0];
 	*num_procs = retbuf[1];
-
-	if (rc != H_AUTHORITY)
-		log_plpar_hcall_return(rc, "H_PIC");
 }
 
 #define SPLPAR_CHARACTERISTICS_TOKEN 20
-- 
GitLab


From 11529396ea3190113173f7a15e59a58dbcaa36c8 Mon Sep 17 00:00:00 2001
From: Nathan Fotenot <nfont@austin.ibm.com>
Date: Thu, 24 Jul 2008 04:25:16 +1000
Subject: [PATCH 541/853] powerpc/pseries: Split processor entitlement
 retrieval and gathering to helper routines

Split the retrieval and setting of processor entitlement and weight into
helper routines.  This also removes the printing of the raw values
returned from h_get_ppp, the values are already parsed and printed.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 166 ++++++++++++++++++----------------
 1 file changed, 88 insertions(+), 78 deletions(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 20278ece31d..a0ca90ab5e3 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -167,7 +167,8 @@ static unsigned int h_get_ppp(unsigned long *entitled,
 	return rc;
 }
 
-static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+static unsigned h_pic(unsigned long *pool_idle_time,
+		      unsigned long *num_procs)
 {
 	unsigned long rc;
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -176,6 +177,51 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
 
 	*pool_idle_time = retbuf[0];
 	*num_procs = retbuf[1];
+
+	return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+	unsigned long h_entitled, h_unallocated;
+	unsigned long h_aggregation, h_resource;
+	int rc;
+
+	rc = h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
+		       &h_resource);
+	if (rc)
+		return;
+
+	seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
+	seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
+	seq_printf(m, "system_active_processors=%ld\n",
+		   (h_resource >> 0 * 8) & 0xffff);
+
+	/* pool related entries are apropriate for shared configs */
+	if (lppaca[0].shared_proc) {
+		unsigned long pool_idle_time, pool_procs;
+
+		seq_printf(m, "pool=%ld\n", (h_aggregation >> 0 * 8) & 0xffff);
+
+		/* report pool_capacity in percentage */
+		seq_printf(m, "pool_capacity=%ld\n",
+			   ((h_resource >> 2 * 8) & 0xffff) * 100);
+
+		h_pic(&pool_idle_time, &pool_procs);
+		seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+		seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+	}
+
+	seq_printf(m, "unallocated_capacity_weight=%ld\n",
+		   (h_resource >> 4 * 8) & 0xFF);
+
+	seq_printf(m, "capacity_weight=%ld\n", (h_resource >> 5 * 8) & 0xFF);
+	seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
+	seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
 }
 
 #define SPLPAR_CHARACTERISTICS_TOKEN 20
@@ -302,60 +348,11 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	partition_active_processors = lparcfg_count_active_processors();
 
 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		unsigned long h_entitled, h_unallocated;
-		unsigned long h_aggregation, h_resource;
-		unsigned long pool_idle_time, pool_procs;
-		unsigned long purr;
-
-		h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
-			  &h_resource);
-
-		seq_printf(m, "R4=0x%lx\n", h_entitled);
-		seq_printf(m, "R5=0x%lx\n", h_unallocated);
-		seq_printf(m, "R6=0x%lx\n", h_aggregation);
-		seq_printf(m, "R7=0x%lx\n", h_resource);
-
-		purr = get_purr();
-
 		/* this call handles the ibm,get-system-parameter contents */
 		parse_system_parameter_string(m);
+		parse_ppp_data(m);
 
-		seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
-
-		seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
-
-		seq_printf(m, "system_active_processors=%ld\n",
-			   (h_resource >> 0 * 8) & 0xffff);
-
-		/* pool related entries are apropriate for shared configs */
-		if (lppaca[0].shared_proc) {
-
-			h_pic(&pool_idle_time, &pool_procs);
-
-			seq_printf(m, "pool=%ld\n",
-				   (h_aggregation >> 0 * 8) & 0xffff);
-
-			/* report pool_capacity in percentage */
-			seq_printf(m, "pool_capacity=%ld\n",
-				   ((h_resource >> 2 * 8) & 0xffff) * 100);
-
-			seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
-
-			seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
-		}
-
-		seq_printf(m, "unallocated_capacity_weight=%ld\n",
-			   (h_resource >> 4 * 8) & 0xFF);
-
-		seq_printf(m, "capacity_weight=%ld\n",
-			   (h_resource >> 5 * 8) & 0xFF);
-
-		seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
-
-		seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
-
-		seq_printf(m, "purr=%ld\n", purr);
-
+		seq_printf(m, "purr=%ld\n", get_purr());
 	} else {		/* non SPLPAR case */
 
 		seq_printf(m, "system_active_processors=%d\n",
@@ -382,6 +379,41 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	return 0;
 }
 
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+	unsigned long current_entitled;
+	unsigned long dummy;
+	unsigned long resource;
+	u8 current_weight, new_weight;
+	u64 new_entitled;
+	ssize_t retval;
+
+	/* Get our current parameters */
+	retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+	if (retval)
+		return retval;
+
+	current_weight = (resource >> 5 * 8) & 0xFF;
+
+	if (entitlement) {
+		new_weight = current_weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = current_entitled;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+		 __FUNCTION__, current_entitled, current_weight);
+
+	pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+		 __FUNCTION__, new_entitled, new_weight);
+
+	retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+	return retval;
+}
+
 /*
  * Interface for changing system parameters (variable capacity weight
  * and entitled capacity).  Format of input is "param_name=value";
@@ -399,12 +431,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
 	char *tmp;
 	u64 new_entitled, *new_entitled_ptr = &new_entitled;
 	u8 new_weight, *new_weight_ptr = &new_weight;
-
-	unsigned long current_entitled;	/* parameters for h_get_ppp */
-	unsigned long dummy;
-	unsigned long resource;
-	u8 current_weight;
-
 	ssize_t retval = -ENOMEM;
 
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
@@ -432,33 +458,17 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
 		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
 		if (endp == tmp)
 			goto out;
-		new_weight_ptr = &current_weight;
+
+		retval = update_ppp(new_entitled_ptr, NULL);
 	} else if (!strcmp(kbuf, "capacity_weight")) {
 		char *endp;
 		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
 		if (endp == tmp)
 			goto out;
-		new_entitled_ptr = &current_entitled;
-	} else
-		goto out;
 
-	/* Get our current parameters */
-	retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
-	if (retval) {
-		retval = -EIO;
+		retval = update_ppp(NULL, new_weight_ptr);
+	} else
 		goto out;
-	}
-
-	current_weight = (resource >> 5 * 8) & 0xFF;
-
-	pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
-		 __func__, current_entitled, current_weight);
-
-	pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
-		 __func__, *new_entitled_ptr, *new_weight_ptr);
-
-	retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
-				    *new_weight_ptr);
 
 	if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
 		retval = count;
-- 
GitLab


From dfc3403f0e5ffb94ee29942f313b87d4061d951b Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Thu, 24 Jul 2008 04:27:30 +1000
Subject: [PATCH 542/853] powerpc/pseries: Add memory entitlement capabilities
 to /proc/ppc64/lparcfg

Update /proc/ppc64/lparcfg to display Cooperative Memory
Overcommitment statistics as reported by the H_GET_MPP hcall.  This
also updates the lparcfg interface to allow setting memory entitlement
and weight.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 121 +++++++++++++++++++++++++++++++++-
 include/asm-powerpc/hvcall.h  |  18 ++++-
 2 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index a0ca90ab5e3..86e5b3ed10d 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -35,7 +35,7 @@
 #include <asm/prom.h>
 #include <asm/vdso_datapage.h>
 
-#define MODULE_VERS "1.7"
+#define MODULE_VERS "1.8"
 #define MODULE_NAME "lparcfg"
 
 /* #define LPARCFG_DEBUG */
@@ -129,6 +129,35 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
 /*
  * Methods used to fetch LPAR data when running on a pSeries platform.
  */
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+	mpp_data->entitled_mem = retbuf[0];
+	mpp_data->mapped_mem = retbuf[1];
+
+	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	mpp_data->pool_num = retbuf[2] & 0xffff;
+
+	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+	mpp_data->pool_size = retbuf[4];
+	mpp_data->loan_request = retbuf[5];
+	mpp_data->backing_mem = retbuf[6];
+
+	return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
 /*
  * H_GET_PPP hcall returns info in 4 parms.
  *  entitled_capacity,unallocated_capacity,
@@ -224,6 +253,44 @@ static void parse_ppp_data(struct seq_file *m)
 	seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
 }
 
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+	struct hvcall_mpp_data mpp_data;
+	int rc;
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return;
+
+	seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+	if (mpp_data.mapped_mem != -1)
+		seq_printf(m, "mapped_entitled_memory=%ld\n",
+		           mpp_data.mapped_mem);
+
+	seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+	seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+	seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+	seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+	           mpp_data.unallocated_mem_weight);
+	seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+	           mpp_data.unallocated_entitlement);
+
+	if (mpp_data.pool_size != -1)
+		seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+		           mpp_data.pool_size);
+
+	seq_printf(m, "entitled_memory_loan_request=%ld\n",
+	           mpp_data.loan_request);
+
+	seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
+}
+
 #define SPLPAR_CHARACTERISTICS_TOKEN 20
 #define SPLPAR_MAXLENGTH 1026*(sizeof(char))
 
@@ -351,6 +418,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		/* this call handles the ibm,get-system-parameter contents */
 		parse_system_parameter_string(m);
 		parse_ppp_data(m);
+		parse_mpp_data(m);
 
 		seq_printf(m, "purr=%ld\n", get_purr());
 	} else {		/* non SPLPAR case */
@@ -414,6 +482,43 @@ static ssize_t update_ppp(u64 *entitlement, u8 *weight)
 	return retval;
 }
 
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition.  Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+	struct hvcall_mpp_data mpp_data;
+	u64 new_entitled;
+	u8 new_weight;
+	ssize_t rc;
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return rc;
+
+	if (entitlement) {
+		new_weight = mpp_data.mem_weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = mpp_data.entitled_mem;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+	         __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+	pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+	         __FUNCTION__, new_entitled, new_weight);
+
+	rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+	return rc;
+}
+
 /*
  * Interface for changing system parameters (variable capacity weight
  * and entitled capacity).  Format of input is "param_name=value";
@@ -467,6 +572,20 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
 			goto out;
 
 		retval = update_ppp(NULL, new_weight_ptr);
+	} else if (!strcmp(kbuf, "entitled_memory")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			goto out;
+
+		retval = update_mpp(new_entitled_ptr, NULL);
+	} else if (!strcmp(kbuf, "entitled_memory_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			goto out;
+
+		retval = update_mpp(NULL, new_weight_ptr);
 	} else
 		goto out;
 
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index bf6cd7cb996..46e76456cbb 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -210,7 +210,9 @@
 #define H_JOIN			0x298
 #define H_VASI_STATE            0x2A4
 #define H_ENABLE_CRQ		0x2B0
-#define MAX_HCALL_OPCODE	H_ENABLE_CRQ
+#define H_SET_MPP		0x2D0
+#define H_GET_MPP		0x2D4
+#define MAX_HCALL_OPCODE	H_GET_MPP
 
 #ifndef __ASSEMBLY__
 
@@ -270,6 +272,20 @@ struct hcall_stats {
 };
 #define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
 
+struct hvcall_mpp_data {
+	unsigned long entitled_mem;
+	unsigned long mapped_mem;
+	unsigned short group_num;
+	unsigned short pool_num;
+	unsigned char mem_weight;
+	unsigned char unallocated_mem_weight;
+	unsigned long unallocated_entitlement;  /* value in bytes */
+	unsigned long pool_size;
+	signed long loan_request;
+	unsigned long backing_mem;
+};
+
+int h_get_mpp(struct hvcall_mpp_data *);
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
-- 
GitLab


From 398778f78b76fb72cb18411487af01dea202709e Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:28:05 +1000
Subject: [PATCH 543/853] powerpc/pseries: Split retrieval of processor
 entitlement data into a helper routine

Split the retrieval of processor entitlement data returned in the H_GET_PPP
hcall into its own helper routine.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 81 ++++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 86e5b3ed10d..d82e1fa5ce2 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -158,6 +158,18 @@ int h_get_mpp(struct hvcall_mpp_data *mpp_data)
 }
 EXPORT_SYMBOL(h_get_mpp);
 
+struct hvcall_ppp_data {
+	u64	entitlement;
+	u64	unallocated_entitlement;
+	u16	group_num;
+	u16	pool_num;
+	u8	capped;
+	u8	weight;
+	u8	unallocated_weight;
+	u16	active_procs_in_pool;
+	u16	active_system_procs;
+};
+
 /*
  * H_GET_PPP hcall returns info in 4 parms.
  *  entitled_capacity,unallocated_capacity,
@@ -178,20 +190,24 @@ EXPORT_SYMBOL(h_get_mpp);
  *              XXXX - Active processors in Physical Processor Pool.
  *                  XXXX  - Processors active on platform.
  */
-static unsigned int h_get_ppp(unsigned long *entitled,
-			      unsigned long *unallocated,
-			      unsigned long *aggregation,
-			      unsigned long *resource)
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
 {
 	unsigned long rc;
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 
 	rc = plpar_hcall(H_GET_PPP, retbuf);
 
-	*entitled = retbuf[0];
-	*unallocated = retbuf[1];
-	*aggregation = retbuf[2];
-	*resource = retbuf[3];
+	ppp_data->entitlement = retbuf[0];
+	ppp_data->unallocated_entitlement = retbuf[1];
+
+	ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	ppp_data->pool_num = retbuf[2] & 0xffff;
+
+	ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+	ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+	ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+	ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+	ppp_data->active_system_procs = retbuf[3] & 0xffff;
 
 	return rc;
 }
@@ -216,41 +232,40 @@ static unsigned h_pic(unsigned long *pool_idle_time,
  */
 static void parse_ppp_data(struct seq_file *m)
 {
-	unsigned long h_entitled, h_unallocated;
-	unsigned long h_aggregation, h_resource;
+	struct hvcall_ppp_data ppp_data;
 	int rc;
 
-	rc = h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
-		       &h_resource);
+	rc = h_get_ppp(&ppp_data);
 	if (rc)
 		return;
 
-	seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
-	seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
-	seq_printf(m, "system_active_processors=%ld\n",
-		   (h_resource >> 0 * 8) & 0xffff);
+	seq_printf(m, "partition_entitled_capacity=%ld\n",
+	           ppp_data.entitlement);
+	seq_printf(m, "group=%d\n", ppp_data.group_num);
+	seq_printf(m, "system_active_processors=%d\n",
+	           ppp_data.active_system_procs);
 
 	/* pool related entries are apropriate for shared configs */
 	if (lppaca[0].shared_proc) {
 		unsigned long pool_idle_time, pool_procs;
 
-		seq_printf(m, "pool=%ld\n", (h_aggregation >> 0 * 8) & 0xffff);
+		seq_printf(m, "pool=%d\n", ppp_data.pool_num);
 
 		/* report pool_capacity in percentage */
-		seq_printf(m, "pool_capacity=%ld\n",
-			   ((h_resource >> 2 * 8) & 0xffff) * 100);
+		seq_printf(m, "pool_capacity=%d\n",
+			   ppp_data.active_procs_in_pool * 100);
 
 		h_pic(&pool_idle_time, &pool_procs);
 		seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
 		seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
 	}
 
-	seq_printf(m, "unallocated_capacity_weight=%ld\n",
-		   (h_resource >> 4 * 8) & 0xFF);
-
-	seq_printf(m, "capacity_weight=%ld\n", (h_resource >> 5 * 8) & 0xFF);
-	seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
-	seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
+	seq_printf(m, "unallocated_capacity_weight=%d\n",
+		   ppp_data.unallocated_weight);
+	seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+	seq_printf(m, "capped=%d\n", ppp_data.capped);
+	seq_printf(m, "unallocated_capacity=%ld\n",
+		   ppp_data.unallocated_entitlement);
 }
 
 /**
@@ -449,31 +464,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 
 static ssize_t update_ppp(u64 *entitlement, u8 *weight)
 {
-	unsigned long current_entitled;
-	unsigned long dummy;
-	unsigned long resource;
-	u8 current_weight, new_weight;
+	struct hvcall_ppp_data ppp_data;
+	u8 new_weight;
 	u64 new_entitled;
 	ssize_t retval;
 
 	/* Get our current parameters */
-	retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+	retval = h_get_ppp(&ppp_data);
 	if (retval)
 		return retval;
 
-	current_weight = (resource >> 5 * 8) & 0xFF;
-
 	if (entitlement) {
-		new_weight = current_weight;
+		new_weight = ppp_data.weight;
 		new_entitled = *entitlement;
 	} else if (weight) {
 		new_weight = *weight;
-		new_entitled = current_entitled;
+		new_entitled = ppp_data.entitlement;
 	} else
 		return -EINVAL;
 
 	pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
-		 __FUNCTION__, current_entitled, current_weight);
+	         __FUNCTION__, ppp_data.entitlement, ppp_data.weight);
 
 	pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
 		 __FUNCTION__, new_entitled, new_weight);
-- 
GitLab


From e46de429cb954d30a5642fba81d516ede518c65e Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:29:03 +1000
Subject: [PATCH 544/853] powerpc/pseries: Enable CMO feature during platform
 setup

For Cooperative Memory Overcommitment (CMO), set the FW_FEATURE_CMO
flag in powerpc_firmware_features from the rtas ibm,get-system-parameters
table prior to calling iommu_init_early_pSeries.

With this, any CMO specific functionality can be controlled by checking:
 firmware_has_feature(FW_FEATURE_CMO)

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/setup.c | 71 ++++++++++++++++++++++++++
 include/asm-powerpc/firmware.h         |  3 +-
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 90beb444e1d..063a0d2fba3 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr)
 			H_DABRX_KERNEL | H_DABRX_USER);
 }
 
+#define CMO_CHARACTERISTICS_TOKEN 44
+#define CMO_MAXLENGTH 1026
+
+/**
+ * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
+ * handle that here. (Stolen from parse_system_parameter_string)
+ */
+void pSeries_cmo_feature_init(void)
+{
+	char *ptr, *key, *value, *end;
+	int call_status;
+	int PrPSP = -1;
+	int SecPSP = -1;
+
+	pr_debug(" -> fw_cmo_feature_init()\n");
+	spin_lock(&rtas_data_buf_lock);
+	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				CMO_CHARACTERISTICS_TOKEN,
+				__pa(rtas_data_buf),
+				RTAS_DATA_BUF_SIZE);
+
+	if (call_status != 0) {
+		spin_unlock(&rtas_data_buf_lock);
+		pr_debug("CMO not available\n");
+		pr_debug(" <- fw_cmo_feature_init()\n");
+		return;
+	}
+
+	end = rtas_data_buf + CMO_MAXLENGTH - 2;
+	ptr = rtas_data_buf + 2;	/* step over strlen value */
+	key = value = ptr;
+
+	while (*ptr && (ptr <= end)) {
+		/* Separate the key and value by replacing '=' with '\0' and
+		 * point the value at the string after the '='
+		 */
+		if (ptr[0] == '=') {
+			ptr[0] = '\0';
+			value = ptr + 1;
+		} else if (ptr[0] == '\0' || ptr[0] == ',') {
+			/* Terminate the string containing the key/value pair */
+			ptr[0] = '\0';
+
+			if (key == value) {
+				pr_debug("Malformed key/value pair\n");
+				/* Never found a '=', end processing */
+				break;
+			}
+
+			if (0 == strcmp(key, "PrPSP"))
+				PrPSP = simple_strtol(value, NULL, 10);
+			else if (0 == strcmp(key, "SecPSP"))
+				SecPSP = simple_strtol(value, NULL, 10);
+			value = key = ptr + 1;
+		}
+		ptr++;
+	}
+
+	if (PrPSP != -1 || SecPSP != -1) {
+		pr_info("CMO enabled\n");
+		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
+		powerpc_firmware_features |= FW_FEATURE_CMO;
+	} else
+		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
+	spin_unlock(&rtas_data_buf_lock);
+	pr_debug(" <- fw_cmo_feature_init()\n");
+}
+
 /*
  * Early initialization.  Relocation is on but do not reference unbolted pages
  */
@@ -329,6 +399,7 @@ static void __init pSeries_init_early(void)
 	else if (firmware_has_feature(FW_FEATURE_XDABR))
 		ppc_md.set_dabr = pseries_set_xdabr;
 
+	pSeries_cmo_feature_init();
 	iommu_init_early_pSeries();
 
 	pr_debug(" <- pSeries_init_early()\n");
diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h
index ef328995ba9..3a179827528 100644
--- a/include/asm-powerpc/firmware.h
+++ b/include/asm-powerpc/firmware.h
@@ -46,6 +46,7 @@
 #define FW_FEATURE_PS3_LV1	ASM_CONST(0x0000000000800000)
 #define FW_FEATURE_BEAT		ASM_CONST(0x0000000001000000)
 #define FW_FEATURE_BULK_REMOVE	ASM_CONST(0x0000000002000000)
+#define FW_FEATURE_CMO		ASM_CONST(0x0000000004000000)
 
 #ifndef __ASSEMBLY__
 
@@ -58,7 +59,7 @@ enum {
 		FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
 		FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
 		FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
-		FW_FEATURE_SPLPAR | FW_FEATURE_LPAR,
+		FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
 	FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
-- 
GitLab


From 86630a32320f83736c4c24e2c8bae218e4c56c7c Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:29:16 +1000
Subject: [PATCH 545/853] powerpc/pseries: Utilities to set firmware page state

Newer versions of firmware support page states, which are used by the
collaborative memory manager (future patch) to "loan" pages to the
hypervisor for use by other partitions.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/plpar_wrappers.h | 10 ++++++++++
 include/asm-powerpc/hvcall.h                    |  5 +++++
 2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d8680b589dc..a437267c6bf 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
 	return vpa_call(0x3, cpu, vpa);
 }
 
+static inline long plpar_page_set_loaned(unsigned long vpa)
+{
+	return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
+}
+
+static inline long plpar_page_set_active(unsigned long vpa)
+{
+	return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
+}
+
 extern void vpa_init(int cpu);
 
 static inline long plpar_pte_enter(unsigned long flags,
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index 46e76456cbb..fbe2932fa9e 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -92,6 +92,11 @@
 #define H_EXACT			(1UL<<(63-24))	/* Use exact PTE or return H_PTEG_FULL */
 #define H_R_XLATE		(1UL<<(63-25))	/* include a valid logical page num in the pte if the valid bit is set */
 #define H_READ_4		(1UL<<(63-26))	/* Return 4 PTEs */
+#define H_PAGE_STATE_CHANGE	(1UL<<(63-28))
+#define H_PAGE_UNUSED		((1UL<<(63-29)) | (1UL<<(63-30)))
+#define H_PAGE_SET_UNUSED	(H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
+#define H_PAGE_SET_LOANED	(H_PAGE_SET_UNUSED | (1UL<<(63-31)))
+#define H_PAGE_SET_ACTIVE	H_PAGE_STATE_CHANGE
 #define H_AVPN			(1UL<<(63-32))	/* An avpn is provided as a sanity test */
 #define H_ANDCOND		(1UL<<(63-33))
 #define H_ICACHE_INVALIDATE	(1UL<<(63-40))	/* icbi, etc.  (ignored for IO pages) */
-- 
GitLab


From 84af458bb23bf5f0ba1af4320dd2a57f7c4363e5 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:30:29 +1000
Subject: [PATCH 546/853] powerpc/pseries: Add collaborative memory manager

Adds a collaborative memory manager, which acts as a simple balloon driver
for System p machines that support cooperative memory overcommitment
(CMO).

Adds a platform configuration option for CMO called PPC_SMLPAR.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/Kconfig  |  23 ++
 arch/powerpc/platforms/pseries/Makefile |   1 +
 arch/powerpc/platforms/pseries/cmm.c    | 468 ++++++++++++++++++++++++
 3 files changed, 492 insertions(+)
 create mode 100644 arch/powerpc/platforms/pseries/cmm.c

diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 757c0296e0b..97619fd51e3 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG
 	depends on PPC_PSERIES && PPC_EARLY_DEBUG
 	bool "Enable extra debug logging in platforms/pseries"
 	default y
+
+config PPC_SMLPAR
+	bool "Support for shared-memory logical partitions"
+	depends on PPC_PSERIES
+	select LPARCFG
+	default n
+	help
+	  Select this option to enable shared memory partition support.
+	  With this option a system running in an LPAR can be given more
+	  memory than physically available and will allow firmware to
+	  balance memory across many LPARs.
+
+config CMM
+	tristate "Collaborative memory management"
+	depends on PPC_SMLPAR
+	default y
+	help
+	  Select this option, if you want to enable the kernel interface
+	  to reduce the memory size of the system. This is accomplished
+	  by allocating pages of memory and put them "on hold". This only
+	  makes sense for a system running in an LPAR where the unused pages
+	  will be reused for other LPARs. The interface allows firmware to
+	  balance memory across many LPARs.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 554c6e42ef2..dfe574af2dc 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
 obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
 obj-$(CONFIG_PHYP_DUMP)	+= phyp_dump.o
+obj-$(CONFIG_CMM)		+= cmm.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 00000000000..c6b3be03168
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,468 @@
+/*
+ * Collaborative memory management interface.
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Author(s): Brian King (brking@linux.vnet.ibm.com),
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/oom.h>
+#include <linux/sched.h>
+#include <linux/stringify.h>
+#include <linux/swap.h>
+#include <linux/sysdev.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+
+#include "plpar_wrappers.h"
+
+#define CMM_DRIVER_VERSION	"1.0.0"
+#define CMM_DEFAULT_DELAY	1
+#define CMM_DEBUG			0
+#define CMM_DISABLE		0
+#define CMM_OOM_KB		1024
+#define CMM_MIN_MEM_MB		256
+#define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
+#define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
+
+static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int oom_kb = CMM_OOM_KB;
+static unsigned int cmm_debug = CMM_DEBUG;
+static unsigned int cmm_disabled = CMM_DISABLE;
+static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static struct sys_device cmm_sysdev;
+
+MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CMM_DRIVER_VERSION);
+
+module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
+		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
+		 "[Default=" __stringify(CMM_OOM_KB) "]");
+module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
+		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
+module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
+		 "[Default=" __stringify(CMM_DEBUG) "]");
+
+#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+
+#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
+
+struct cmm_page_array {
+	struct cmm_page_array *next;
+	unsigned long index;
+	unsigned long page[CMM_NR_PAGES];
+};
+
+static unsigned long loaned_pages;
+static unsigned long loaned_pages_target;
+static unsigned long oom_freed_pages;
+
+static struct cmm_page_array *cmm_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
+
+static struct task_struct *cmm_thread_ptr;
+
+/**
+ * cmm_alloc_pages - Allocate pages and mark them as loaned
+ * @nr:	number of pages to allocate
+ *
+ * Return value:
+ * 	number of pages requested to be allocated which were not
+ **/
+static long cmm_alloc_pages(long nr)
+{
+	struct cmm_page_array *pa, *npa;
+	unsigned long addr;
+	long rc;
+
+	cmm_dbg("Begin request for %ld pages\n", nr);
+
+	while (nr) {
+		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
+				       __GFP_NORETRY | __GFP_NOMEMALLOC);
+		if (!addr)
+			break;
+		spin_lock(&cmm_lock);
+		pa = cmm_page_list;
+		if (!pa || pa->index >= CMM_NR_PAGES) {
+			/* Need a new page for the page list. */
+			spin_unlock(&cmm_lock);
+			npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
+								       __GFP_NORETRY | __GFP_NOMEMALLOC);
+			if (!npa) {
+				pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
+				free_page(addr);
+				break;
+			}
+			spin_lock(&cmm_lock);
+			pa = cmm_page_list;
+
+			if (!pa || pa->index >= CMM_NR_PAGES) {
+				npa->next = pa;
+				npa->index = 0;
+				pa = npa;
+				cmm_page_list = pa;
+			} else
+				free_page((unsigned long) npa);
+		}
+
+		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+			pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
+			spin_unlock(&cmm_lock);
+			free_page(addr);
+			break;
+		}
+
+		pa->page[pa->index++] = addr;
+		loaned_pages++;
+		totalram_pages--;
+		spin_unlock(&cmm_lock);
+		nr--;
+	}
+
+	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+	return nr;
+}
+
+/**
+ * cmm_free_pages - Free pages and mark them as active
+ * @nr:	number of pages to free
+ *
+ * Return value:
+ * 	number of pages requested to be freed which were not
+ **/
+static long cmm_free_pages(long nr)
+{
+	struct cmm_page_array *pa;
+	unsigned long addr;
+
+	cmm_dbg("Begin free of %ld pages.\n", nr);
+	spin_lock(&cmm_lock);
+	pa = cmm_page_list;
+	while (nr) {
+		if (!pa || pa->index <= 0)
+			break;
+		addr = pa->page[--pa->index];
+
+		if (pa->index == 0) {
+			pa = pa->next;
+			free_page((unsigned long) cmm_page_list);
+			cmm_page_list = pa;
+		}
+
+		plpar_page_set_active(__pa(addr));
+		free_page(addr);
+		loaned_pages--;
+		nr--;
+		totalram_pages++;
+	}
+	spin_unlock(&cmm_lock);
+	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+	return nr;
+}
+
+/**
+ * cmm_oom_notify - OOM notifier
+ * @self:	notifier block struct
+ * @dummy:	not used
+ * @parm:	returned - number of pages freed
+ *
+ * Return value:
+ * 	NOTIFY_OK
+ **/
+static int cmm_oom_notify(struct notifier_block *self,
+			  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	long nr = KB2PAGES(oom_kb);
+
+	cmm_dbg("OOM processing started\n");
+	nr = cmm_free_pages(nr);
+	loaned_pages_target = loaned_pages;
+	*freed += KB2PAGES(oom_kb) - nr;
+	oom_freed_pages += KB2PAGES(oom_kb) - nr;
+	cmm_dbg("OOM processing complete\n");
+	return NOTIFY_OK;
+}
+
+/**
+ * cmm_get_mpp - Read memory performance parameters
+ *
+ * Makes hcall to query the current page loan request from the hypervisor.
+ *
+ * Return value:
+ * 	nothing
+ **/
+static void cmm_get_mpp(void)
+{
+	int rc;
+	struct hvcall_mpp_data mpp_data;
+	unsigned long active_pages_target;
+	signed long page_loan_request;
+
+	rc = h_get_mpp(&mpp_data);
+
+	if (rc != H_SUCCESS)
+		return;
+
+	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
+	loaned_pages_target = page_loan_request + loaned_pages;
+	if (loaned_pages_target > oom_freed_pages)
+		loaned_pages_target -= oom_freed_pages;
+	else
+		loaned_pages_target = 0;
+
+	active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
+
+	if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
+		loaned_pages_target = totalram_pages + loaned_pages -
+			((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
+
+	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
+		page_loan_request, loaned_pages, loaned_pages_target,
+		oom_freed_pages, totalram_pages);
+}
+
+static struct notifier_block cmm_oom_nb = {
+	.notifier_call = cmm_oom_notify
+};
+
+/**
+ * cmm_thread - CMM task thread
+ * @dummy:	not used
+ *
+ * Return value:
+ * 	0
+ **/
+static int cmm_thread(void *dummy)
+{
+	unsigned long timeleft;
+
+	while (1) {
+		timeleft = msleep_interruptible(delay * 1000);
+
+		if (kthread_should_stop() || timeleft) {
+			loaned_pages_target = loaned_pages;
+			break;
+		}
+
+		cmm_get_mpp();
+
+		if (loaned_pages_target > loaned_pages) {
+			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
+				loaned_pages_target = loaned_pages;
+		} else if (loaned_pages_target < loaned_pages)
+			cmm_free_pages(loaned_pages - loaned_pages_target);
+	}
+	return 0;
+}
+
+#define CMM_SHOW(name, format, args...)			\
+	static ssize_t show_##name(struct sys_device *dev, char *buf)	\
+	{							\
+		return sprintf(buf, format, ##args);		\
+	}							\
+	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
+
+static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
+{
+	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
+}
+
+static ssize_t store_oom_pages(struct sys_device *dev,
+			       const char *buf, size_t count)
+{
+	unsigned long val = simple_strtoul (buf, NULL, 10);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (val != 0)
+		return -EBADMSG;
+
+	oom_freed_pages = 0;
+	return count;
+}
+
+static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
+		   show_oom_pages, store_oom_pages);
+
+static struct sysdev_attribute *cmm_attrs[] = {
+	&attr_loaned_kb,
+	&attr_loaned_target_kb,
+	&attr_oom_freed_kb,
+};
+
+static struct sysdev_class cmm_sysdev_class = {
+	.name = "cmm",
+};
+
+/**
+ * cmm_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_sysfs_register(struct sys_device *sysdev)
+{
+	int i, rc;
+
+	if ((rc = sysdev_class_register(&cmm_sysdev_class)))
+		return rc;
+
+	sysdev->id = 0;
+	sysdev->cls = &cmm_sysdev_class;
+
+	if ((rc = sysdev_register(sysdev)))
+		goto class_unregister;
+
+	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
+		if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	while (--i >= 0)
+		sysdev_remove_file(sysdev, cmm_attrs[i]);
+	sysdev_unregister(sysdev);
+class_unregister:
+	sysdev_class_unregister(&cmm_sysdev_class);
+	return rc;
+}
+
+/**
+ * cmm_unregister_sysfs - Unregister from sysfs
+ *
+ **/
+static void cmm_unregister_sysfs(struct sys_device *sysdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
+		sysdev_remove_file(sysdev, cmm_attrs[i]);
+	sysdev_unregister(sysdev);
+	sysdev_class_unregister(&cmm_sysdev_class);
+}
+
+/**
+ * cmm_init - Module initialization
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return -EOPNOTSUPP;
+
+	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+		return rc;
+
+	if ((rc = cmm_sysfs_register(&cmm_sysdev)))
+		goto out_oom_notifier;
+
+	if (cmm_disabled)
+		return rc;
+
+	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+	if (IS_ERR(cmm_thread_ptr)) {
+		rc = PTR_ERR(cmm_thread_ptr);
+		goto out_unregister_sysfs;
+	}
+
+	return rc;
+
+out_unregister_sysfs:
+	cmm_unregister_sysfs(&cmm_sysdev);
+out_oom_notifier:
+	unregister_oom_notifier(&cmm_oom_nb);
+	return rc;
+}
+
+/**
+ * cmm_exit - Module exit
+ *
+ * Return value:
+ * 	nothing
+ **/
+static void cmm_exit(void)
+{
+	if (cmm_thread_ptr)
+		kthread_stop(cmm_thread_ptr);
+	unregister_oom_notifier(&cmm_oom_nb);
+	cmm_free_pages(loaned_pages);
+	cmm_unregister_sysfs(&cmm_sysdev);
+}
+
+/**
+ * cmm_set_disable - Disable/Enable CMM
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_set_disable(const char *val, struct kernel_param *kp)
+{
+	int disable = simple_strtoul(val, NULL, 10);
+
+	if (disable != 0 && disable != 1)
+		return -EINVAL;
+
+	if (disable && !cmm_disabled) {
+		if (cmm_thread_ptr)
+			kthread_stop(cmm_thread_ptr);
+		cmm_thread_ptr = NULL;
+		cmm_free_pages(loaned_pages);
+	} else if (!disable && cmm_disabled) {
+		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+		if (IS_ERR(cmm_thread_ptr))
+			return PTR_ERR(cmm_thread_ptr);
+	}
+
+	cmm_disabled = disable;
+	return 0;
+}
+
+module_param_call(disable, cmm_set_disable, param_get_uint,
+		  &cmm_disabled, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
+		 "[Default=" __stringify(CMM_DISABLE) "]");
+
+module_init(cmm_init);
+module_exit(cmm_exit);
-- 
GitLab


From ffa5abbd0c399b32fc13a1b4718d87ee7a716999 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:30:58 +1000
Subject: [PATCH 547/853] powerpc/pseries: Add CMO paging statistics

With the addition of Cooperative Memory Overcommitment (CMO) support
for IBM Power Systems, two fields have been added to the VPA to report
paging statistics.  Add support in lparcfg to report them to userspace.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 20 ++++++++++++++++++++
 include/asm-powerpc/lppaca.h  |  5 ++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index d82e1fa5ce2..848c3e5a637 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -409,6 +409,25 @@ static int lparcfg_count_active_processors(void)
 	return count;
 }
 
+static void pseries_cmo_data(struct seq_file *m)
+{
+	int cpu;
+	unsigned long cmo_faults = 0;
+	unsigned long cmo_fault_time = 0;
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		cmo_faults += lppaca[cpu].cmo_faults;
+		cmo_fault_time += lppaca[cpu].cmo_fault_time;
+	}
+
+	seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+	seq_printf(m, "cmo_fault_time_usec=%lu\n",
+		   cmo_fault_time / tb_ticks_per_usec);
+}
+
 static int pseries_lparcfg_data(struct seq_file *m, void *v)
 {
 	int partition_potential_processors;
@@ -434,6 +453,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		parse_system_parameter_string(m);
 		parse_ppp_data(m);
 		parse_mpp_data(m);
+		pseries_cmo_data(m);
 
 		seq_printf(m, "purr=%ld\n", get_purr());
 	} else {		/* non SPLPAR case */
diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h
index 567ed92cd91..2fe268b1033 100644
--- a/include/asm-powerpc/lppaca.h
+++ b/include/asm-powerpc/lppaca.h
@@ -125,7 +125,10 @@ struct lppaca {
 	// NOTE: This value will ALWAYS be zero for dedicated processors and
 	// will NEVER be zero for shared processors (ie, initialized to a 1).
 	volatile u32 yield_count;	// PLIC increments each dispatchx00-x03
-	u8	reserved6[124];		// Reserved                     x04-x7F
+	u32 reserved6;
+	volatile u64 cmo_faults;	// CMO page fault count         x08-x0F
+	volatile u64 cmo_fault_time;	// CMO page fault time          x10-x17
+	u8	reserved7[104];		// Reserved                     x18-x7F
 
 //=============================================================================
 // CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
-- 
GitLab


From 6490c4903d12f242bec4454301f76f6a7520e399 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:31:16 +1000
Subject: [PATCH 548/853] powerpc/pseries: iommu enablement for CMO

To support Cooperative Memory Overcommitment (CMO), we need to check
for failure from some of the tce hcalls.

These changes for the pseries platform affect the powerpc architecture;
patches for the other affected platforms are included in this patch.

pSeries platform IOMMU code changes:
 * platform TCE functions must handle H_NOT_ENOUGH_RESOURCES errors and
   return an error.

Architecture IOMMU code changes:
 * Calls to ppc_md.tce_build need to check return values and return
   DMA_MAPPING_ERROR for transient errors.

Architecture changes:
 * struct machdep_calls for tce_build*_pSeriesLP functions need to change
   to indicate failure.
 * all other platforms will need updates to iommu functions to match the new
   calling semantics; they will return 0 on success.  The other platforms
   default configs have been built, but no further testing was performed.

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/iommu.c            | 28 ++++++++++++++---
 arch/powerpc/platforms/cell/iommu.c    |  3 +-
 arch/powerpc/platforms/iseries/iommu.c |  3 +-
 arch/powerpc/platforms/pasemi/iommu.c  |  3 +-
 arch/powerpc/platforms/pseries/iommu.c | 42 ++++++++++++++++++++------
 arch/powerpc/sysdev/dart_iommu.c       |  3 +-
 include/asm-powerpc/machdep.h          |  2 +-
 7 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2385f68c175..550a19399bf 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -49,6 +49,8 @@ static int novmerge = 1;
 
 static int protect4gb = 1;
 
+static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
+
 static inline unsigned long iommu_num_pages(unsigned long vaddr,
 					    unsigned long slen)
 {
@@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
 {
 	unsigned long entry, flags;
 	dma_addr_t ret = DMA_ERROR_CODE;
+	int build_fail;
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
@@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
 	ret = entry << IOMMU_PAGE_SHIFT;	/* Set the return dma address */
 
 	/* Put the TCEs in the HW table */
-	ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK,
-			 direction, attrs);
+	build_fail = ppc_md.tce_build(tbl, entry, npages,
+	                              (unsigned long)page & IOMMU_PAGE_MASK,
+	                              direction, attrs);
+
+	/* ppc_md.tce_build() only returns non-zero for transient errors.
+	 * Clean up the table bitmap in this case and return
+	 * DMA_ERROR_CODE. For all other errors the functionality is
+	 * not altered.
+	 */
+	if (unlikely(build_fail)) {
+		__iommu_free(tbl, ret, npages);
 
+		spin_unlock_irqrestore(&(tbl->it_lock), flags);
+		return DMA_ERROR_CODE;
+	}
 
 	/* Flush/invalidate TLB caches if necessary */
 	if (ppc_md.tce_flush)
@@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 	dma_addr_t dma_next = 0, dma_addr;
 	unsigned long flags;
 	struct scatterlist *s, *outs, *segstart;
-	int outcount, incount, i;
+	int outcount, incount, i, build_fail = 0;
 	unsigned int align;
 	unsigned long handle;
 	unsigned int max_seg_size;
@@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 			    npages, entry, dma_addr);
 
 		/* Insert into HW table */
-		ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK,
-				 direction, attrs);
+		build_fail = ppc_md.tce_build(tbl, entry, npages,
+		                              vaddr & IOMMU_PAGE_MASK,
+		                              direction, attrs);
+		if(unlikely(build_fail))
+			goto failure;
 
 		/* If we are in an open segment, try merging */
 		if (segstart != s) {
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 031124a8e37..e06420af5fe 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
 	}
 }
 
-static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
 		unsigned long uaddr, enum dma_data_direction direction,
 		struct dma_attrs *attrs)
 {
@@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
 
 	pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
 		 index, npages, direction, base_pte);
+	return 0;
 }
 
 static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index bc818e4e203..bb464d1211b 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
 #include <asm/iseries/hv_call_event.h>
 #include <asm/iseries/iommu.h>
 
-static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
+static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
 		unsigned long uaddr, enum dma_data_direction direction,
 		struct dma_attrs *attrs)
 {
@@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
 		index++;
 		uaddr += TCE_PAGE_SIZE;
 	}
+	return 0;
 }
 
 static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 70541b7a501..a0ff03a3d8d 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
 static struct iommu_table iommu_table_iobmap;
 static int iommu_table_iobmap_inited;
 
-static void iobmap_build(struct iommu_table *tbl, long index,
+static int iobmap_build(struct iommu_table *tbl, long index,
 			 long npages, unsigned long uaddr,
 			 enum dma_data_direction direction,
 			 struct dma_attrs *attrs)
@@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index,
 		uaddr += IOBMAP_PAGE_SIZE;
 		bus_addr += IOBMAP_PAGE_SIZE;
 	}
+	return 0;
 }
 
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 5377dd4b849..a8c446697f9 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -48,7 +48,7 @@
 #include "plpar_wrappers.h"
 
 
-static void tce_build_pSeries(struct iommu_table *tbl, long index,
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
 			      long npages, unsigned long uaddr,
 			      enum dma_data_direction direction,
 			      struct dma_attrs *attrs)
@@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
 		uaddr += TCE_PAGE_SIZE;
 		tcep++;
 	}
+	return 0;
 }
 
 
@@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 	return *tcep;
 }
 
-static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				long npages, unsigned long uaddr,
 				enum dma_data_direction direction,
 				struct dma_attrs *attrs)
 {
-	u64 rc;
+	u64 rc = 0;
 	u64 proto_tce, tce;
 	u64 rpn;
+	int ret = 0;
+	long tcenum_start = tcenum, npages_start = npages;
 
 	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
 	proto_tce = TCE_PCI_READ;
@@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
 		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
 
+		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+			ret = (int)rc;
+			tce_free_pSeriesLP(tbl, tcenum_start,
+			                   (npages_start - (npages + 1)));
+			break;
+		}
+
 		if (rc && printk_ratelimit()) {
 			printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
 			printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
@@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		tcenum++;
 		rpn++;
 	}
+	return ret;
 }
 
 static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
 
-static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
 				     enum dma_data_direction direction,
 				     struct dma_attrs *attrs)
 {
-	u64 rc;
+	u64 rc = 0;
 	u64 proto_tce;
 	u64 *tcep;
 	u64 rpn;
 	long l, limit;
+	long tcenum_start = tcenum, npages_start = npages;
+	int ret = 0;
 
 	if (npages == 1) {
-		tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
-				    direction, attrs);
-		return;
+		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+		                           direction, attrs);
 	}
 
 	tcep = __get_cpu_var(tce_page);
@@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
 		/* If allocation fails, fall back to the loop implementation */
 		if (!tcep) {
-			tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+			return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 					    direction, attrs);
-			return;
 		}
 		__get_cpu_var(tce_page) = tcep;
 	}
@@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		tcenum += limit;
 	} while (npages > 0 && !rc);
 
+	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+		ret = (int)rc;
+		tce_freemulti_pSeriesLP(tbl, tcenum_start,
+		                        (npages_start - (npages + limit)));
+		return ret;
+	}
+
 	if (rc && printk_ratelimit()) {
 		printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
 		printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
@@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		printk("\ttce[0] val = 0x%lx\n", tcep[0]);
 		show_stack(current, (unsigned long *)__get_SP());
 	}
+	return ret;
 }
 
 static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index de8c8b542cf..89639ecbf38 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl)
 	}
 }
 
-static void dart_build(struct iommu_table *tbl, long index,
+static int dart_build(struct iommu_table *tbl, long index,
 		       long npages, unsigned long uaddr,
 		       enum dma_data_direction direction,
 		       struct dma_attrs *attrs)
@@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index,
 	} else {
 		dart_dirty = 1;
 	}
+	return 0;
 }
 
 
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 1233d735fd2..893aafd87fd 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -76,7 +76,7 @@ struct machdep_calls {
 	 * destroyed as well */
 	void		(*hpte_clear_all)(void);
 
-	void		(*tce_build)(struct iommu_table * tbl,
+	int		(*tce_build)(struct iommu_table *tbl,
 				     long index,
 				     long npages,
 				     unsigned long uaddr,
-- 
GitLab


From a90ab95a9576d35de0d05f9f4fc435edcccafaa9 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:31:33 +1000
Subject: [PATCH 549/853] powerpc/pseries: vio bus support for CMO

This is a large patch but the normal code path is not affected.  For
non-pSeries platforms the code is ifdef'ed out and for non-CMO enabled
pSeries systems this does not affect the normal code path.  Devices that
do not perform DMA operations do not need modification with this patch.
The function get_desired_dma was renamed from get_io_entitlement for
clarity.

Overview

Cooperative Memory Overcommitment (CMO) allows for a set of OS partitions
to be run with less RAM than the aggregate needs of the group of
partitions.  The firmware will balance memory between the partitions
and page in/out memory as needed.  Based on the number and type of IO
adpaters preset each partition is allocated an amount of memory for
DMA operations and this allocation will be guaranteed to the partition;
this is referred to as the partition's 'entitlement'.

Partitions running in a CMO environment can only have virtual IO devices
present.  The VIO bus layer will manage the IO entitlement for the system.
Accounting, at a system and per-device level, is tracked in the VIO bus
code and exposed via sysfs.  A set of dma_ops functions are added to
the bus to allow for this accounting.

Bus initialization

At initialization, the bus will calculate the minimum needs of the system
based on providing each device present with a standard minimum entitlement
along with a spare allocation for the bus to handle hotplug events.
If the minimum needs can not be met the system boot will be halted.

Device changes

The significant changes for devices while running under CMO are that the
devices must specify how much dedicated IO entitlement they desire and
must also handle DMA mapping errors that can occur due to constrained
IO memory.  The virtual IO drivers are modified to silence errors when
DMA mappings fail for CMO and handle these failures gracefully.

Each devices will be guaranteed a minimum entitlement that can always
be mapped.  Devices will specify how much entitlement they desire and
the VIO bus will attempt to provide for this.  Devices can change their
desired entitlement level at any point in time to address particular needs
(via vio_cmo_set_dev_desired()), not just at device probe time.

VIO bus changes

The system will have a particular entitlement level available from which
it can provide memory to the devices.  The bus defines two pools of memory
within this entitlement, the reserved and excess pools.  Each device is
provided with it's own entitlement no less than a system defined minimum
entitlement and no greater than what the device has specified as it's
desired entitlement.  The entitlement provided to devices comes from the
reserve pool.  The reserve pool can also contain a spare allocation as
large as the system defined minimum entitlement which is used for device
hotplug events.  Any entitlement not needed to fulfill the needs of a
reserve pool is placed in the excess pool.  Each device is guaranteed
that it can map up to it's entitled level; additional mapping are possible
as long as there is unmapped memory in the excess pool.

Bus probe

As the system starts, each device is given an entitlement equal only
to the system defined minimum entitlement.  The reserve pool is equal
to the sum of these entitlements, plus a spare allocation.  The VIO bus
also tracks the aggregate desired entitlement of all the devices.  If the
system desired entitlement is greater than the size of the reserve pool,
when devices unmap IO memory it will be reserved and a balance operation
will be scheduled for some time in the future.

Entitlement balancing

The balance function tries to fairly distribute entitlement between the
devices in the system with the goal of providing each device with it's
desired amount of entitlement.  Devices using more than what would be
ideal will have their entitled set-point adjusted; this will effectively
set a goal for lower IO memory usage as future mappings can fail and
deallocations will trigger a balance operation to distribute the newly
unmapped memory.  A fair distribution of entitlement can take several
balance operations to achieve.  Entitlement changes and device DLPAR
events will alter the state of CMO and will trigger balance operations.

Hotplug events

The VIO bus allows for changes in system entitlement at run-time via
'vio_cmo_entitlement_update()'.  When devices are added the hotplug
device event will be preceded by a system entitlement increase and this
is reversed when devices are removed.

The following changes are made that the VIO bus layer for CMO:
 * add IO memory accounting per device structure.
 * add IO memory entitlement query function to driver structure.
 * during vio bus probe, if CMO is enabled, check that driver has
   memory entitlement query function defined.  Fail if function not defined.
 * fail to register driver if io entitlement function not defined.
 * create set of dma_ops at vio level for CMO that will track allocations
   and return DMA failures once entitlement is reached.  Entitlement will
   limited by overall system entitlement.  Devices will have a reserved
   quantity of memory that is guaranteed, the rest can be used as available.
 * expose entitlement, current allocation, desired allocation, and the
   allocation error counter for devices to the user through sysfs
 * provide mechanism for changing a device's desired entitlement at run time
   for devices as an exported function and sysfs tunable
 * track any DMA failures for entitled IO memory for each vio device.
 * check entitlement against available system entitlement on device add
 * track entitlement metrics (high water mark, current usage)
 * provide function to reset high water mark
 * provide minimum and desired entitlement numbers at a bus level
 * provide drivers with a minimum guaranteed entitlement
 * balance available entitlement between devices to satisfy their needs
 * handle system entitlement changes and device hotplug

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vio.c | 1033 ++++++++++++++++++++++++++++++++++++-
 include/asm-powerpc/vio.h |   27 +-
 2 files changed, 1052 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index b77f8af7ddd..ade8aeaa2e7 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1,11 +1,12 @@
 /*
  * IBM PowerPC Virtual I/O Infrastructure Support.
  *
- *    Copyright (c) 2003-2005 IBM Corp.
+ *    Copyright (c) 2003,2008 IBM Corp.
  *     Dave Engebretsen engebret@us.ibm.com
  *     Santiago Leon santil@us.ibm.com
  *     Hollis Blanchard <hollisb@us.ibm.com>
  *     Stephen Rothwell
+ *     Robert Jennings <rcjenn@us.ibm.com>
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device  = { /* fake "parent" device */
 	.dev.bus = &vio_bus_type,
 };
 
+#ifdef CONFIG_PPC_SMLPAR
+/**
+ * vio_cmo_pool - A pool of IO memory for CMO use
+ *
+ * @size: The size of the pool in bytes
+ * @free: The amount of free memory in the pool
+ */
+struct vio_cmo_pool {
+	size_t size;
+	size_t free;
+};
+
+/* How many ms to delay queued balance work */
+#define VIO_CMO_BALANCE_DELAY 100
+
+/* Portion out IO memory to CMO devices by this chunk size */
+#define VIO_CMO_BALANCE_CHUNK 131072
+
+/**
+ * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
+ *
+ * @vio_dev: struct vio_dev pointer
+ * @list: pointer to other devices on bus that are being tracked
+ */
+struct vio_cmo_dev_entry {
+	struct vio_dev *viodev;
+	struct list_head list;
+};
+
+/**
+ * vio_cmo - VIO bus accounting structure for CMO entitlement
+ *
+ * @lock: spinlock for entire structure
+ * @balance_q: work queue for balancing system entitlement
+ * @device_list: list of CMO-enabled devices requiring entitlement
+ * @entitled: total system entitlement in bytes
+ * @reserve: pool of memory from which devices reserve entitlement, incl. spare
+ * @excess: pool of excess entitlement not needed for device reserves or spare
+ * @spare: IO memory for device hotplug functionality
+ * @min: minimum necessary for system operation
+ * @desired: desired memory for system operation
+ * @curr: bytes currently allocated
+ * @high: high water mark for IO data usage
+ */
+struct vio_cmo {
+	spinlock_t lock;
+	struct delayed_work balance_q;
+	struct list_head device_list;
+	size_t entitled;
+	struct vio_cmo_pool reserve;
+	struct vio_cmo_pool excess;
+	size_t spare;
+	size_t min;
+	size_t desired;
+	size_t curr;
+	size_t high;
+} vio_cmo;
+
+/**
+ * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
+ */
+static int vio_cmo_num_OF_devs(void)
+{
+	struct device_node *node_vroot;
+	int count = 0;
+
+	/*
+	 * Count the number of vdevice entries with an
+	 * ibm,my-dma-window OF property
+	 */
+	node_vroot = of_find_node_by_name(NULL, "vdevice");
+	if (node_vroot) {
+		struct device_node *of_node;
+		struct property *prop;
+
+		for_each_child_of_node(node_vroot, of_node) {
+			prop = of_find_property(of_node, "ibm,my-dma-window",
+			                       NULL);
+			if (prop)
+				count++;
+		}
+	}
+	of_node_put(node_vroot);
+	return count;
+}
+
+/**
+ * vio_cmo_alloc - allocate IO memory for CMO-enable devices
+ *
+ * @viodev: VIO device requesting IO memory
+ * @size: size of allocation requested
+ *
+ * Allocations come from memory reserved for the devices and any excess
+ * IO memory available to all devices.  The spare pool used to service
+ * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
+ * made available.
+ *
+ * Return codes:
+ *  0 for successful allocation and -ENOMEM for a failure
+ */
+static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
+{
+	unsigned long flags;
+	size_t reserve_free = 0;
+	size_t excess_free = 0;
+	int ret = -ENOMEM;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+
+	/* Determine the amount of free entitlement available in reserve */
+	if (viodev->cmo.entitled > viodev->cmo.allocated)
+		reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
+
+	/* If spare is not fulfilled, the excess pool can not be used. */
+	if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
+		excess_free = vio_cmo.excess.free;
+
+	/* The request can be satisfied */
+	if ((reserve_free + excess_free) >= size) {
+		vio_cmo.curr += size;
+		if (vio_cmo.curr > vio_cmo.high)
+			vio_cmo.high = vio_cmo.curr;
+		viodev->cmo.allocated += size;
+		size -= min(reserve_free, size);
+		vio_cmo.excess.free -= size;
+		ret = 0;
+	}
+
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+	return ret;
+}
+
+/**
+ * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
+ * @viodev: VIO device freeing IO memory
+ * @size: size of deallocation
+ *
+ * IO memory is freed by the device back to the correct memory pools.
+ * The spare pool is replenished first from either memory pool, then
+ * the reserve pool is used to reduce device entitlement, the excess
+ * pool is used to increase the reserve pool toward the desired entitlement
+ * target, and then the remaining memory is returned to the pools.
+ *
+ */
+static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
+{
+	unsigned long flags;
+	size_t spare_needed = 0;
+	size_t excess_freed = 0;
+	size_t reserve_freed = size;
+	size_t tmp;
+	int balance = 0;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	vio_cmo.curr -= size;
+
+	/* Amount of memory freed from the excess pool */
+	if (viodev->cmo.allocated > viodev->cmo.entitled) {
+		excess_freed = min(reserve_freed, (viodev->cmo.allocated -
+		                                   viodev->cmo.entitled));
+		reserve_freed -= excess_freed;
+	}
+
+	/* Remove allocation from device */
+	viodev->cmo.allocated -= (reserve_freed + excess_freed);
+
+	/* Spare is a subset of the reserve pool, replenish it first. */
+	spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
+
+	/*
+	 * Replenish the spare in the reserve pool from the excess pool.
+	 * This moves entitlement into the reserve pool.
+	 */
+	if (spare_needed && excess_freed) {
+		tmp = min(excess_freed, spare_needed);
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.reserve.size += tmp;
+		vio_cmo.spare += tmp;
+		excess_freed -= tmp;
+		spare_needed -= tmp;
+		balance = 1;
+	}
+
+	/*
+	 * Replenish the spare in the reserve pool from the reserve pool.
+	 * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
+	 * if needed, and gives it to the spare pool. The amount of used
+	 * memory in this pool does not change.
+	 */
+	if (spare_needed && reserve_freed) {
+		tmp = min(spare_needed, min(reserve_freed,
+		                            (viodev->cmo.entitled -
+		                             VIO_CMO_MIN_ENT)));
+
+		vio_cmo.spare += tmp;
+		viodev->cmo.entitled -= tmp;
+		reserve_freed -= tmp;
+		spare_needed -= tmp;
+		balance = 1;
+	}
+
+	/*
+	 * Increase the reserve pool until the desired allocation is met.
+	 * Move an allocation freed from the excess pool into the reserve
+	 * pool and schedule a balance operation.
+	 */
+	if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
+		tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
+
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.reserve.size += tmp;
+		excess_freed -= tmp;
+		balance = 1;
+	}
+
+	/* Return memory from the excess pool to that pool */
+	if (excess_freed)
+		vio_cmo.excess.free += excess_freed;
+
+	if (balance)
+		schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_entitlement_update - Manage system entitlement changes
+ *
+ * @new_entitlement: new system entitlement to attempt to accommodate
+ *
+ * Increases in entitlement will be used to fulfill the spare entitlement
+ * and the rest is given to the excess pool.  Decreases, if they are
+ * possible, come from the excess pool and from unused device entitlement
+ *
+ * Returns: 0 on success, -ENOMEM when change can not be made
+ */
+int vio_cmo_entitlement_update(size_t new_entitlement)
+{
+	struct vio_dev *viodev;
+	struct vio_cmo_dev_entry *dev_ent;
+	unsigned long flags;
+	size_t avail, delta, tmp;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+
+	/* Entitlement increases */
+	if (new_entitlement > vio_cmo.entitled) {
+		delta = new_entitlement - vio_cmo.entitled;
+
+		/* Fulfill spare allocation */
+		if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
+			tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
+			vio_cmo.spare += tmp;
+			vio_cmo.reserve.size += tmp;
+			delta -= tmp;
+		}
+
+		/* Remaining new allocation goes to the excess pool */
+		vio_cmo.entitled += delta;
+		vio_cmo.excess.size += delta;
+		vio_cmo.excess.free += delta;
+
+		goto out;
+	}
+
+	/* Entitlement decreases */
+	delta = vio_cmo.entitled - new_entitlement;
+	avail = vio_cmo.excess.free;
+
+	/*
+	 * Need to check how much unused entitlement each device can
+	 * sacrifice to fulfill entitlement change.
+	 */
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+		if (avail >= delta)
+			break;
+
+		viodev = dev_ent->viodev;
+		if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+		    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+				avail += viodev->cmo.entitled -
+				         max_t(size_t, viodev->cmo.allocated,
+				               VIO_CMO_MIN_ENT);
+	}
+
+	if (delta <= avail) {
+		vio_cmo.entitled -= delta;
+
+		/* Take entitlement from the excess pool first */
+		tmp = min(vio_cmo.excess.free, delta);
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.excess.free -= tmp;
+		delta -= tmp;
+
+		/*
+		 * Remove all but VIO_CMO_MIN_ENT bytes from devices
+		 * until entitlement change is served
+		 */
+		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+			if (!delta)
+				break;
+
+			viodev = dev_ent->viodev;
+			tmp = 0;
+			if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+			    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+				tmp = viodev->cmo.entitled -
+				      max_t(size_t, viodev->cmo.allocated,
+				            VIO_CMO_MIN_ENT);
+			viodev->cmo.entitled -= min(tmp, delta);
+			delta -= min(tmp, delta);
+		}
+	} else {
+		spin_unlock_irqrestore(&vio_cmo.lock, flags);
+		return -ENOMEM;
+	}
+
+out:
+	schedule_delayed_work(&vio_cmo.balance_q, 0);
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+	return 0;
+}
+
+/**
+ * vio_cmo_balance - Balance entitlement among devices
+ *
+ * @work: work queue structure for this operation
+ *
+ * Any system entitlement above the minimum needed for devices, or
+ * already allocated to devices, can be distributed to the devices.
+ * The list of devices is iterated through to recalculate the desired
+ * entitlement level and to determine how much entitlement above the
+ * minimum entitlement is allocated to devices.
+ *
+ * Small chunks of the available entitlement are given to devices until
+ * their requirements are fulfilled or there is no entitlement left to give.
+ * Upon completion sizes of the reserve and excess pools are calculated.
+ *
+ * The system minimum entitlement level is also recalculated here.
+ * Entitlement will be reserved for devices even after vio_bus_remove to
+ * accommodate reloading the driver.  The OF tree is walked to count the
+ * number of devices present and this will remove entitlement for devices
+ * that have actually left the system after having vio_bus_remove called.
+ */
+static void vio_cmo_balance(struct work_struct *work)
+{
+	struct vio_cmo *cmo;
+	struct vio_dev *viodev;
+	struct vio_cmo_dev_entry *dev_ent;
+	unsigned long flags;
+	size_t avail = 0, level, chunk, need;
+	int devcount = 0, fulfilled;
+
+	cmo = container_of(work, struct vio_cmo, balance_q.work);
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+
+	/* Calculate minimum entitlement and fulfill spare */
+	cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
+	BUG_ON(cmo->min > cmo->entitled);
+	cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
+	cmo->min += cmo->spare;
+	cmo->desired = cmo->min;
+
+	/*
+	 * Determine how much entitlement is available and reset device
+	 * entitlements
+	 */
+	avail = cmo->entitled - cmo->spare;
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+		viodev = dev_ent->viodev;
+		devcount++;
+		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+		cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+		avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
+	}
+
+	/*
+	 * Having provided each device with the minimum entitlement, loop
+	 * over the devices portioning out the remaining entitlement
+	 * until there is nothing left.
+	 */
+	level = VIO_CMO_MIN_ENT;
+	while (avail) {
+		fulfilled = 0;
+		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+			viodev = dev_ent->viodev;
+
+			if (viodev->cmo.desired <= level) {
+				fulfilled++;
+				continue;
+			}
+
+			/*
+			 * Give the device up to VIO_CMO_BALANCE_CHUNK
+			 * bytes of entitlement, but do not exceed the
+			 * desired level of entitlement for the device.
+			 */
+			chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
+			chunk = min(chunk, (viodev->cmo.desired -
+			                    viodev->cmo.entitled));
+			viodev->cmo.entitled += chunk;
+
+			/*
+			 * If the memory for this entitlement increase was
+			 * already allocated to the device it does not come
+			 * from the available pool being portioned out.
+			 */
+			need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
+			       max(viodev->cmo.allocated, level);
+			avail -= need;
+
+		}
+		if (fulfilled == devcount)
+			break;
+		level += VIO_CMO_BALANCE_CHUNK;
+	}
+
+	/* Calculate new reserve and excess pool sizes */
+	cmo->reserve.size = cmo->min;
+	cmo->excess.free = 0;
+	cmo->excess.size = 0;
+	need = 0;
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+		viodev = dev_ent->viodev;
+		/* Calculated reserve size above the minimum entitlement */
+		if (viodev->cmo.entitled)
+			cmo->reserve.size += (viodev->cmo.entitled -
+			                      VIO_CMO_MIN_ENT);
+		/* Calculated used excess entitlement */
+		if (viodev->cmo.allocated > viodev->cmo.entitled)
+			need += viodev->cmo.allocated - viodev->cmo.entitled;
+	}
+	cmo->excess.size = cmo->entitled - cmo->reserve.size;
+	cmo->excess.free = cmo->excess.size - need;
+
+	cancel_delayed_work(container_of(work, struct delayed_work, work));
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
+                                          dma_addr_t *dma_handle, gfp_t flag)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	void *ret;
+
+	if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+		atomic_inc(&viodev->cmo.allocs_failed);
+		return NULL;
+	}
+
+	ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
+	if (unlikely(ret == NULL)) {
+		vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+		atomic_inc(&viodev->cmo.allocs_failed);
+	}
+
+	return ret;
+}
+
+static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
+                                        void *vaddr, dma_addr_t dma_handle)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+
+	dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
+
+	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
+                                           size_t size,
+                                           enum dma_data_direction direction,
+                                           struct dma_attrs *attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	dma_addr_t ret = DMA_ERROR_CODE;
+
+	if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+		atomic_inc(&viodev->cmo.allocs_failed);
+		return ret;
+	}
+
+	ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
+	if (unlikely(dma_mapping_error(ret))) {
+		vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+		atomic_inc(&viodev->cmo.allocs_failed);
+	}
+
+	return ret;
+}
+
+static void vio_dma_iommu_unmap_single(struct device *dev,
+		dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction,
+		struct dma_attrs *attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+
+	dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
+
+	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+                                int nelems, enum dma_data_direction direction,
+                                struct dma_attrs *attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct scatterlist *sgl;
+	int ret, count = 0;
+	size_t alloc_size = 0;
+
+	for (sgl = sglist; count < nelems; count++, sgl++)
+		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
+
+	if (vio_cmo_alloc(viodev, alloc_size)) {
+		atomic_inc(&viodev->cmo.allocs_failed);
+		return 0;
+	}
+
+	ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
+
+	if (unlikely(!ret)) {
+		vio_cmo_dealloc(viodev, alloc_size);
+		atomic_inc(&viodev->cmo.allocs_failed);
+	}
+
+	for (sgl = sglist, count = 0; count < ret; count++, sgl++)
+		alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+	if (alloc_size)
+		vio_cmo_dealloc(viodev, alloc_size);
+
+	return ret;
+}
+
+static void vio_dma_iommu_unmap_sg(struct device *dev,
+		struct scatterlist *sglist, int nelems,
+		enum dma_data_direction direction,
+		struct dma_attrs *attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct scatterlist *sgl;
+	size_t alloc_size = 0;
+	int count = 0;
+
+	for (sgl = sglist; count < nelems; count++, sgl++)
+		alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+
+	dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
+
+	vio_cmo_dealloc(viodev, alloc_size);
+}
+
+struct dma_mapping_ops vio_dma_mapping_ops = {
+	.alloc_coherent = vio_dma_iommu_alloc_coherent,
+	.free_coherent  = vio_dma_iommu_free_coherent,
+	.map_single     = vio_dma_iommu_map_single,
+	.unmap_single   = vio_dma_iommu_unmap_single,
+	.map_sg         = vio_dma_iommu_map_sg,
+	.unmap_sg       = vio_dma_iommu_unmap_sg,
+};
+
+/**
+ * vio_cmo_set_dev_desired - Set desired entitlement for a device
+ *
+ * @viodev: struct vio_dev for device to alter
+ * @new_desired: new desired entitlement level in bytes
+ *
+ * For use by devices to request a change to their entitlement at runtime or
+ * through sysfs.  The desired entitlement level is changed and a balancing
+ * of system resources is scheduled to run in the future.
+ */
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
+{
+	unsigned long flags;
+	struct vio_cmo_dev_entry *dev_ent;
+	int found = 0;
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	if (desired < VIO_CMO_MIN_ENT)
+		desired = VIO_CMO_MIN_ENT;
+
+	/*
+	 * Changes will not be made for devices not in the device list.
+	 * If it is not in the device list, then no driver is loaded
+	 * for the device and it can not receive entitlement.
+	 */
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+		if (viodev == dev_ent->viodev) {
+			found = 1;
+			break;
+		}
+	if (!found)
+		return;
+
+	/* Increase/decrease in desired device entitlement */
+	if (desired >= viodev->cmo.desired) {
+		/* Just bump the bus and device values prior to a balance*/
+		vio_cmo.desired += desired - viodev->cmo.desired;
+		viodev->cmo.desired = desired;
+	} else {
+		/* Decrease bus and device values for desired entitlement */
+		vio_cmo.desired -= viodev->cmo.desired - desired;
+		viodev->cmo.desired = desired;
+		/*
+		 * If less entitlement is desired than current entitlement, move
+		 * any reserve memory in the change region to the excess pool.
+		 */
+		if (viodev->cmo.entitled > desired) {
+			vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
+			vio_cmo.excess.size += viodev->cmo.entitled - desired;
+			/*
+			 * If entitlement moving from the reserve pool to the
+			 * excess pool is currently unused, add to the excess
+			 * free counter.
+			 */
+			if (viodev->cmo.allocated < viodev->cmo.entitled)
+				vio_cmo.excess.free += viodev->cmo.entitled -
+				                       max(viodev->cmo.allocated, desired);
+			viodev->cmo.entitled = desired;
+		}
+	}
+	schedule_delayed_work(&vio_cmo.balance_q, 0);
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_bus_probe - Handle CMO specific bus probe activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Determine the devices IO memory entitlement needs, attempting
+ * to satisfy the system minimum entitlement at first and scheduling
+ * a balance operation to take care of the rest at a later time.
+ *
+ * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
+ *          -ENOMEM when entitlement is not available for device or
+ *          device entry.
+ *
+ */
+static int vio_cmo_bus_probe(struct vio_dev *viodev)
+{
+	struct vio_cmo_dev_entry *dev_ent;
+	struct device *dev = &viodev->dev;
+	struct vio_driver *viodrv = to_vio_driver(dev->driver);
+	unsigned long flags;
+	size_t size;
+
+	/*
+	 * Check to see that device has a DMA window and configure
+	 * entitlement for the device.
+	 */
+	if (of_get_property(viodev->dev.archdata.of_node,
+	                    "ibm,my-dma-window", NULL)) {
+		/* Check that the driver is CMO enabled and get desired DMA */
+		if (!viodrv->get_desired_dma) {
+			dev_err(dev, "%s: device driver does not support CMO\n",
+			        __func__);
+			return -EINVAL;
+		}
+
+		viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
+		if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
+			viodev->cmo.desired = VIO_CMO_MIN_ENT;
+		size = VIO_CMO_MIN_ENT;
+
+		dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
+		                  GFP_KERNEL);
+		if (!dev_ent)
+			return -ENOMEM;
+
+		dev_ent->viodev = viodev;
+		spin_lock_irqsave(&vio_cmo.lock, flags);
+		list_add(&dev_ent->list, &vio_cmo.device_list);
+	} else {
+		viodev->cmo.desired = 0;
+		size = 0;
+		spin_lock_irqsave(&vio_cmo.lock, flags);
+	}
+
+	/*
+	 * If the needs for vio_cmo.min have not changed since they
+	 * were last set, the number of devices in the OF tree has
+	 * been constant and the IO memory for this is already in
+	 * the reserve pool.
+	 */
+	if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
+	                    VIO_CMO_MIN_ENT)) {
+		/* Updated desired entitlement if device requires it */
+		if (size)
+			vio_cmo.desired += (viodev->cmo.desired -
+		                        VIO_CMO_MIN_ENT);
+	} else {
+		size_t tmp;
+
+		tmp = vio_cmo.spare + vio_cmo.excess.free;
+		if (tmp < size) {
+			dev_err(dev, "%s: insufficient free "
+			        "entitlement to add device. "
+			        "Need %lu, have %lu\n", __func__,
+				size, (vio_cmo.spare + tmp));
+			spin_unlock_irqrestore(&vio_cmo.lock, flags);
+			return -ENOMEM;
+		}
+
+		/* Use excess pool first to fulfill request */
+		tmp = min(size, vio_cmo.excess.free);
+		vio_cmo.excess.free -= tmp;
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.reserve.size += tmp;
+
+		/* Use spare if excess pool was insufficient */
+		vio_cmo.spare -= size - tmp;
+
+		/* Update bus accounting */
+		vio_cmo.min += size;
+		vio_cmo.desired += viodev->cmo.desired;
+	}
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+	return 0;
+}
+
+/**
+ * vio_cmo_bus_remove - Handle CMO specific bus removal activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Remove the device from the cmo device list.  The minimum entitlement
+ * will be reserved for the device as long as it is in the system.  The
+ * rest of the entitlement the device had been allocated will be returned
+ * to the system.
+ */
+static void vio_cmo_bus_remove(struct vio_dev *viodev)
+{
+	struct vio_cmo_dev_entry *dev_ent;
+	unsigned long flags;
+	size_t tmp;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	if (viodev->cmo.allocated) {
+		dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
+		        "allocated after remove operation.\n",
+		        __func__, viodev->cmo.allocated);
+		BUG();
+	}
+
+	/*
+	 * Remove the device from the device list being maintained for
+	 * CMO enabled devices.
+	 */
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+		if (viodev == dev_ent->viodev) {
+			list_del(&dev_ent->list);
+			kfree(dev_ent);
+			break;
+		}
+
+	/*
+	 * Devices may not require any entitlement and they do not need
+	 * to be processed.  Otherwise, return the device's entitlement
+	 * back to the pools.
+	 */
+	if (viodev->cmo.entitled) {
+		/*
+		 * This device has not yet left the OF tree, it's
+		 * minimum entitlement remains in vio_cmo.min and
+		 * vio_cmo.desired
+		 */
+		vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+
+		/*
+		 * Save min allocation for device in reserve as long
+		 * as it exists in OF tree as determined by later
+		 * balance operation
+		 */
+		viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
+
+		/* Replenish spare from freed reserve pool */
+		if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
+			tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
+			                                 vio_cmo.spare));
+			vio_cmo.spare += tmp;
+			viodev->cmo.entitled -= tmp;
+		}
+
+		/* Remaining reserve goes to excess pool */
+		vio_cmo.excess.size += viodev->cmo.entitled;
+		vio_cmo.excess.free += viodev->cmo.entitled;
+		vio_cmo.reserve.size -= viodev->cmo.entitled;
+
+		/*
+		 * Until the device is removed it will keep a
+		 * minimum entitlement; this will guarantee that
+		 * a module unload/load will result in a success.
+		 */
+		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+		viodev->cmo.desired = VIO_CMO_MIN_ENT;
+		atomic_set(&viodev->cmo.allocs_failed, 0);
+	}
+
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
+{
+	vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
+	viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
+}
+
+/**
+ * vio_cmo_bus_init - CMO entitlement initialization at bus init time
+ *
+ * Set up the reserve and excess entitlement pools based on available
+ * system entitlement and the number of devices in the OF tree that
+ * require entitlement in the reserve pool.
+ */
+static void vio_cmo_bus_init(void)
+{
+	struct hvcall_mpp_data mpp_data;
+	int err;
+
+	memset(&vio_cmo, 0, sizeof(struct vio_cmo));
+	spin_lock_init(&vio_cmo.lock);
+	INIT_LIST_HEAD(&vio_cmo.device_list);
+	INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
+
+	/* Get current system entitlement */
+	err = h_get_mpp(&mpp_data);
+
+	/*
+	 * On failure, continue with entitlement set to 0, will panic()
+	 * later when spare is reserved.
+	 */
+	if (err != H_SUCCESS) {
+		printk(KERN_ERR "%s: unable to determine system IO "\
+		       "entitlement. (%d)\n", __func__, err);
+		vio_cmo.entitled = 0;
+	} else {
+		vio_cmo.entitled = mpp_data.entitled_mem;
+	}
+
+	/* Set reservation and check against entitlement */
+	vio_cmo.spare = VIO_CMO_MIN_ENT;
+	vio_cmo.reserve.size = vio_cmo.spare;
+	vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
+	                         VIO_CMO_MIN_ENT);
+	if (vio_cmo.reserve.size > vio_cmo.entitled) {
+		printk(KERN_ERR "%s: insufficient system entitlement\n",
+		       __func__);
+		panic("%s: Insufficient system entitlement", __func__);
+	}
+
+	/* Set the remaining accounting variables */
+	vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
+	vio_cmo.excess.free = vio_cmo.excess.size;
+	vio_cmo.min = vio_cmo.reserve.size;
+	vio_cmo.desired = vio_cmo.reserve.size;
+}
+
+/* sysfs device functions and data structures for CMO */
+
+#define viodev_cmo_rd_attr(name)                                        \
+static ssize_t viodev_cmo_##name##_show(struct device *dev,             \
+                                        struct device_attribute *attr,  \
+                                         char *buf)                     \
+{                                                                       \
+	return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
+}
+
+static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
+}
+
+static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	atomic_set(&viodev->cmo.allocs_failed, 0);
+	return count;
+}
+
+static ssize_t viodev_cmo_desired_set(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	size_t new_desired;
+	int ret;
+
+	ret = strict_strtoul(buf, 10, &new_desired);
+	if (ret)
+		return ret;
+
+	vio_cmo_set_dev_desired(viodev, new_desired);
+	return count;
+}
+
+viodev_cmo_rd_attr(desired);
+viodev_cmo_rd_attr(entitled);
+viodev_cmo_rd_attr(allocated);
+
+static ssize_t name_show(struct device *, struct device_attribute *, char *);
+static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static struct device_attribute vio_cmo_dev_attrs[] = {
+	__ATTR_RO(name),
+	__ATTR_RO(devspec),
+	__ATTR(cmo_desired,       S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+	       viodev_cmo_desired_show, viodev_cmo_desired_set),
+	__ATTR(cmo_entitled,      S_IRUGO, viodev_cmo_entitled_show,      NULL),
+	__ATTR(cmo_allocated,     S_IRUGO, viodev_cmo_allocated_show,     NULL),
+	__ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+	       viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
+	__ATTR_NULL
+};
+
+/* sysfs bus functions and data structures for CMO */
+
+#define viobus_cmo_rd_attr(name)                                        \
+static ssize_t                                                          \
+viobus_cmo_##name##_show(struct bus_type *bt, char *buf)                \
+{                                                                       \
+	return sprintf(buf, "%lu\n", vio_cmo.name);                     \
+}
+
+#define viobus_cmo_pool_rd_attr(name, var)                              \
+static ssize_t                                                          \
+viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf)     \
+{                                                                       \
+	return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
+}
+
+static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
+                                     size_t count)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	vio_cmo.high = vio_cmo.curr;
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+
+	return count;
+}
+
+viobus_cmo_rd_attr(entitled);
+viobus_cmo_pool_rd_attr(reserve, size);
+viobus_cmo_pool_rd_attr(excess, size);
+viobus_cmo_pool_rd_attr(excess, free);
+viobus_cmo_rd_attr(spare);
+viobus_cmo_rd_attr(min);
+viobus_cmo_rd_attr(desired);
+viobus_cmo_rd_attr(curr);
+viobus_cmo_rd_attr(high);
+
+static struct bus_attribute vio_cmo_bus_attrs[] = {
+	__ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
+	__ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
+	__ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
+	__ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
+	__ATTR(cmo_spare,   S_IRUGO, viobus_cmo_spare_show,   NULL),
+	__ATTR(cmo_min,     S_IRUGO, viobus_cmo_min_show,     NULL),
+	__ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
+	__ATTR(cmo_curr,    S_IRUGO, viobus_cmo_curr_show,    NULL),
+	__ATTR(cmo_high,    S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+	       viobus_cmo_high_show, viobus_cmo_high_reset),
+	__ATTR_NULL
+};
+
+static void vio_cmo_sysfs_init(void)
+{
+	vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
+	vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
+}
+#else /* CONFIG_PPC_SMLPAR */
+/* Dummy functions for iSeries platform */
+int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
+static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
+static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
+static void vio_cmo_bus_init() {}
+static void vio_cmo_sysfs_init() { }
+#endif /* CONFIG_PPC_SMLPAR */
+EXPORT_SYMBOL(vio_cmo_entitlement_update);
+EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
 static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
 {
 	const unsigned char *dma_window;
@@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev)
 		return error;
 
 	id = vio_match_device(viodrv->id_table, viodev);
-	if (id)
+	if (id) {
+		memset(&viodev->cmo, 0, sizeof(viodev->cmo));
+		if (firmware_has_feature(FW_FEATURE_CMO)) {
+			error = vio_cmo_bus_probe(viodev);
+			if (error)
+				return error;
+		}
 		error = viodrv->probe(viodev, id);
+		if (error)
+			vio_cmo_bus_remove(viodev);
+	}
 
 	return error;
 }
@@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct vio_driver *viodrv = to_vio_driver(dev->driver);
+	struct device *devptr;
+	int ret = 1;
+
+	/*
+	 * Hold a reference to the device after the remove function is called
+	 * to allow for CMO accounting cleanup for the device.
+	 */
+	devptr = get_device(dev);
 
 	if (viodrv->remove)
-		return viodrv->remove(viodev);
+		ret = viodrv->remove(viodev);
+
+	if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+		vio_cmo_bus_remove(viodev);
 
-	/* driver can't remove */
-	return 1;
+	put_device(devptr);
+	return ret;
 }
 
 /**
@@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
 			viodev->unit_address = *unit_address;
 	}
 	viodev->dev.archdata.of_node = of_node_get(of_node);
-	viodev->dev.archdata.dma_ops = &dma_iommu_ops;
+
+	if (firmware_has_feature(FW_FEATURE_CMO))
+		vio_cmo_set_dma_ops(viodev);
+	else
+		viodev->dev.archdata.dma_ops = &dma_iommu_ops;
 	viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
 	viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
 
@@ -245,6 +1260,9 @@ static int __init vio_bus_init(void)
 	int err;
 	struct device_node *node_vroot;
 
+	if (firmware_has_feature(FW_FEATURE_CMO))
+		vio_cmo_sysfs_init();
+
 	err = bus_register(&vio_bus_type);
 	if (err) {
 		printk(KERN_ERR "failed to register VIO bus\n");
@@ -262,6 +1280,9 @@ static int __init vio_bus_init(void)
 		return err;
 	}
 
+	if (firmware_has_feature(FW_FEATURE_CMO))
+		vio_cmo_bus_init();
+
 	node_vroot = of_find_node_by_name(NULL, "vdevice");
 	if (node_vroot) {
 		struct device_node *of_node;
diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h
index 56512a968da..0a290a19594 100644
--- a/include/asm-powerpc/vio.h
+++ b/include/asm-powerpc/vio.h
@@ -39,16 +39,32 @@
 #define VIO_IRQ_DISABLE		0UL
 #define VIO_IRQ_ENABLE		1UL
 
+/*
+ * VIO CMO minimum entitlement for all devices and spare entitlement
+ */
+#define VIO_CMO_MIN_ENT 1562624
+
 struct iommu_table;
 
-/*
- * The vio_dev structure is used to describe virtual I/O devices.
+/**
+ * vio_dev - This structure is used to describe virtual I/O devices.
+ *
+ * @desired: set from return of driver's get_desired_dma() function
+ * @entitled: bytes of IO data that has been reserved for this device.
+ * @allocated: bytes of IO data currently in use by the device.
+ * @allocs_failed: number of DMA failures due to insufficient entitlement.
  */
 struct vio_dev {
 	const char *name;
 	const char *type;
 	uint32_t unit_address;
 	unsigned int irq;
+	struct {
+		size_t desired;
+		size_t entitled;
+		size_t allocated;
+		atomic_t allocs_failed;
+	} cmo;
 	struct device dev;
 };
 
@@ -56,12 +72,19 @@ struct vio_driver {
 	const struct vio_device_id *id_table;
 	int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
 	int (*remove)(struct vio_dev *dev);
+	/* A driver must have a get_desired_dma() function to
+	 * be loaded in a CMO environment if it uses DMA.
+	 */
+	unsigned long (*get_desired_dma)(struct vio_dev *dev);
 	struct device_driver driver;
 };
 
 extern int vio_register_driver(struct vio_driver *drv);
 extern void vio_unregister_driver(struct vio_driver *drv);
 
+extern int vio_cmo_entitlement_update(size_t);
+extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
+
 extern void __devinit vio_unregister_device(struct vio_dev *dev);
 
 struct device_node;
-- 
GitLab


From 22e1a4dd3f2a9009d1d8896a5e833b6094877008 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Thu, 24 Jul 2008 04:31:52 +1000
Subject: [PATCH 550/853] powerpc/pseries: Verify CMO memory entitlement
 updates with virtual I/O

Verify memory entitlement updates can be handled by vio.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 848c3e5a637..64381a204a5 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -34,6 +34,7 @@
 #include <asm/time.h>
 #include <asm/prom.h>
 #include <asm/vdso_datapage.h>
+#include <asm/vio.h>
 
 #define MODULE_VERS "1.8"
 #define MODULE_NAME "lparcfg"
@@ -527,6 +528,15 @@ static ssize_t update_mpp(u64 *entitlement, u8 *weight)
 	u8 new_weight;
 	ssize_t rc;
 
+	if (entitlement) {
+		/* Check with vio to ensure the new memory entitlement
+		 * can be handled.
+		 */
+		rc = vio_cmo_entitlement_update(*entitlement);
+		if (rc)
+			return rc;
+	}
+
 	rc = h_get_mpp(&mpp_data);
 	if (rc)
 		return rc;
-- 
GitLab


From ea866e6526b8a2ead92875732d41b26fdb470312 Mon Sep 17 00:00:00 2001
From: Santiago Leon <santil@us.ibm.com>
Date: Thu, 24 Jul 2008 04:34:23 +1000
Subject: [PATCH 551/853] ibmveth: Automatically enable larger rx buffer pools
 for larger mtu

Activates larger rx buffer pools when the MTU is changed to a larger
value.  This patch de-activates the large rx buffer pools when the MTU
changes to a smaller value.

Signed-off-by: Santiago Leon <santil@us.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ibmveth.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 00527805e4f..007ca8735a9 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1054,7 +1054,6 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ibmveth_adapter *adapter = dev->priv;
 	int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
-	int reinit = 0;
 	int i, rc;
 
 	if (new_mtu < IBMVETH_MAX_MTU)
@@ -1067,15 +1066,21 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
 	if (i == IbmVethNumBufferPools)
 		return -EINVAL;
 
+	/* Deactivate all the buffer pools so that the next loop can activate
+	   only the buffer pools necessary to hold the new MTU */
+	for (i = 0; i < IbmVethNumBufferPools; i++)
+		if (adapter->rx_buff_pool[i].active) {
+			ibmveth_free_buffer_pool(adapter,
+						 &adapter->rx_buff_pool[i]);
+			adapter->rx_buff_pool[i].active = 0;
+		}
+
 	/* Look for an active buffer pool that can hold the new MTU */
 	for(i = 0; i<IbmVethNumBufferPools; i++) {
-		if (!adapter->rx_buff_pool[i].active) {
-			adapter->rx_buff_pool[i].active = 1;
-			reinit = 1;
-		}
+		adapter->rx_buff_pool[i].active = 1;
 
 		if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
-			if (reinit && netif_running(adapter->netdev)) {
+			if (netif_running(adapter->netdev)) {
 				adapter->pool_config = 1;
 				ibmveth_close(adapter->netdev);
 				adapter->pool_config = 0;
@@ -1402,14 +1407,15 @@ const char * buf, size_t count)
 				return -EPERM;
 			}
 
-			pool->active = 0;
 			if (netif_running(netdev)) {
 				adapter->pool_config = 1;
 				ibmveth_close(netdev);
+				pool->active = 0;
 				adapter->pool_config = 0;
 				if ((rc = ibmveth_open(netdev)))
 					return rc;
 			}
+			pool->active = 0;
 		}
 	} else if (attr == &veth_num_attr) {
 		if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
-- 
GitLab


From 1096d63d8e7d226630706e15648705d0187787e4 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:34:52 +1000
Subject: [PATCH 552/853] ibmveth: enable driver for CMO

Enable ibmveth for Cooperative Memory Overcommitment (CMO).  For this driver
it means calculating a desired amount of IO memory based on the current MTU
and updating this value with the bus when MTU changes occur.  Because DMA
mappings can fail, we have added a bounce buffer for temporary cases where
the driver can not map IO memory for the buffer pool.

The following changes are made to enable the driver for CMO:
 * DMA mapping errors will not result in error messages if entitlement has
   been exceeded and resources were not available.
 * DMA mapping errors are handled gracefully, ibmveth_replenish_buffer_pool()
   is corrected to check the return from dma_map_single and fail gracefully.
 * The driver will have a get_desired_dma function defined to function
   in a CMO environment.
 * When the MTU is changed, the driver will update the device IO entitlement

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Santiago Leon <santil@us.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ibmveth.c | 169 +++++++++++++++++++++++++++++++++---------
 drivers/net/ibmveth.h |   5 ++
 2 files changed, 140 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 007ca8735a9..e5a6e2e8454 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -33,6 +33,7 @@
 */
 
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
@@ -52,7 +53,9 @@
 #include <asm/hvcall.h>
 #include <asm/atomic.h>
 #include <asm/vio.h>
+#include <asm/iommu.h>
 #include <asm/uaccess.h>
+#include <asm/firmware.h>
 #include <linux/seq_file.h>
 
 #include "ibmveth.h"
@@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter);
 static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
+static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
 static struct kobj_type ktype_veth_pool;
 
+
 #ifdef CONFIG_PROC_FS
 #define IBMVETH_PROC_DIR "ibmveth"
 static struct proc_dir_entry *ibmveth_proc_dir;
@@ -226,16 +231,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
 	u32 i;
 	u32 count = pool->size - atomic_read(&pool->available);
 	u32 buffers_added = 0;
+	struct sk_buff *skb;
+	unsigned int free_index, index;
+	u64 correlator;
+	unsigned long lpar_rc;
+	dma_addr_t dma_addr;
 
 	mb();
 
 	for(i = 0; i < count; ++i) {
-		struct sk_buff *skb;
-		unsigned int free_index, index;
-		u64 correlator;
 		union ibmveth_buf_desc desc;
-		unsigned long lpar_rc;
-		dma_addr_t dma_addr;
 
 		skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
 
@@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
 		dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
 				pool->buff_size, DMA_FROM_DEVICE);
 
+		if (dma_mapping_error(dma_addr))
+			goto failure;
+
 		pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
 		pool->dma_addr[index] = dma_addr;
 		pool->skbuff[index] = skb;
@@ -267,25 +275,32 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
 
 		lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
 
-		if(lpar_rc != H_SUCCESS) {
-			pool->free_map[free_index] = index;
-			pool->skbuff[index] = NULL;
-			if (pool->consumer_index == 0)
-				pool->consumer_index = pool->size - 1;
-			else
-				pool->consumer_index--;
-			dma_unmap_single(&adapter->vdev->dev,
-					pool->dma_addr[index], pool->buff_size,
-					DMA_FROM_DEVICE);
-			dev_kfree_skb_any(skb);
-			adapter->replenish_add_buff_failure++;
-			break;
-		} else {
+		if (lpar_rc != H_SUCCESS)
+			goto failure;
+		else {
 			buffers_added++;
 			adapter->replenish_add_buff_success++;
 		}
 	}
 
+	mb();
+	atomic_add(buffers_added, &(pool->available));
+	return;
+
+failure:
+	pool->free_map[free_index] = index;
+	pool->skbuff[index] = NULL;
+	if (pool->consumer_index == 0)
+		pool->consumer_index = pool->size - 1;
+	else
+		pool->consumer_index--;
+	if (!dma_mapping_error(dma_addr))
+		dma_unmap_single(&adapter->vdev->dev,
+		                 pool->dma_addr[index], pool->buff_size,
+		                 DMA_FROM_DEVICE);
+	dev_kfree_skb_any(skb);
+	adapter->replenish_add_buff_failure++;
+
 	mb();
 	atomic_add(buffers_added, &(pool->available));
 }
@@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
 
 	adapter->replenish_task_cycles++;
 
-	for(i = 0; i < IbmVethNumBufferPools; i++)
+	for (i = (IbmVethNumBufferPools - 1); i >= 0; i--)
 		if(adapter->rx_buff_pool[i].active)
 			ibmveth_replenish_buffer_pool(adapter,
 						     &adapter->rx_buff_pool[i]);
@@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 		if (adapter->rx_buff_pool[i].active)
 			ibmveth_free_buffer_pool(adapter,
 						 &adapter->rx_buff_pool[i]);
+
+	if (adapter->bounce_buffer != NULL) {
+		if (!dma_mapping_error(adapter->bounce_buffer_dma)) {
+			dma_unmap_single(&adapter->vdev->dev,
+					adapter->bounce_buffer_dma,
+					adapter->netdev->mtu + IBMVETH_BUFF_OH,
+					DMA_BIDIRECTIONAL);
+			adapter->bounce_buffer_dma = DMA_ERROR_CODE;
+		}
+		kfree(adapter->bounce_buffer);
+		adapter->bounce_buffer = NULL;
+	}
 }
 
 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
@@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev)
 		return rc;
 	}
 
+	adapter->bounce_buffer =
+	    kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL);
+	if (!adapter->bounce_buffer) {
+		ibmveth_error_printk("unable to allocate bounce buffer\n");
+		ibmveth_cleanup(adapter);
+		napi_disable(&adapter->napi);
+		return -ENOMEM;
+	}
+	adapter->bounce_buffer_dma =
+	    dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
+			   netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(adapter->bounce_buffer_dma)) {
+		ibmveth_error_printk("unable to map bounce buffer\n");
+		ibmveth_cleanup(adapter);
+		napi_disable(&adapter->napi);
+		return -ENOMEM;
+	}
+
 	ibmveth_debug_printk("initial replenish cycle\n");
 	ibmveth_interrupt(netdev->irq, netdev);
 
@@ -853,10 +898,12 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	unsigned int tx_packets = 0;
 	unsigned int tx_send_failed = 0;
 	unsigned int tx_map_failed = 0;
+	int used_bounce = 0;
+	unsigned long data_dma_addr;
 
 	desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len;
-	desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data,
-					     skb->len, DMA_TO_DEVICE);
+	data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
+				       skb->len, DMA_TO_DEVICE);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 	    ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) {
@@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		buf[1] = 0;
 	}
 
-	if (dma_mapping_error(desc.fields.address)) {
-		ibmveth_error_printk("tx: unable to map xmit buffer\n");
+	if (dma_mapping_error(data_dma_addr)) {
+		if (!firmware_has_feature(FW_FEATURE_CMO))
+			ibmveth_error_printk("tx: unable to map xmit buffer\n");
+		skb_copy_from_linear_data(skb, adapter->bounce_buffer,
+					  skb->len);
+		desc.fields.address = adapter->bounce_buffer_dma;
 		tx_map_failed++;
-		tx_dropped++;
-		goto out;
-	}
+		used_bounce = 1;
+	} else
+		desc.fields.address = data_dma_addr;
 
 	/* send the frame. Arbitrarily set retrycount to 1024 */
 	correlator = 0;
@@ -904,8 +955,9 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		netdev->trans_start = jiffies;
 	}
 
-	dma_unmap_single(&adapter->vdev->dev, desc.fields.address,
-			 skb->len, DMA_TO_DEVICE);
+	if (!used_bounce)
+		dma_unmap_single(&adapter->vdev->dev, data_dma_addr,
+				 skb->len, DMA_TO_DEVICE);
 
 out:	spin_lock_irqsave(&adapter->stats_lock, flags);
 	netdev->stats.tx_dropped += tx_dropped;
@@ -1053,8 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
 static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ibmveth_adapter *adapter = dev->priv;
+	struct vio_dev *viodev = adapter->vdev;
 	int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
-	int i, rc;
+	int i;
 
 	if (new_mtu < IBMVETH_MAX_MTU)
 		return -EINVAL;
@@ -1085,10 +1138,15 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
 				ibmveth_close(adapter->netdev);
 				adapter->pool_config = 0;
 				dev->mtu = new_mtu;
-				if ((rc = ibmveth_open(adapter->netdev)))
-					return rc;
-			} else
-				dev->mtu = new_mtu;
+				vio_cmo_set_dev_desired(viodev,
+						ibmveth_get_desired_dma
+						(viodev));
+				return ibmveth_open(adapter->netdev);
+			}
+			dev->mtu = new_mtu;
+			vio_cmo_set_dev_desired(viodev,
+						ibmveth_get_desired_dma
+						(viodev));
 			return 0;
 		}
 	}
@@ -1103,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev)
 }
 #endif
 
+/**
+ * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
+ *
+ * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
+ *
+ * Return value:
+ *	Number of bytes of IO data the driver will need to perform well.
+ */
+static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
+{
+	struct net_device *netdev = dev_get_drvdata(&vdev->dev);
+	struct ibmveth_adapter *adapter;
+	unsigned long ret;
+	int i;
+	int rxqentries = 1;
+
+	/* netdev inits at probe time along with the structures we need below*/
+	if (netdev == NULL)
+		return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
+
+	adapter = netdev_priv(netdev);
+
+	ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
+	ret += IOMMU_PAGE_ALIGN(netdev->mtu);
+
+	for (i = 0; i < IbmVethNumBufferPools; i++) {
+		/* add the size of the active receive buffers */
+		if (adapter->rx_buff_pool[i].active)
+			ret +=
+			    adapter->rx_buff_pool[i].size *
+			    IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
+			            buff_size);
+		rxqentries += adapter->rx_buff_pool[i].size;
+	}
+	/* add the size of the receive queue entries */
+	ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
+
+	return ret;
+}
+
 static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 {
 	int rc, i;
@@ -1247,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev)
 	ibmveth_proc_unregister_adapter(adapter);
 
 	free_netdev(netdev);
+	dev_set_drvdata(&dev->dev, NULL);
+
 	return 0;
 }
 
@@ -1491,6 +1591,7 @@ static struct vio_driver ibmveth_driver = {
 	.id_table	= ibmveth_device_table,
 	.probe		= ibmveth_probe,
 	.remove		= ibmveth_remove,
+	.get_desired_dma = ibmveth_get_desired_dma,
 	.driver		= {
 		.name	= ibmveth_driver_name,
 		.owner	= THIS_MODULE,
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index 41f61cd1885..d2818694875 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -93,9 +93,12 @@ static inline long h_illan_attributes(unsigned long unit_address,
   plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
 
 #define IbmVethNumBufferPools 5
+#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */
 #define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
 #define IBMVETH_MAX_MTU 68
 #define IBMVETH_MAX_POOL_COUNT 4096
+#define IBMVETH_BUFF_LIST_SIZE 4096
+#define IBMVETH_FILT_LIST_SIZE 4096
 #define IBMVETH_MAX_BUF_SIZE (1024 * 128)
 
 static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
@@ -143,6 +146,8 @@ struct ibmveth_adapter {
     struct ibmveth_rx_q rx_queue;
     int pool_config;
     int rx_csum;
+    void *bounce_buffer;
+    dma_addr_t bounce_buffer_dma;
 
     /* adapter specific stats */
     u64 replenish_task_cycles;
-- 
GitLab


From 7912a0ac5907df1f8b214b3ca15ccf96129daae0 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:35:27 +1000
Subject: [PATCH 553/853] ibmvscsi: driver enablement for CMO

Enable the driver to function in a Cooperative Memory Overcommitment (CMO)
environment.

The following changes are made to enable the driver for CMO:
 * DMA mapping errors will not result in error messages if entitlement has
   been exceeded and resources were not available.
 * The driver has a get_desired_dma function defined to function
   in a CMO environment. It will indicate how much IO memory it would like
   to function.

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked by: Brian King <brking@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 45 +++++++++++++++++++++++++++-----
 drivers/scsi/ibmvscsi/ibmvscsi.h |  2 ++
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 5d23368a1bc..20000ec79b0 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -72,6 +72,7 @@
 #include <linux/delay.h>
 #include <asm/firmware.h>
 #include <asm/vio.h>
+#include <asm/firmware.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
@@ -426,8 +427,10 @@ static int map_sg_data(struct scsi_cmnd *cmd,
 					   SG_ALL * sizeof(struct srp_direct_buf),
 					   &evt_struct->ext_list_token, 0);
 		if (!evt_struct->ext_list) {
-			sdev_printk(KERN_ERR, cmd->device,
-				    "Can't allocate memory for indirect table\n");
+			if (!firmware_has_feature(FW_FEATURE_CMO))
+				sdev_printk(KERN_ERR, cmd->device,
+				            "Can't allocate memory "
+				            "for indirect table\n");
 			return 0;
 		}
 	}
@@ -743,7 +746,9 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
 	srp_cmd->lun = ((u64) lun) << 48;
 
 	if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
-		sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n");
+		if (!firmware_has_feature(FW_FEATURE_CMO))
+			sdev_printk(KERN_ERR, cmnd->device,
+			            "couldn't convert cmd to srp_cmd\n");
 		free_event_struct(&hostdata->pool, evt_struct);
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
@@ -855,7 +860,10 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 					    DMA_BIDIRECTIONAL);
 
 	if (dma_mapping_error(req->buffer)) {
-		dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n");
+		if (!firmware_has_feature(FW_FEATURE_CMO))
+			dev_err(hostdata->dev,
+			        "Unable to map request_buffer for "
+			        "adapter_info!\n");
 		free_event_struct(&hostdata->pool, evt_struct);
 		return;
 	}
@@ -1400,7 +1408,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
 						    DMA_BIDIRECTIONAL);
 
 	if (dma_mapping_error(host_config->buffer)) {
-		dev_err(hostdata->dev, "dma_mapping error getting host config\n");
+		if (!firmware_has_feature(FW_FEATURE_CMO))
+			dev_err(hostdata->dev,
+			        "dma_mapping error getting host config\n");
 		free_event_struct(&hostdata->pool, evt_struct);
 		return -1;
 	}
@@ -1604,7 +1614,7 @@ static struct scsi_host_template driver_template = {
 	.eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
 	.slave_configure = ibmvscsi_slave_configure,
 	.change_queue_depth = ibmvscsi_change_queue_depth,
-	.cmd_per_lun = 16,
+	.cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT,
 	.can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
 	.this_id = -1,
 	.sg_tablesize = SG_ALL,
@@ -1612,6 +1622,26 @@ static struct scsi_host_template driver_template = {
 	.shost_attrs = ibmvscsi_attrs,
 };
 
+/**
+ * ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver
+ *
+ * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
+ *
+ * Return value:
+ *	Number of bytes of IO data the driver will need to perform well.
+ */
+static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev)
+{
+	/* iu_storage data allocated in initialize_event_pool */
+	unsigned long desired_io = max_requests * sizeof(union viosrp_iu);
+
+	/* add io space for sg data */
+	desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT *
+	                     IBMVSCSI_CMDS_PER_LUN_DEFAULT);
+
+	return desired_io;
+}
+
 /**
  * Called by bus code for each adapter
  */
@@ -1641,7 +1671,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	hostdata->host = host;
 	hostdata->dev = dev;
 	atomic_set(&hostdata->request_limit, -1);
-	hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */
+	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
 	rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests);
 	if (rc != 0 && rc != H_RESOURCE) {
@@ -1735,6 +1765,7 @@ static struct vio_driver ibmvscsi_driver = {
 	.id_table = ibmvscsi_device_table,
 	.probe = ibmvscsi_probe,
 	.remove = ibmvscsi_remove,
+	.get_desired_dma = ibmvscsi_get_desired_dma,
 	.driver = {
 		.name = "ibmvscsi",
 		.owner = THIS_MODULE,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h
index 46e850e302c..2d4339d5e16 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.h
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.h
@@ -45,6 +45,8 @@ struct Scsi_Host;
 #define MAX_INDIRECT_BUFS 10
 
 #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
+#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16
+#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */
 #define IBMVSCSI_MAX_CMDS_PER_LUN 64
 
 /* ------------------------------------------------------------
-- 
GitLab


From 39c1ffecc6aabcc8105602a95ce769f27bcf6048 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 24 Jul 2008 04:35:48 +1000
Subject: [PATCH 554/853] ibmvfc: Add support for collaborative memory
 overcommit

Adds support to the ibmvfc driver for collaborative memory overcommit.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index eb702b96d57..c4a7c06793c 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3819,6 +3819,20 @@ static int ibmvfc_remove(struct vio_dev *vdev)
 	return 0;
 }
 
+/**
+ * ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver
+ * @vdev:	vio device struct
+ *
+ * Return value:
+ *	Number of bytes the driver will need to DMA map at the same time in
+ *	order to perform well.
+ */
+static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev)
+{
+	unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu);
+	return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun);
+}
+
 static struct vio_device_id ibmvfc_device_table[] __devinitdata = {
 	{"fcp", "IBM,vfc-client"},
 	{ "", "" }
@@ -3829,6 +3843,7 @@ static struct vio_driver ibmvfc_driver = {
 	.id_table = ibmvfc_device_table,
 	.probe = ibmvfc_probe,
 	.remove = ibmvfc_remove,
+	.get_desired_dma = ibmvfc_get_desired_dma,
 	.driver = {
 		.name = IBMVFC_NAME,
 		.owner = THIS_MODULE,
-- 
GitLab


From 8391e42a5c1f3d757faa5e7f46a4a68f9aa6cb12 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Thu, 24 Jul 2008 04:36:38 +1000
Subject: [PATCH 555/853] powerpc/pseries: Update arch vector to indicate
 support for CMO

Update the architecture vector to indicate that Cooperative Memory
Overcommitment is supported if CONFIG_PPC_SMLPAR is set.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1ea8c8d3ce8..c4ab2195b9c 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void)
 #else
 #define OV5_MSI			0x00
 #endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_PPC_SMLPAR
+#define OV5_CMO			0x80	/* Cooperative Memory Overcommitment */
+#else
+#define OV5_CMO			0x00
+#endif
 
 /*
  * The architecture vector has an array of PVR mask/value pairs,
@@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = {
 	0,				/* don't halt */
 
 	/* option vector 5: PAPR/OF options */
-	3 - 2,				/* length */
+	5 - 2,				/* length */
 	0,				/* don't ignore, don't halt */
 	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
 	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
+	0,
+	OV5_CMO,
 };
 
 /* Old method - ELF header with PT_NOTE sections */
-- 
GitLab


From 16c14b4621c7b6fc4611abf1f86cd78cdb1b2b03 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Thu, 24 Jul 2008 05:10:46 +1000
Subject: [PATCH 556/853] powerpc/pseries: Remove kmalloc call in handling
 writes to lparcfg

There are only 4 valid name=value pairs for writes to
/proc/ppc64/lparcfg.  Current code allocates a buffer to copy
this information in from the user.  Since the longest name=value
pair will easily fit into a buffer of 64 characters, simply
put the buffer on the stack instead of allocating the buffer.

Signed-off-by: Nathan Fotenot <nfont@austin.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 64381a204a5..9f856a0c3e3 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -573,29 +573,27 @@ static ssize_t update_mpp(u64 *entitlement, u8 *weight)
 static ssize_t lparcfg_write(struct file *file, const char __user * buf,
 			     size_t count, loff_t * off)
 {
-	char *kbuf;
+	int kbuf_sz = 64;
+	char kbuf[kbuf_sz];
 	char *tmp;
 	u64 new_entitled, *new_entitled_ptr = &new_entitled;
 	u8 new_weight, *new_weight_ptr = &new_weight;
-	ssize_t retval = -ENOMEM;
+	ssize_t retval;
 
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
 			firmware_has_feature(FW_FEATURE_ISERIES))
 		return -EINVAL;
 
-	kbuf = kmalloc(count, GFP_KERNEL);
-	if (!kbuf)
-		goto out;
+	if (count > kbuf_sz)
+		return -EINVAL;
 
-	retval = -EFAULT;
 	if (copy_from_user(kbuf, buf, count))
-		goto out;
+		return -EFAULT;
 
-	retval = -EINVAL;
 	kbuf[count - 1] = '\0';
 	tmp = strchr(kbuf, '=');
 	if (!tmp)
-		goto out;
+		return -EINVAL;
 
 	*tmp++ = '\0';
 
@@ -603,32 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
 		char *endp;
 		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
 		if (endp == tmp)
-			goto out;
+			return -EINVAL;
 
 		retval = update_ppp(new_entitled_ptr, NULL);
 	} else if (!strcmp(kbuf, "capacity_weight")) {
 		char *endp;
 		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
 		if (endp == tmp)
-			goto out;
+			return -EINVAL;
 
 		retval = update_ppp(NULL, new_weight_ptr);
 	} else if (!strcmp(kbuf, "entitled_memory")) {
 		char *endp;
 		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
 		if (endp == tmp)
-			goto out;
+			return -EINVAL;
 
 		retval = update_mpp(new_entitled_ptr, NULL);
 	} else if (!strcmp(kbuf, "entitled_memory_weight")) {
 		char *endp;
 		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
 		if (endp == tmp)
-			goto out;
+			return -EINVAL;
 
 		retval = update_mpp(NULL, new_weight_ptr);
 	} else
-		goto out;
+		return -EINVAL;
 
 	if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
 		retval = count;
@@ -644,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
 		retval = -EIO;
 	}
 
-out:
-	kfree(kbuf);
 	return retval;
 }
 
-- 
GitLab


From 1e3519f8e1baec0b733cd42684fcd3d9681662f1 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 25 Jul 2008 16:21:11 +1000
Subject: [PATCH 557/853] Move update_mmu_cache() declaration from tlbflush.h
 to pgtable.h where it belongs. This fixes some build problems on some configs

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/asm-powerpc/pgtable.h  | 13 +++++++++++++
 include/asm-powerpc/tlbflush.h | 11 -----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index d18ffe7bc7c..dbb8ca172e4 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -38,6 +38,19 @@ extern void paging_init(void);
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
 
 #include <asm-generic/pgtable.h>
+
+
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ * On machines which use an MMU hash table, we use this to put a
+ * corresponding HPTE into the hash table ahead of time, instead of
+ * waiting for the inevitable extra hash-table miss exception.
+ */
+extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index 5c910814764..361cd5c7a32 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -162,16 +162,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
 
 #endif
 
-/*
- * This gets called at the end of handling a page fault, when
- * the kernel has put a new PTE into the page table for the process.
- * We use it to ensure coherency between the i-cache and d-cache
- * for the page which has just been mapped in.
- * On machines which use an MMU hash table, we use this to put a
- * corresponding HPTE into the hash table ahead of time, instead of
- * waiting for the inevitable extra hash-table miss exception.
- */
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
-
 #endif /*__KERNEL__ */
 #endif /* _ASM_POWERPC_TLBFLUSH_H */
-- 
GitLab


From 973b7d83ebeb1e34b8bee69208916e5f0e2353c3 Mon Sep 17 00:00:00 2001
From: Tony Breeds <tony@bakeyournoodle.com>
Date: Fri, 25 Jul 2008 16:21:51 +1000
Subject: [PATCH 558/853] powerpc: Wireup new syscalls

signalfd4, eventfd2, epoll_create1, dup3, pipe2 and inotify_init1

Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/asm-powerpc/syscalls.h | 1 +
 include/asm-powerpc/systbl.h   | 6 ++++++
 include/asm-powerpc/unistd.h   | 8 +++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h
index 2b8a458f990..eb8eb400c66 100644
--- a/include/asm-powerpc/syscalls.h
+++ b/include/asm-powerpc/syscalls.h
@@ -31,6 +31,7 @@ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2,
 		unsigned long p3, unsigned long p4, unsigned long p5,
 		unsigned long p6, struct pt_regs *regs);
 asmlinkage long sys_pipe(int __user *fildes);
+asmlinkage long sys_pipe2(int __user *fildes, int flags);
 asmlinkage long sys_rt_sigaction(int sig,
 		const struct sigaction __user *act,
 		struct sigaction __user *oact, size_t sigsetsize);
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index ae7085c6569..e084272ed1c 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -316,3 +316,9 @@ COMPAT_SYS(fallocate)
 SYSCALL(subpage_prot)
 COMPAT_SYS_SPU(timerfd_settime)
 COMPAT_SYS_SPU(timerfd_gettime)
+COMPAT_SYS_SPU(signalfd4)
+SYSCALL_SPU(eventfd2)
+SYSCALL_SPU(epoll_create1)
+SYSCALL_SPU(dup3)
+SYSCALL_SPU(pipe2)
+SYSCALL(inotify_init1)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index ce91bb66206..e07d0c76ed7 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -335,10 +335,16 @@
 #define __NR_subpage_prot	310
 #define __NR_timerfd_settime	311
 #define __NR_timerfd_gettime	312
+#define __NR_signalfd4		313
+#define __NR_eventfd2		314
+#define __NR_epoll_create1	315
+#define __NR_dup3		316
+#define __NR_pipe2		317
+#define __NR_inotify_init1	318
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		313
+#define __NR_syscalls		319
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
-- 
GitLab


From cffe1c5d7a5a1e54f7c2c6d0510f651a965bccc3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 25 Jul 2008 01:25:04 -0700
Subject: [PATCH 559/853] pkt_sched: Fix locking in shutdown_scheduler_queue()

Qdisc locks need to be held with BH disabled.

Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4ac7e3a8c25..43abd4d27ea 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -736,9 +736,9 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 		dev_queue->qdisc = qdisc_default;
 		dev_queue->qdisc_sleeping = qdisc_default;
 
-		spin_lock(root_lock);
+		spin_lock_bh(root_lock);
 		qdisc_destroy(qdisc);
-		spin_unlock(root_lock);
+		spin_unlock_bh(root_lock);
 	}
 }
 
-- 
GitLab


From d37e6bf68fc1eb34a4ad21d9ae8890ed37ea80e7 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 24 Jul 2008 18:28:11 +0300
Subject: [PATCH 560/853] UBI: always start the background thread

This fix only affects UBI debugging.

If the the background thread is disabled for debugging purposes,
start it anyway, because otherwise we see tonns of kernel debugging
complaints like this:

INFO: task ubi_bgt0d:26857 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
ubi_bgt0d     D dd37bf94     0 26857      2
       dd37bfcc 00000086 f8e17cea dd37bf94 00000046 00000000 00000000 f5c62430
       f5c62430 f5c62590 c2a09c80 f6cbd498 dd8e9cbc 00000296 dd37bfb0 00000296
       dd8e9cb8 dd8e9cbc dd37bfcc c0119774 00000000 00000000 c0132e89 f6961560
Call Trace:
 [<f8e17cea>] ? ubi_thread+0x0/0x127 [ubi]
 [<c0119774>] ? complete+0x43/0x4b
 [<c0132e89>] ? kthread+0x0/0x5b
 [<f8e17cea>] ? ubi_thread+0x0/0x127 [ubi]
 [<c0132eae>] kthread+0x25/0x5b
 [<c0132e89>] ? kthread+0x0/0x5b
 [<c0104953>] kernel_thread_helper+0x7/0x14
 =======================

So start it, and go sleep inside it, instead of creating it and never
start.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index eba760b3b8c..c7630a22831 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -870,11 +870,9 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 		ubi->beb_rsvd_pebs);
 	ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
 
-	/* Enable the background thread */
-	if (!DBG_DISABLE_BGT) {
+	if (!DBG_DISABLE_BGT)
 		ubi->thread_enabled = 1;
-		wake_up_process(ubi->bgt_thread);
-	}
+	wake_up_process(ubi->bgt_thread);
 
 	ubi_devices[ubi_num] = ubi;
 	return ubi_num;
-- 
GitLab


From 6fccab671f2f0a24b799f29a4ec878f62d34656c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Jul 2008 02:54:40 -0700
Subject: [PATCH 561/853] ipsec: ipcomp - Merge IPComp implementations

This patch merges the IPv4/IPv6 IPComp implementations since most
of the code is identical.  As a result future enhancements will no
longer need to be duplicated.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipcomp.h   |   6 +
 net/ipv4/Kconfig       |   4 +-
 net/ipv4/ipcomp.c      | 315 +------------------------------------
 net/ipv6/Kconfig       |   4 +-
 net/ipv6/ipcomp6.c     | 298 +----------------------------------
 net/xfrm/Kconfig       |   6 +
 net/xfrm/Makefile      |   1 +
 net/xfrm/xfrm_ipcomp.c | 349 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 377 insertions(+), 606 deletions(-)
 create mode 100644 net/xfrm/xfrm_ipcomp.c

diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index 330b74e813a..2a1092abaa0 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -14,6 +14,12 @@ struct ipcomp_data {
 
 struct ip_comp_hdr;
 struct sk_buff;
+struct xfrm_state;
+
+int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb);
+int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb);
+void ipcomp_destroy(struct xfrm_state *x);
+int ipcomp_init_state(struct xfrm_state *x);
 
 static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb)
 {
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 4670683b468..591ea23639c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -356,10 +356,8 @@ config INET_ESP
 
 config INET_IPCOMP
 	tristate "IP: IPComp transformation"
-	select XFRM
 	select INET_XFRM_TUNNEL
-	select CRYPTO
-	select CRYPTO_DEFLATE
+	select XFRM_IPCOMP
 	---help---
 	  Support for IP Payload Compression Protocol (IPComp) (RFC3173),
 	  typically needed for IPsec.
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index a75807b971b..a42b64d040c 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -14,153 +14,14 @@
  *   - Adaptive compression.
  */
 #include <linux/module.h>
-#include <linux/crypto.h>
 #include <linux/err.h>
-#include <linux/pfkeyv2.h>
-#include <linux/percpu.h>
-#include <linux/smp.h>
-#include <linux/list.h>
-#include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
-#include <linux/mutex.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/icmp.h>
 #include <net/ipcomp.h>
 #include <net/protocol.h>
-
-struct ipcomp_tfms {
-	struct list_head list;
-	struct crypto_comp **tfms;
-	int users;
-};
-
-static DEFINE_MUTEX(ipcomp_resource_mutex);
-static void **ipcomp_scratches;
-static int ipcomp_scratch_users;
-static LIST_HEAD(ipcomp_tfms_list);
-
-static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipcomp_data *ipcd = x->data;
-	const int plen = skb->len;
-	int dlen = IPCOMP_SCRATCH_SIZE;
-	const u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
-
-	if (err)
-		goto out;
-
-	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
-	if (err)
-		goto out;
-
-	skb->truesize += dlen - plen;
-	__skb_put(skb, dlen - plen);
-	skb_copy_to_linear_data(skb, scratch, dlen);
-out:
-	put_cpu();
-	return err;
-}
-
-static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int nexthdr;
-	int err = -ENOMEM;
-	struct ip_comp_hdr *ipch;
-
-	if (skb_linearize_cow(skb))
-		goto out;
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	/* Remove ipcomp header and decompress original payload */
-	ipch = (void *)skb->data;
-	nexthdr = ipch->nexthdr;
-
-	skb->transport_header = skb->network_header + sizeof(*ipch);
-	__skb_pull(skb, sizeof(*ipch));
-	err = ipcomp_decompress(x, skb);
-	if (err)
-		goto out;
-
-	err = nexthdr;
-
-out:
-	return err;
-}
-
-static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipcomp_data *ipcd = x->data;
-	const int plen = skb->len;
-	int dlen = IPCOMP_SCRATCH_SIZE;
-	u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-	int err;
-
-	local_bh_disable();
-	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
-	local_bh_enable();
-	if (err)
-		goto out;
-
-	if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
-		err = -EMSGSIZE;
-		goto out;
-	}
-
-	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
-	put_cpu();
-
-	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
-	return 0;
-
-out:
-	put_cpu();
-	return err;
-}
-
-static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-	struct ip_comp_hdr *ipch;
-	struct ipcomp_data *ipcd = x->data;
-
-	if (skb->len < ipcd->threshold) {
-		/* Don't bother compressing */
-		goto out_ok;
-	}
-
-	if (skb_linearize_cow(skb))
-		goto out_ok;
-
-	err = ipcomp_compress(x, skb);
-
-	if (err) {
-		goto out_ok;
-	}
-
-	/* Install ipcomp header, convert into ipcomp datagram. */
-	ipch = ip_comp_hdr(skb);
-	ipch->nexthdr = *skb_mac_header(skb);
-	ipch->flags = 0;
-	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb_mac_header(skb) = IPPROTO_COMP;
-out_ok:
-	skb_push(skb, -skb_network_offset(skb));
-	return 0;
-}
+#include <net/sock.h>
 
 static void ipcomp4_err(struct sk_buff *skb, u32 info)
 {
@@ -241,156 +102,12 @@ out:
 	return err;
 }
 
-static void ipcomp_free_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (--ipcomp_scratch_users)
-		return;
-
-	scratches = ipcomp_scratches;
-	if (!scratches)
-		return;
-
-	for_each_possible_cpu(i)
-		vfree(*per_cpu_ptr(scratches, i));
-
-	free_percpu(scratches);
-}
-
-static void **ipcomp_alloc_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (ipcomp_scratch_users++)
-		return ipcomp_scratches;
-
-	scratches = alloc_percpu(void *);
-	if (!scratches)
-		return NULL;
-
-	ipcomp_scratches = scratches;
-
-	for_each_possible_cpu(i) {
-		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
-		if (!scratch)
-			return NULL;
-		*per_cpu_ptr(scratches, i) = scratch;
-	}
-
-	return scratches;
-}
-
-static void ipcomp_free_tfms(struct crypto_comp **tfms)
-{
-	struct ipcomp_tfms *pos;
-	int cpu;
-
-	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
-		if (pos->tfms == tfms)
-			break;
-	}
-
-	BUG_TRAP(pos);
-
-	if (--pos->users)
-		return;
-
-	list_del(&pos->list);
-	kfree(pos);
-
-	if (!tfms)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_comp(tfm);
-	}
-	free_percpu(tfms);
-}
-
-static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
-{
-	struct ipcomp_tfms *pos;
-	struct crypto_comp **tfms;
-	int cpu;
-
-	/* This can be any valid CPU ID so we don't need locking. */
-	cpu = raw_smp_processor_id();
-
-	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
-		struct crypto_comp *tfm;
-
-		tfms = pos->tfms;
-		tfm = *per_cpu_ptr(tfms, cpu);
-
-		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
-			pos->users++;
-			return tfms;
-		}
-	}
-
-	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
-	if (!pos)
-		return NULL;
-
-	pos->users = 1;
-	INIT_LIST_HEAD(&pos->list);
-	list_add(&pos->list, &ipcomp_tfms_list);
-
-	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
-	if (!tfms)
-		goto error;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
-							    CRYPTO_ALG_ASYNC);
-		if (IS_ERR(tfm))
-			goto error;
-		*per_cpu_ptr(tfms, cpu) = tfm;
-	}
-
-	return tfms;
-
-error:
-	ipcomp_free_tfms(tfms);
-	return NULL;
-}
-
-static void ipcomp_free_data(struct ipcomp_data *ipcd)
-{
-	if (ipcd->tfms)
-		ipcomp_free_tfms(ipcd->tfms);
-	ipcomp_free_scratches();
-}
-
-static void ipcomp_destroy(struct xfrm_state *x)
-{
-	struct ipcomp_data *ipcd = x->data;
-	if (!ipcd)
-		return;
-	xfrm_state_delete_tunnel(x);
-	mutex_lock(&ipcomp_resource_mutex);
-	ipcomp_free_data(ipcd);
-	mutex_unlock(&ipcomp_resource_mutex);
-	kfree(ipcd);
-}
-
-static int ipcomp_init_state(struct xfrm_state *x)
+static int ipcomp4_init_state(struct xfrm_state *x)
 {
 	int err;
 	struct ipcomp_data *ipcd;
 	struct xfrm_algo_desc *calg_desc;
 
-	err = -EINVAL;
-	if (!x->calg)
-		goto out;
-
-	if (x->encap)
-		goto out;
-
 	x->props.header_len = 0;
 	switch (x->props.mode) {
 	case XFRM_MODE_TRANSPORT:
@@ -402,40 +119,22 @@ static int ipcomp_init_state(struct xfrm_state *x)
 		goto out;
 	}
 
-	err = -ENOMEM;
-	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
-	if (!ipcd)
+	err = ipcomp_init_state(x);
+	if (err)
 		goto out;
 
-	mutex_lock(&ipcomp_resource_mutex);
-	if (!ipcomp_alloc_scratches())
-		goto error;
-
-	ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
-	if (!ipcd->tfms)
-		goto error;
-	mutex_unlock(&ipcomp_resource_mutex);
-
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
 	}
 
-	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
-	BUG_ON(!calg_desc);
-	ipcd->threshold = calg_desc->uinfo.comp.threshold;
-	x->data = ipcd;
 	err = 0;
 out:
 	return err;
 
 error_tunnel:
-	mutex_lock(&ipcomp_resource_mutex);
-error:
-	ipcomp_free_data(ipcd);
-	mutex_unlock(&ipcomp_resource_mutex);
-	kfree(ipcd);
+	ipcomp_destroy(x);
 	goto out;
 }
 
@@ -443,7 +142,7 @@ static const struct xfrm_type ipcomp_type = {
 	.description	= "IPCOMP4",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_COMP,
-	.init_state	= ipcomp_init_state,
+	.init_state	= ipcomp4_init_state,
 	.destructor	= ipcomp_destroy,
 	.input		= ipcomp_input,
 	.output		= ipcomp_output
@@ -481,7 +180,7 @@ module_init(ipcomp4_init);
 module_exit(ipcomp4_fini);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp/IPv4) - RFC3173");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
 
 MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 42814a2ec9d..ec992159b5f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -96,10 +96,8 @@ config INET6_ESP
 
 config INET6_IPCOMP
 	tristate "IPv6: IPComp transformation"
-	select XFRM
 	select INET6_XFRM_TUNNEL
-	select CRYPTO
-	select CRYPTO_DEFLATE
+	select XFRM_IPCOMP
 	---help---
 	  Support for IP Payload Compression Protocol (IPComp) (RFC3173),
 	  typically needed for IPsec.
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index ee6de425ce6..0cfcea42153 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -50,125 +50,6 @@
 #include <linux/icmpv6.h>
 #include <linux/mutex.h>
 
-struct ipcomp6_tfms {
-	struct list_head list;
-	struct crypto_comp **tfms;
-	int users;
-};
-
-static DEFINE_MUTEX(ipcomp6_resource_mutex);
-static void **ipcomp6_scratches;
-static int ipcomp6_scratch_users;
-static LIST_HEAD(ipcomp6_tfms_list);
-
-static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int nexthdr;
-	int err = -ENOMEM;
-	struct ip_comp_hdr *ipch;
-	int plen, dlen;
-	struct ipcomp_data *ipcd = x->data;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
-
-	if (skb_linearize_cow(skb))
-		goto out;
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	/* Remove ipcomp header and decompress original payload */
-	ipch = (void *)skb->data;
-	nexthdr = ipch->nexthdr;
-
-	skb->transport_header = skb->network_header + sizeof(*ipch);
-	__skb_pull(skb, sizeof(*ipch));
-
-	/* decompression */
-	plen = skb->len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
-	if (err)
-		goto out_put_cpu;
-
-	if (dlen < (plen + sizeof(*ipch))) {
-		err = -EINVAL;
-		goto out_put_cpu;
-	}
-
-	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
-	if (err) {
-		goto out_put_cpu;
-	}
-
-	skb->truesize += dlen - plen;
-	__skb_put(skb, dlen - plen);
-	skb_copy_to_linear_data(skb, scratch, dlen);
-	err = nexthdr;
-
-out_put_cpu:
-	put_cpu();
-out:
-	return err;
-}
-
-static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-	struct ip_comp_hdr *ipch;
-	struct ipcomp_data *ipcd = x->data;
-	int plen, dlen;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
-
-	/* check whether datagram len is larger than threshold */
-	if (skb->len < ipcd->threshold) {
-		goto out_ok;
-	}
-
-	if (skb_linearize_cow(skb))
-		goto out_ok;
-
-	/* compression */
-	plen = skb->len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	local_bh_disable();
-	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
-	local_bh_enable();
-	if (err || (dlen + sizeof(*ipch)) >= plen) {
-		put_cpu();
-		goto out_ok;
-	}
-	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
-	put_cpu();
-	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
-
-	/* insert ipcomp header and replace datagram */
-	ipch = ip_comp_hdr(skb);
-	ipch->nexthdr = *skb_mac_header(skb);
-	ipch->flags = 0;
-	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb_mac_header(skb) = IPPROTO_COMP;
-
-out_ok:
-	skb_push(skb, -skb_network_offset(skb));
-
-	return 0;
-}
-
 static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 				int type, int code, int offset, __be32 info)
 {
@@ -251,161 +132,12 @@ out:
 	return err;
 }
 
-static void ipcomp6_free_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (--ipcomp6_scratch_users)
-		return;
-
-	scratches = ipcomp6_scratches;
-	if (!scratches)
-		return;
-
-	for_each_possible_cpu(i) {
-		void *scratch = *per_cpu_ptr(scratches, i);
-
-		vfree(scratch);
-	}
-
-	free_percpu(scratches);
-}
-
-static void **ipcomp6_alloc_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (ipcomp6_scratch_users++)
-		return ipcomp6_scratches;
-
-	scratches = alloc_percpu(void *);
-	if (!scratches)
-		return NULL;
-
-	ipcomp6_scratches = scratches;
-
-	for_each_possible_cpu(i) {
-		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
-		if (!scratch)
-			return NULL;
-		*per_cpu_ptr(scratches, i) = scratch;
-	}
-
-	return scratches;
-}
-
-static void ipcomp6_free_tfms(struct crypto_comp **tfms)
-{
-	struct ipcomp6_tfms *pos;
-	int cpu;
-
-	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		if (pos->tfms == tfms)
-			break;
-	}
-
-	BUG_TRAP(pos);
-
-	if (--pos->users)
-		return;
-
-	list_del(&pos->list);
-	kfree(pos);
-
-	if (!tfms)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_comp(tfm);
-	}
-	free_percpu(tfms);
-}
-
-static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name)
-{
-	struct ipcomp6_tfms *pos;
-	struct crypto_comp **tfms;
-	int cpu;
-
-	/* This can be any valid CPU ID so we don't need locking. */
-	cpu = raw_smp_processor_id();
-
-	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		struct crypto_comp *tfm;
-
-		tfms = pos->tfms;
-		tfm = *per_cpu_ptr(tfms, cpu);
-
-		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
-			pos->users++;
-			return tfms;
-		}
-	}
-
-	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
-	if (!pos)
-		return NULL;
-
-	pos->users = 1;
-	INIT_LIST_HEAD(&pos->list);
-	list_add(&pos->list, &ipcomp6_tfms_list);
-
-	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
-	if (!tfms)
-		goto error;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
-							    CRYPTO_ALG_ASYNC);
-		if (IS_ERR(tfm))
-			goto error;
-		*per_cpu_ptr(tfms, cpu) = tfm;
-	}
-
-	return tfms;
-
-error:
-	ipcomp6_free_tfms(tfms);
-	return NULL;
-}
-
-static void ipcomp6_free_data(struct ipcomp_data *ipcd)
-{
-	if (ipcd->tfms)
-		ipcomp6_free_tfms(ipcd->tfms);
-	ipcomp6_free_scratches();
-}
-
-static void ipcomp6_destroy(struct xfrm_state *x)
-{
-	struct ipcomp_data *ipcd = x->data;
-	if (!ipcd)
-		return;
-	xfrm_state_delete_tunnel(x);
-	mutex_lock(&ipcomp6_resource_mutex);
-	ipcomp6_free_data(ipcd);
-	mutex_unlock(&ipcomp6_resource_mutex);
-	kfree(ipcd);
-
-	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
-}
-
 static int ipcomp6_init_state(struct xfrm_state *x)
 {
 	int err;
 	struct ipcomp_data *ipcd;
 	struct xfrm_algo_desc *calg_desc;
 
-	err = -EINVAL;
-	if (!x->calg)
-		goto out;
-
-	if (x->encap)
-		goto out;
-
 	x->props.header_len = 0;
 	switch (x->props.mode) {
 	case XFRM_MODE_TRANSPORT:
@@ -417,39 +149,21 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto out;
 	}
 
-	err = -ENOMEM;
-	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
-	if (!ipcd)
+	err = ipcomp_init_state(x);
+	if (err)
 		goto out;
 
-	mutex_lock(&ipcomp6_resource_mutex);
-	if (!ipcomp6_alloc_scratches())
-		goto error;
-
-	ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name);
-	if (!ipcd->tfms)
-		goto error;
-	mutex_unlock(&ipcomp6_resource_mutex);
-
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
 	}
 
-	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
-	BUG_ON(!calg_desc);
-	ipcd->threshold = calg_desc->uinfo.comp.threshold;
-	x->data = ipcd;
 	err = 0;
 out:
 	return err;
 error_tunnel:
-	mutex_lock(&ipcomp6_resource_mutex);
-error:
-	ipcomp6_free_data(ipcd);
-	mutex_unlock(&ipcomp6_resource_mutex);
-	kfree(ipcd);
+	ipcomp_destroy(x);
 
 	goto out;
 }
@@ -460,9 +174,9 @@ static const struct xfrm_type ipcomp6_type =
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_COMP,
 	.init_state	= ipcomp6_init_state,
-	.destructor	= ipcomp6_destroy,
-	.input		= ipcomp6_input,
-	.output		= ipcomp6_output,
+	.destructor	= ipcomp_destroy,
+	.input		= ipcomp_input,
+	.output		= ipcomp_output,
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 9201ef8ad90..6d081674515 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -46,6 +46,12 @@ config XFRM_STATISTICS
 
 	  If unsure, say N.
 
+config XFRM_IPCOMP
+	tristate
+	select XFRM
+	select CRYPTO
+	select CRYPTO_DEFLATE
+
 config NET_KEY
 	tristate "PF_KEY sockets"
 	select XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 332cfb0ff56..0f439a72cca 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
 		      xfrm_input.o xfrm_output.o xfrm_algo.o
 obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
+obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
 
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
new file mode 100644
index 00000000000..b51e804fbba
--- /dev/null
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -0,0 +1,349 @@
+/*
+ * IP Payload Compression Protocol (IPComp) - RFC3173.
+ *
+ * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2003-2008 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Todo:
+ *   - Tunable compression parameters.
+ *   - Compression stats.
+ *   - Adaptive compression.
+ */
+
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rtnetlink.h>
+#include <linux/smp.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+#include <net/ipcomp.h>
+#include <net/xfrm.h>
+
+struct ipcomp_tfms {
+	struct list_head list;
+	struct crypto_comp **tfms;
+	int users;
+};
+
+static DEFINE_MUTEX(ipcomp_resource_mutex);
+static void **ipcomp_scratches;
+static int ipcomp_scratch_users;
+static LIST_HEAD(ipcomp_tfms_list);
+
+static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipcomp_data *ipcd = x->data;
+	const int plen = skb->len;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	const u8 *start = skb->data;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
+
+	if (err)
+		goto out;
+
+	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
+	if (err)
+		goto out;
+
+	skb->truesize += dlen - plen;
+	__skb_put(skb, dlen - plen);
+	skb_copy_to_linear_data(skb, scratch, dlen);
+out:
+	put_cpu();
+	return err;
+}
+
+int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int nexthdr;
+	int err = -ENOMEM;
+	struct ip_comp_hdr *ipch;
+
+	if (skb_linearize_cow(skb))
+		goto out;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* Remove ipcomp header and decompress original payload */
+	ipch = (void *)skb->data;
+	nexthdr = ipch->nexthdr;
+
+	skb->transport_header = skb->network_header + sizeof(*ipch);
+	__skb_pull(skb, sizeof(*ipch));
+	err = ipcomp_decompress(x, skb);
+	if (err)
+		goto out;
+
+	err = nexthdr;
+
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(ipcomp_input);
+
+static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipcomp_data *ipcd = x->data;
+	const int plen = skb->len;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	u8 *start = skb->data;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err;
+
+	local_bh_disable();
+	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+	local_bh_enable();
+	if (err)
+		goto out;
+
+	if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+	put_cpu();
+
+	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
+	return 0;
+
+out:
+	put_cpu();
+	return err;
+}
+
+int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	struct ip_comp_hdr *ipch;
+	struct ipcomp_data *ipcd = x->data;
+
+	if (skb->len < ipcd->threshold) {
+		/* Don't bother compressing */
+		goto out_ok;
+	}
+
+	if (skb_linearize_cow(skb))
+		goto out_ok;
+
+	err = ipcomp_compress(x, skb);
+
+	if (err) {
+		goto out_ok;
+	}
+
+	/* Install ipcomp header, convert into ipcomp datagram. */
+	ipch = ip_comp_hdr(skb);
+	ipch->nexthdr = *skb_mac_header(skb);
+	ipch->flags = 0;
+	ipch->cpi = htons((u16 )ntohl(x->id.spi));
+	*skb_mac_header(skb) = IPPROTO_COMP;
+out_ok:
+	skb_push(skb, -skb_network_offset(skb));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipcomp_output);
+
+static void ipcomp_free_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (--ipcomp_scratch_users)
+		return;
+
+	scratches = ipcomp_scratches;
+	if (!scratches)
+		return;
+
+	for_each_possible_cpu(i)
+		vfree(*per_cpu_ptr(scratches, i));
+
+	free_percpu(scratches);
+}
+
+static void **ipcomp_alloc_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (ipcomp_scratch_users++)
+		return ipcomp_scratches;
+
+	scratches = alloc_percpu(void *);
+	if (!scratches)
+		return NULL;
+
+	ipcomp_scratches = scratches;
+
+	for_each_possible_cpu(i) {
+		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+		if (!scratch)
+			return NULL;
+		*per_cpu_ptr(scratches, i) = scratch;
+	}
+
+	return scratches;
+}
+
+static void ipcomp_free_tfms(struct crypto_comp **tfms)
+{
+	struct ipcomp_tfms *pos;
+	int cpu;
+
+	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+		if (pos->tfms == tfms)
+			break;
+	}
+
+	BUG_TRAP(pos);
+
+	if (--pos->users)
+		return;
+
+	list_del(&pos->list);
+	kfree(pos);
+
+	if (!tfms)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+		crypto_free_comp(tfm);
+	}
+	free_percpu(tfms);
+}
+
+static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
+{
+	struct ipcomp_tfms *pos;
+	struct crypto_comp **tfms;
+	int cpu;
+
+	/* This can be any valid CPU ID so we don't need locking. */
+	cpu = raw_smp_processor_id();
+
+	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+		struct crypto_comp *tfm;
+
+		tfms = pos->tfms;
+		tfm = *per_cpu_ptr(tfms, cpu);
+
+		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
+			pos->users++;
+			return tfms;
+		}
+	}
+
+	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+	if (!pos)
+		return NULL;
+
+	pos->users = 1;
+	INIT_LIST_HEAD(&pos->list);
+	list_add(&pos->list, &ipcomp_tfms_list);
+
+	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
+	if (!tfms)
+		goto error;
+
+	for_each_possible_cpu(cpu) {
+		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+							    CRYPTO_ALG_ASYNC);
+		if (IS_ERR(tfm))
+			goto error;
+		*per_cpu_ptr(tfms, cpu) = tfm;
+	}
+
+	return tfms;
+
+error:
+	ipcomp_free_tfms(tfms);
+	return NULL;
+}
+
+static void ipcomp_free_data(struct ipcomp_data *ipcd)
+{
+	if (ipcd->tfms)
+		ipcomp_free_tfms(ipcd->tfms);
+	ipcomp_free_scratches();
+}
+
+void ipcomp_destroy(struct xfrm_state *x)
+{
+	struct ipcomp_data *ipcd = x->data;
+	if (!ipcd)
+		return;
+	xfrm_state_delete_tunnel(x);
+	mutex_lock(&ipcomp_resource_mutex);
+	ipcomp_free_data(ipcd);
+	mutex_unlock(&ipcomp_resource_mutex);
+	kfree(ipcd);
+}
+EXPORT_SYMBOL_GPL(ipcomp_destroy);
+
+int ipcomp_init_state(struct xfrm_state *x)
+{
+	int err;
+	struct ipcomp_data *ipcd;
+	struct xfrm_algo_desc *calg_desc;
+
+	err = -EINVAL;
+	if (!x->calg)
+		goto out;
+
+	if (x->encap)
+		goto out;
+
+	err = -ENOMEM;
+	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
+	if (!ipcd)
+		goto out;
+
+	mutex_lock(&ipcomp_resource_mutex);
+	if (!ipcomp_alloc_scratches())
+		goto error;
+
+	ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
+	if (!ipcd->tfms)
+		goto error;
+	mutex_unlock(&ipcomp_resource_mutex);
+
+	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
+	BUG_ON(!calg_desc);
+	ipcd->threshold = calg_desc->uinfo.comp.threshold;
+	x->data = ipcd;
+	err = 0;
+out:
+	return err;
+
+error:
+	ipcomp_free_data(ipcd);
+	mutex_unlock(&ipcomp_resource_mutex);
+	kfree(ipcd);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(ipcomp_init_state);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-- 
GitLab


From 7d7e5a60c62e88cb8782760bb6c4d3bd1577a6c6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Jul 2008 02:55:33 -0700
Subject: [PATCH 562/853] ipsec: ipcomp - Decompress into frags if necessary

When decompressing extremely large packets allocating them through
kmalloc is prone to failure.  Therefore it's better to use page
frags instead.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_ipcomp.c | 48 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index b51e804fbba..800f669083f 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -17,6 +17,7 @@
 
 #include <linux/crypto.h>
 #include <linux/err.h>
+#include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -49,6 +50,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
 	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
 	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
+	int len;
 
 	if (err)
 		goto out;
@@ -58,13 +60,47 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 	}
 
-	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
-	if (err)
-		goto out;
+	len = dlen - plen;
+	if (len > skb_tailroom(skb))
+		len = skb_tailroom(skb);
+
+	skb->truesize += len;
+	__skb_put(skb, len);
+
+	len += plen;
+	skb_copy_to_linear_data(skb, scratch, len);
+
+	while ((scratch += len, dlen -= len) > 0) {
+		skb_frag_t *frag;
+
+		err = -EMSGSIZE;
+		if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))
+			goto out;
+
+		frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags;
+		frag->page = alloc_page(GFP_ATOMIC);
+
+		err = -ENOMEM;
+		if (!frag->page)
+			goto out;
+
+		len = PAGE_SIZE;
+		if (dlen < len)
+			len = dlen;
+
+		memcpy(page_address(frag->page), scratch, len);
+
+		frag->page_offset = 0;
+		frag->size = len;
+		skb->truesize += len;
+		skb->data_len += len;
+		skb->len += len;
+
+		skb_shinfo(skb)->nr_frags++;
+	}
+
+	err = 0;
 
-	skb->truesize += dlen - plen;
-	__skb_put(skb, dlen - plen);
-	skb_copy_to_linear_data(skb, scratch, dlen);
 out:
 	put_cpu();
 	return err;
-- 
GitLab


From 29b309e52d3d51ef8a15bd15590903cf272beb93 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 25 Jul 2008 09:19:36 -0700
Subject: [PATCH 563/853] Undo duplicate "m68k: drivers/input/serio/hp_sdc.c
 needs <linux/semaphore.h>"

Both commits 0f17e4c796e89d1f69f13b653aba60e6ccfb8ae0 ("Add missing
semaphore.h includes") and 4933d07531711e399d8d578036aa9fc1be2f9b20
("m68k: drivers/input/serio/hp_sdc.c needs <linux/semaphore.h>") added a

We only really need one ;)

Reported-by: Huang Weiyi <weiyi.huang@gmail.com>
Requested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/input/serio/hp_sdc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index aad664d5259..0d395979b2d 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -70,7 +70,6 @@
 #include <linux/semaphore.h>
 #include <linux/slab.h>
 #include <linux/hil.h>
-#include <linux/semaphore.h>
 #include <asm/io.h>
 #include <asm/system.h>
 
-- 
GitLab


From 43de804df8d6002059bf4af4522fa9273a19b8aa Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Fri, 25 Jul 2008 23:30:15 +0800
Subject: [PATCH 564/853] char/xilinx_hwicap/xilinx_hwicap.c: Removed
 duplicated include

Removed duplicated include file <linux/version.h> in
char/xilinx_hwicap/xilinx_hwicap.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/xilinx_hwicap/xilinx_hwicap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index 51966ccf4ea..8bfee5fb722 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -87,7 +87,6 @@
 #include <linux/mutex.h>
 #include <linux/smp_lock.h>
 #include <linux/sysctl.h>
-#include <linux/version.h>
 #include <linux/fs.h>
 #include <linux/cdev.h>
 #include <linux/platform_device.h>
-- 
GitLab


From 3d6f4a20cc287a8980c6186624834cf10a70752b Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Thu, 24 Jul 2008 23:38:31 -0700
Subject: [PATCH 565/853] endian: Always evaluate arguments.

Changeset 7fa897b91a3ea0f16c2873b869d7a0eef05acff4 ("ide: trivial sparse
annotations") created an IDE bootup regression on big-endian systems.

In drivers/ide/ide-iops.c, function ide_fixstring() we now have the
loop:

		for (p = end ; p != s;)
			be16_to_cpus((u16 *)(p -= 2));

which will never terminate on big-endian because in such
a configuration be16_to_cpus() evaluates to "do { } while (0)"

Therefore, always evaluate the arguments to nop endian transformation
operations.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/byteorder/big_endian.h    | 12 ++++++------
 include/linux/byteorder/little_endian.h | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 961ed4b48d8..44f95b92393 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -94,12 +94,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
 #define __le32_to_cpus(x) __swab32s((x))
 #define __cpu_to_le16s(x) __swab16s((x))
 #define __le16_to_cpus(x) __swab16s((x))
-#define __cpu_to_be64s(x) do {} while (0)
-#define __be64_to_cpus(x) do {} while (0)
-#define __cpu_to_be32s(x) do {} while (0)
-#define __be32_to_cpus(x) do {} while (0)
-#define __cpu_to_be16s(x) do {} while (0)
-#define __be16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) do { (void)(x); } while (0)
+#define __be64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be32s(x) do { (void)(x); } while (0)
+#define __be32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be16s(x) do { (void)(x); } while (0)
+#define __be16_to_cpus(x) do { (void)(x); } while (0)
 
 #ifdef __KERNEL__
 #include <linux/byteorder/generic.h>
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 05dc7c35b3b..4cc170a3176 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -88,12 +88,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
 {
 	return __swab16p((__u16 *)p);
 }
-#define __cpu_to_le64s(x) do {} while (0)
-#define __le64_to_cpus(x) do {} while (0)
-#define __cpu_to_le32s(x) do {} while (0)
-#define __le32_to_cpus(x) do {} while (0)
-#define __cpu_to_le16s(x) do {} while (0)
-#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_le64s(x) do { (void)(x); } while (0)
+#define __le64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le32s(x) do { (void)(x); } while (0)
+#define __le32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le16s(x) do { (void)(x); } while (0)
+#define __le16_to_cpus(x) do { (void)(x); } while (0)
 #define __cpu_to_be64s(x) __swab64s((x))
 #define __be64_to_cpus(x) __swab64s((x))
 #define __cpu_to_be32s(x) __swab32s((x))
-- 
GitLab


From 3e4d0cab61c88a9ae3e61151a857960397e26403 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 25 Jul 2008 10:10:28 -0700
Subject: [PATCH 566/853] [IA64] Wire up new system calls

Six new system calls: signalfd4, eventfd2, epoll_create1,
dup3, pipe2 and inotify_init1.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/entry.S  | 6 ++++++
 include/asm-ia64/unistd.h | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 56ab156c48a..0dd6c1419d8 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1691,6 +1691,12 @@ sys_call_table:
 	data8 sys_timerfd_create		// 1310
 	data8 sys_timerfd_settime
 	data8 sys_timerfd_gettime
+	data8 sys_signalfd4
+	data8 sys_eventfd2
+	data8 sys_epoll_create1			// 1315
+	data8 sys_dup3
+	data8 sys_pipe2
+	data8 sys_inotify_init1
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index e6031471612..d535833aab5 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -302,11 +302,17 @@
 #define __NR_timerfd_create		1310
 #define __NR_timerfd_settime		1311
 #define __NR_timerfd_gettime		1312
+#define __NR_signalfd4			1313
+#define __NR_eventfd2			1314
+#define __NR_epoll_create1		1315
+#define __NR_dup3			1316
+#define __NR_pipe2			1317
+#define __NR_inotify_init1		1318
 
 #ifdef __KERNEL__
 
 
-#define NR_syscalls			289 /* length of syscall table */
+#define NR_syscalls			295 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
-- 
GitLab


From c82dd5321cf779f1f536ef26b383cbe8c9de7f10 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:22 -0700
Subject: [PATCH 567/853] mfd: don't use memzero

For it doesn't exist on i386.

Cc: Ian Molton <spyro@f2s.com>
Cc: Dmitry Baryshkov <dbaryshkov@gmail.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/mfd-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index d7d88ce053a..0454be4266c 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -36,7 +36,7 @@ static int mfd_add_device(struct platform_device *parent,
 	if (ret)
 		goto fail_device;
 
-	memzero(res, sizeof(res));
+	memset(res, 0, sizeof(res));
 	for (r = 0; r < cell->num_resources; r++) {
 		res[r].name = cell->resources[r].name;
 		res[r].flags = cell->resources[r].flags;
-- 
GitLab


From 5df439ef06d4173357711a04740aa8bfcf50d621 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:45:23 -0700
Subject: [PATCH 568/853] flag parameters: fix compile error of
 sys_epoll_create1

GEN     .version
  CHK     include/linux/compile.h
  UPD     include/linux/compile.h
  CC      init/version.o
  LD      init/built-in.o
  LD      vmlinux
arch/x86/kernel/built-in.o: In function `sys_call_table':
(.rodata+0x8a4): undefined reference to `sys_epoll_create1'
make: *** [vmlinux] Error 1

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys_ni.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index bd66ac5406f..55eca1594da 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -57,6 +57,7 @@ cond_syscall(compat_sys_set_robust_list);
 cond_syscall(sys_get_robust_list);
 cond_syscall(compat_sys_get_robust_list);
 cond_syscall(sys_epoll_create);
+cond_syscall(sys_epoll_create1);
 cond_syscall(sys_epoll_ctl);
 cond_syscall(sys_epoll_wait);
 cond_syscall(sys_epoll_pwait);
-- 
GitLab


From e0deaff470900a4c3222ca7139f6c9639e26a2f5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:24 -0700
Subject: [PATCH 569/853] split the typecheck macros out of
 include/linux/kernel.h

Needed to fix up a recursive include snafu in
locking-add-typecheck-on-irqsave-and-friends-for-correct-flags.patch

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h    | 21 +--------------------
 include/linux/typecheck.h | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/typecheck.h

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f9cd7a513f9..5c4b1251e11 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/typecheck.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -441,26 +442,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
 	(type *)( (char *)__mptr - offsetof(type,member) );})
 
-/*
- * Check at compile time that something is of a particular type.
- * Always evaluates to 1 so you may use it easily in comparisons.
- */
-#define typecheck(type,x) \
-({	type __dummy; \
-	typeof(x) __dummy2; \
-	(void)(&__dummy == &__dummy2); \
-	1; \
-})
-
-/*
- * Check at compile time that 'function' is a certain type, or is a pointer
- * to that type (needs to use typedef for the function type.)
- */
-#define typecheck_fn(type,function) \
-({	typeof(type) __tmp = function; \
-	(void)__tmp; \
-})
-
 struct sysinfo;
 extern int do_sysinfo(struct sysinfo *info);
 
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
new file mode 100644
index 00000000000..eb5b74a575b
--- /dev/null
+++ b/include/linux/typecheck.h
@@ -0,0 +1,24 @@
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({	type __dummy; \
+	typeof(x) __dummy2; \
+	(void)(&__dummy == &__dummy2); \
+	1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({	typeof(type) __tmp = function; \
+	(void)__tmp; \
+})
+
+#endif		/* TYPECHECK_H_INCLUDED */
-- 
GitLab


From 3f307891ce0e7b0438c432af1aacd656a092ff45 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 25 Jul 2008 01:45:25 -0700
Subject: [PATCH 570/853] locking: add typecheck on irqsave and friends for
 correct flags

There haave been several areas in the kernel where an int has been used for
flags in local_irq_save() and friends instead of a long.  This can cause some
hard to debug problems on some architectures.

This patch adds a typecheck inside the irqsave and restore functions to flag
these cases.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irqflags.h | 54 +++++++++++++++++++++---------
 include/linux/spinlock.h | 72 +++++++++++++++++++++++++++++++---------
 2 files changed, 95 insertions(+), 31 deletions(-)

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 2b1c2e58566..74bde13224c 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -11,6 +11,8 @@
 #ifndef _LINUX_TRACE_IRQFLAGS_H
 #define _LINUX_TRACE_IRQFLAGS_H
 
+#include <linux/typecheck.h>
+
 #ifdef CONFIG_TRACE_IRQFLAGS
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
@@ -58,18 +60,24 @@
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
 	do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
-#define local_irq_save(flags) \
-	do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags)				\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		raw_local_irq_save(flags);		\
+		trace_hardirqs_off();			\
+	} while (0)
 
-#define local_irq_restore(flags)				\
-	do {							\
-		if (raw_irqs_disabled_flags(flags)) {		\
-			raw_local_irq_restore(flags);		\
-			trace_hardirqs_off();			\
-		} else {					\
-			trace_hardirqs_on();			\
-			raw_local_irq_restore(flags);		\
-		}						\
+
+#define local_irq_restore(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		if (raw_irqs_disabled_flags(flags)) {	\
+			raw_local_irq_restore(flags);	\
+			trace_hardirqs_off();		\
+		} else {				\
+			trace_hardirqs_on();		\
+			raw_local_irq_restore(flags);	\
+		}					\
 	} while (0)
 #else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
 /*
@@ -78,8 +86,16 @@
  */
 # define raw_local_irq_disable()	local_irq_disable()
 # define raw_local_irq_enable()		local_irq_enable()
-# define raw_local_irq_save(flags)	local_irq_save(flags)
-# define raw_local_irq_restore(flags)	local_irq_restore(flags)
+# define raw_local_irq_save(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		local_irq_save(flags);			\
+	} while (0)
+# define raw_local_irq_restore(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		local_irq_restore(flags);		\
+	} while (0)
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -89,7 +105,11 @@
 		raw_safe_halt();				\
 	} while (0)
 
-#define local_save_flags(flags)		raw_local_save_flags(flags)
+#define local_save_flags(flags)				\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		raw_local_save_flags(flags);		\
+	} while (0)
 
 #define irqs_disabled()						\
 ({								\
@@ -99,7 +119,11 @@
 	raw_irqs_disabled_flags(_flags);			\
 })
 
-#define irqs_disabled_flags(flags)	raw_irqs_disabled_flags(flags)
+#define irqs_disabled_flags(flags)		\
+({						\
+	typecheck(unsigned long, flags);	\
+	raw_irqs_disabled_flags(flags);		\
+})
 #endif		/* CONFIG_X86 */
 
 #endif
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d311a090fae..61e5610ad16 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -46,6 +46,7 @@
  *  linux/spinlock.h:     builds the final spin_*() APIs.
  */
 
+#include <linux/typecheck.h>
 #include <linux/preempt.h>
 #include <linux/linkage.h>
 #include <linux/compiler.h>
@@ -191,23 +192,53 @@ do {								\
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
-#define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
-#define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _spin_lock_irqsave(lock);	\
+	} while (0)
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _read_lock_irqsave(lock);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _write_lock_irqsave(lock);	\
+	} while (0)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-	flags = _spin_lock_irqsave_nested(lock, subclass)
+#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		flags = _spin_lock_irqsave_nested(lock, subclass);	\
+	} while (0)
 #else
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-	flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		flags = _spin_lock_irqsave(lock);			\
+	} while (0)
 #endif
 
 #else
 
-#define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
-#define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
-#define write_lock_irqsave(lock, flags)	_write_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_spin_lock_irqsave(lock, flags);	\
+	} while (0)
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_lock_irqsave(lock, flags);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_lock_irqsave(lock, flags);	\
+	} while (0)
 #define spin_lock_irqsave_nested(lock, flags, subclass)	\
 	spin_lock_irqsave(lock, flags)
 
@@ -260,16 +291,25 @@ do {						\
 } while (0)
 #endif
 
-#define spin_unlock_irqrestore(lock, flags) \
-					_spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_spin_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
 
-#define read_unlock_irqrestore(lock, flags) \
-					_read_unlock_irqrestore(lock, flags)
+#define read_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define read_unlock_bh(lock)		_read_unlock_bh(lock)
 
-#define write_unlock_irqrestore(lock, flags) \
-					_write_unlock_irqrestore(lock, flags)
+#define write_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define write_unlock_bh(lock)		_write_unlock_bh(lock)
 
 #define spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
-- 
GitLab


From b7bbf8fa6ba329b3552b75a0716f5fbc6f839499 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 01:45:25 -0700
Subject: [PATCH 571/853] fs: ldm.[ch] use get_unaligned_* helpers

Replace the private BE16/BE32/BE64 macros with direct calls to
get_unaligned_be16/32/64.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/ldm.c | 70 ++++++++++++++++++++++-----------------------
 fs/partitions/ldm.h |  5 ----
 2 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e8a4c..8652fb99e96 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
 	bool is_vista = false;
 
 	BUG_ON(!data || !ph);
-	if (MAGIC_PRIVHEAD != BE64(data)) {
+	if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
 		ldm_error("Cannot find PRIVHEAD structure. LDM database is"
 			" corrupt. Aborting.");
 		return false;
 	}
-	ph->ver_major = BE16(data + 0x000C);
-	ph->ver_minor = BE16(data + 0x000E);
-	ph->logical_disk_start = BE64(data + 0x011B);
-	ph->logical_disk_size = BE64(data + 0x0123);
-	ph->config_start = BE64(data + 0x012B);
-	ph->config_size = BE64(data + 0x0133);
+	ph->ver_major = get_unaligned_be16(data + 0x000C);
+	ph->ver_minor = get_unaligned_be16(data + 0x000E);
+	ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
+	ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
+	ph->config_start = get_unaligned_be64(data + 0x012B);
+	ph->config_size = get_unaligned_be64(data + 0x0133);
 	/* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
 	if (ph->ver_major == 2 && ph->ver_minor == 12)
 		is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
 {
 	BUG_ON (!data || !toc);
 
-	if (MAGIC_TOCBLOCK != BE64 (data)) {
+	if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
 		ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
 		return false;
 	}
 	strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
 	toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
-	toc->bitmap1_start = BE64 (data + 0x2E);
-	toc->bitmap1_size  = BE64 (data + 0x36);
+	toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
+	toc->bitmap1_size  = get_unaligned_be64(data + 0x36);
 
 	if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
 			sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
 	}
 	strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
 	toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
-	toc->bitmap2_start = BE64 (data + 0x50);
-	toc->bitmap2_size  = BE64 (data + 0x58);
+	toc->bitmap2_start = get_unaligned_be64(data + 0x50);
+	toc->bitmap2_size  = get_unaligned_be64(data + 0x58);
 	if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
 			sizeof (toc->bitmap2_name)) != 0) {
 		ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
 {
 	BUG_ON (!data || !vm);
 
-	if (MAGIC_VMDB != BE32 (data)) {
+	if (MAGIC_VMDB != get_unaligned_be32(data)) {
 		ldm_crit ("Cannot find the VMDB, database may be corrupt.");
 		return false;
 	}
 
-	vm->ver_major = BE16 (data + 0x12);
-	vm->ver_minor = BE16 (data + 0x14);
+	vm->ver_major = get_unaligned_be16(data + 0x12);
+	vm->ver_minor = get_unaligned_be16(data + 0x14);
 	if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
 		ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
 			"Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
 		return false;
 	}
 
-	vm->vblk_size     = BE32 (data + 0x08);
-	vm->vblk_offset   = BE32 (data + 0x0C);
-	vm->last_vblk_seq = BE32 (data + 0x04);
+	vm->vblk_size     = get_unaligned_be32(data + 0x08);
+	vm->vblk_offset   = get_unaligned_be32(data + 0x0C);
+	vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
 
 	ldm_debug ("Parsed VMDB successfully.");
 	return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
 		goto out;				/* Already logged */
 
 	/* Are there uncommitted transactions? */
-	if (BE16(data + 0x10) != 0x01) {
+	if (get_unaligned_be16(data + 0x10) != 0x01) {
 		ldm_crit ("Database is not in a consistent state.  Aborting.");
 		goto out;
 	}
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 
 	len += VBLK_SIZE_CMP3;
-	if (len != BE32 (buffer + 0x14))
+	if (len != get_unaligned_be32(buffer + 0x14))
 		return false;
 
 	comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 
 	len += VBLK_SIZE_DGR3;
-	if (len != BE32 (buffer + 0x14))
+	if (len != get_unaligned_be32(buffer + 0x14))
 		return false;
 
 	dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 
 	len += VBLK_SIZE_DGR4;
-	if (len != BE32 (buffer + 0x14))
+	if (len != get_unaligned_be32(buffer + 0x14))
 		return false;
 
 	dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 
 	len += VBLK_SIZE_DSK3;
-	if (len != BE32 (buffer + 0x14))
+	if (len != get_unaligned_be32(buffer + 0x14))
 		return false;
 
 	disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 
 	len += VBLK_SIZE_DSK4;
-	if (len != BE32 (buffer + 0x14))
+	if (len != get_unaligned_be32(buffer + 0x14))
 		return false;
 
 	disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 	}
 	len += VBLK_SIZE_PRT3;
-	if (len > BE32(buffer + 0x14)) {
+	if (len > get_unaligned_be32(buffer + 0x14)) {
 		ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-				BE32(buffer + 0x14));
+				get_unaligned_be32(buffer + 0x14));
 		return false;
 	}
 	part = &vb->vblk.part;
-	part->start = BE64(buffer + 0x24 + r_name);
-	part->volume_offset = BE64(buffer + 0x2C + r_name);
+	part->start = get_unaligned_be64(buffer + 0x24 + r_name);
+	part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
 	part->size = ldm_get_vnum(buffer + 0x34 + r_name);
 	part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
 	part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 	}
 	len += VBLK_SIZE_VOL5;
-	if (len > BE32(buffer + 0x14)) {
+	if (len > get_unaligned_be32(buffer + 0x14)) {
 		ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-				BE32(buffer + 0x14));
+				get_unaligned_be32(buffer + 0x14));
 		return false;
 	}
 	volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
 
 	BUG_ON (!data || !frags);
 
-	group = BE32 (data + 0x08);
-	rec   = BE16 (data + 0x0C);
-	num   = BE16 (data + 0x0E);
+	group = get_unaligned_be32(data + 0x08);
+	rec   = get_unaligned_be16(data + 0x0C);
+	num   = get_unaligned_be16(data + 0x0E);
 	if ((num < 1) || (num > 4)) {
 		ldm_error ("A VBLK claims to have %d parts.", num);
 		return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
 		}
 
 		for (v = 0; v < perbuf; v++, data+=size) {  /* For each vblk */
-			if (MAGIC_VBLK != BE32 (data)) {
+			if (MAGIC_VBLK != get_unaligned_be32(data)) {
 				ldm_error ("Expected to find a VBLK.");
 				goto out;
 			}
 
-			recs = BE16 (data + 0x0E);	/* Number of records */
+			recs = get_unaligned_be16(data + 0x0E);	/* Number of records */
 			if (recs == 1) {
 				if (!ldm_ldmdb_add (data, size, ldb))
 					goto out;	/* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5fdd9..30e08e809c1 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
 #define TOC_BITMAP1		"config"	/* Names of the two defined */
 #define TOC_BITMAP2		"log"		/* bitmaps in the TOCBLOCK. */
 
-/* Most numbers we deal with are big-endian and won't be aligned. */
-#define BE16(x)			((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
-#define BE32(x)			((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
-#define BE64(x)			((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
-
 /* Borrowed from msdos.c */
 #define SYS_IND(p)		(get_unaligned(&(p)->sys_ind))
 
-- 
GitLab


From 8b5ac31e27135a6f2c210c40d03bf8f1b3a86b77 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 01:45:26 -0700
Subject: [PATCH 572/853] include: use get/put_unaligned_* helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/reiserfs_fs.h      |  4 ++--
 include/linux/smb_fs.h           | 19 +++++++------------
 include/net/ieee80211_radiotap.h |  2 +-
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4aacaeecb56..e9963af16cd 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -526,8 +526,8 @@ struct item_head {
 ** p is the array of __u32, i is the index into the array, v is the value
 ** to store there.
 */
-#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i)))
-#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i))
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
 
 //
 // in old version uniqueness field shows key type
diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h
index 2c5cd55f44f..923cd8a247b 100644
--- a/include/linux/smb_fs.h
+++ b/include/linux/smb_fs.h
@@ -43,18 +43,13 @@ static inline struct smb_inode_info *SMB_I(struct inode *inode)
 }
 
 /* macro names are short for word, double-word, long value (?) */
-#define WVAL(buf,pos) \
-	(le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos)))))
-#define DVAL(buf,pos) \
-	(le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos)))))
-#define LVAL(buf,pos) \
-	(le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos)))))
-#define WSET(buf,pos,val) \
-	put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos)))
-#define DSET(buf,pos,val) \
-	put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos)))
-#define LSET(buf,pos,val) \
-	put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos)))
+#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
+#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
+#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
+
+#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
+#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
+#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
 
 /* where to find the base of the SMB packet proper */
 #define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index dfd8bf66ce2..d364fd594ea 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -262,7 +262,7 @@ static inline int ieee80211_get_radiotap_len(unsigned char *data)
 	struct ieee80211_radiotap_header *hdr =
 		(struct ieee80211_radiotap_header *)data;
 
-	return le16_to_cpu(get_unaligned(&hdr->it_len));
+	return get_unaligned_le16(&hdr->it_len);
 }
 
 #endif				/* IEEE80211_RADIOTAP_H */
-- 
GitLab


From 545e400619b24b6b17b7f1f1e838e9ff6d036949 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 01:45:27 -0700
Subject: [PATCH 573/853] lzo: use get/put_unaligned_* helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/lzo/lzo1x_decompress.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 77f0f9b775a..5dc6b29c157 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -138,8 +138,7 @@ match:
 					t += 31 + *ip++;
 				}
 				m_pos = op - 1;
-				m_pos -= le16_to_cpu(get_unaligned(
-					(const unsigned short *)ip)) >> 2;
+				m_pos -= get_unaligned_le16(ip) >> 2;
 				ip += 2;
 			} else if (t >= 16) {
 				m_pos = op;
@@ -157,8 +156,7 @@ match:
 					}
 					t += 7 + *ip++;
 				}
-				m_pos -= le16_to_cpu(get_unaligned(
-					(const unsigned short *)ip)) >> 2;
+				m_pos -= get_unaligned_le16(ip) >> 2;
 				ip += 2;
 				if (m_pos == op)
 					goto eof_found;
-- 
GitLab


From 585e93ae83b80c874bf4eb50a239027cef5db4af Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Fri, 25 Jul 2008 01:45:27 -0700
Subject: [PATCH 574/853] find dynamic stack allocations in checkstack.pl

Currently, checkstack.pl only looks for fixed subtractions from the stack
pointer.  However, things like this:

void function(int size)
{
        char stackbuster[size << 2];
...

are certainly worth pointing out, I think.

This could perhaps be done more cleanly, and the following patch only
adds "dynamic" REs for x86 and x86_64, but it works:

0x00b0 crypto_cbc_decrypt_inplace [cbc]:                Dynamic (%rax)
0x00ad crypto_pcbc_decrypt_inplace [pcbc]:              Dynamic (%rax)
0x02f6 crypto_pcbc_encrypt_inplace [pcbc]:              Dynamic (%rax)
0x036c _crypto_xcbc_digest_setkey [xcbc]:               Dynamic (%rax)
...

(Inspired by Keith Owens' old stack-check script)

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkstack.pl | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 340ad692051..358f96c75b4 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -26,8 +26,12 @@
 # $& (whole re) matches the complete objdump line with the stack growth
 # $1 (first bracket) matches the size of the stack growth
 #
+# $dre is similar, but for dynamic stack redutions:
+# $& (whole re) matches the complete objdump line with the stack growth
+# $1 (first bracket) matches the dynamic amount of the stack growth
+#
 # use anything else and feel the pain ;)
-my (@stack, $re, $x, $xs);
+my (@stack, $re, $dre, $x, $xs);
 {
 	my $arch = shift;
 	if ($arch eq "") {
@@ -46,9 +50,11 @@ my (@stack, $re, $x, $xs);
 	} elsif ($arch =~ /^i[3456]86$/) {
 		#c0105234:       81 ec ac 05 00 00       sub    $0x5ac,%esp
 		$re = qr/^.*[as][du][db]    \$(0x$x{1,8}),\%esp$/o;
+		$dre = qr/^.*[as][du][db]    (%.*),\%esp$/o;
 	} elsif ($arch eq 'x86_64') {
 		#    2f60:	48 81 ec e8 05 00 00 	sub    $0x5e8,%rsp
 		$re = qr/^.*[as][du][db]    \$(0x$x{1,8}),\%rsp$/o;
+		$dre = qr/^.*[as][du][db]    (\%.*),\%rsp$/o;
 	} elsif ($arch eq 'ia64') {
 		#e0000000044011fc:       01 0f fc 8c     adds r12=-384,r12
 		$re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
@@ -141,6 +147,22 @@ while (my $line = <STDIN>) {
 		next if ($size < 100);
 		push @stack, "$intro$size\n";
 	}
+	elsif (defined $dre && $line =~ m/$dre/) {
+		my $size = "Dynamic ($1)";
+
+		next if $line !~ m/^($xs*)/;
+		my $addr = $1;
+		$addr =~ s/ /0/g;
+		$addr = "0x$addr";
+
+		my $intro = "$addr $func [$file]:";
+		my $padlen = 56 - length($intro);
+		while ($padlen > 0) {
+			$intro .= '	';
+			$padlen -= 8;
+		}
+		push @stack, "$intro$size\n";
+	}
 }
 
 print sort bysize @stack;
-- 
GitLab


From abddaec56ebb7911bbf0578a4636a74bd7376d92 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Fri, 25 Jul 2008 01:45:28 -0700
Subject: [PATCH 575/853] fix checkstack.pl arch detection

uname -m was leaving a newline in $arch, and not passing the tests.

Also, printing the unknown arch on failure is probably helpful.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkstack.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 358f96c75b4..3eca62566d6 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -36,6 +36,7 @@ my (@stack, $re, $dre, $x, $xs);
 	my $arch = shift;
 	if ($arch eq "") {
 		$arch = `uname -m`;
+		chomp($arch);
 	}
 
 	$x	= "[0-9a-f]";	# hex character
@@ -91,7 +92,7 @@ my (@stack, $re, $dre, $x, $xs);
 		#   0:   00 e8 38 01     LINK 0x4e0;
 		$re = qr/.*[[:space:]]LINK[[:space:]]*(0x$x{1,8})/o;
 	} else {
-		print("wrong or unknown architecture\n");
+		print("wrong or unknown architecture \"$arch\"\n");
 		exit
 	}
 }
-- 
GitLab


From 82c8253ac27291d6c70114eb445c714359812a10 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:45:29 -0700
Subject: [PATCH 576/853] init/do_mounts.c should #include <linux/initrd.h>

Every file should include the headers containing the externs for its
global code (in this case for rd_doload).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/do_mounts.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index a1de1bf3d6b..f769fac4f4c 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -12,6 +12,7 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/initrd.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs_sb.h>
-- 
GitLab


From b39c08cb692cb8898c30e0d8187c7cbe27cc905c Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:29 -0700
Subject: [PATCH 577/853] Remove apparently unused fd1772.h header file.

This header file has been unused for quite some time, and the
corresponding source files appear to have been removed back in commit
99eb8a550dbccc0e1f6c7e866fe421810e0585f6 ("Remove the arm26 port")

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Adrian Bunk <bunk@stusta.de>
Cc: Ian Molton <spyro@f2s.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fd1772.h | 80 ------------------------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 include/linux/fd1772.h

diff --git a/include/linux/fd1772.h b/include/linux/fd1772.h
deleted file mode 100644
index 871d6e4c677..00000000000
--- a/include/linux/fd1772.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _LINUX_FD1772REG_H
-#define _LINUX_FD1772REG_H
-
-/*
-** WD1772 stuff - originally from the M68K Linux
- * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk)
- */
-
-/* register codes */
-
-#define FDC1772SELREG_STP   (0x80)   /* command/status register */
-#define FDC1772SELREG_TRA   (0x82)   /* track register */
-#define FDC1772SELREG_SEC   (0x84)   /* sector register */
-#define FDC1772SELREG_DTA   (0x86)   /* data register */
-
-/* register names for FDC1772_READ/WRITE macros */
-
-#define FDC1772REG_CMD         0
-#define FDC1772REG_STATUS      0
-#define FDC1772REG_TRACK       2
-#define FDC1772REG_SECTOR      4
-#define FDC1772REG_DATA                6
-
-/* command opcodes */
-
-#define FDC1772CMD_RESTORE  (0x00)   /*  -                   */
-#define FDC1772CMD_SEEK     (0x10)   /*   |                  */
-#define FDC1772CMD_STEP     (0x20)   /*   |  TYP 1 Commands  */
-#define FDC1772CMD_STIN     (0x40)   /*   |                  */
-#define FDC1772CMD_STOT     (0x60)   /*  -                   */
-#define FDC1772CMD_RDSEC    (0x80)   /*  -   TYP 2 Commands  */
-#define FDC1772CMD_WRSEC    (0xa0)   /*  -          "        */
-#define FDC1772CMD_RDADR    (0xc0)   /*  -                   */
-#define FDC1772CMD_RDTRA    (0xe0)   /*   |  TYP 3 Commands  */
-#define FDC1772CMD_WRTRA    (0xf0)   /*  -                   */
-#define FDC1772CMD_FORCI    (0xd0)   /*  -   TYP 4 Command   */
-
-/* command modifier bits */
-
-#define FDC1772CMDADD_SR6   (0x00)   /* step rate settings */
-#define FDC1772CMDADD_SR12  (0x01)
-#define FDC1772CMDADD_SR2   (0x02)
-#define FDC1772CMDADD_SR3   (0x03)
-#define FDC1772CMDADD_V     (0x04)   /* verify */
-#define FDC1772CMDADD_H     (0x08)   /* wait for spin-up */
-#define FDC1772CMDADD_U     (0x10)   /* update track register */
-#define FDC1772CMDADD_M     (0x10)   /* multiple sector access */
-#define FDC1772CMDADD_E     (0x04)   /* head settling flag */
-#define FDC1772CMDADD_P     (0x02)   /* precompensation */
-#define FDC1772CMDADD_A0    (0x01)   /* DAM flag */
-
-/* status register bits */
-
-#define        FDC1772STAT_MOTORON     (0x80)   /* motor on */
-#define        FDC1772STAT_WPROT       (0x40)   /* write protected (FDC1772CMD_WR*) */
-#define        FDC1772STAT_SPINUP      (0x20)   /* motor speed stable (Type I) */
-#define        FDC1772STAT_DELDAM      (0x20)   /* sector has deleted DAM (Type II+III) */
-#define        FDC1772STAT_RECNF       (0x10)   /* record not found */
-#define        FDC1772STAT_CRC         (0x08)   /* CRC error */
-#define        FDC1772STAT_TR00        (0x04)   /* Track 00 flag (Type I) */
-#define        FDC1772STAT_LOST        (0x04)   /* Lost Data (Type II+III) */
-#define        FDC1772STAT_IDX         (0x02)   /* Index status (Type I) */
-#define        FDC1772STAT_DRQ         (0x02)   /* DRQ status (Type II+III) */
-#define        FDC1772STAT_BUSY        (0x01)   /* FDC1772 is busy */
-
-
-/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1  1 -> Side 2 */
-#define DSKSIDE     (0x01)
-        
-#define DSKDRVNONE  (0x06)
-#define DSKDRV0     (0x02)
-#define DSKDRV1     (0x04)
-
-/* step rates */
-#define        FDC1772STEP_6   0x00
-#define        FDC1772STEP_12  0x01
-#define        FDC1772STEP_2   0x02
-#define        FDC1772STEP_3   0x03
-
-#endif
-- 
GitLab


From cb345d7352aa9e692ef4b83c41d3e6e1cdb2f846 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:30 -0700
Subject: [PATCH 578/853] init/: delete hard-coded setting and testing of
 BUILD_CRAMDISK

There seems to be little point in explicitly setting, then testing the macro
BUILD_CRAMDISK within the context of a single source file.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/do_mounts_rd.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 46dfd64ae8f..470a328d145 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -10,8 +10,6 @@
 
 #include "do_mounts.h"
 
-#define BUILD_CRAMDISK
-
 int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
 
 static int __init prompt_ramdisk(char *str)
@@ -162,14 +160,8 @@ int __init rd_load_image(char *from)
 		goto done;
 
 	if (nblocks == 0) {
-#ifdef BUILD_CRAMDISK
 		if (crd_load(in_fd, out_fd) == 0)
 			goto successful_load;
-#else
-		printk(KERN_NOTICE
-		       "RAMDISK: Kernel does not support compressed "
-		       "RAM disk images\n");
-#endif
 		goto done;
 	}
 
@@ -267,8 +259,6 @@ int __init rd_load_disk(int n)
 	return rd_load_image("/dev/root");
 }
 
-#ifdef BUILD_CRAMDISK
-
 /*
  * gzip declarations
  */
@@ -425,5 +415,3 @@ static int __init crd_load(int in_fd, int out_fd)
 	kfree(window);
 	return result;
 }
-
-#endif  /* BUILD_CRAMDISK */
-- 
GitLab


From fd193829744bc77392395cf8f47889235c97f0a3 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:31 -0700
Subject: [PATCH 579/853] lib: allow memparse() to accept a NULL and ignorable
 second parm

Extend memparse() to allow the caller to use a NULL second parameter, which
would represent no interest in returning the address of the end of the parsed
string.

In numerous cases, callers invoke memparse() to parse a possibly-suffixed
string (such as "64K" or "2G" or whatever) and define a character pointer to
accept the end pointer being returned by memparse() even though they have no
interest in it and promptly throw it away.

This (backward-compatible) enhancement allows callers to use NULL in the cases
where they just don't care about getting back that end pointer.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/cmdline.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/lib/cmdline.c b/lib/cmdline.c
index f596c08d213..5ba8a942a47 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -116,7 +116,7 @@ char *get_options(const char *str, int nints, int *ints)
 /**
  *	memparse - parse a string with mem suffixes into a number
  *	@ptr: Where parse begins
- *	@retptr: (output) Pointer to next char after parse completes
+ *	@retptr: (output) Optional pointer to next char after parse completes
  *
  *	Parses a string into a number.  The number stored at @ptr is
  *	potentially suffixed with %K (for kilobytes, or 1024 bytes),
@@ -126,11 +126,13 @@ char *get_options(const char *str, int nints, int *ints)
  *	megabyte, or one gigabyte, respectively.
  */
 
-unsigned long long memparse (char *ptr, char **retptr)
+unsigned long long memparse(char *ptr, char **retptr)
 {
-	unsigned long long ret = simple_strtoull (ptr, retptr, 0);
+	char *endptr;	/* local pointer to end of parsed string */
 
-	switch (**retptr) {
+	unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
+
+	switch (*endptr) {
 	case 'G':
 	case 'g':
 		ret <<= 10;
@@ -140,10 +142,14 @@ unsigned long long memparse (char *ptr, char **retptr)
 	case 'K':
 	case 'k':
 		ret <<= 10;
-		(*retptr)++;
+		endptr++;
 	default:
 		break;
 	}
+
+	if (retptr)
+		*retptr = endptr;
+
 	return ret;
 }
 
-- 
GitLab


From e0ce0da9fefcc723dc006c35a7f91a32750abd40 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:32 -0700
Subject: [PATCH 580/853] lists: remove a redundant conditional definition of
 list_add()

Remove the conditional surrounding the definition of list_add() from list.h
since, if you define CONFIG_DEBUG_LIST, the definition you will subsequently
pick up from lib/list_debug.c will be absolutely identical, at which point you
can remove that redundant definition from list_debug.c as well.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h |  4 ----
 lib/list_debug.c     | 14 --------------
 2 files changed, 18 deletions(-)

diff --git a/include/linux/list.h b/include/linux/list.h
index 139ec41d9c2..453916bc041 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -61,14 +61,10 @@ extern void __list_add(struct list_head *new,
  * Insert a new entry after the specified head.
  * This is good for implementing stacks.
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void list_add(struct list_head *new, struct list_head *head)
 {
 	__list_add(new, head, head->next);
 }
-#else
-extern void list_add(struct list_head *new, struct list_head *head);
-#endif
 
 
 /**
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 4350ba9655b..45c03fd608d 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -39,20 +39,6 @@ void __list_add(struct list_head *new,
 }
 EXPORT_SYMBOL(__list_add);
 
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-EXPORT_SYMBOL(list_add);
-
 /**
  * list_del - deletes entry from list.
  * @entry: the element to delete from the list.
-- 
GitLab


From 58340a07c194e0aed7bc58b61ff24330bb2a409f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 25 Jul 2008 01:45:33 -0700
Subject: [PATCH 581/853] introduce HAVE_EFFICIENT_UNALIGNED_ACCESS Kconfig
 symbol

In many cases, especially in networking, it can be beneficial to know at
compile time whether the architecture can do unaligned accesses efficiently.
This patch introduces a new Kconfig symbol

	HAVE_EFFICIENT_UNALIGNED_ACCESS

for that purpose and adds it to the powerpc and x86 architectures.  Also add
some documentation about alignment and networking, and especially one intended
use of this symbol.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Ingo Molnar <mingo@elte.hu> [x86 architecture part]
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/unaligned-memory-access.txt | 32 ++++++++++++++++++++---
 arch/Kconfig                              | 19 ++++++++++++++
 arch/powerpc/Kconfig                      |  1 +
 arch/x86/Kconfig                          |  1 +
 4 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
index b0472ac5226..f866c72291b 100644
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -218,9 +218,35 @@ If use of such macros is not convenient, another option is to use memcpy(),
 where the source or destination (or both) are of type u8* or unsigned char*.
 Due to the byte-wise nature of this operation, unaligned accesses are avoided.
 
+
+Alignment vs. Networking
+========================
+
+On architectures that require aligned loads, networking requires that the IP
+header is aligned on a four-byte boundary to optimise the IP stack. For
+regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
+architectures this constant has the value 2 because the normal ethernet
+header is 14 bytes long, so in order to get proper alignment one needs to
+DMA to an address which can be expressed as 4*n + 2. One notable exception
+here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
+addresses can be very expensive and dwarf the cost of unaligned loads.
+
+For some ethernet hardware that cannot DMA to unaligned addresses like
+4*n+2 or non-ethernet hardware, this can be a problem, and it is then
+required to copy the incoming frame into an aligned buffer. Because this is
+unnecessary on architectures that can do unaligned accesses, the code can be
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so:
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	skb = original skb
+#else
+	skb = copy skb
+#endif
+
 --
-Author: Daniel Drake <dsd@gentoo.org>
+Authors: Daniel Drake <dsd@gentoo.org>,
+         Johannes Berg <johannes@sipsolutions.net>
 With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
-Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock,
-Uli Kunitz, Vadim Lobanov
+Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+Vadim Lobanov
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 6093c0be58b..b0fabfa864f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -27,6 +27,25 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config HAVE_EFFICIENT_UNALIGNED_ACCESS
+	def_bool n
+	help
+	  Some architectures are unable to perform unaligned accesses
+	  without the use of get_unaligned/put_unaligned. Others are
+	  unable to perform such accesses efficiently (e.g. trap on
+	  unaligned access and require fixing it up in the exception
+	  handler.)
+
+	  This symbol should be selected by an architecture if it can
+	  perform unaligned accesses efficiently to allow different
+	  code paths to be selected for these cases. Some network
+	  drivers, for example, could opt to not fix up alignment
+	  problems with received packets if doing so would not help
+	  much.
+
+	  See Documentation/unaligned-memory-access.txt for more
+	  information on the topic of unaligned memory accesses.
+
 config KRETPROBES
 	def_bool y
 	depends on KPROBES && HAVE_KRETPROBES
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a487671c282..de6b49cd6be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -112,6 +112,7 @@ config PPC
 	select HAVE_FTRACE
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_KPROBES
 	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b2ddfcf0172..66f3ab05b18 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 
 config ARCH_DEFCONFIG
 	string
-- 
GitLab


From 2fc9c4e18f94431e7eb77d97edb2a995b46fba55 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 25 Jul 2008 01:45:34 -0700
Subject: [PATCH 582/853] kallsyms: fix potential overflow in binary search

This will probably never trigger... but it won't hurt to be careful.

http://googleresearch.blogspot.com/2006/06/extra-extra-read-all-about-it-nearly.html

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Joshua Bloch <jjb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kallsyms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6fc0040f3e3..38fc10ac754 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -176,7 +176,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
 	high = kallsyms_num_syms;
 
 	while (high - low > 1) {
-		mid = (low + high) / 2;
+		mid = low + (high - low) / 2;
 		if (kallsyms_addresses[mid] <= addr)
 			low = mid;
 		else
-- 
GitLab


From 696adfe84c11c571a1e0863460ff0ec142b4e5a9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 25 Jul 2008 01:45:34 -0700
Subject: [PATCH 583/853] list_for_each_rcu must die: networking

All uses of list_for_each_rcu() can be profitably replaced by the
easier-to-use list_for_each_entry_rcu().  This patch makes this change for
networking, in preparation for removing the list_for_each_rcu() API
entirely.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/802/psnap.c     | 4 +---
 net/ipv4/af_inet.c  | 9 +++------
 net/ipv6/af_inet6.c | 9 +++------
 3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/net/802/psnap.c b/net/802/psnap.c
index ea464393144..b3cfe5a14fc 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -31,11 +31,9 @@ static struct llc_sap *snap_sap;
  */
 static struct datalink_proto *find_snap_client(unsigned char *desc)
 {
-	struct list_head *entry;
 	struct datalink_proto *proto = NULL, *p;
 
-	list_for_each_rcu(entry, &snap_list) {
-		p = list_entry(entry, struct datalink_proto, node);
+	list_for_each_entry_rcu(p, &snap_list, node) {
 		if (!memcmp(p->type, desc, 5)) {
 			proto = p;
 			break;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dd919d84285..f440a9f5492 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -264,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol)
 static int inet_create(struct net *net, struct socket *sock, int protocol)
 {
 	struct sock *sk;
-	struct list_head *p;
 	struct inet_protosw *answer;
 	struct inet_sock *inet;
 	struct proto *answer_prot;
@@ -281,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol)
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
-	answer = NULL;
 lookup_protocol:
 	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
-	list_for_each_rcu(p, &inetsw[sock->type]) {
-		answer = list_entry(p, struct inet_protosw, list);
+	list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
 
+		err = 0;
 		/* Check the non-wild match. */
 		if (protocol == answer->protocol) {
 			if (protocol != IPPROTO_IP)
@@ -302,10 +300,9 @@ lookup_protocol:
 				break;
 		}
 		err = -EPROTONOSUPPORT;
-		answer = NULL;
 	}
 
-	if (unlikely(answer == NULL)) {
+	if (unlikely(err)) {
 		if (try_loading_module < 2) {
 			rcu_read_unlock();
 			/*
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3d828bc4b1c..60461ad7fa6 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -83,7 +83,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
 	struct inet_sock *inet;
 	struct ipv6_pinfo *np;
 	struct sock *sk;
-	struct list_head *p;
 	struct inet_protosw *answer;
 	struct proto *answer_prot;
 	unsigned char answer_flags;
@@ -97,13 +96,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
 		build_ehash_secret();
 
 	/* Look for the requested type/protocol pair. */
-	answer = NULL;
 lookup_protocol:
 	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
-	list_for_each_rcu(p, &inetsw6[sock->type]) {
-		answer = list_entry(p, struct inet_protosw, list);
+	list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
 
+		err = 0;
 		/* Check the non-wild match. */
 		if (protocol == answer->protocol) {
 			if (protocol != IPPROTO_IP)
@@ -118,10 +116,9 @@ lookup_protocol:
 				break;
 		}
 		err = -EPROTONOSUPPORT;
-		answer = NULL;
 	}
 
-	if (!answer) {
+	if (err) {
 		if (try_loading_module < 2) {
 			rcu_read_unlock();
 			/*
-- 
GitLab


From b03f6489f9f27dc519a4c60ebf39cc7b8a58eae7 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:45:35 -0700
Subject: [PATCH 584/853] build kernel/profile.o only when requested

Build kernel/profile.o only if CONFIG_PROFILING is enabled.

This makes CONFIG_PROFILING=n kernels smaller.

As a bonus, some profile_tick() calls and one branch from schedule() are
now eliminated with CONFIG_PROFILING=n (but I doubt these are
measurable effects).

This patch changes the effects of CONFIG_PROFILING=n, but I don't think
having more than two choices would be the better choice.

This patch also adds the name of the first parameter to the prototypes
of profile_{hits,tick}() since I anyway had to add them for the dummy
functions.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/profile.h | 56 ++++++++++++++++++++++++++++-------------
 kernel/Makefile         |  3 ++-
 kernel/profile.c        |  4 ---
 3 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/include/linux/profile.h b/include/linux/profile.h
index 05c1cc73693..4081fa31081 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,8 +8,6 @@
 
 #include <asm/errno.h>
 
-extern int prof_on __read_mostly;
-
 #define CPU_PROFILING	1
 #define SCHED_PROFILING	2
 #define SLEEP_PROFILING	3
@@ -19,14 +17,29 @@ struct proc_dir_entry;
 struct pt_regs;
 struct notifier_block;
 
+#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
+void create_prof_cpu_mask(struct proc_dir_entry *);
+#else
+#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
+#endif
+
+enum profile_type {
+	PROFILE_TASK_EXIT,
+	PROFILE_MUNMAP
+};
+
+#ifdef CONFIG_PROFILING
+
+extern int prof_on __read_mostly;
+
 /* init basic kernel profiler */
 void __init profile_init(void);
-void profile_tick(int);
+void profile_tick(int type);
 
 /*
  * Add multiple profiler hits to a given address:
  */
-void profile_hits(int, void *ip, unsigned int nr_hits);
+void profile_hits(int type, void *ip, unsigned int nr_hits);
 
 /*
  * Single profiler hit:
@@ -40,19 +53,6 @@ static inline void profile_hit(int type, void *ip)
 		profile_hits(type, ip, 1);
 }
 
-#ifdef CONFIG_PROC_FS
-void create_prof_cpu_mask(struct proc_dir_entry *);
-#else
-#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
-#endif
-
-enum profile_type {
-	PROFILE_TASK_EXIT,
-	PROFILE_MUNMAP
-};
-
-#ifdef CONFIG_PROFILING
-
 struct task_struct;
 struct mm_struct;
 
@@ -80,6 +80,28 @@ struct pt_regs;
 
 #else
 
+#define prof_on 0
+
+static inline void profile_init(void)
+{
+	return;
+}
+
+static inline void profile_tick(int type)
+{
+	return;
+}
+
+static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
+{
+	return;
+}
+
+static inline void profile_hit(int type, void *ip)
+{
+	return;
+}
+
 static inline int task_handoff_register(struct notifier_block * n)
 {
 	return -ENOSYS;
diff --git a/kernel/Makefile b/kernel/Makefile
index 15ab63ffe64..54f69837d35 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
+obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/profile.c b/kernel/profile.c
index 58926411eb2..cd26bed4cc2 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -112,8 +112,6 @@ void __init profile_init(void)
 
 /* Profile event notifications */
 
-#ifdef CONFIG_PROFILING
-
 static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
 static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
 static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
@@ -203,8 +201,6 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
 }
 EXPORT_SYMBOL_GPL(unregister_timer_hook);
 
-#endif /* CONFIG_PROFILING */
-
 
 #ifdef CONFIG_SMP
 /*
-- 
GitLab


From cebbd3fb803603b12408458ba17c29ce1e15a5f2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:35 -0700
Subject: [PATCH 585/853] build-kernel-profileo-only-when-requested-cleanups

Cc: Adrian Bunk <bunk@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/profile.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/profile.h b/include/linux/profile.h
index 4081fa31081..7e7087239af 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -18,9 +18,11 @@ struct pt_regs;
 struct notifier_block;
 
 #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
-void create_prof_cpu_mask(struct proc_dir_entry *);
+void create_prof_cpu_mask(struct proc_dir_entry *de);
 #else
-#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
+static inline void create_prof_cpu_mask(struct proc_dir_entry *de)
+{
+}
 #endif
 
 enum profile_type {
-- 
GitLab


From f16695f4ac088cf7593e113574046d2d7e5af5eb Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:45:36 -0700
Subject: [PATCH 586/853] asm-generic/int-ll64.h: always provide __{s,u}64

Several compilers offer "long long" without claiming to support C99.

Considering how frequent __s64/__u64 are used our userspace headers are
anyway unusable without __s64/__u64 available.

Always offer __s64/__u64 to non-gcc non-C99 compilers - if they provide
"long long" that makes the headers compiling and if they don't they are
anyway screwed.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/int-ll64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h
index 260948905e4..f9bc9ac29b3 100644
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -26,7 +26,7 @@ typedef unsigned int __u32;
 #ifdef __GNUC__
 __extension__ typedef __signed__ long long __s64;
 __extension__ typedef unsigned long long __u64;
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#else
 typedef __signed__ long long __s64;
 typedef unsigned long long __u64;
 #endif
-- 
GitLab


From f557d0996a6f9c06912528ea85e1dba0fb7d485f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:45:37 -0700
Subject: [PATCH 587/853] remove some more tipar bits

Some bits were missed when the tipar driver was removed.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/00-INDEX              | 2 --
 Documentation/kernel-parameters.txt | 7 -------
 drivers/char/Makefile               | 1 -
 3 files changed, 10 deletions(-)

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 1977fab3865..6de71308a90 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -361,8 +361,6 @@ telephony/
 	- directory with info on telephony (e.g. voice over IP) support.
 time_interpolators.txt
 	- info on time interpolators.
-tipar.txt
-	- information about Parallel link cable for Texas Instruments handhelds.
 tty.txt
 	- guide to the locking policies of the tty layer.
 uml/
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 497a98dafda..e7bea3e8530 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2159,13 +2159,6 @@ and is between 256 and 4096 characters. It is defined in the file
 			<deci-seconds>: poll all this frequency
 			0: no polling (default)
 
-	tipar.timeout=	[HW,PPT]
-			Set communications timeout in tenths of a second
-			(default 15).
-
-	tipar.delay=	[HW,PPT]
-			Set inter-bit delay in microseconds (default 10).
-
 	tmscsim=	[HW,SCSI]
 			See comment before function dc390_setup() in
 			drivers/scsi/tmscsim.c.
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index eb02c350680..f7a0d1a754f 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -64,7 +64,6 @@ obj-$(CONFIG_BRIQ_PANEL)	+= briq_panel.o
 obj-$(CONFIG_BFIN_OTP)		+= bfin-otp.o
 
 obj-$(CONFIG_PRINTER)		+= lp.o
-obj-$(CONFIG_TIPAR)		+= tipar.o
 
 obj-$(CONFIG_APM_EMULATION)	+= apm-emulation.o
 
-- 
GitLab


From ac331d158e198d2a91a5b0a3ec4ca9991fdb57af Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:45:38 -0700
Subject: [PATCH 588/853] call_usermodehelper(): increase reliability

Presently call_usermodehelper_setup() uses GFP_ATOMIC.  but it can return
NULL _very_ easily.

GFP_ATOMIC is needed only when we can't sleep.  and, GFP_KERNEL is robust
and better.

thus, I add gfp_mask argument to call_usermodehelper_setup().

So, its callers pass the gfp_t as below:

call_usermodehelper() and call_usermodehelper_keys():
	depend on 'wait' argument.
call_usermodehelper_pipe():
	always GFP_KERNEL because always run under process context.
orderly_poweroff():
	pass to GFP_ATOMIC because may run under interrupt context.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Paul Menage" <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h | 11 +++++++----
 kernel/kmod.c        |  9 +++++----
 kernel/sys.c         |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 0509c4ce485..a1a91577813 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -19,6 +19,7 @@
  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/gfp.h>
 #include <linux/stddef.h>
 #include <linux/errno.h>
 #include <linux/compiler.h>
@@ -41,8 +42,8 @@ struct file;
 struct subprocess_info;
 
 /* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-						  char **argv, char **envp);
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+						  char **envp, gfp_t gfp_mask);
 
 /* Set various pieces of state into the subprocess_info structure */
 void call_usermodehelper_setkeys(struct subprocess_info *info,
@@ -69,8 +70,9 @@ static inline int
 call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 {
 	struct subprocess_info *info;
+	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
-	info = call_usermodehelper_setup(path, argv, envp);
+	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
 	if (info == NULL)
 		return -ENOMEM;
 	return call_usermodehelper_exec(info, wait);
@@ -81,8 +83,9 @@ call_usermodehelper_keys(char *path, char **argv, char **envp,
 			 struct key *session_keyring, enum umh_wait wait)
 {
 	struct subprocess_info *info;
+	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
-	info = call_usermodehelper_setup(path, argv, envp);
+	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
 	if (info == NULL)
 		return -ENOMEM;
 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 2989f67c444..2456d1a0bef 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -352,16 +352,17 @@ static inline void register_pm_notifier_callback(void) {}
  * @path: path to usermode executable
  * @argv: arg vector for process
  * @envp: environment for process
+ * @gfp_mask: gfp mask for memory allocation
  *
  * Returns either %NULL on allocation failure, or a subprocess_info
  * structure.  This should be passed to call_usermodehelper_exec to
  * exec the process and free the structure.
  */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-						  char **argv, char **envp)
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+						  char **envp, gfp_t gfp_mask)
 {
 	struct subprocess_info *sub_info;
-	sub_info = kzalloc(sizeof(struct subprocess_info),  GFP_ATOMIC);
+	sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
 	if (!sub_info)
 		goto out;
 
@@ -494,7 +495,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 	struct subprocess_info *sub_info;
 	int ret;
 
-	sub_info = call_usermodehelper_setup(path, argv, envp);
+	sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
 	if (sub_info == NULL)
 		return -ENOMEM;
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 14e97282eb6..6c218804604 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1795,7 +1795,7 @@ int orderly_poweroff(bool force)
 		goto out;
 	}
 
-	info = call_usermodehelper_setup(argv[0], argv, envp);
+	info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
 	if (info == NULL) {
 		argv_free(argv);
 		goto out;
-- 
GitLab


From 62ec30d45ecbb85b5991474c8f04192697687495 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Fri, 25 Jul 2008 01:45:39 -0700
Subject: [PATCH 589/853] misc: add HP WMI laptop extras driver

This driver adds support for reading and configuring certain information
on modern HP laptops with WMI BIOS interfaces.  It supports enabling and
disabling the ambient light sensor, querying attached displays and hard
drive temperature, sending events on docking and querying the state of the
dock and toggling the state of the wifi, bluetooth and wwan hardware via
rfkill.  It also makes the little "(i)" button work on machines that send
that via WMI rather than via the keyboard controller.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig  |  12 +
 drivers/misc/Makefile |   1 +
 drivers/misc/hp-wmi.c | 494 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 507 insertions(+)
 create mode 100644 drivers/misc/hp-wmi.c

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index d5bc288b1b0..1689c051f68 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -212,6 +212,18 @@ config TC1100_WMI
 	  This is a driver for the WMI extensions (wireless and bluetooth power
 	  control) of the HP Compaq TC1100 tablet.
 
+config HP_WMI
+       tristate "HP WMI extras"
+       depends on ACPI_WMI
+       depends on INPUT
+       depends on RFKILL
+       help
+         Say Y here if you want to support WMI-based hotkeys on HP laptops and
+	 to read data from WMI such as docking or ambient light sensor state.
+
+         To compile this driver as a module, choose M here: the module will
+         be called hp-wmi.
+
 config MSI_LAPTOP
         tristate "MSI Laptop Extras"
         depends on X86
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 688fe76135e..f5e273420c0 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ACER_WMI)		+= acer-wmi.o
 obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
 obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
+obj-$(CONFIG_HP_WMI)		+= hp-wmi.o
 obj-$(CONFIG_TC1100_WMI)	+= tc1100-wmi.o
 obj-$(CONFIG_LKDTM)		+= lkdtm.o
 obj-$(CONFIG_TIFM_CORE)       	+= tifm_core.o
diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
new file mode 100644
index 00000000000..1dbcbcb323a
--- /dev/null
+++ b/drivers/misc/hp-wmi.c
@@ -0,0 +1,494 @@
+/*
+ * HP WMI hotkeys
+ *
+ * Copyright (C) 2008 Red Hat <mjg@redhat.com>
+ *
+ * Portions based on wistron_btns.c:
+ * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
+ * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
+ * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/input.h>
+#include <acpi/acpi_drivers.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/rfkill.h>
+#include <linux/string.h>
+
+MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>");
+MODULE_DESCRIPTION("HP laptop WMI hotkeys driver");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C");
+MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
+
+#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
+#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
+
+#define HPWMI_DISPLAY_QUERY 0x1
+#define HPWMI_HDDTEMP_QUERY 0x2
+#define HPWMI_ALS_QUERY 0x3
+#define HPWMI_DOCK_QUERY 0x4
+#define HPWMI_WIRELESS_QUERY 0x5
+
+static int __init hp_wmi_bios_setup(struct platform_device *device);
+static int __exit hp_wmi_bios_remove(struct platform_device *device);
+
+struct bios_args {
+	u32 signature;
+	u32 command;
+	u32 commandtype;
+	u32 datasize;
+	u32 data;
+};
+
+struct bios_return {
+	u32 sigpass;
+	u32 return_code;
+	u32 value;
+};
+
+struct key_entry {
+	char type;		/* See KE_* below */
+	u8 code;
+	u16 keycode;
+};
+
+enum { KE_KEY, KE_SW, KE_END };
+
+static struct key_entry hp_wmi_keymap[] = {
+	{KE_SW, 0x01, SW_DOCK},
+	{KE_KEY, 0x02, KEY_BRIGHTNESSUP},
+	{KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
+	{KE_KEY, 0x04, KEY_HELP},
+	{KE_END, 0}
+};
+
+static struct input_dev *hp_wmi_input_dev;
+static struct platform_device *hp_wmi_platform_dev;
+
+static struct rfkill *wifi_rfkill;
+static struct rfkill *bluetooth_rfkill;
+static struct rfkill *wwan_rfkill;
+
+static struct platform_driver hp_wmi_driver = {
+	.driver = {
+		   .name = "hp-wmi",
+		   .owner = THIS_MODULE,
+	},
+	.probe = hp_wmi_bios_setup,
+	.remove = hp_wmi_bios_remove,
+};
+
+static int hp_wmi_perform_query(int query, int write, int value)
+{
+	struct bios_return bios_return;
+	acpi_status status;
+	union acpi_object *obj;
+	struct bios_args args = {
+		.signature = 0x55434553,
+		.command = write ? 0x2 : 0x1,
+		.commandtype = query,
+		.datasize = write ? 0x4 : 0,
+		.data = value,
+	};
+	struct acpi_buffer input = { sizeof(struct bios_args), &args };
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	status = wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output);
+
+	obj = output.pointer;
+
+	if (!obj || obj->type != ACPI_TYPE_BUFFER)
+		return -EINVAL;
+
+	bios_return = *((struct bios_return *)obj->buffer.pointer);
+	if (bios_return.return_code > 0)
+		return bios_return.return_code * -1;
+	else
+		return bios_return.value;
+}
+
+static int hp_wmi_display_state(void)
+{
+	return hp_wmi_perform_query(HPWMI_DISPLAY_QUERY, 0, 0);
+}
+
+static int hp_wmi_hddtemp_state(void)
+{
+	return hp_wmi_perform_query(HPWMI_HDDTEMP_QUERY, 0, 0);
+}
+
+static int hp_wmi_als_state(void)
+{
+	return hp_wmi_perform_query(HPWMI_ALS_QUERY, 0, 0);
+}
+
+static int hp_wmi_dock_state(void)
+{
+	return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0);
+}
+
+static int hp_wmi_wifi_set(void *data, enum rfkill_state state)
+{
+	if (state)
+		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101);
+	else
+		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100);
+}
+
+static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state)
+{
+	if (state)
+		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202);
+	else
+		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200);
+}
+
+static int hp_wmi_wwan_set(void *data, enum rfkill_state state)
+{
+	if (state)
+		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404);
+	else
+		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400);
+}
+
+static int hp_wmi_wifi_state(void)
+{
+	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+	if (wireless & 0x100)
+		return 1;
+	else
+		return 0;
+}
+
+static int hp_wmi_bluetooth_state(void)
+{
+	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+	if (wireless & 0x10000)
+		return 1;
+	else
+		return 0;
+}
+
+static int hp_wmi_wwan_state(void)
+{
+	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+	if (wireless & 0x1000000)
+		return 1;
+	else
+		return 0;
+}
+
+static ssize_t show_display(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	int value = hp_wmi_display_state();
+	if (value < 0)
+		return -EINVAL;
+	return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_hddtemp(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	int value = hp_wmi_hddtemp_state();
+	if (value < 0)
+		return -EINVAL;
+	return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_als(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	int value = hp_wmi_als_state();
+	if (value < 0)
+		return -EINVAL;
+	return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_dock(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	int value = hp_wmi_dock_state();
+	if (value < 0)
+		return -EINVAL;
+	return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t set_als(struct device *dev, struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	u32 tmp = simple_strtoul(buf, NULL, 10);
+	hp_wmi_perform_query(HPWMI_ALS_QUERY, 1, tmp);
+	return count;
+}
+
+static DEVICE_ATTR(display, S_IRUGO, show_display, NULL);
+static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL);
+static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
+static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
+
+static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
+{
+	struct key_entry *key;
+
+	for (key = hp_wmi_keymap; key->type != KE_END; key++)
+		if (code == key->code)
+			return key;
+
+	return NULL;
+}
+
+static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
+{
+	struct key_entry *key;
+
+	for (key = hp_wmi_keymap; key->type != KE_END; key++)
+		if (key->type == KE_KEY && keycode == key->keycode)
+			return key;
+
+	return NULL;
+}
+
+static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+{
+	struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode);
+
+	if (key && key->type == KE_KEY) {
+		*keycode = key->keycode;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+{
+	struct key_entry *key;
+	int old_keycode;
+
+	if (keycode < 0 || keycode > KEY_MAX)
+		return -EINVAL;
+
+	key = hp_wmi_get_entry_by_scancode(scancode);
+	if (key && key->type == KE_KEY) {
+		old_keycode = key->keycode;
+		key->keycode = keycode;
+		set_bit(keycode, dev->keybit);
+		if (!hp_wmi_get_entry_by_keycode(old_keycode))
+			clear_bit(old_keycode, dev->keybit);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+void hp_wmi_notify(u32 value, void *context)
+{
+	struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
+	static struct key_entry *key;
+	union acpi_object *obj;
+
+	wmi_get_event_data(value, &response);
+
+	obj = (union acpi_object *)response.pointer;
+
+	if (obj && obj->type == ACPI_TYPE_BUFFER && obj->buffer.length == 8) {
+		int eventcode = *((u8 *) obj->buffer.pointer);
+		key = hp_wmi_get_entry_by_scancode(eventcode);
+		if (key) {
+			switch (key->type) {
+			case KE_KEY:
+				input_report_key(hp_wmi_input_dev,
+						 key->keycode, 1);
+				input_sync(hp_wmi_input_dev);
+				input_report_key(hp_wmi_input_dev,
+						 key->keycode, 0);
+				input_sync(hp_wmi_input_dev);
+				break;
+			case KE_SW:
+				input_report_switch(hp_wmi_input_dev,
+						    key->keycode,
+						    hp_wmi_dock_state());
+				input_sync(hp_wmi_input_dev);
+				break;
+			}
+		} else if (eventcode == 0x5) {
+			if (wifi_rfkill)
+				wifi_rfkill->state = hp_wmi_wifi_state();
+			if (bluetooth_rfkill)
+				bluetooth_rfkill->state =
+				    hp_wmi_bluetooth_state();
+			if (wwan_rfkill)
+				wwan_rfkill->state = hp_wmi_wwan_state();
+		} else
+			printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
+			       eventcode);
+	} else
+		printk(KERN_INFO "HP WMI: Unknown response received\n");
+}
+
+static int __init hp_wmi_input_setup(void)
+{
+	struct key_entry *key;
+	int err;
+
+	hp_wmi_input_dev = input_allocate_device();
+
+	hp_wmi_input_dev->name = "HP WMI hotkeys";
+	hp_wmi_input_dev->phys = "wmi/input0";
+	hp_wmi_input_dev->id.bustype = BUS_HOST;
+	hp_wmi_input_dev->getkeycode = hp_wmi_getkeycode;
+	hp_wmi_input_dev->setkeycode = hp_wmi_setkeycode;
+
+	for (key = hp_wmi_keymap; key->type != KE_END; key++) {
+		switch (key->type) {
+		case KE_KEY:
+			set_bit(EV_KEY, hp_wmi_input_dev->evbit);
+			set_bit(key->keycode, hp_wmi_input_dev->keybit);
+			break;
+		case KE_SW:
+			set_bit(EV_SW, hp_wmi_input_dev->evbit);
+			set_bit(key->keycode, hp_wmi_input_dev->swbit);
+			break;
+		}
+	}
+
+	err = input_register_device(hp_wmi_input_dev);
+
+	if (err) {
+		input_free_device(hp_wmi_input_dev);
+		return err;
+	}
+
+	return 0;
+}
+
+static void cleanup_sysfs(struct platform_device *device)
+{
+	device_remove_file(&device->dev, &dev_attr_display);
+	device_remove_file(&device->dev, &dev_attr_hddtemp);
+	device_remove_file(&device->dev, &dev_attr_als);
+	device_remove_file(&device->dev, &dev_attr_dock);
+}
+
+static int __init hp_wmi_bios_setup(struct platform_device *device)
+{
+	int err;
+
+	err = device_create_file(&device->dev, &dev_attr_display);
+	if (err)
+		goto add_sysfs_error;
+	err = device_create_file(&device->dev, &dev_attr_hddtemp);
+	if (err)
+		goto add_sysfs_error;
+	err = device_create_file(&device->dev, &dev_attr_als);
+	if (err)
+		goto add_sysfs_error;
+	err = device_create_file(&device->dev, &dev_attr_dock);
+	if (err)
+		goto add_sysfs_error;
+
+	wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
+	wifi_rfkill->name = "hp-wifi";
+	wifi_rfkill->state = hp_wmi_wifi_state();
+	wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
+	wifi_rfkill->user_claim_unsupported = 1;
+
+	bluetooth_rfkill = rfkill_allocate(&device->dev,
+					   RFKILL_TYPE_BLUETOOTH);
+	bluetooth_rfkill->name = "hp-bluetooth";
+	bluetooth_rfkill->state = hp_wmi_bluetooth_state();
+	bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
+	bluetooth_rfkill->user_claim_unsupported = 1;
+
+	wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX);
+	wwan_rfkill->name = "hp-wwan";
+	wwan_rfkill->state = hp_wmi_wwan_state();
+	wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
+	wwan_rfkill->user_claim_unsupported = 1;
+
+	rfkill_register(wifi_rfkill);
+	rfkill_register(bluetooth_rfkill);
+	rfkill_register(wwan_rfkill);
+
+	return 0;
+add_sysfs_error:
+	cleanup_sysfs(device);
+	return err;
+}
+
+static int __exit hp_wmi_bios_remove(struct platform_device *device)
+{
+	cleanup_sysfs(device);
+
+	rfkill_unregister(wifi_rfkill);
+	rfkill_unregister(bluetooth_rfkill);
+	rfkill_unregister(wwan_rfkill);
+
+	return 0;
+}
+
+static int __init hp_wmi_init(void)
+{
+	int err;
+
+	if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+		err = wmi_install_notify_handler(HPWMI_EVENT_GUID,
+						 hp_wmi_notify, NULL);
+		if (!err)
+			hp_wmi_input_setup();
+	}
+
+	if (wmi_has_guid(HPWMI_BIOS_GUID)) {
+		err = platform_driver_register(&hp_wmi_driver);
+		if (err)
+			return 0;
+		hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1);
+		if (!hp_wmi_platform_dev) {
+			platform_driver_unregister(&hp_wmi_driver);
+			return 0;
+		}
+		platform_device_add(hp_wmi_platform_dev);
+	}
+
+	return 0;
+}
+
+static void __exit hp_wmi_exit(void)
+{
+	if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+		wmi_remove_notify_handler(HPWMI_EVENT_GUID);
+		input_unregister_device(hp_wmi_input_dev);
+	}
+	if (hp_wmi_platform_dev) {
+		platform_device_del(hp_wmi_platform_dev);
+		platform_driver_unregister(&hp_wmi_driver);
+	}
+}
+
+module_init(hp_wmi_init);
+module_exit(hp_wmi_exit);
-- 
GitLab


From b69c49b78457f681ecfb3147bd968434ee6559c1 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 25 Jul 2008 01:45:40 -0700
Subject: [PATCH 590/853] clean up duplicated alloc/free_thread_info

We duplicate alloc/free_thread_info defines on many platforms (the
majority uses __get_free_pages/free_pages).  This patch defines common
defines and removes these duplicated defines.
__HAVE_ARCH_THREAD_INFO_ALLOCATOR is introduced for platforms that do
something different.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-alpha/thread_info.h     |  4 +---
 include/asm-arm/thread_info.h       | 13 -------------
 include/asm-avr32/thread_info.h     |  4 ----
 include/asm-blackfin/thread_info.h  |  5 +----
 include/asm-cris/thread_info.h      |  2 ++
 include/asm-frv/thread_info.h       |  2 ++
 include/asm-h8300/thread_info.h     |  5 +----
 include/asm-ia64/thread_info.h      |  2 ++
 include/asm-m32r/thread_info.h      |  2 ++
 include/asm-m68k/thread_info.h      |  8 +-------
 include/asm-m68knommu/thread_info.h |  4 ----
 include/asm-mips/thread_info.h      |  2 ++
 include/asm-mn10300/thread_info.h   |  2 ++
 include/asm-parisc/thread_info.h    | 10 +++-------
 include/asm-powerpc/thread_info.h   | 14 +++-----------
 include/asm-s390/thread_info.h      |  5 +----
 include/asm-sh/thread_info.h        |  2 ++
 include/asm-sparc/thread_info_32.h  |  2 ++
 include/asm-sparc/thread_info_64.h  |  2 ++
 include/asm-um/thread_info.h        | 16 +---------------
 include/asm-x86/thread_info.h       |  2 ++
 include/asm-xtensa/thread_info.h    |  5 +----
 kernel/fork.c                       | 17 +++++++++++++++++
 23 files changed, 50 insertions(+), 80 deletions(-)

diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h
index fb318519629..15fda434442 100644
--- a/include/asm-alpha/thread_info.h
+++ b/include/asm-alpha/thread_info.h
@@ -50,10 +50,8 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define current_thread_info()  __current_thread_info
 
 /* Thread information allocation.  */
+#define THREAD_SIZE_ORDER 1
 #define THREAD_SIZE (2*PAGE_SIZE)
-#define alloc_thread_info(tsk) \
-  ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h
index f5a66478631..d4be2d64616 100644
--- a/include/asm-arm/thread_info.h
+++ b/include/asm-arm/thread_info.h
@@ -97,19 +97,6 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
 }
 
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
-	((struct thread_info *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, \
-		THREAD_SIZE_ORDER))
-#else
-#define alloc_thread_info(tsk) \
-	((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#endif
-
-#define free_thread_info(info) \
-	free_pages((unsigned long)info, THREAD_SIZE_ORDER);
-
 #define thread_saved_pc(tsk)	\
 	((unsigned long)(pc_pointer(task_thread_info(tsk)->cpu_context.pc)))
 #define thread_saved_fp(tsk)	\
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
index df68631b7b2..294b25f9323 100644
--- a/include/asm-avr32/thread_info.h
+++ b/include/asm-avr32/thread_info.h
@@ -61,10 +61,6 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)addr;
 }
 
-/* thread information allocation */
-#define alloc_thread_info(ti) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
 #define get_thread_info(ti) get_task_struct((ti)->task)
 #define put_thread_info(ti) put_task_struct((ti)->task)
 
diff --git a/include/asm-blackfin/thread_info.h b/include/asm-blackfin/thread_info.h
index bc2fe5accf2..642769329d1 100644
--- a/include/asm-blackfin/thread_info.h
+++ b/include/asm-blackfin/thread_info.h
@@ -42,6 +42,7 @@
 /*
  * Size of kernel stack for each process. This must be a power of 2...
  */
+#define THREAD_SIZE_ORDER	1
 #define THREAD_SIZE		8192	/* 2 pages */
 
 #ifndef __ASSEMBLY__
@@ -94,10 +95,6 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1));
 }
 
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-				__get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti)	free_pages((unsigned long) (ti), 1)
 #endif				/* __ASSEMBLY__ */
 
 /*
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h
index 784668ab0fa..7efe1000f99 100644
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -11,6 +11,8 @@
 
 #ifdef __KERNEL__
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 #include <asm/processor.h>
diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h
index 348b8f1df17..b7ac6bf2844 100644
--- a/include/asm-frv/thread_info.h
+++ b/include/asm-frv/thread_info.h
@@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info asm("gr15");
 
 #define current_thread_info() ({ __current_thread_info; })
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
 #define alloc_thread_info(tsk)					\
diff --git a/include/asm-h8300/thread_info.h b/include/asm-h8300/thread_info.h
index 27bb95e2944..aafd4d322ec 100644
--- a/include/asm-h8300/thread_info.h
+++ b/include/asm-h8300/thread_info.h
@@ -49,6 +49,7 @@ struct thread_info {
 /*
  * Size of kernel stack for each process. This must be a power of 2...
  */
+#define THREAD_SIZE_ORDER	1
 #define THREAD_SIZE		8192	/* 2 pages */
 
 
@@ -65,10 +66,6 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-				__get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti)	free_pages((unsigned long) (ti), 1)
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 2422ac61658..7c60fcdd2ef 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -54,6 +54,8 @@ struct thread_info {
 	},					\
 }
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
diff --git a/include/asm-m32r/thread_info.h b/include/asm-m32r/thread_info.h
index 1effcd0f5e6..8589d462df2 100644
--- a/include/asm-m32r/thread_info.h
+++ b/include/asm-m32r/thread_info.h
@@ -94,6 +94,8 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
 #define alloc_thread_info(tsk)					\
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index d635a375248..abc002798a2 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -25,13 +25,7 @@ struct thread_info {
 }
 
 /* THREAD_SIZE should be 8k, so handle differently for 4k and 8k machines */
-#if PAGE_SHIFT == 13 /* 8k machines */
-#define alloc_thread_info(tsk)   ((struct thread_info *)__get_free_pages(GFP_KERNEL,0))
-#define free_thread_info(ti)  free_pages((unsigned long)(ti),0)
-#else /* otherwise assume 4k pages */
-#define alloc_thread_info(tsk)   ((struct thread_info *)__get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti)  free_pages((unsigned long)(ti),1)
-#endif /* PAGE_SHIFT == 13 */
+#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
 
 #define init_thread_info	(init_task.thread.info)
 #define init_stack		(init_thread_union.stack)
diff --git a/include/asm-m68knommu/thread_info.h b/include/asm-m68knommu/thread_info.h
index 95996d978be..0c9bc095f3f 100644
--- a/include/asm-m68knommu/thread_info.h
+++ b/include/asm-m68knommu/thread_info.h
@@ -71,10 +71,6 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-				__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti)	free_pages((unsigned long) (ti), THREAD_SIZE_ORDER)
 #endif /* __ASSEMBLY__ */
 
 #define	PREEMPT_ACTIVE	0x4000000
diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h
index b2772df1a1b..bb3060699df 100644
--- a/include/asm-mips/thread_info.h
+++ b/include/asm-mips/thread_info.h
@@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_MASK (THREAD_SIZE - 1UL)
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #ifdef CONFIG_DEBUG_STACK_USAGE
 #define alloc_thread_info(tsk)					\
 ({								\
diff --git a/include/asm-mn10300/thread_info.h b/include/asm-mn10300/thread_info.h
index e397e719278..78a3881f3c1 100644
--- a/include/asm-mn10300/thread_info.h
+++ b/include/asm-mn10300/thread_info.h
@@ -112,6 +112,8 @@ static inline unsigned long current_stack_pointer(void)
 	return sp;
 }
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
 #define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
index 2d9c7500867..9f812741c35 100644
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -34,15 +34,11 @@ struct thread_info {
 
 /* thread information allocation */
 
-#define THREAD_ORDER            2
+#define THREAD_SIZE_ORDER            2
 /* Be sure to hunt all references to this down when you change the size of
  * the kernel stack */
-#define THREAD_SIZE             (PAGE_SIZE << THREAD_ORDER)
-#define THREAD_SHIFT            (PAGE_SHIFT + THREAD_ORDER)
-
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-			__get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#define free_thread_info(ti)    free_pages((unsigned long) (ti), THREAD_ORDER)
+#define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
 
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *)mfctl(30))
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index b705c2a7651..a9db562df69 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -66,20 +66,12 @@ struct thread_info {
 
 #if THREAD_SHIFT >= PAGE_SHIFT
 
-#define THREAD_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)	\
-	((struct thread_info *)__get_free_pages(GFP_KERNEL | \
-		__GFP_ZERO, THREAD_ORDER))
-#else
-#define alloc_thread_info(tsk)	\
-	((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#endif
-#define free_thread_info(ti)	free_pages((unsigned long)ti, THREAD_ORDER)
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
 
 #else /* THREAD_SHIFT < PAGE_SHIFT */
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
 extern void free_thread_info(struct thread_info *ti);
 
diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h
index 99bbed99a3b..91a8f93ad35 100644
--- a/include/asm-s390/thread_info.h
+++ b/include/asm-s390/thread_info.h
@@ -78,10 +78,7 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)((*(unsigned long *) __LC_KERNEL_STACK)-THREAD_SIZE);
 }
 
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-	__get_free_pages(GFP_KERNEL,THREAD_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti),THREAD_ORDER)
+#define THREAD_SIZE_ORDER THREAD_ORDER
 
 #endif
 
diff --git a/include/asm-sh/thread_info.h b/include/asm-sh/thread_info.h
index c50e5d35fe8..5131e390752 100644
--- a/include/asm-sh/thread_info.h
+++ b/include/asm-sh/thread_info.h
@@ -92,6 +92,8 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
 #define alloc_thread_info(ti)	kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-sparc/thread_info_32.h b/include/asm-sparc/thread_info_32.h
index 91b9f5888c8..2cf9db04405 100644
--- a/include/asm-sparc/thread_info_32.h
+++ b/include/asm-sparc/thread_info_32.h
@@ -86,6 +86,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define THREAD_INFO_ORDER  1
 #endif
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void)
 #define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)()
 
diff --git a/include/asm-sparc/thread_info_64.h b/include/asm-sparc/thread_info_64.h
index c6d2e6c7f84..960969d5ad0 100644
--- a/include/asm-sparc/thread_info_64.h
+++ b/include/asm-sparc/thread_info_64.h
@@ -155,6 +155,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define __THREAD_INFO_ORDER	0
 #endif /* PAGE_SHIFT == 13 */
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #ifdef CONFIG_DEBUG_STACK_USAGE
 #define alloc_thread_info(tsk)					\
 ({								\
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index 356b83e2c22..e07e72846c7 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -53,21 +53,7 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
-#ifdef CONFIG_DEBUG_STACK_USAGE
-
-#define alloc_thread_info(tsk) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
-						 CONFIG_KERNEL_STACK_ORDER))
-#else
-
-/* thread information allocation */
-#define alloc_thread_info(tsk) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL, \
-						 CONFIG_KERNEL_STACK_ORDER))
-#endif
-
-#define free_thread_info(ti) \
-	free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER)
+#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER
 
 #endif
 
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 3f2de105098..da0a675adf9 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -152,6 +152,8 @@ struct thread_info {
 #define THREAD_FLAGS GFP_KERNEL
 #endif
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #define alloc_thread_info(tsk)						\
 	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
 
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h
index a2c640682ed..7e4131dd546 100644
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -111,10 +111,6 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
-
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
@@ -160,6 +156,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TS_USEDFPU		0x0001	/* FPU was used by this task this quantum (SMP) */
 
 #define THREAD_SIZE 8192	//(2*PAGE_SIZE)
+#define THREAD_SIZE_ORDER 1
 
 #endif	/* __KERNEL__ */
 #endif	/* _XTENSA_THREAD_INFO */
diff --git a/kernel/fork.c b/kernel/fork.c
index 552c8d8e77a..5a5d6fef341 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -93,6 +93,23 @@ int nr_processes(void)
 static struct kmem_cache *task_struct_cachep;
 #endif
 
+#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	gfp_t mask = GFP_KERNEL | __GFP_ZERO;
+#else
+	gfp_t mask = GFP_KERNEL;
+#endif
+	return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+}
+
+static inline void free_thread_info(struct thread_info *ti)
+{
+	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+}
+#endif
+
 /* SLAB cache for signal_struct structures (tsk->signal) */
 static struct kmem_cache *signal_cachep;
 
-- 
GitLab


From 2b4bc46052ea8cd7c370b67ca0b9c26586f1439a Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:45:42 -0700
Subject: [PATCH 591/853] pdflush: use time_after() instead of open-coding it

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/pdflush.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/pdflush.c b/mm/pdflush.c
index 9d834aa4b97..0cbe0c60c6b 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -130,7 +130,7 @@ static int __pdflush(struct pdflush_work *my_work)
 		 * Thread creation: For how long have there been zero
 		 * available threads?
 		 */
-		if (jiffies - last_empty_jifs > 1 * HZ) {
+		if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
 			/* unlocked list_empty() test is OK here */
 			if (list_empty(&pdflush_list)) {
 				/* unlocked test is OK here */
@@ -151,7 +151,7 @@ static int __pdflush(struct pdflush_work *my_work)
 		if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
 			continue;
 		pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
-		if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
+		if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
 			/* Limit exit rate */
 			pdf->when_i_went_to_sleep = jiffies;
 			break;					/* exeunt */
-- 
GitLab


From ba92a43dbaee339cf5915ef766d3d3ffbaaf103c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Fri, 25 Jul 2008 01:45:43 -0700
Subject: [PATCH 592/853] exec: remove some includes

fs/exec.c used to need mman.h pagemap.h swap.h and rmap.h when it did
mm-ish stuff in install_arg_page(); but no need for them after 2.6.22.

[akpm@linux-foundation.org: unbreak arm]
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 190ed1f9277..e41aef0fb35 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,19 +25,18 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
-#include <linux/mman.h>
+#include <linux/mm.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/smp_lock.h>
+#include <linux/swap.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
 #include <linux/personality.h>
 #include <linux/binfmts.h>
-#include <linux/swap.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
@@ -47,7 +46,6 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
-#include <linux/rmap.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
-- 
GitLab


From 2d6ffcca623a9a16df6cdfbe8250b7a5904a5f5e Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 25 Jul 2008 01:45:44 -0700
Subject: [PATCH 593/853] inflate: refactor inflate malloc code

Inflate requires some dynamic memory allocation very early in the boot
process and this is provided with a set of four functions:
malloc/free/gzip_mark/gzip_release.

The old inflate code used a mark/release strategy rather than implement
free.  This new version instead keeps a count on the number of outstanding
allocations and when it hits zero, it resets the malloc arena.

This allows removing all the mark and release implementations and unifying
all the malloc/free implementations.

The architecture-dependent code must define two addresses:
 - free_mem_ptr, the address of the beginning of the area in which
   allocations should be made
 - free_mem_end_ptr, the address of the end of the area in which
   allocations should be made. If set to 0, then no check is made on
   the number of allocations, it just grows as much as needed

The architecture-dependent code can also provide an arch_decomp_wdog()
function call.  This function will be called several times during the
decompression process, and allow to notify the watchdog that the system is
still running.  If an architecture provides such a call, then it must
define ARCH_HAS_DECOMP_WDOG so that the generic inflate code calls
arch_decomp_wdog().

Work initially done by Matt Mackall, updated to a recent version of the
kernel and improved by me.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Mikael Starvik <mikael.starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/boot/misc.c                    | 39 +--------------
 arch/arm/boot/compressed/misc.c           | 59 +++--------------------
 arch/cris/arch-v10/boot/compressed/misc.c | 36 +-------------
 arch/cris/arch-v32/boot/compressed/misc.c | 39 ++-------------
 arch/h8300/boot/compressed/misc.c         | 38 ---------------
 arch/m32r/boot/compressed/misc.c          | 37 --------------
 arch/mn10300/boot/compressed/misc.c       | 37 --------------
 arch/sh/boot/compressed/misc_32.c         | 38 ---------------
 arch/sh/boot/compressed/misc_64.c         | 40 ---------------
 arch/x86/boot/compressed/misc.c           | 39 ---------------
 init/do_mounts_rd.c                       | 25 +---------
 init/initramfs.c                          | 22 +--------
 lib/inflate.c                             | 52 +++++++++++++++++---
 13 files changed, 62 insertions(+), 439 deletions(-)

diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index c00646b25f6..3047a1b3a51 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -78,8 +78,6 @@ static unsigned outcnt;		/* bytes in output buffer */
 static int  fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 static char *input_data;
 static int  input_data_size;
@@ -88,51 +86,18 @@ static uch *output_data;
 static ulg output_ptr;
 static ulg bytes_out;
 
-static void *malloc(int size);
-static void free(void *where);
 static void error(char *m);
 static void gzip_mark(void **);
 static void gzip_release(void **);
 
 extern int end;
 static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
 
 #define HEAP_SIZE 0x3000
 
 #include "../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr <= 0) error("Memory error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_ptr_end)
-		error("Out of memory");
-	return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
-
 /* ===========================================================================
  * Fill the input buffer. This is called only when the buffer is empty
  * and at least one byte is really needed.
@@ -193,7 +158,7 @@ decompress_kernel(void *output_start,
 
 	/* FIXME FIXME FIXME */
 	free_mem_ptr		= (ulg)output_start + ksize;
-	free_mem_ptr_end	= (ulg)output_start + ksize + 0x200000;
+	free_mem_end_ptr	= (ulg)output_start + ksize + 0x200000;
 	/* FIXME FIXME FIXME */
 
 	/* put in temp area to reduce initial footprint */
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 9b444022cb9..7145cc7c04f 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -217,8 +217,6 @@ static unsigned outcnt;		/* bytes in output buffer */
 static int  fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 extern char input_data[];
 extern char input_data_end[];
@@ -227,64 +225,21 @@ static uch *output_data;
 static ulg output_ptr;
 static ulg bytes_out;
 
-static void *malloc(int size);
-static void free(void *where);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 static void putstr(const char *);
 
 extern int end;
 static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
 
-#define HEAP_SIZE 0x3000
-
-#include "../../../../lib/inflate.c"
-
-#ifndef STANDALONE_DEBUG
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr <= 0) error("Memory error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_ptr_end)
-		error("Out of memory");
-	return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
-	arch_decomp_wdog();
-	*ptr = (void *) free_mem_ptr;
-}
+#ifdef STANDALONE_DEBUG
+#define NO_INFLATE_MALLOC
+#endif
 
-static void gzip_release(void **ptr)
-{
-	arch_decomp_wdog();
-	free_mem_ptr = (long) *ptr;
-}
-#else
-static void gzip_mark(void **ptr)
-{
-}
+#define ARCH_HAS_DECOMP_WDOG
 
-static void gzip_release(void **ptr)
-{
-}
-#endif
+#include "../../../../lib/inflate.c"
 
 /* ===========================================================================
  * Fill the input buffer. This is called only when the buffer is empty
@@ -348,7 +303,7 @@ decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p,
 {
 	output_data		= (uch *)output_start;	/* Points to kernel start */
 	free_mem_ptr		= free_mem_ptr_p;
-	free_mem_ptr_end	= free_mem_ptr_end_p;
+	free_mem_end_ptr	= free_mem_ptr_end_p;
 	__machine_arch_type	= arch_id;
 
 	arch_decomp_setup();
diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c
index 18e13bce140..d933c89889d 100644
--- a/arch/cris/arch-v10/boot/compressed/misc.c
+++ b/arch/cris/arch-v10/boot/compressed/misc.c
@@ -102,50 +102,16 @@ extern char *input_data;  /* lives in head.S */
 static long bytes_out = 0;
 static uch *output_data;
 static unsigned long output_ptr = 0;
-
-static void *malloc(int size);
-static void free(void *where);
-static void gzip_mark(void **);
-static void gzip_release(void **);
-
 static void puts(const char *);
 
 /* the "heap" is put directly after the BSS ends, at end */
 
 extern int _end;
 static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
 
 #include "../../../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size < 0)
-		error("Malloc error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	return p;
-}
-
-static void free(void *where)
-{	/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
-
 /* decompressor info and error messages to serial console */
 
 static void
diff --git a/arch/cris/arch-v32/boot/compressed/misc.c b/arch/cris/arch-v32/boot/compressed/misc.c
index 55b2695c5d7..3595e16e82b 100644
--- a/arch/cris/arch-v32/boot/compressed/misc.c
+++ b/arch/cris/arch-v32/boot/compressed/misc.c
@@ -89,20 +89,14 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
 
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 extern char *input_data;  /* lives in head.S */
 
-static long bytes_out = 0;
+static long bytes_out;
 static uch *output_data;
-static unsigned long output_ptr = 0;
+static unsigned long output_ptr;
 
-static void *malloc(int size);
-static void free(void *where);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 static void puts(const char *);
 
@@ -110,37 +104,10 @@ static void puts(const char *);
 
 extern int _end;
 static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
 
 #include "../../../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size <0) error("Malloc error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	return p;
-}
-
-static void free(void *where)
-{	/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
-
 /* decompressor info and error messages to serial console */
 
 static inline void
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
index 845074588af..51ab6cbd030 100644
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -67,8 +67,6 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
 static int  fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 extern char input_data[];
 extern int input_len;
@@ -77,11 +75,7 @@ static long bytes_out = 0;
 static uch *output_data;
 static unsigned long output_ptr = 0;
 
-static void *malloc(int size);
-static void free(void *where);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 int puts(const char *);
 
@@ -98,38 +92,6 @@ static unsigned long free_mem_end_ptr;
 #define TDR *((volatile unsigned char *)0xffff8b)
 #define SSR *((volatile unsigned char *)0xffff8c)
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr == 0) error("Memory error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_end_ptr)
-		error("Out of memory");
-
-	return p;
-}
-
-static void free(void *where)
-{	/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
-
 int puts(const char *s)
 {
 	return 0;
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 600d40e3349..d394292498c 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -70,8 +70,6 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
 static int  fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 static unsigned char *input_data;
 static int input_len;
@@ -82,9 +80,6 @@ static unsigned long output_ptr = 0;
 
 #include "m32r_sio.c"
 
-static void *malloc(int size);
-static void free(void *where);
-
 static unsigned long free_mem_ptr;
 static unsigned long free_mem_end_ptr;
 
@@ -92,38 +87,6 @@ static unsigned long free_mem_end_ptr;
 
 #include "../../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr == 0) error("Memory error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_end_ptr)
-		error("Out of memory");
-
-	return p;
-}
-
-static void free(void *where)
-{	/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
-
 void* memset(void* s, int c, size_t n)
 {
 	int i;
diff --git a/arch/mn10300/boot/compressed/misc.c b/arch/mn10300/boot/compressed/misc.c
index ded207efc97..f673383518e 100644
--- a/arch/mn10300/boot/compressed/misc.c
+++ b/arch/mn10300/boot/compressed/misc.c
@@ -153,26 +153,9 @@ static uch *output_data;
 static unsigned long output_ptr;
 
 
-static void *malloc(int size);
-
-static inline void free(void *where)
-{	/* Don't care */
-}
-
 static unsigned long free_mem_ptr = (unsigned long) &end;
 static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000;
 
-static inline void gzip_mark(void **ptr)
-{
-	kputs(".");
-	*ptr = (void *) free_mem_ptr;
-}
-
-static inline void gzip_release(void **ptr)
-{
-	free_mem_ptr = (unsigned long) *ptr;
-}
-
 #define INPLACE_MOVE_ROUTINE	0x1000
 #define LOW_BUFFER_START	0x2000
 #define LOW_BUFFER_END		0x90000
@@ -186,26 +169,6 @@ static int lines, cols;
 
 #include "../../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size < 0)
-		error("Malloc error\n");
-	if (!free_mem_ptr)
-		error("Memory error\n");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *) free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_end_ptr)
-		error("\nOut of memory\n");
-
-	return p;
-}
-
 static inline void scroll(void)
 {
 	int i;
diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c
index adcea31e663..f386997e4d9 100644
--- a/arch/sh/boot/compressed/misc_32.c
+++ b/arch/sh/boot/compressed/misc_32.c
@@ -74,8 +74,6 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
 static int  fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 extern char input_data[];
 extern int input_len;
@@ -84,11 +82,7 @@ static long bytes_out = 0;
 static uch *output_data;
 static unsigned long output_ptr = 0;
 
-static void *malloc(int size);
-static void free(void *where);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 int puts(const char *);
 
@@ -101,38 +95,6 @@ static unsigned long free_mem_end_ptr;
 
 #include "../../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr == 0) error("Memory error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_end_ptr)
-		error("Out of memory");
-
-	return p;
-}
-
-static void free(void *where)
-{	/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
-
 #ifdef CONFIG_SH_STANDARD_BIOS
 size_t strlen(const char *s)
 {
diff --git a/arch/sh/boot/compressed/misc_64.c b/arch/sh/boot/compressed/misc_64.c
index a006ef89b9d..2941657e18a 100644
--- a/arch/sh/boot/compressed/misc_64.c
+++ b/arch/sh/boot/compressed/misc_64.c
@@ -72,8 +72,6 @@ static unsigned outcnt = 0;	/* bytes in output buffer */
 static int fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 extern char input_data[];
 extern int input_len;
@@ -82,11 +80,7 @@ static long bytes_out = 0;
 static uch *output_data;
 static unsigned long output_ptr = 0;
 
-static void *malloc(int size);
-static void free(void *where);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 static void puts(const char *);
 
@@ -99,40 +93,6 @@ static unsigned long free_mem_end_ptr;
 
 #include "../../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size < 0)
-		error("Malloc error\n");
-	if (free_mem_ptr == 0)
-		error("Memory error\n");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *) free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_end_ptr)
-		error("\nOut of memory\n");
-
-	return p;
-}
-
-static void free(void *where)
-{				/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (long) *ptr;
-}
-
 void puts(const char *s)
 {
 }
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index bc5553b496f..9fea7370647 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -182,8 +182,6 @@ static unsigned		outcnt;
 static int  fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 /*
  * This is set up by the setup-routine at boot-time
@@ -196,9 +194,6 @@ extern int input_len;
 
 static long bytes_out;
 
-static void *malloc(int size);
-static void free(void *where);
-
 static void *memset(void *s, int c, unsigned n);
 static void *memcpy(void *dest, const void *src, unsigned n);
 
@@ -220,40 +215,6 @@ static int lines, cols;
 
 #include "../../../../lib/inflate.c"
 
-static void *malloc(int size)
-{
-	void *p;
-
-	if (size < 0)
-		error("Malloc error");
-	if (free_mem_ptr <= 0)
-		error("Memory error");
-
-	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
-
-	p = (void *)free_mem_ptr;
-	free_mem_ptr += size;
-
-	if (free_mem_ptr >= free_mem_end_ptr)
-		error("Out of memory");
-
-	return p;
-}
-
-static void free(void *where)
-{	/* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-	*ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-	free_mem_ptr = (memptr) *ptr;
-}
-
 static void scroll(void)
 {
 	int i;
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 470a328d145..fedef93b586 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -303,32 +303,11 @@ static int crd_infd, crd_outfd;
 
 static int  __init fill_inbuf(void);
 static void __init flush_window(void);
-static void __init *malloc(size_t size);
-static void __init free(void *where);
 static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
 
-#include "../lib/inflate.c"
-
-static void __init *malloc(size_t size)
-{
-	return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
-	kfree(where);
-}
-
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
+#define NO_INFLATE_MALLOC
 
+#include "../lib/inflate.c"
 
 /* ===========================================================================
  * Fill the input buffer. This is called only when the buffer is empty
diff --git a/init/initramfs.c b/init/initramfs.c
index 8eeeccb328c..644fc01ad5f 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -14,16 +14,6 @@ static void __init error(char *x)
 		message = x;
 }
 
-static void __init *malloc(size_t size)
-{
-	return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
-	kfree(where);
-}
-
 /* link hash */
 
 #define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
@@ -407,18 +397,10 @@ static long bytes_out;
 
 static void __init flush_window(void);
 static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
 
-#include "../lib/inflate.c"
+#define NO_INFLATE_MALLOC
 
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
+#include "../lib/inflate.c"
 
 /* ===========================================================================
  * Write the output window window[0..outcnt-1] and update crc and bytes_out.
diff --git a/lib/inflate.c b/lib/inflate.c
index 9762294be06..1a8e8a97812 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -230,6 +230,45 @@ STATIC const ush mask_bits[] = {
 #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
 #define DUMPBITS(n) {b>>=(n);k-=(n);}
 
+#ifndef NO_INFLATE_MALLOC
+/* A trivial malloc implementation, adapted from
+ *  malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ */
+
+static unsigned long malloc_ptr;
+static int malloc_count;
+
+static void *malloc(int size)
+{
+       void *p;
+
+       if (size < 0)
+		error("Malloc error");
+       if (!malloc_ptr)
+		malloc_ptr = free_mem_ptr;
+
+       malloc_ptr = (malloc_ptr + 3) & ~3;     /* Align */
+
+       p = (void *)malloc_ptr;
+       malloc_ptr += size;
+
+       if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
+		error("Out of memory");
+
+       malloc_count++;
+       return p;
+}
+
+static void free(void *where)
+{
+       malloc_count--;
+       if (!malloc_count)
+		malloc_ptr = free_mem_ptr;
+}
+#else
+#define malloc(a) kmalloc(a, GFP_KERNEL)
+#define free(a) kfree(a)
+#endif
 
 /*
    Huffman code decoding is performed using a multi-level table lookup.
@@ -1045,7 +1084,6 @@ STATIC int INIT inflate(void)
   int e;                /* last block flag */
   int r;                /* result code */
   unsigned h;           /* maximum struct huft's malloc'ed */
-  void *ptr;
 
   /* initialize window, bit buffer */
   wp = 0;
@@ -1057,12 +1095,12 @@ STATIC int INIT inflate(void)
   h = 0;
   do {
     hufts = 0;
-    gzip_mark(&ptr);
-    if ((r = inflate_block(&e)) != 0) {
-      gzip_release(&ptr);	    
-      return r;
-    }
-    gzip_release(&ptr);
+#ifdef ARCH_HAS_DECOMP_WDOG
+    arch_decomp_wdog();
+#endif
+    r = inflate_block(&e);
+    if (r)
+	    return r;
     if (hufts > h)
       h = hufts;
   } while (!e);
-- 
GitLab


From 2f5a5cf93fae7b8354b45b8443dcc3448a8fc276 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 25 Jul 2008 01:45:46 -0700
Subject: [PATCH 594/853] drivers/power: fix platform driver hotplug/coldplug

Since 43cc71eed1250755986da4c0f9898f9a635cb3bf ("platform: prefix MODALIAS
with "platform:"), the platform modalias is prefixed with "platform:".
Add MODULE_ALIAS() to the hotpluggable "power" drivers drivers, to
re-enable auto loading.

[dbrownell@users.sourceforge.net: one was missing]
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Greg KH <greg@kroah.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/power/ds2760_battery.c | 2 ++
 drivers/power/pda_power.c      | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 71be36f1870..308ddb201b6 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -433,6 +433,8 @@ static int ds2760_battery_resume(struct platform_device *pdev)
 
 #endif /* CONFIG_PM */
 
+MODULE_ALIAS("platform:ds2760-battery");
+
 static struct platform_driver ds2760_battery_driver = {
 	.driver = {
 		.name = "ds2760-battery",
diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index 82810b7bff9..0471ec743ab 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -362,6 +362,8 @@ static int pda_power_resume(struct platform_device *pdev)
 #define pda_power_resume NULL
 #endif /* CONFIG_PM */
 
+MODULE_ALIAS("platform:pda-power");
+
 static struct platform_driver pda_power_pdrv = {
 	.driver = {
 		.name = "pda-power",
-- 
GitLab


From 4f46d6e7e5ffbce0ee1d1a80767fdf45e56cc863 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 25 Jul 2008 01:45:47 -0700
Subject: [PATCH 595/853] mfd: fix platform driver hotplug/coldplug

Since 43cc71eed1250755986da4c0f9898f9a635cb3bf (platform: prefix MODALIAS
with "platform:"), the platform modalias is prefixed with "platform:".
Add MODULE_ALIAS() to the MFD platform drivers, to re-enable auto loading.

[dbrownell@users.sourceforge.net: one was missing]
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Greg KH <greg@kroah.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/htc-pasic3.c | 2 ++
 drivers/mfd/mcp-sa11x0.c | 2 ++
 drivers/mfd/sm501.c      | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c
index 633cbba072f..91b294dcc13 100644
--- a/drivers/mfd/htc-pasic3.c
+++ b/drivers/mfd/htc-pasic3.c
@@ -238,6 +238,8 @@ static int pasic3_remove(struct platform_device *pdev)
 	return 0;
 }
 
+MODULE_ALIAS("platform:pasic3");
+
 static struct platform_driver pasic3_driver = {
 	.driver		= {
 		.name	= "pasic3",
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 1eab7cffcea..b5272b5ce3f 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -242,6 +242,8 @@ static int mcp_sa11x0_resume(struct platform_device *dev)
 /*
  * The driver for the SA11x0 MCP port.
  */
+MODULE_ALIAS("platform:sa11x0-mcp");
+
 static struct platform_driver mcp_sa11x0_driver = {
 	.probe		= mcp_sa11x0_probe,
 	.remove		= mcp_sa11x0_remove,
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 2fe64734d8a..e2530df4d85 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1378,6 +1378,8 @@ static struct pci_driver sm501_pci_drv = {
 	.remove		= sm501_pci_remove,
 };
 
+MODULE_ALIAS("platform:sm501");
+
 static struct platform_driver sm501_plat_drv = {
 	.driver		= {
 		.name	= "sm501",
-- 
GitLab


From db358b40e0674fd4079204d8e3e1c8ab3829a1b9 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 25 Jul 2008 01:45:48 -0700
Subject: [PATCH 596/853] parport: fix platform driver hotplug/coldplug

Since 43cc71eed1250755986da4c0f9898f9a635cb3bf (platform: prefix MODALIAS
with "platform:"), the platform modalias is prefixed with "platform:".
Add MODULE_ALIAS() to the hotpluggable parport platform drivers, to
re-enable auto loading.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Greg KH <greg@kroah.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Acked-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/parport/parport_ax88796.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/parport/parport_ax88796.c b/drivers/parport/parport_ax88796.c
index 4ec220b2eae..6938d2e9f18 100644
--- a/drivers/parport/parport_ax88796.c
+++ b/drivers/parport/parport_ax88796.c
@@ -406,6 +406,8 @@ static int parport_ax88796_resume(struct platform_device *dev)
 #define parport_ax88796_resume  NULL
 #endif
 
+MODULE_ALIAS("platform:ax88796-pp");
+
 static struct platform_driver axdrv = {
 	.driver		= {
 		.name	= "ax88796-pp",
-- 
GitLab


From 4500d067eeb3d00679335d9cf5c6536e79cd3ef4 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:49 -0700
Subject: [PATCH 597/853] init.h: remove obsolete content

Remove apparently obsolete content from init.h referring to gcc 2.9x
and to "no_module_init".

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index 21d658cdfa2..42ae95411a9 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -275,13 +275,7 @@ void __init parse_early_param(void);
 
 #define security_initcall(fn)		module_init(fn)
 
-/* These macros create a dummy inline: gcc 2.9x does not count alias
- as usage, hence the `unused function' warning when __init functions
- are declared static. We use the dummy __*_module_inline functions
- both to kill the warning and check the type of the init/cleanup
- function. */
-
-/* Each module must use one module_init(), or one no_module_init */
+/* Each module must use one module_init(). */
 #define module_init(initfn)					\
 	static inline initcall_t __inittest(void)		\
 	{ return initfn; }					\
-- 
GitLab


From 277e2c695907a70b316a31769cd891dc4d43b7f3 Mon Sep 17 00:00:00 2001
From: Daniel Guilak <guilak@linux.vnet.ibm.com>
Date: Fri, 25 Jul 2008 01:45:49 -0700
Subject: [PATCH 598/853] init/version.c: silence sparse warning by declaring
 the version string

Signed-off-by: Daniel Guilak <daniel@danielguilak.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/version.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/init/version.c b/init/version.c
index 9d17d70ee02..041fd822ce2 100644
--- a/init/version.c
+++ b/init/version.c
@@ -16,6 +16,7 @@
 #define version(a) Version_ ## a
 #define version_string(a) version(a)
 
+extern int version_string(LINUX_VERSION_CODE);
 int version_string(LINUX_VERSION_CODE);
 
 struct uts_namespace init_uts_ns = {
-- 
GitLab


From 197dcffc8ba0ea943fee86e28e99cd9575799772 Mon Sep 17 00:00:00 2001
From: Daniel Guilak <guilak@linux.vnet.ibm.com>
Date: Fri, 25 Jul 2008 01:45:50 -0700
Subject: [PATCH 599/853] init/version.c: define version_string only if
 CONFIG_KALLSYMS is not defined

int Version_* is only used with ksymoops, which is only needed (according
to README and Documentation/Changes) if CONFIG_KALLSYMS is NOT defined.
Therefore this patch defines version_string only if CONFIG_KALLSYMS is not
defined.

Signed-off-by: Daniel Guilak <daniel@danielguilak.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/version.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/init/version.c b/init/version.c
index 041fd822ce2..52a8b98642b 100644
--- a/init/version.c
+++ b/init/version.c
@@ -13,11 +13,13 @@
 #include <linux/utsrelease.h>
 #include <linux/version.h>
 
+#ifndef CONFIG_KALLSYMS
 #define version(a) Version_ ## a
 #define version_string(a) version(a)
 
 extern int version_string(LINUX_VERSION_CODE);
 int version_string(LINUX_VERSION_CODE);
+#endif
 
 struct uts_namespace init_uts_ns = {
 	.kref = {
-- 
GitLab


From a7f371e54fac49ff62bb640d4a7276fca01527e8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Fri, 25 Jul 2008 01:45:51 -0700
Subject: [PATCH 600/853] documentation: update CodingStyle tips for Emacs
 users

Describe a setup that integrates better with Emacs' cc-mode and also fixes
up the alignment of continuation lines to really only use tabs.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/CodingStyle | 42 +++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 6caa1461557..1875e502f87 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -474,25 +474,29 @@ make a good program).
 So, you can either get rid of GNU emacs, or change it to use saner
 values.  To do the latter, you can stick the following in your .emacs file:
 
-(defun linux-c-mode ()
-  "C mode with adjusted defaults for use with the Linux kernel."
-  (interactive)
-  (c-mode)
-  (c-set-style "K&R")
-  (setq tab-width 8)
-  (setq indent-tabs-mode t)
-  (setq c-basic-offset 8))
-
-This will define the M-x linux-c-mode command.  When hacking on a
-module, if you put the string -*- linux-c -*- somewhere on the first
-two lines, this mode will be automatically invoked. Also, you may want
-to add
-
-(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
-			auto-mode-alist))
-
-to your .emacs file if you want to have linux-c-mode switched on
-automagically when you edit source files under /usr/src/linux.
+(defun c-lineup-arglist-tabs-only (ignored)
+  "Line up argument lists by tabs, not spaces"
+  (let* ((anchor (c-langelem-pos c-syntactic-element))
+	 (column (c-langelem-2nd-pos c-syntactic-element))
+	 (offset (- (1+ column) anchor))
+	 (steps (floor offset c-basic-offset)))
+    (* (max steps 1)
+       c-basic-offset)))
+
+(add-hook 'c-mode-hook
+          (lambda ()
+            (let ((filename (buffer-file-name)))
+              ;; Enable kernel mode for the appropriate files
+              (when (and filename
+                         (string-match "~/src/linux-trees" filename))
+                (setq indent-tabs-mode t)
+                (c-set-style "linux")
+                (c-set-offset 'arglist-cont-nonempty
+                              '(c-lineup-gcc-asm-reg
+                                c-lineup-arglist-tabs-only))))))
+
+This will make emacs go better with the kernel coding style for C
+files below ~/src/linux-trees.
 
 But even if you fail in getting emacs to do sane formatting, not
 everything is lost: use "indent".
-- 
GitLab


From f38954c93c4a548f55d73ac5c1cf5e7f4023bb6c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:52 -0700
Subject: [PATCH 601/853] drivers/misc/hpilo.c needs CONFIG_PCI

m68k allmodconfig:

drivers/misc/hpilo.c: In function 'ilo_ccb_close':
drivers/misc/hpilo.c:225: error: implicit declaration of function 'pci_free_consistent'
drivers/misc/hpilo.c: In function 'ilo_ccb_open':
drivers/misc/hpilo.c:244: error: implicit declaration of function 'pci_alloc_consistent'
drivers/misc/hpilo.c:245: warning: assignment makes pointer from integer without a cast

Cc: David Altobelli <david.altobelli@hp.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 1689c051f68..7e37ba5afe3 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -436,6 +436,7 @@ config SGI_XP
 
 config HP_ILO
 	tristate "Channel interface driver for HP iLO/iLO2 processor"
+	depends on PCI
 	default n
 	help
 	  The channel interface driver allows applications to communicate
-- 
GitLab


From b6c63937001889af6fe431aaba97e59d04e028e7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 01:45:52 -0700
Subject: [PATCH 602/853] Rename WARN() to WARNING() to clear the namespace

We want to use WARN() as a variant of WARN_ON(), however a few drivers are
using WARN() internally.  This patch renames these to WARNING() to avoid the
namespace clash.  A few cases were defining but not using the thing, for those
cases I just deleted the definition.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Greg KH <greg@kroah.com>
Cc: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/isdn/hisax/st5481.h       |  4 ++--
 drivers/isdn/hisax/st5481_b.c     |  4 ++--
 drivers/isdn/hisax/st5481_d.c     |  6 +++---
 drivers/isdn/hisax/st5481_usb.c   | 18 +++++++++---------
 drivers/usb/gadget/at91_udc.h     |  2 +-
 drivers/usb/gadget/cdc2.c         |  2 +-
 drivers/usb/gadget/ether.c        |  2 +-
 drivers/usb/gadget/file_storage.c | 14 +++++++-------
 drivers/usb/gadget/fsl_usb2_udc.c |  2 +-
 drivers/usb/gadget/fsl_usb2_udc.h |  2 +-
 drivers/usb/gadget/gmidi.c        |  2 --
 drivers/usb/gadget/goku_udc.c     |  2 +-
 drivers/usb/gadget/goku_udc.h     |  2 +-
 drivers/usb/gadget/inode.c        |  2 --
 drivers/usb/gadget/net2280.c      |  2 +-
 drivers/usb/gadget/net2280.h      |  2 +-
 drivers/usb/gadget/omap_udc.c     |  6 +++---
 drivers/usb/gadget/omap_udc.h     |  2 +-
 drivers/usb/gadget/printer.c      |  2 +-
 drivers/usb/gadget/pxa25x_udc.c   |  6 +++---
 drivers/usb/gadget/pxa25x_udc.h   |  2 +-
 drivers/usb/gadget/u_ether.c      |  3 ---
 drivers/usb/host/isp116x-hcd.c    |  2 +-
 drivers/usb/host/isp116x.h        |  2 +-
 drivers/usb/host/sl811-hcd.c      |  2 +-
 drivers/usb/host/sl811.h          |  2 +-
 drivers/usb/misc/usbtest.c        |  4 ++--
 include/linux/usb/composite.h     |  2 +-
 28 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h
index 2044e7173ab..cff7a635433 100644
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -220,7 +220,7 @@ enum {
 #define ERR(format, arg...) \
 printk(KERN_ERR "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 
-#define WARN(format, arg...) \
+#define WARNING(format, arg...) \
 printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 
 #define INFO(format, arg...) \
@@ -412,7 +412,7 @@ struct st5481_adapter {
 ({ \
 	int status; \
 	if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \
-		WARN("usb_submit_urb failed,status=%d", status); \
+		WARNING("usb_submit_urb failed,status=%d", status); \
 	} \
         status; \
 })
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index fa64115cd7c..0074b600a0e 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -180,7 +180,7 @@ static void usb_b_out_complete(struct urb *urb)
 				DBG(4,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				if (b_out->busy == 0) {
 					st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL);
 				}
@@ -372,6 +372,6 @@ void st5481_b_l2l1(struct hisax_if *ifc, int pr, void *arg)
 		B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL);
 		break;
 	default:
-		WARN("pr %#x\n", pr);
+		WARNING("pr %#x\n", pr);
 	}
 }
diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c
index b8c4855cc88..077991c1cd0 100644
--- a/drivers/isdn/hisax/st5481_d.c
+++ b/drivers/isdn/hisax/st5481_d.c
@@ -389,7 +389,7 @@ static void usb_d_out_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				break;
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				if (d_out->busy == 0) {
 					st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter);
 				}
@@ -420,7 +420,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg)
 	isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
 
 	if (test_and_set_bit(buf_nr, &d_out->busy)) {
-		WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
+		WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
 		return;
 	}
 	urb = d_out->urb[buf_nr];
@@ -601,7 +601,7 @@ void st5481_d_l2l1(struct hisax_if *hisax_d_if, int pr, void *arg)
 		FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL);
 		break;
 	default:
-		WARN("pr %#x\n", pr);
+		WARNING("pr %#x\n", pr);
 		break;
 	}
 }
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index 427a8b0520f..ec3c0e50766 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -66,7 +66,7 @@ static void usb_ctrl_msg(struct st5481_adapter *adapter,
 	struct ctrl_msg *ctrl_msg;
 	
 	if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) {
-		WARN("control msg FIFO full");
+		WARNING("control msg FIFO full");
 		return;
 	}
 	ctrl_msg = &ctrl->msg_fifo.data[w_index]; 
@@ -139,7 +139,7 @@ static void usb_ctrl_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				break;
 		}
 	}
@@ -198,7 +198,7 @@ static void usb_int_complete(struct urb *urb)
 			DBG(2, "urb shutting down with status: %d", urb->status);
 			return;
 		default:
-			WARN("nonzero urb status received: %d", urb->status);
+			WARNING("nonzero urb status received: %d", urb->status);
 			goto exit;
 	}
 
@@ -235,7 +235,7 @@ static void usb_int_complete(struct urb *urb)
 exit:
 	status = usb_submit_urb (urb, GFP_ATOMIC);
 	if (status)
-		WARN("usb_submit_urb failed with result %d", status);
+		WARNING("usb_submit_urb failed with result %d", status);
 }
 
 /* ======================================================================
@@ -257,7 +257,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 	DBG(2,"");
 	
 	if ((status = usb_reset_configuration (dev)) < 0) {
-		WARN("reset_configuration failed,status=%d",status);
+		WARNING("reset_configuration failed,status=%d",status);
 		return status;
 	}
 
@@ -269,7 +269,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 
 	// Check if the config is sane
 	if ( altsetting->desc.bNumEndpoints != 7 ) {
-		WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
+		WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
 		return -EINVAL;
 	}
 
@@ -279,7 +279,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 
 	// Use alternative setting 3 on interface 0 to have 2B+D
 	if ((status = usb_set_interface (dev, 0, 3)) < 0) {
-		WARN("usb_set_interface failed,status=%d",status);
+		WARNING("usb_set_interface failed,status=%d",status);
 		return status;
 	}
 
@@ -497,7 +497,7 @@ static void usb_in_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				break;
 		}
 	}
@@ -523,7 +523,7 @@ static void usb_in_complete(struct urb *urb)
 			DBG(4,"count=%d",status);
 			DBG_PACKET(0x400, in->rcvbuf, status);
 			if (!(skb = dev_alloc_skb(status))) {
-				WARN("receive out of memory\n");
+				WARNING("receive out of memory\n");
 				break;
 			}
 			memcpy(skb_put(skb, status), in->rcvbuf, status);
diff --git a/drivers/usb/gadget/at91_udc.h b/drivers/usb/gadget/at91_udc.h
index a973f2a50fb..c65d6229589 100644
--- a/drivers/usb/gadget/at91_udc.h
+++ b/drivers/usb/gadget/at91_udc.h
@@ -171,7 +171,7 @@ struct at91_request {
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 #define DBG(stuff...)		pr_debug("udc: " stuff)
 
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c
index d490d028950..a39a4b940c3 100644
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -170,7 +170,7 @@ static int __init cdc_bind(struct usb_composite_dev *cdev)
 		 * but if the controller isn't recognized at all then
 		 * that assumption is a bit more likely to be wrong.
 		 */
-		WARN(cdev, "controller '%s' not recognized; trying %s\n",
+		WARNING(cdev, "controller '%s' not recognized; trying %s\n",
 				gadget->name,
 				cdc_config_driver.label);
 		device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index d7aaaa29b1e..bcac2e68660 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -293,7 +293,7 @@ static int __init eth_bind(struct usb_composite_dev *cdev)
 		 * but if the controller isn't recognized at all then
 		 * that assumption is a bit more likely to be wrong.
 		 */
-		WARN(cdev, "controller '%s' not recognized; trying %s\n",
+		WARNING(cdev, "controller '%s' not recognized; trying %s\n",
 				gadget->name,
 				eth_config_driver.label);
 		device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 15c24edbb61..ea2c31d1808 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -308,7 +308,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
 	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
@@ -1091,7 +1091,7 @@ static int ep0_queue(struct fsg_dev *fsg)
 	if (rc != 0 && rc != -ESHUTDOWN) {
 
 		/* We can't do much more than wait for a reset */
-		WARN(fsg, "error in submission: %s --> %d\n",
+		WARNING(fsg, "error in submission: %s --> %d\n",
 				fsg->ep0->name, rc);
 	}
 	return rc;
@@ -1227,7 +1227,7 @@ static void received_cbi_adsc(struct fsg_dev *fsg, struct fsg_buffhd *bh)
 
 	/* Save the command for later */
 	if (fsg->cbbuf_cmnd_size)
-		WARN(fsg, "CB[I] overwriting previous command\n");
+		WARNING(fsg, "CB[I] overwriting previous command\n");
 	fsg->cbbuf_cmnd_size = req->actual;
 	memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size);
 
@@ -1506,7 +1506,7 @@ static void start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,
 		 * submissions if DMA is enabled. */
 		if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP &&
 						req->length == 0))
-			WARN(fsg, "error in submission: %s --> %d\n",
+			WARNING(fsg, "error in submission: %s --> %d\n",
 					ep->name, rc);
 	}
 }
@@ -2294,7 +2294,7 @@ static int halt_bulk_in_endpoint(struct fsg_dev *fsg)
 		VDBG(fsg, "delayed bulk-in endpoint halt\n");
 	while (rc != 0) {
 		if (rc != -EAGAIN) {
-			WARN(fsg, "usb_ep_set_halt -> %d\n", rc);
+			WARNING(fsg, "usb_ep_set_halt -> %d\n", rc);
 			rc = 0;
 			break;
 		}
@@ -2317,7 +2317,7 @@ static int wedge_bulk_in_endpoint(struct fsg_dev *fsg)
 		VDBG(fsg, "delayed bulk-in endpoint wedge\n");
 	while (rc != 0) {
 		if (rc != -EAGAIN) {
-			WARN(fsg, "usb_ep_set_wedge -> %d\n", rc);
+			WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc);
 			rc = 0;
 			break;
 		}
@@ -3755,7 +3755,7 @@ static int __init check_parameters(struct fsg_dev *fsg)
 		if (gcnum >= 0)
 			mod_data.release = 0x0300 + gcnum;
 		else {
-			WARN(fsg, "controller '%s' not recognized\n",
+			WARNING(fsg, "controller '%s' not recognized\n",
 				fsg->gadget->name);
 			mod_data.release = 0x0399;
 		}
diff --git a/drivers/usb/gadget/fsl_usb2_udc.c b/drivers/usb/gadget/fsl_usb2_udc.c
index 1695382f30f..1cfccf102a2 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.c
+++ b/drivers/usb/gadget/fsl_usb2_udc.c
@@ -1538,7 +1538,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
 
 		/* If the ep is configured */
 		if (curr_ep->name == NULL) {
-			WARN("Invalid EP?");
+			WARNING("Invalid EP?");
 			continue;
 		}
 
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h
index 98b1483ef6a..6131752a38b 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -552,7 +552,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length)
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)		pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 7f4d4828e3a..ea8651e3da1 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -138,8 +138,6 @@ static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req);
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
-	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
 
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 48f1c63b701..60aa04847b1 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1768,7 +1768,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * usb_gadget_driver_{register,unregister}() must change.
 	 */
 	if (the_controller) {
-		WARN(dev, "ignoring %s\n", pci_name(pdev));
+		WARNING(dev, "ignoring %s\n", pci_name(pdev));
 		return -EBUSY;
 	}
 	if (!pdev->irq) {
diff --git a/drivers/usb/gadget/goku_udc.h b/drivers/usb/gadget/goku_udc.h
index bc4eb1e0b50..566cb231905 100644
--- a/drivers/usb/gadget/goku_udc.h
+++ b/drivers/usb/gadget/goku_udc.h
@@ -285,7 +285,7 @@ struct goku_udc {
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
 	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 04692d59fc1..f4585d3e90d 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -262,8 +262,6 @@ static const char *CHIP;
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
-	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
 
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index b67ab677af7..5cfb5ebf388 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1007,7 +1007,7 @@ static void scan_dma_completions (struct net2280_ep *ep)
 			 * 0122, and 0124; not all cases trigger the warning.
 			 */
 			if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) {
-				WARN (ep->dev, "%s lost packet sync!\n",
+				WARNING (ep->dev, "%s lost packet sync!\n",
 						ep->ep.name);
 				req->req.status = -EOVERFLOW;
 			} else if ((tmp = readl (&ep->regs->ep_avail)) != 0) {
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index 1f2af398a9a..81a71dbdc2c 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -272,7 +272,7 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
 	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 4b79a8509e8..395bd184448 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -1120,7 +1120,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value)
 			status = -EINVAL;
 		else if (value) {
 			if (ep->udc->ep0_set_config) {
-				WARN("error changing config?\n");
+				WARNING("error changing config?\n");
 				omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
 			}
 			omap_writew(UDC_STALL_CMD, UDC_SYSCON2);
@@ -1764,7 +1764,7 @@ do_stall:
 					u.r.bRequestType, u.r.bRequest, status);
 			if (udc->ep0_set_config) {
 				if (udc->ep0_reset_config)
-					WARN("error resetting config?\n");
+					WARNING("error resetting config?\n");
 				else
 					omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
 			}
@@ -3076,7 +3076,7 @@ static int omap_udc_suspend(struct platform_device *dev, pm_message_t message)
 	 * which would prevent entry to deep sleep...
 	 */
 	if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) {
-		WARN("session active; suspend requires disconnect\n");
+		WARNING("session active; suspend requires disconnect\n");
 		omap_pullup(&udc->gadget, 0);
 	}
 
diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h
index 8522bbb1227..29edc51b6b2 100644
--- a/drivers/usb/gadget/omap_udc.h
+++ b/drivers/usb/gadget/omap_udc.h
@@ -188,7 +188,7 @@ struct omap_udc {
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 #define DBG(stuff...)		pr_debug("udc: " stuff)
 
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 49cd9e145a9..e0090085b78 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -179,7 +179,7 @@ module_param(qlen, uint, S_IRUGO|S_IWUSR);
 
 #define ERROR(dev, fmt, args...) \
 	xprintk(dev, KERN_ERR, fmt, ## args)
-#define WARN(dev, fmt, args...) \
+#define WARNING(dev, fmt, args...) \
 	xprintk(dev, KERN_WARNING, fmt, ## args)
 #define INFO(dev, fmt, args...) \
 	xprintk(dev, KERN_INFO, fmt, ## args)
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 8fb0066609b..7e6725d8997 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -342,7 +342,7 @@ pxa25x_ep_free_request (struct usb_ep *_ep, struct usb_request *_req)
 	struct pxa25x_request	*req;
 
 	req = container_of (_req, struct pxa25x_request, req);
-	WARN_ON (!list_empty (&req->queue));
+	WARN_ON(!list_empty (&req->queue));
 	kfree(req);
 }
 
@@ -1556,7 +1556,7 @@ config_change:
 					 * tell us about config change events,
 					 * so later ones may fail...
 					 */
-					WARN("config change %02x fail %d?\n",
+					WARNING("config change %02x fail %d?\n",
 						u.r.bRequest, i);
 					return;
 					/* TODO experiment:  if has_cfr,
@@ -2330,7 +2330,7 @@ static int pxa25x_udc_suspend(struct platform_device *dev, pm_message_t state)
 	unsigned long flags;
 
 	if (!udc->mach->gpio_pullup && !udc->mach->udc_command)
-		WARN("USB host won't detect disconnect!\n");
+		WARNING("USB host won't detect disconnect!\n");
 	udc->suspended = 1;
 
 	local_irq_save(flags);
diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h
index 4d11ece7c95..c8a13215e02 100644
--- a/drivers/usb/gadget/pxa25x_udc.h
+++ b/drivers/usb/gadget/pxa25x_udc.h
@@ -259,7 +259,7 @@ dump_state(struct pxa25x_udc *dev)
 #define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0)
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 
 
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 5458f43a866..3791e627190 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -116,7 +116,6 @@ static inline int qlen(struct usb_gadget *gadget)
 #undef DBG
 #undef VDBG
 #undef ERROR
-#undef WARN
 #undef INFO
 
 #define xprintk(d, level, fmt, args...) \
@@ -140,8 +139,6 @@ static inline int qlen(struct usb_gadget *gadget)
 
 #define ERROR(dev, fmt, args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev, fmt, args...) \
-	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev, fmt, args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
 
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 31178e10cbb..ce1ca0ba051 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -882,7 +882,7 @@ static void isp116x_endpoint_disable(struct usb_hcd *hcd,
 	for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
 		msleep(3);
 	if (!list_empty(&hep->urb_list))
-		WARN("ep %p not empty?\n", ep);
+		WARNING("ep %p not empty?\n", ep);
 
 	kfree(ep);
 	hep->hcpriv = NULL;
diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h
index 595b90a9984..aa211bafcff 100644
--- a/drivers/usb/host/isp116x.h
+++ b/drivers/usb/host/isp116x.h
@@ -338,7 +338,7 @@ struct isp116x_ep {
 #endif
 
 #define ERR(stuff...)		printk(KERN_ERR "116x: " stuff)
-#define WARN(stuff...)		printk(KERN_WARNING "116x: " stuff)
+#define WARNING(stuff...)	printk(KERN_WARNING "116x: " stuff)
 #define INFO(stuff...)		printk(KERN_INFO "116x: " stuff)
 
 /* ------------------------------------------------- */
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 340d72da554..8a74bbb57d0 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1026,7 +1026,7 @@ sl811h_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
 	if (!list_empty(&hep->urb_list))
 		msleep(3);
 	if (!list_empty(&hep->urb_list))
-		WARN("ep %p not empty?\n", ep);
+		WARNING("ep %p not empty?\n", ep);
 
 	kfree(ep);
 	hep->hcpriv = NULL;
diff --git a/drivers/usb/host/sl811.h b/drivers/usb/host/sl811.h
index 7690d98e42a..b6b8c1f233d 100644
--- a/drivers/usb/host/sl811.h
+++ b/drivers/usb/host/sl811.h
@@ -261,6 +261,6 @@ sl811_read_buf(struct sl811 *sl811, int addr, void *buf, size_t count)
 #endif
 
 #define ERR(stuff...)		printk(KERN_ERR "sl811: " stuff)
-#define WARN(stuff...)		printk(KERN_WARNING "sl811: " stuff)
+#define WARNING(stuff...)	printk(KERN_WARNING "sl811: " stuff)
 #define INFO(stuff...)		printk(KERN_INFO "sl811: " stuff)
 
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 054dedd2812..b358c4e1cf2 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -81,7 +81,7 @@ static struct usb_device *testdev_to_usbdev (struct usbtest_dev *test)
 
 #define ERROR(tdev, fmt, args...) \
 	dev_err(&(tdev)->intf->dev , fmt , ## args)
-#define WARN(tdev, fmt, args...) \
+#define WARNING(tdev, fmt, args...) \
 	dev_warn(&(tdev)->intf->dev , fmt , ## args)
 
 /*-------------------------------------------------------------------------*/
@@ -1946,7 +1946,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id)
 
 			status = get_endpoints (dev, intf);
 			if (status < 0) {
-				WARN(dev, "couldn't get endpoints, %d\n",
+				WARNING(dev, "couldn't get endpoints, %d\n",
 						status);
 				return status;
 			}
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 747c3a49cdc..c932390c6da 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -330,7 +330,7 @@ extern int usb_string_id(struct usb_composite_dev *c);
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
 	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
-- 
GitLab


From a8f18b909c0a3f22630846207035c8b84bb252b8 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 01:45:53 -0700
Subject: [PATCH 603/853] Add a WARN() macro; this is WARN_ON() + printk
 arguments

Add a WARN() macro that acts like WARN_ON(), with the added feature that it
takes a printk like argument that is printed as part of the warning message.

[akpm@linux-foundation.org: fix printk arguments]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Greg KH <greg@kroah.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 22 ++++++++++++++++++++++
 kernel/panic.c            | 22 ++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 2632328d864..a346e744e77 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -34,9 +34,14 @@ struct bug_entry {
 #ifndef __WARN
 #ifndef __ASSEMBLY__
 extern void warn_on_slowpath(const char *file, const int line);
+extern void warn_slowpath(const char *file, const int line,
+		const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 #define WANT_WARN_ON_SLOWPATH
 #endif
 #define __WARN() warn_on_slowpath(__FILE__, __LINE__)
+#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
+#else
+#define __WARN_printf(arg...) __WARN()
 #endif
 
 #ifndef WARN_ON
@@ -48,6 +53,15 @@ extern void warn_on_slowpath(const char *file, const int line);
 })
 #endif
 
+#ifndef WARN
+#define WARN(condition, format...) ({						\
+	int __ret_warn_on = !!(condition);				\
+	if (unlikely(__ret_warn_on))					\
+		__WARN_printf(format);					\
+	unlikely(__ret_warn_on);					\
+})
+#endif
+
 #else /* !CONFIG_BUG */
 #ifndef HAVE_ARCH_BUG
 #define BUG()
@@ -63,6 +77,14 @@ extern void warn_on_slowpath(const char *file, const int line);
 	unlikely(__ret_warn_on);					\
 })
 #endif
+
+#ifndef WARN
+#define WARN(condition, format...) ({					\
+	int __ret_warn_on = !!(condition);				\
+	unlikely(__ret_warn_on);					\
+})
+#endif
+
 #endif
 
 #define WARN_ON_ONCE(condition)	({				\
diff --git a/kernel/panic.c b/kernel/panic.c
index 425567f45b9..12c5a0a6c89 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -318,6 +318,28 @@ void warn_on_slowpath(const char *file, int line)
 	add_taint(TAINT_WARN);
 }
 EXPORT_SYMBOL(warn_on_slowpath);
+
+
+void warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+	va_list args;
+	char function[KSYM_SYMBOL_LEN];
+	unsigned long caller = (unsigned long)__builtin_return_address(0);
+	sprint_symbol(function, caller);
+
+	printk(KERN_WARNING "------------[ cut here ]------------\n");
+	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
+		line, function);
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+
+	print_modules();
+	dump_stack();
+	print_oops_end_marker();
+	add_taint(TAINT_WARN);
+}
+EXPORT_SYMBOL(warn_slowpath);
 #endif
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-- 
GitLab


From 7a2c477069fbd32f91598f05334003979b987a39 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 01:45:54 -0700
Subject: [PATCH 604/853] kernel/irq/manage.c: replace a printk + WARN_ON() to
 a WARN()

Replace a printk+WARN_ON() by a WARN(); this increases the chance of the
string making it into the bugreport (ie: it goes inside the
---[ cut here ]--- section)

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq/manage.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5bc6e5ecc49..f8914b92b66 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -260,9 +260,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
 		}
 	} else {
 		if (desc->wake_depth == 0) {
-			printk(KERN_WARNING "Unbalanced IRQ %d "
-					"wake disable\n", irq);
-			WARN_ON(1);
+			WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
 		} else if (--desc->wake_depth == 0) {
 			ret = set_irq_wake_real(irq, on);
 			if (ret)
-- 
GitLab


From d955c78ac4699ac9c3fe07be62982cda13d13267 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 01:45:55 -0700
Subject: [PATCH 605/853] Example use of WARN()

Now that WARN() exists, we can fold some of the printk's into it.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/kobject.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/kobject.c b/lib/kobject.c
index 744401571ed..bd732ffebc8 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -164,9 +164,8 @@ static int kobject_add_internal(struct kobject *kobj)
 		return -ENOENT;
 
 	if (!kobj->name || !kobj->name[0]) {
-		pr_debug("kobject: (%p): attempted to be registered with empty "
+		WARN(1, "kobject: (%p): attempted to be registered with empty "
 			 "name!\n", kobj);
-		WARN_ON(1);
 		return -EINVAL;
 	}
 
@@ -583,12 +582,10 @@ static void kobject_release(struct kref *kref)
 void kobject_put(struct kobject *kobj)
 {
 	if (kobj) {
-		if (!kobj->state_initialized) {
-			printk(KERN_WARNING "kobject: '%s' (%p): is not "
+		if (!kobj->state_initialized)
+			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
 			       "initialized, yet kobject_put() is being "
 			       "called.\n", kobject_name(kobj), kobj);
-			WARN_ON(1);
-		}
 		kref_put(&kobj->kref, kobject_release);
 	}
 }
-- 
GitLab


From 924d9addb9b1474fc81a78a5c6706755efea7aaa Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 25 Jul 2008 01:45:55 -0700
Subject: [PATCH 606/853] list debugging: use WARN() instead of BUG()

Arjan noted that the list_head debugging is BUG'ing when it detects
corruption.  By causing the box to panic immediately, we're possibly
losing some bug reports.  Changing this to a WARN() should mean we at the
least start seeing reports collected at kerneloops.org

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_debug.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/lib/list_debug.c b/lib/list_debug.c
index 45c03fd608d..1a39f4e3ae1 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,18 +20,14 @@ void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
 {
-	if (unlikely(next->prev != prev)) {
-		printk(KERN_ERR "list_add corruption. next->prev should be "
-			"prev (%p), but was %p. (next=%p).\n",
-			prev, next->prev, next);
-		BUG();
-	}
-	if (unlikely(prev->next != next)) {
-		printk(KERN_ERR "list_add corruption. prev->next should be "
-			"next (%p), but was %p. (prev=%p).\n",
-			next, prev->next, prev);
-		BUG();
-	}
+	WARN(next->prev != prev,
+		"list_add corruption. next->prev should be "
+		"prev (%p), but was %p. (next=%p).\n",
+		prev, next->prev, next);
+	WARN(prev->next != next,
+		"list_add corruption. prev->next should be "
+		"next (%p), but was %p. (prev=%p).\n",
+		next, prev->next, prev);
 	next->prev = new;
 	new->next = next;
 	new->prev = prev;
@@ -47,16 +43,12 @@ EXPORT_SYMBOL(__list_add);
  */
 void list_del(struct list_head *entry)
 {
-	if (unlikely(entry->prev->next != entry)) {
-		printk(KERN_ERR "list_del corruption. prev->next should be %p, "
-				"but was %p\n", entry, entry->prev->next);
-		BUG();
-	}
-	if (unlikely(entry->next->prev != entry)) {
-		printk(KERN_ERR "list_del corruption. next->prev should be %p, "
-				"but was %p\n", entry, entry->next->prev);
-		BUG();
-	}
+	WARN(entry->prev->next != entry,
+		"list_del corruption. prev->next should be %p, "
+		"but was %p\n", entry, entry->prev->next);
+	WARN(entry->next->prev != entry,
+		"list_del corruption. next->prev should be %p, "
+		"but was %p\n", entry, entry->next->prev);
 	__list_del(entry->prev, entry->next);
 	entry->next = LIST_POISON1;
 	entry->prev = LIST_POISON2;
-- 
GitLab


From 2711b793eb62a5873a0ba583a69252040aef176e Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 25 Jul 2008 01:45:56 -0700
Subject: [PATCH 607/853] kallsyms: unify 32- and 64-bit code

Use the %p format string which already accounts for the padding you need
with a pointer type on a particular architecture.

Also replace the macro with a static inline function to match the rest of
the file.

Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 00c1801099f..57aefa160a9 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -6,6 +6,7 @@
 #define _LINUX_KALLSYMS_H
 
 #include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/stddef.h>
 
 #define KSYM_NAME_LEN 128
@@ -105,18 +106,10 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
 	print_symbol(fmt, (unsigned long)addr);
 }
 
-#ifndef CONFIG_64BIT
-#define print_ip_sym(ip)		\
-do {					\
-	printk("[<%08lx>]", ip);	\
-	print_symbol(" %s\n", ip);	\
-} while(0)
-#else
-#define print_ip_sym(ip)		\
-do {					\
-	printk("[<%016lx>]", ip);	\
-	print_symbol(" %s\n", ip);	\
-} while(0)
-#endif
+static inline void print_ip_sym(unsigned long ip)
+{
+	printk("[<%p>]", (void *) ip);
+	print_symbol(" %s\n", ip);
+}
 
 #endif /*_LINUX_KALLSYMS_H*/
-- 
GitLab


From 717115e1a5856b57af0f71e1df7149108294fc10 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: [PATCH 608/853] printk ratelimiting rewrite

All ratelimit user use same jiffies and burst params, so some messages
(callbacks) will be lost.

For example:
a call printk_ratelimit(5 * HZ, 1)
b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will
will be supressed.

- rewrite __ratelimit, and use a ratelimit_state as parameter.  Thanks for
  hints from andrew.

- Add WARN_ON_RATELIMIT, update rcupreempt.h

- remove __printk_ratelimit

- use __ratelimit in net_ratelimit

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h  |  3 +++
 include/linux/kernel.h     |  8 ++----
 include/linux/net.h        |  3 +--
 include/linux/ratelimit.h  | 27 +++++++++++++++++++
 include/linux/rcupreempt.h |  9 +++++--
 kernel/printk.c            | 17 +++---------
 kernel/sysctl.c            |  4 +--
 lib/ratelimit.c            | 55 +++++++++++++++++++++-----------------
 net/core/sysctl_net_core.c |  4 +--
 net/core/utils.c           |  5 ++--
 10 files changed, 79 insertions(+), 56 deletions(-)
 create mode 100644 include/linux/ratelimit.h

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a346e744e77..a3f738cffdb 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -97,6 +97,9 @@ extern void warn_slowpath(const char *file, const int line,
 	unlikely(__ret_warn_once);				\
 })
 
+#define WARN_ON_RATELIMIT(condition, state)			\
+		WARN_ON((condition) && __ratelimit(state))
+
 #ifdef CONFIG_SMP
 # define WARN_ON_SMP(x)			WARN_ON(x)
 #else
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5c4b1251e11..fdbbf72ca2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,6 +15,7 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/typecheck.h>
+#include <linux/ratelimit.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -189,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
+extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
-extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 #else
@@ -204,8 +202,6 @@ static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
 static inline int __cold printk(const char *s, ...) { return 0; }
 static inline int printk_ratelimit(void) { return 0; }
-static inline int __printk_ratelimit(int ratelimit_jiffies, \
-				     int ratelimit_burst) { return 0; }
 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
 					  unsigned int interval_msec)	\
 		{ return false; }
diff --git a/include/linux/net.h b/include/linux/net.h
index 2f999fbb188..4a9a30f2d68 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = {			\
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
-extern int net_msg_cost;
-extern int net_msg_burst;
+extern struct ratelimit_state net_ratelimit_state;
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
new file mode 100644
index 00000000000..18a5b9ba9d4
--- /dev/null
+++ b/include/linux/ratelimit.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+#include <linux/param.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+struct ratelimit_state {
+	int interval;
+	int burst;
+	int printed;
+	int missed;
+	unsigned long begin;
+};
+
+#define DEFINE_RATELIMIT_STATE(name, interval, burst)		\
+		struct ratelimit_state name = {interval, burst,}
+
+extern int __ratelimit(struct ratelimit_state *rs);
+
+static inline int ratelimit(void)
+{
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+					DEFAULT_RATELIMIT_BURST);
+	return __ratelimit(&rs);
+}
+#endif
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index f04b64eca63..0967f03b070 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
 
 static inline void rcu_enter_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
+	WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
 }
 
 static inline void rcu_exit_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
+	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+				&rs);
 }
 
 #else /* CONFIG_NO_HZ */
diff --git a/kernel/printk.c b/kernel/printk.c
index 3f7a2a94583..a7f7559c5f6 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 }
 
 #if defined CONFIG_PRINTK
+
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
 /*
  * printk rate limiting, lifted from the networking subsystem.
  *
@@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg)
  * every printk_ratelimit_jiffies to make a denial-of-service
  * attack impossible.
  */
-int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-{
-	return __ratelimit(ratelimit_jiffies, ratelimit_burst);
-}
-EXPORT_SYMBOL(__printk_ratelimit);
-
-/* minimum time in jiffies between messages */
-int printk_ratelimit_jiffies = 5 * HZ;
-
-/* number of messages we send before ratelimiting */
-int printk_ratelimit_burst = 10;
-
 int printk_ratelimit(void)
 {
-	return __printk_ratelimit(printk_ratelimit_jiffies,
-				printk_ratelimit_burst);
+	return __ratelimit(&printk_ratelimit_state);
 }
 EXPORT_SYMBOL(printk_ratelimit);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a8299d1fe5..35a50db9b6c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT,
 		.procname	= "printk_ratelimit",
-		.data		= &printk_ratelimit_jiffies,
+		.data		= &printk_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
 		.procname	= "printk_ratelimit_burst",
-		.data		= &printk_ratelimit_burst,
+		.data		= &printk_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 485e3040dcd..35136671b21 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
  *
  * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
  *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
  * This file is released under the GPLv2.
  *
  */
@@ -11,41 +14,43 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
 /*
  * __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
  *
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
  */
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
 {
-	static DEFINE_SPINLOCK(ratelimit_lock);
-	static unsigned toks = 10 * 5 * HZ;
-	static unsigned long last_msg;
-	static int missed;
-	unsigned long flags;
-	unsigned long now = jiffies;
+	if (!rs->interval)
+		return 1;
 
 	spin_lock_irqsave(&ratelimit_lock, flags);
-	toks += now - last_msg;
-	last_msg = now;
-	if (toks > (ratelimit_burst * ratelimit_jiffies))
-		toks = ratelimit_burst * ratelimit_jiffies;
-	if (toks >= ratelimit_jiffies) {
-		int lost = missed;
+	if (!rs->begin)
+		rs->begin = jiffies;
 
-		missed = 0;
-		toks -= ratelimit_jiffies;
-		spin_unlock_irqrestore(&ratelimit_lock, flags);
-		if (lost)
-			printk(KERN_WARNING "%s: %d messages suppressed\n",
-				__func__, lost);
-		return 1;
+	if (time_is_before_jiffies(rs->begin + rs->interval)) {
+		if (rs->missed)
+			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+				__func__, rs->missed);
+		rs->begin = 0;
+		rs->printed = 0;
+		rs->missed = 0;
 	}
-	missed++;
+	if (rs->burst && rs->burst > rs->printed)
+		goto print;
+
+	rs->missed++;
 	spin_unlock_irqrestore(&ratelimit_lock, flags);
 	return 0;
+
+print:
+	rs->printed++;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
 }
 EXPORT_SYMBOL(__ratelimit);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a570e2af22c..f686467ff12 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_COST,
 		.procname	= "message_cost",
-		.data		= &net_msg_cost,
+		.data		= &net_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_BURST,
 		.procname	= "message_burst",
-		.data		= &net_msg_burst,
+		.data		= &net_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/core/utils.c b/net/core/utils.c
index 8031eb59054..72e0ebe964a 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,17 +31,16 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-int net_msg_cost __read_mostly = 5*HZ;
-int net_msg_burst __read_mostly = 10;
 int net_msg_warn __read_mostly = 1;
 EXPORT_SYMBOL(net_msg_warn);
 
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
 /*
  * All net warning printk()s should be guarded by this function.
  */
 int net_ratelimit(void)
 {
-	return __printk_ratelimit(net_msg_cost, net_msg_burst);
+	return __ratelimit(&net_ratelimit_state);
 }
 EXPORT_SYMBOL(net_ratelimit);
 
-- 
GitLab


From 472dba7d117844c746be97db6be26c2810d79b62 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: [PATCH 609/853] sm501: add power control callback

Add callback to get or set the power control if the device has the sleep
connected to some form of GPIO.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 31 +++++++++++++++++++++++++++++++
 include/linux/sm501.h |  7 +++++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index e2530df4d85..9296b2673b5 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1138,8 +1138,31 @@ static int sm501_plat_probe(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM
+
 /* power management support */
 
+static void sm501_set_power(struct sm501_devdata *sm, int on)
+{
+	struct sm501_platdata *pd = sm->platdata;
+
+	if (pd == NULL)
+		return;
+
+	if (pd->get_power) {
+		if (pd->get_power(sm->dev) == on) {
+			dev_dbg(sm->dev, "is already %d\n", on);
+			return;
+		}
+	}
+
+	if (pd->set_power) {
+		dev_dbg(sm->dev, "setting power to %d\n", on);
+
+		pd->set_power(sm->dev, on);
+		sm501_mdelay(sm, 10);
+	}
+}
+
 static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct sm501_devdata *sm = platform_get_drvdata(pdev);
@@ -1148,6 +1171,12 @@ static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
 	sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL);
 
 	sm501_dump_regs(sm);
+
+	if (sm->platdata) {
+		if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF)
+			sm501_set_power(sm, 0);
+	}
+
 	return 0;
 }
 
@@ -1155,6 +1184,8 @@ static int sm501_plat_resume(struct platform_device *pdev)
 {
 	struct sm501_devdata *sm = platform_get_drvdata(pdev);
 
+	sm501_set_power(sm, 1);
+
 	sm501_dump_regs(sm);
 	sm501_dump_gate(sm);
 	sm501_dump_clk(sm);
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index b530fa6a1d3..145405bf9ef 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -157,6 +157,8 @@ struct sm501_init_gpio {
 	struct sm501_reg_init	gpio_ddr_high;
 };
 
+#define SM501_FLAG_SUSPEND_OFF		(1<<4)
+
 /* sm501_platdata
  *
  * This is passed with the platform device to allow the board
@@ -170,6 +172,11 @@ struct sm501_platdata {
 	struct sm501_init_gpio		*init_gpiop;
 	struct sm501_platdata_fb	*fb;
 
+	int				 flags;
+
+	int	(*get_power)(struct device *dev);
+	int	(*set_power)(struct device *dev, unsigned int on);
+
 	struct sm501_platdata_gpio_i2c	*gpio_i2c;
 	unsigned int			 gpio_i2c_nr;
 };
-- 
GitLab


From f61be273d3699d174bc1438e6804f9f9e52bb932 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:45:59 -0700
Subject: [PATCH 610/853] sm501: add gpiolib support

Add support for exporting the GPIOs on the SM501 via gpiolib.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/Kconfig   |   8 ++
 drivers/mfd/sm501.c   | 299 ++++++++++++++++++++++++++++++++++--------
 include/linux/sm501.h |  20 +--
 3 files changed, 257 insertions(+), 70 deletions(-)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9f93c29fed3..bac9e973ece 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -19,6 +19,14 @@ config MFD_SM501
 	  interface. The device may be connected by PCI or local bus with
 	  varying functions enabled.
 
+config MFD_SM501_GPIO
+	bool "Export GPIO via GPIO layer"
+	depends on MFD_SM501 && HAVE_GPIO_LIB
+	 ---help---
+	 This option uses the gpio library layer to export the 64 GPIO
+	 lines on the SM501. The platform data is used to supply the
+	 base number for the first GPIO line to register.
+
 config MFD_ASIC3
 	bool "Support for Compaq ASIC3"
 	depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 9296b2673b5..be871390812 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -19,6 +19,7 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/pci.h>
+#include <linux/gpio.h>
 
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -31,10 +32,29 @@ struct sm501_device {
 	struct platform_device		pdev;
 };
 
+struct sm501_gpio;
+
+struct sm501_gpio_chip {
+	struct gpio_chip	gpio;
+	struct sm501_gpio	*ourgpio;	/* to get back to parent. */
+	void __iomem		*regbase;
+};
+
+struct sm501_gpio {
+	struct sm501_gpio_chip	low;
+	struct sm501_gpio_chip	high;
+	spinlock_t		lock;
+
+	unsigned int		 registered : 1;
+	void __iomem		*regs;
+	struct resource		*regs_res;
+};
+
 struct sm501_devdata {
 	spinlock_t			 reg_lock;
 	struct mutex			 clock_lock;
 	struct list_head		 devices;
+	struct sm501_gpio		 gpio;
 
 	struct device			*dev;
 	struct resource			*io_res;
@@ -42,6 +62,7 @@ struct sm501_devdata {
 	struct resource			*regs_claim;
 	struct sm501_platdata		*platdata;
 
+
 	unsigned int			 in_suspend;
 	unsigned long			 pm_misc;
 
@@ -52,6 +73,7 @@ struct sm501_devdata {
 	unsigned int			 rev;
 };
 
+
 #define MHZ (1000 * 1000)
 
 #ifdef DEBUG
@@ -276,58 +298,6 @@ unsigned long sm501_modify_reg(struct device *dev,
 
 EXPORT_SYMBOL_GPL(sm501_modify_reg);
 
-unsigned long sm501_gpio_get(struct device *dev,
-			     unsigned long gpio)
-{
-	struct sm501_devdata *sm = dev_get_drvdata(dev);
-	unsigned long result;
-	unsigned long reg;
-
-	reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-	result = readl(sm->regs + reg);
-
-	result >>= (gpio & 31);
-	return result & 1UL;
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_get);
-
-void sm501_gpio_set(struct device *dev,
-		    unsigned long gpio,
-		    unsigned int to,
-		    unsigned int dir)
-{
-	struct sm501_devdata *sm = dev_get_drvdata(dev);
-
-	unsigned long bit = 1 << (gpio & 31);
-	unsigned long base;
-	unsigned long save;
-	unsigned long val;
-
-	base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-	base += SM501_GPIO;
-
-	spin_lock_irqsave(&sm->reg_lock, save);
-
-	val = readl(sm->regs + base) & ~bit;
-	if (to)
-		val |= bit;
-	writel(val, sm->regs + base);
-
-	val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit;
-	if (dir)
-		val |= bit;
-
-	writel(val, sm->regs + SM501_GPIO_DDR_LOW);
-	sm501_sync_regs(sm);
-
-	spin_unlock_irqrestore(&sm->reg_lock, save);
-
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_set);
-
-
 /* sm501_unit_power
  *
  * alters the power active gate to set specific units on or off
@@ -906,6 +876,226 @@ static int sm501_register_display(struct sm501_devdata *sm,
 	return sm501_register_device(sm, pdev);
 }
 
+#ifdef CONFIG_MFD_SM501_GPIO
+
+static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
+{
+	return container_of(gc, struct sm501_gpio_chip, gpio);
+}
+
+static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
+{
+	return container_of(gpio, struct sm501_devdata, gpio);
+}
+
+static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
+
+{
+	struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
+	unsigned long result;
+
+	result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
+	result >>= offset;
+
+	return result & 1UL;
+}
+
+static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	unsigned long bit = 1 << offset;
+	void __iomem *regs = smchip->regbase;
+	unsigned long save;
+	unsigned long val;
+
+	dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+		__func__, chip, offset);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit;
+	if (value)
+		val |= bit;
+	writel(val, regs);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+}
+
+static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	void __iomem *regs = smchip->regbase;
+	unsigned long bit = 1 << offset;
+	unsigned long save;
+	unsigned long ddr;
+
+	dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+		 __func__, chip, offset);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	ddr = readl(regs + SM501_GPIO_DDR_LOW);
+	writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+
+	return 0;
+}
+
+static int sm501_gpio_output(struct gpio_chip *chip,
+			     unsigned offset, int value)
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	unsigned long bit = 1 << offset;
+	void __iomem *regs = smchip->regbase;
+	unsigned long save;
+	unsigned long val;
+	unsigned long ddr;
+
+	dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n",
+		__func__, chip, offset, value);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	val = readl(regs + SM501_GPIO_DATA_LOW);
+	if (value)
+		val |= bit;
+	else
+		val &= ~bit;
+	writel(val, regs);
+
+	ddr = readl(regs + SM501_GPIO_DDR_LOW);
+	writel(ddr | bit, regs + SM501_GPIO_DDR_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	writel(val, regs + SM501_GPIO_DATA_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+
+	return 0;
+}
+
+static struct gpio_chip gpio_chip_template = {
+	.ngpio			= 32,
+	.direction_input	= sm501_gpio_input,
+	.direction_output	= sm501_gpio_output,
+	.set			= sm501_gpio_set,
+	.get			= sm501_gpio_get,
+};
+
+static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
+					      struct sm501_gpio *gpio,
+					      struct sm501_gpio_chip *chip)
+{
+	struct sm501_platdata *pdata = sm->platdata;
+	struct gpio_chip *gchip = &chip->gpio;
+	unsigned base = pdata->gpio_base;
+
+	memcpy(chip, &gpio_chip_template, sizeof(struct gpio_chip));
+
+	if (chip == &gpio->high) {
+		base += 32;
+		chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
+		gchip->label  = "SM501-HIGH";
+	} else {
+		chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW;
+		gchip->label  = "SM501-LOW";
+	}
+
+	gchip->base   = base;
+	chip->ourgpio = gpio;
+
+	return gpiochip_add(gchip);
+}
+
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+	struct sm501_gpio *gpio = &sm->gpio;
+	resource_size_t iobase = sm->io_res->start + SM501_GPIO;
+	int ret;
+	int tmp;
+
+	dev_dbg(sm->dev, "registering gpio block %08llx\n",
+		(unsigned long long)iobase);
+
+	spin_lock_init(&gpio->lock);
+
+	gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio");
+	if (gpio->regs_res == NULL) {
+		dev_err(sm->dev, "gpio: failed to request region\n");
+		return -ENXIO;
+	}
+
+	gpio->regs = ioremap(iobase, 0x20);
+	if (gpio->regs == NULL) {
+		dev_err(sm->dev, "gpio: failed to remap registers\n");
+		ret = -ENXIO;
+		goto err_mapped;
+	}
+
+	/* Register both our chips. */
+
+	ret = sm501_gpio_register_chip(sm, gpio, &gpio->low);
+	if (ret) {
+		dev_err(sm->dev, "failed to add low chip\n");
+		goto err_mapped;
+	}
+
+	ret = sm501_gpio_register_chip(sm, gpio, &gpio->high);
+	if (ret) {
+		dev_err(sm->dev, "failed to add high chip\n");
+		goto err_low_chip;
+	}
+
+	gpio->registered = 1;
+
+	return 0;
+
+ err_low_chip:
+	tmp = gpiochip_remove(&gpio->low.gpio);
+	if (tmp) {
+		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+		return ret;
+	}
+
+ err_mapped:
+	release_resource(gpio->regs_res);
+	kfree(gpio->regs_res);
+
+	return ret;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+	int ret;
+
+	ret = gpiochip_remove(&sm->gpio.low.gpio);
+	if (ret)
+		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+
+	ret = gpiochip_remove(&sm->gpio.high.gpio);
+	if (ret)
+		dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
+}
+
+#else
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+	return 0;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+}
+#endif
+
 /* sm501_dbg_regs
  *
  * Debug attribute to attach to parent device to show core registers
@@ -1059,6 +1249,8 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 			sm501_register_usbhost(sm, &mem_avail);
 		if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1))
 			sm501_register_uart(sm, idata->devices);
+		if (idata->devices & SM501_USE_GPIO)
+			sm501_register_gpio(sm);
 	}
 
 	ret = sm501_check_clocks(sm);
@@ -1366,6 +1558,9 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
 		sm501_remove_sub(sm, smdev);
 
 	device_remove_file(sm->dev, &dev_attr_dbg_regs);
+
+	if (sm->gpio.registered)
+		sm501_gpio_remove(sm);
 }
 
 static void sm501_pci_remove(struct pci_dev *dev)
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 145405bf9ef..6ea39007c8a 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -46,24 +46,6 @@ extern unsigned long sm501_modify_reg(struct device *dev,
 				      unsigned long set,
 				      unsigned long clear);
 
-/* sm501_gpio_set
- *
- * set the state of the given GPIO line
-*/
-
-extern void sm501_gpio_set(struct device *dev,
-			   unsigned long gpio,
-			   unsigned int to,
-			   unsigned int dir);
-
-/* sm501_gpio_get
- *
- * get the state of the given GPIO line
-*/
-
-extern unsigned long sm501_gpio_get(struct device *dev,
-				    unsigned long gpio);
-
 
 /* Platform data definitions */
 
@@ -131,6 +113,7 @@ struct sm501_reg_init {
 #define SM501_USE_FBACCEL	(1<<6)
 #define SM501_USE_AC97		(1<<7)
 #define SM501_USE_I2S		(1<<8)
+#define SM501_USE_GPIO		(1<<9)
 
 #define SM501_USE_ALL		(0xffffffff)
 
@@ -173,6 +156,7 @@ struct sm501_platdata {
 	struct sm501_platdata_fb	*fb;
 
 	int				 flags;
+	unsigned			 gpio_base;
 
 	int	(*get_power)(struct device *dev);
 	int	(*set_power)(struct device *dev, unsigned int on);
-- 
GitLab


From 60e540d617b40eb3d37f1dd99c97af588ff9b70b Mon Sep 17 00:00:00 2001
From: Arnaud Patard <apatard@mandriva.com>
Date: Fri, 25 Jul 2008 01:46:00 -0700
Subject: [PATCH 611/853] sm501: gpio dynamic registration for PCI devices

The SM501 PCI card requires a dyanmic gpio allocation as the number of
cards is not known at compile time.  Fixup the platform data and
registration to deal with this.

Acked-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 6 ++++--
 include/linux/sm501.h | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index be871390812..c3e5a48f614 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -996,12 +996,13 @@ static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
 {
 	struct sm501_platdata *pdata = sm->platdata;
 	struct gpio_chip *gchip = &chip->gpio;
-	unsigned base = pdata->gpio_base;
+	int base = pdata->gpio_base;
 
 	memcpy(chip, &gpio_chip_template, sizeof(struct gpio_chip));
 
 	if (chip == &gpio->high) {
-		base += 32;
+		if (base > 0)
+			base += 32;
 		chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
 		gchip->label  = "SM501-HIGH";
 	} else {
@@ -1452,6 +1453,7 @@ static struct sm501_platdata_fb sm501_fb_pdata = {
 static struct sm501_platdata sm501_pci_platdata = {
 	.init		= &sm501_pci_initdata,
 	.fb		= &sm501_fb_pdata,
+	.gpio_base	= -1,
 };
 
 static int sm501_pci_probe(struct pci_dev *dev,
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 6ea39007c8a..a8d02f36ad3 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -156,7 +156,7 @@ struct sm501_platdata {
 	struct sm501_platdata_fb	*fb;
 
 	int				 flags;
-	unsigned			 gpio_base;
+	int				 gpio_base;
 
 	int	(*get_power)(struct device *dev);
 	int	(*set_power)(struct device *dev, unsigned int on);
-- 
GitLab


From 42cd2366fb9b58cdfc1855be32b31a78e40b2079 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:46:01 -0700
Subject: [PATCH 612/853] sm501: gpio I2C support

Add support for adding the GPIO based I2C resources.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 76 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/sm501.h | 10 +++++-
 2 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index c3e5a48f614..107215b2880 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/gpio.h>
+#include <linux/i2c-gpio.h>
 
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -1086,6 +1087,11 @@ static void sm501_gpio_remove(struct sm501_devdata *sm)
 		dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
 }
 
+static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+	struct sm501_gpio *gpio = &sm->gpio;
+	return pin + (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+}
 #else
 static int sm501_register_gpio(struct sm501_devdata *sm)
 {
@@ -1095,8 +1101,66 @@ static int sm501_register_gpio(struct sm501_devdata *sm)
 static void sm501_gpio_remove(struct sm501_devdata *sm)
 {
 }
+
+static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+	return -1;
+}
 #endif
 
+static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
+					    struct sm501_platdata_gpio_i2c *iic)
+{
+	struct i2c_gpio_platform_data *icd;
+	struct platform_device *pdev;
+
+	pdev = sm501_create_subdev(sm, "i2c-gpio", 0,
+				   sizeof(struct i2c_gpio_platform_data));
+	if (!pdev)
+		return -ENOMEM;
+
+	icd = pdev->dev.platform_data;
+
+	/* We keep the pin_sda and pin_scl fields relative in case the
+	 * same platform data is passed to >1 SM501.
+	 */
+
+	icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda);
+	icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl);
+	icd->timeout = iic->timeout;
+	icd->udelay = iic->udelay;
+
+	/* note, we can't use either of the pin numbers, as the i2c-gpio
+	 * driver uses the platform.id field to generate the bus number
+	 * to register with the i2c core; The i2c core doesn't have enough
+	 * entries to deal with anything we currently use.
+	*/
+
+	pdev->id = iic->bus_num;
+
+	dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n",
+		 iic->bus_num,
+		 icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl);
+
+	return sm501_register_device(sm, pdev);
+}
+
+static int sm501_register_gpio_i2c(struct sm501_devdata *sm,
+				   struct sm501_platdata *pdata)
+{
+	struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c;
+	int index;
+	int ret;
+
+	for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) {
+		ret = sm501_register_gpio_i2c_instance(sm, iic);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* sm501_dbg_regs
  *
  * Debug attribute to attach to parent device to show core registers
@@ -1204,6 +1268,7 @@ static unsigned int sm501_mem_local[] = {
 static int sm501_init_dev(struct sm501_devdata *sm)
 {
 	struct sm501_initdata *idata;
+	struct sm501_platdata *pdata;
 	resource_size_t mem_avail;
 	unsigned long dramctrl;
 	unsigned long devid;
@@ -1242,7 +1307,9 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 
 	/* check to see if we have some device initialisation */
 
-	idata = sm->platdata ? sm->platdata->init : NULL;
+	pdata = sm->platdata;
+	idata = pdata ? pdata->init : NULL;
+
 	if (idata) {
 		sm501_init_regs(sm, idata);
 
@@ -1254,6 +1321,13 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 			sm501_register_gpio(sm);
 	}
 
+	if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
+		if (!sm->gpio.registered)
+			dev_err(sm->dev, "no gpio registered for i2c gpio.\n");
+		else
+			sm501_register_gpio_i2c(sm, pdata);
+	}
+
 	ret = sm501_check_clocks(sm);
 	if (ret) {
 		dev_err(sm->dev, "M1X and M clocks sourced from different "
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index a8d02f36ad3..214f93209b8 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -86,11 +86,19 @@ struct sm501_platdata_fb {
 	struct sm501_platdata_fbsub	*fb_pnl;
 };
 
-/* gpio i2c */
+/* gpio i2c
+ *
+ * Note, we have to pass in the bus number, as the number used will be
+ * passed to the i2c-gpio driver's platform_device.id, subsequently used
+ * to register the i2c bus.
+*/
 
 struct sm501_platdata_gpio_i2c {
+	unsigned int		bus_num;
 	unsigned int		pin_sda;
 	unsigned int		pin_scl;
+	int			udelay;
+	int			timeout;
 };
 
 /* sm501_initdata
-- 
GitLab


From 28130bea3bcfefe3437b0a5dcab786f1f0296953 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:46:02 -0700
Subject: [PATCH 613/853] sm501: fixes for akpms comments on gpiolib addition

Fixup the comments from the patch that added the gpiolib support from
Andrew Morton.  These include spotting some missing frees on error or
release, and changing a memcpy for a type-safe assingment.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 107215b2880..2dfb41aabca 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -999,7 +999,7 @@ static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
 	struct gpio_chip *gchip = &chip->gpio;
 	int base = pdata->gpio_base;
 
-	memcpy(chip, &gpio_chip_template, sizeof(struct gpio_chip));
+	chip->gpio = gpio_chip_template;
 
 	if (chip == &gpio->high) {
 		if (base > 0)
@@ -1039,7 +1039,7 @@ static int sm501_register_gpio(struct sm501_devdata *sm)
 	if (gpio->regs == NULL) {
 		dev_err(sm->dev, "gpio: failed to remap registers\n");
 		ret = -ENXIO;
-		goto err_mapped;
+		goto err_claimed;
 	}
 
 	/* Register both our chips. */
@@ -1068,6 +1068,9 @@ static int sm501_register_gpio(struct sm501_devdata *sm)
 	}
 
  err_mapped:
+	iounmap(gpio->regs);
+
+ err_claimed:
 	release_resource(gpio->regs_res);
 	kfree(gpio->regs_res);
 
@@ -1076,33 +1079,38 @@ static int sm501_register_gpio(struct sm501_devdata *sm)
 
 static void sm501_gpio_remove(struct sm501_devdata *sm)
 {
+	struct sm501_gpio *gpio = &sm->gpio;
 	int ret;
 
-	ret = gpiochip_remove(&sm->gpio.low.gpio);
+	ret = gpiochip_remove(&gpio->low.gpio);
 	if (ret)
 		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
 
-	ret = gpiochip_remove(&sm->gpio.high.gpio);
+	ret = gpiochip_remove(&gpio->high.gpio);
 	if (ret)
 		dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
+
+	iounmap(gpio->regs);
+	release_resource(gpio->regs_res);
+	kfree(gpio->regs_res);
 }
 
-static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
 {
 	struct sm501_gpio *gpio = &sm->gpio;
 	return pin + (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
 }
 #else
-static int sm501_register_gpio(struct sm501_devdata *sm)
+static inline int sm501_register_gpio(struct sm501_devdata *sm)
 {
 	return 0;
 }
 
-static void sm501_gpio_remove(struct sm501_devdata *sm)
+static inline void sm501_gpio_remove(struct sm501_devdata *sm)
 {
 }
 
-static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
 {
 	return -1;
 }
-- 
GitLab


From f2999209d779573e17468b680f5f267d8cb2a9c7 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:46:02 -0700
Subject: [PATCH 614/853] mfd: sm501 build fixes when CONFIG_MFD_SM501_GPIO
 unset

Fix the build problems if CONFIG_MFD_SM501_GPIO is not set, which is
generally when there is no gpiolib support available as currently happens
on x86 when building PCI SM501.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Tested-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 2dfb41aabca..79d7aea5510 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -19,7 +19,6 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/pci.h>
-#include <linux/gpio.h>
 #include <linux/i2c-gpio.h>
 
 #include <linux/sm501.h>
@@ -35,6 +34,9 @@ struct sm501_device {
 
 struct sm501_gpio;
 
+#ifdef CONFIG_MFD_SM501_GPIO
+#include <linux/gpio.h>
+
 struct sm501_gpio_chip {
 	struct gpio_chip	gpio;
 	struct sm501_gpio	*ourgpio;	/* to get back to parent. */
@@ -50,6 +52,11 @@ struct sm501_gpio {
 	void __iomem		*regs;
 	struct resource		*regs_res;
 };
+#else
+struct sm501_gpio {
+	/* no gpio support, empty definition for sm501_devdata. */
+};
+#endif
 
 struct sm501_devdata {
 	spinlock_t			 reg_lock;
@@ -1082,6 +1089,9 @@ static void sm501_gpio_remove(struct sm501_devdata *sm)
 	struct sm501_gpio *gpio = &sm->gpio;
 	int ret;
 
+	if (!sm->gpio.registered)
+		return;
+
 	ret = gpiochip_remove(&gpio->low.gpio);
 	if (ret)
 		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
@@ -1100,6 +1110,11 @@ static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
 	struct sm501_gpio *gpio = &sm->gpio;
 	return pin + (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
 }
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+	return sm->gpio.registered;
+}
 #else
 static inline int sm501_register_gpio(struct sm501_devdata *sm)
 {
@@ -1114,6 +1129,11 @@ static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
 {
 	return -1;
 }
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+	return 0;
+}
 #endif
 
 static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
@@ -1330,8 +1350,8 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 	}
 
 	if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
-		if (!sm->gpio.registered)
-			dev_err(sm->dev, "no gpio registered for i2c gpio.\n");
+		if (!sm501_gpio_isregistered(sm))
+			dev_err(sm->dev, "no gpio available for i2c gpio.\n");
 		else
 			sm501_register_gpio_i2c(sm, pdata);
 	}
@@ -1643,8 +1663,7 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
 
 	device_remove_file(sm->dev, &dev_attr_dbg_regs);
 
-	if (sm->gpio.registered)
-		sm501_gpio_remove(sm);
+	sm501_gpio_remove(sm);
 }
 
 static void sm501_pci_remove(struct pci_dev *dev)
-- 
GitLab


From 53a9600c634e3bfd6230e0597aca159bf4d4d010 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:46:03 -0700
Subject: [PATCH 615/853] mfd: sm501 fix gpio number calculation for upper bank

The sm501_gpio_pin2nr() routine returns the wrong values for gpios in the
upper bank.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 79d7aea5510..7aebad4c06f 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1108,7 +1108,9 @@ static void sm501_gpio_remove(struct sm501_devdata *sm)
 static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
 {
 	struct sm501_gpio *gpio = &sm->gpio;
-	return pin + (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+	int base = (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+
+	return (pin % 32) + base;
 }
 
 static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
-- 
GitLab


From ef53d9c5e4da147ecaa43c44c5e5945eb83970a2 Mon Sep 17 00:00:00 2001
From: Srinivasa D S <srinivasa@in.ibm.com>
Date: Fri, 25 Jul 2008 01:46:04 -0700
Subject: [PATCH 616/853] kprobes: improve kretprobe scalability with hashed
 locking

Currently list of kretprobe instances are stored in kretprobe object (as
used_instances,free_instances) and in kretprobe hash table.  We have one
global kretprobe lock to serialise the access to these lists.  This causes
only one kretprobe handler to execute at a time.  Hence affects system
performance, particularly on SMP systems and when return probe is set on
lot of functions (like on all systemcalls).

Solution proposed here gives fine-grain locks that performs better on SMP
system compared to present kretprobe implementation.

Solution:

 1) Instead of having one global lock to protect kretprobe instances
    present in kretprobe object and kretprobe hash table.  We will have
    two locks, one lock for protecting kretprobe hash table and another
    lock for kretporbe object.

 2) We hold lock present in kretprobe object while we modify kretprobe
    instance in kretprobe object and we hold per-hash-list lock while
    modifying kretprobe instances present in that hash list.  To prevent
    deadlock, we never grab a per-hash-list lock while holding a kretprobe
    lock.

 3) We can remove used_instances from struct kretprobe, as we can
    track used instances of kretprobe instances using kretprobe hash
    table.

Time duration for kernel compilation ("make -j 8") on a 8-way ppc64 system
with return probes set on all systemcalls looks like this.

cacheline              non-cacheline             Un-patched kernel
aligned patch 	       aligned patch
===============================================================================
real    9m46.784s       9m54.412s                  10m2.450s
user    40m5.715s       40m7.142s                  40m4.273s
sys     2m57.754s       2m58.583s                  3m17.430s
===========================================================

Time duration for kernel compilation ("make -j 8) on the same system, when
kernel is not probed.
=========================
real    9m26.389s
user    40m8.775s
sys     2m7.283s
=========================

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/kprobes.c     |   6 +-
 arch/ia64/kernel/kprobes.c    |   6 +-
 arch/powerpc/kernel/kprobes.c |   6 +-
 arch/s390/kernel/kprobes.c    |   6 +-
 arch/sparc64/kernel/kprobes.c |  11 ++-
 arch/x86/kernel/kprobes.c     |   6 +-
 include/linux/kprobes.h       |   7 +-
 kernel/kprobes.c              | 127 ++++++++++++++++++++++++----------
 8 files changed, 108 insertions(+), 67 deletions(-)

diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5ee39e10c8d..d28513f14d0 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	return (void *)orig_ret_address;
 }
 
-/* Called with kretprobe_lock held. */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 233434f4f88..f07688da947 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 		((struct fnptr *)kretprobe_trampoline)->ip;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	return 1;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 4ba2af12545..de79915452c 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 	kcb->kprobe_saved_msr = regs->msr;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	regs->nip = orig_ret_address;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 288ad490a6d..4f82e5b5f87 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 					struct pt_regs *regs)
 {
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index f43b5d75535..201a6e547e4 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-/* Called with kretprobe_lock held.  The value stored in the return
- * address register is actually 2 instructions before where the
- * callee will return to.  Sequences usually look something like this
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
  *
  *		call	some_function	<--- return register points here
  *		 nop			<--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->tnpc = orig_ret_address + 4;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0..6c27679ec6a 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 	/* fixup registers */
 #ifdef CONFIG_X86_64
 	regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04a3556bdea..0be7795655f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,11 +157,10 @@ struct kretprobe {
 	int nmissed;
 	size_t data_size;
 	struct hlist_head free_instances;
-	struct hlist_head used_instances;
+	spinlock_t lock;
 };
 
 struct kretprobe_instance {
-	struct hlist_node uflist; /* either on free list or used list */
 	struct hlist_node hlist;
 	struct kretprobe *rp;
 	kprobe_opcode_t *ret_addr;
@@ -201,7 +200,6 @@ static inline int init_test_probes(void)
 }
 #endif /* CONFIG_KPROBES_SANITY_TEST */
 
-extern spinlock_t kretprobe_lock;
 extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
@@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
+void kretprobe_hash_lock(struct task_struct *tsk,
+			 struct hlist_head **head, unsigned long *flags);
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
 struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
 
 /* kprobe_running() will just return the current_kprobe on this CPU */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1485ca8d0e0..cb0b3bde361 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -62,6 +62,7 @@
 	addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
 #endif
 
+static int kprobes_initialized;
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 
@@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 static bool kprobe_enabled;
 
 DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
-DEFINE_SPINLOCK(kretprobe_lock);	/* Protects kretprobe_inst_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static struct {
+	spinlock_t lock ____cacheline_aligned;
+} kretprobe_table_locks[KPROBE_TABLE_SIZE];
+
+static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+{
+	return &(kretprobe_table_locks[hash].lock);
+}
 
 /*
  * Normally, functions that we'd want to prohibit kprobes in, are marked
@@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
 	return;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
 				struct hlist_head *head)
 {
+	struct kretprobe *rp = ri->rp;
+
 	/* remove rp inst off the rprobe_inst_table */
 	hlist_del(&ri->hlist);
-	if (ri->rp) {
-		/* remove rp inst off the used list */
-		hlist_del(&ri->uflist);
-		/* put rp inst back onto the free list */
-		INIT_HLIST_NODE(&ri->uflist);
-		hlist_add_head(&ri->uflist, &ri->rp->free_instances);
+	INIT_HLIST_NODE(&ri->hlist);
+	if (likely(rp)) {
+		spin_lock(&rp->lock);
+		hlist_add_head(&ri->hlist, &rp->free_instances);
+		spin_unlock(&rp->lock);
 	} else
 		/* Unregistering */
 		hlist_add_head(&ri->hlist, head);
 }
 
-struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
+void kretprobe_hash_lock(struct task_struct *tsk,
+			 struct hlist_head **head, unsigned long *flags)
+{
+	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+	spinlock_t *hlist_lock;
+
+	*head = &kretprobe_inst_table[hash];
+	hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
 {
-	return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+{
+	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+	spinlock_t *hlist_lock;
+
+	hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_unlock_irqrestore(hlist_lock, *flags);
+}
+
+void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+{
+	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_unlock_irqrestore(hlist_lock, *flags);
 }
 
 /*
@@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
 	struct hlist_node *node, *tmp;
-	unsigned long flags = 0;
+	unsigned long hash, flags = 0;
 
-	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(tk);
+	if (unlikely(!kprobes_initialized))
+		/* Early boot.  kretprobe_table_locks not yet initialized. */
+		return;
+
+	hash = hash_ptr(tk, KPROBE_HASH_BITS);
+	head = &kretprobe_inst_table[hash];
+	kretprobe_table_lock(hash, &flags);
 	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
 		if (ri->task == tk)
 			recycle_rp_inst(ri, &empty_rp);
 	}
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
-
+	kretprobe_table_unlock(hash, &flags);
+	INIT_HLIST_HEAD(&empty_rp);
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
@@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp)
 	struct kretprobe_instance *ri;
 	struct hlist_node *pos, *next;
 
-	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) {
-		hlist_del(&ri->uflist);
+	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
+		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
 }
 
 static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
 {
-	unsigned long flags;
+	unsigned long flags, hash;
 	struct kretprobe_instance *ri;
 	struct hlist_node *pos, *next;
+	struct hlist_head *head;
+
 	/* No race here */
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
-		ri->rp = NULL;
-		hlist_del(&ri->uflist);
+	for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
+		kretprobe_table_lock(hash, &flags);
+		head = &kretprobe_inst_table[hash];
+		hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
+			if (ri->rp == rp)
+				ri->rp = NULL;
+		}
+		kretprobe_table_unlock(hash, &flags);
 	}
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
 	free_rp_inst(rp);
 }
 
@@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 					   struct pt_regs *regs)
 {
 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
-	unsigned long flags = 0;
+	unsigned long hash, flags = 0;
+	struct kretprobe_instance *ri;
 
 	/*TODO: consider to only swap the RA after the last pre_handler fired */
-	spin_lock_irqsave(&kretprobe_lock, flags);
+	hash = hash_ptr(current, KPROBE_HASH_BITS);
+	spin_lock_irqsave(&rp->lock, flags);
 	if (!hlist_empty(&rp->free_instances)) {
-		struct kretprobe_instance *ri;
-
 		ri = hlist_entry(rp->free_instances.first,
-				 struct kretprobe_instance, uflist);
+				struct kretprobe_instance, hlist);
+		hlist_del(&ri->hlist);
+		spin_unlock_irqrestore(&rp->lock, flags);
+
 		ri->rp = rp;
 		ri->task = current;
 
 		if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-			spin_unlock_irqrestore(&kretprobe_lock, flags);
+			spin_unlock_irqrestore(&rp->lock, flags);
 			return 0;
 		}
 
 		arch_prepare_kretprobe(ri, regs);
 
 		/* XXX(hch): why is there no hlist_move_head? */
-		hlist_del(&ri->uflist);
-		hlist_add_head(&ri->uflist, &ri->rp->used_instances);
-		hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task));
-	} else
+		INIT_HLIST_NODE(&ri->hlist);
+		kretprobe_table_lock(hash, &flags);
+		hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
+		kretprobe_table_unlock(hash, &flags);
+	} else {
 		rp->nmissed++;
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+		spin_unlock_irqrestore(&rp->lock, flags);
+	}
 	return 0;
 }
 
@@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
 		rp->maxactive = NR_CPUS;
 #endif
 	}
-	INIT_HLIST_HEAD(&rp->used_instances);
+	spin_lock_init(&rp->lock);
 	INIT_HLIST_HEAD(&rp->free_instances);
 	for (i = 0; i < rp->maxactive; i++) {
 		inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
 			free_rp_inst(rp);
 			return -ENOMEM;
 		}
-		INIT_HLIST_NODE(&inst->uflist);
-		hlist_add_head(&inst->uflist, &rp->free_instances);
+		INIT_HLIST_NODE(&inst->hlist);
+		hlist_add_head(&inst->hlist, &rp->free_instances);
 	}
 
 	rp->nmissed = 0;
@@ -1009,6 +1058,7 @@ static int __init init_kprobes(void)
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		INIT_HLIST_HEAD(&kprobe_table[i]);
 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
+		spin_lock_init(&(kretprobe_table_locks[i].lock));
 	}
 
 	/*
@@ -1050,6 +1100,7 @@ static int __init init_kprobes(void)
 	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
+	kprobes_initialized = (err == 0);
 
 	if (!err)
 		init_test_probes();
-- 
GitLab


From 8b6dd986823a8d92ed9f54baa5cef8604d9d9d44 Mon Sep 17 00:00:00 2001
From: Abhishek Sagar <sagar.abhishek@gmail.com>
Date: Fri, 25 Jul 2008 01:46:05 -0700
Subject: [PATCH 617/853] kprobes: remove redundant config check

I noticed that there's a CONFIG_KPROBES check inside kernel/kprobes.c,
which is redundant.

Signed-off-by: Abhishek Sagar <sagar.abhishek@gmail.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kprobes.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index cb0b3bde361..75bc2cd9ebc 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1337,13 +1337,8 @@ EXPORT_SYMBOL_GPL(register_jprobe);
 EXPORT_SYMBOL_GPL(unregister_jprobe);
 EXPORT_SYMBOL_GPL(register_jprobes);
 EXPORT_SYMBOL_GPL(unregister_jprobes);
-#ifdef CONFIG_KPROBES
 EXPORT_SYMBOL_GPL(jprobe_return);
-#endif
-
-#ifdef CONFIG_KPROBES
 EXPORT_SYMBOL_GPL(register_kretprobe);
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
 EXPORT_SYMBOL_GPL(register_kretprobes);
 EXPORT_SYMBOL_GPL(unregister_kretprobes);
-#endif
-- 
GitLab


From d8f388d8dc8d4f36539dd37c1fff62cc404ea0fc Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Fri, 25 Jul 2008 01:46:07 -0700
Subject: [PATCH 618/853] gpio: sysfs interface

This adds a simple sysfs interface for GPIOs.

    /sys/class/gpio
    	/export ... asks the kernel to export a GPIO to userspace
    	/unexport ... to return a GPIO to the kernel
        /gpioN ... for each exported GPIO #N
	    /value ... always readable, writes fail for input GPIOs
	    /direction ... r/w as: in, out (default low); write high, low
	/gpiochipN ... for each gpiochip; #N is its first GPIO
	    /base ... (r/o) same as N
	    /label ... (r/o) descriptive, not necessarily unique
	    /ngpio ... (r/o) number of GPIOs; numbered N .. N+(ngpio - 1)

GPIOs claimed by kernel code may be exported by its owner using a new
gpio_export() call, which should be most useful for driver debugging.
Such exports may optionally be done without a "direction" attribute.

Userspace may ask to take over a GPIO by writing to a sysfs control file,
helping to cope with incomplete board support or other "one-off"
requirements that don't merit full kernel support:

  echo 23 > /sys/class/gpio/export
	... will gpio_request(23, "sysfs") and gpio_export(23);
	use /sys/class/gpio/gpio-23/direction to (re)configure it,
	when that GPIO can be used as both input and output.
  echo 23 > /sys/class/gpio/unexport
	... will gpio_free(23), when it was exported as above

The extra D-space footprint is a few hundred bytes, except for the sysfs
resources associated with each exported GPIO.  The additional I-space
footprint is about two thirds of the current size of gpiolib (!).  Since
no /dev node creation is involved, no "udev" support is needed.

Related changes:

  * This adds a device pointer to "struct gpio_chip".  When GPIO
    providers initialize that, sysfs gpio class devices become children of
    that device instead of being "virtual" devices.

  * The (few) gpio_chip providers which have such a device node have
    been updated.

  * Some gpio_chip drivers also needed to update their module "owner"
    field ...  for which missing kerneldoc was added.

  * Some gpio_chips don't support input GPIOs.  Those GPIOs are now
    flagged appropriately when the chip is registered.

Based on previous patches, and discussion both on and off LKML.

A Documentation/ABI/testing/sysfs-gpio update is ready to submit once this
merges to mainline.

[akpm@linux-foundation.org: a few maintenance build fixes]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Guennadi Liakhovetski <g.liakhovetski@pengutronix.de>
Cc: Greg KH <greg@kroah.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt       | 123 +++++++-
 arch/arm/plat-omap/gpio.c    |   3 +
 arch/avr32/mach-at32ap/pio.c |   2 +
 drivers/gpio/Kconfig         |  15 +
 drivers/gpio/gpiolib.c       | 536 ++++++++++++++++++++++++++++++++++-
 drivers/gpio/mcp23s08.c      |   1 +
 drivers/gpio/pca953x.c       |   1 +
 drivers/gpio/pcf857x.c       |   1 +
 drivers/i2c/chips/tps65010.c |   2 +
 drivers/mfd/htc-egpio.c      |   2 +
 include/asm-generic/gpio.h   |  33 ++-
 include/linux/gpio.h         |  13 +
 12 files changed, 712 insertions(+), 20 deletions(-)

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index c35ca9e40d4..8b69811a964 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -347,15 +347,12 @@ necessarily be nonportable.
 Dynamic definition of GPIOs is not currently standard; for example, as
 a side effect of configuring an add-on board with some GPIO expanders.
 
-These calls are purely for kernel space, but a userspace API could be built
-on top of them.
-
 
 GPIO implementor's framework (OPTIONAL)
 =======================================
 As noted earlier, there is an optional implementation framework making it
 easier for platforms to support different kinds of GPIO controller using
-the same programming interface.
+the same programming interface.  This framework is called "gpiolib".
 
 As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
 will be found there.  That will list all the controllers registered through
@@ -439,4 +436,120 @@ becomes available.  That may mean the device should not be registered until
 calls for that GPIO can work.  One way to address such dependencies is for
 such gpio_chip controllers to provide setup() and teardown() callbacks to
 board specific code; those board specific callbacks would register devices
-once all the necessary resources are available.
+once all the necessary resources are available, and remove them later when
+the GPIO controller device becomes unavailable.
+
+
+Sysfs Interface for Userspace (OPTIONAL)
+========================================
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs.  This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary.  Plus, it could be
+present on production systems without debugging support.
+
+Given approprate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory.  System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection.  In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about.  And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks:  "leds-gpio" and "gpio_keys", respectively.  Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+   -	Control interfaces used to get userspace control over GPIOs;
+
+   -	GPIOs themselves; and
+
+   -	GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+    /sys/class/gpio/
+
+    	"export" ... Userspace may ask the kernel to export control of
+		a GPIO to userspace by writing its number to this file.
+
+		Example:  "echo 19 > export" will create a "gpio19" node
+		for GPIO #19, if that's not requested by kernel code.
+
+    	"unexport" ... Reverses the effect of exporting to userspace.
+
+		Example:  "echo 19 > unexport" will remove a "gpio19"
+		node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+    /sys/class/gpio/gpioN/
+
+	"direction" ... reads as either "in" or "out".  This value may
+		normally be written.  Writing as "out" defaults to
+		initializing the value as low.  To ensure glitch free
+		operation, values "low" and "high" may be written to
+		configure the GPIO as an output with that initial value.
+
+		Note that this attribute *will not exist* if the kernel
+		doesn't support changing the direction of a GPIO, or
+		it was exported by kernel code that didn't explicitly
+		allow userspace to reconfigure this GPIO's direction.
+
+	"value" ... reads as either 0 (low) or 1 (high).  If the GPIO
+		is configured as an output, this value may be written;
+		any nonzero value is treated as high.
+
+GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+    /sys/class/gpio/gpiochipN/
+
+    	"base" ... same as N, the first GPIO managed by this chip
+
+    	"label" ... provided for diagnostics (not always unique)
+
+    	"ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes.  However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack.  In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+	/* export the GPIO to userspace */
+	int gpio_export(unsigned gpio, bool direction_may_change);
+
+	/* reverse gpio_export() */
+	void gpio_unexport();
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpio_export().  The driver can control whether the
+signal direction may change.  This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 1903a3491ee..d8e9c2c3f0f 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1488,6 +1488,9 @@ static int __init _omap_gpio_init(void)
 		bank->chip.set = gpio_set;
 		if (bank_is_mpuio(bank)) {
 			bank->chip.label = "mpuio";
+#ifdef CONFIG_ARCH_OMAP1
+			bank->chip.dev = &omap_mpuio_device.dev;
+#endif
 			bank->chip.base = OMAP_MPUIO(0);
 		} else {
 			bank->chip.label = "gpio";
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 60da03ba711..296294f8ed8 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -360,6 +360,8 @@ static int __init pio_probe(struct platform_device *pdev)
 	pio->chip.label = pio->name;
 	pio->chip.base = pdev->id * 32;
 	pio->chip.ngpio = 32;
+	pio->chip.dev = &pdev->dev;
+	pio->chip.owner = THIS_MODULE;
 
 	pio->chip.direction_input = direction_input;
 	pio->chip.get = gpio_get;
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index fced1909cbb..6ec0e35b98e 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -23,6 +23,21 @@ config DEBUG_GPIO
 	  slower.  The diagnostics help catch the type of setup errors
 	  that are most common when setting up new platforms or boards.
 
+config GPIO_SYSFS
+	bool "/sys/class/gpio/... (sysfs interface)"
+	depends on SYSFS && EXPERIMENTAL
+	help
+	  Say Y here to add a sysfs interface for GPIOs.
+
+	  This is mostly useful to work around omissions in a system's
+	  kernel support.  Those are common in custom and semicustom
+	  hardware assembled using standard kernels with a minimum of
+	  custom patches.  In those cases, userspace code may import
+	  a given GPIO from the kernel, if no kernel driver requested it.
+
+	  Kernel drivers may also request that a particular GPIO be
+	  exported to userspace; this can be useful when debugging.
+
 # put expanders in the right section, in alphabetical order
 
 comment "I2C GPIO expanders:"
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index beaf6b3a37d..8d2940517c9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2,8 +2,11 @@
 #include <linux/module.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
-
-#include <asm/gpio.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/gpio.h>
 
 
 /* Optional implementation infrastructure for GPIO interfaces.
@@ -44,6 +47,8 @@ struct gpio_desc {
 #define FLAG_REQUESTED	0
 #define FLAG_IS_OUT	1
 #define FLAG_RESERVED	2
+#define FLAG_EXPORT	3	/* protected by sysfs_lock */
+#define FLAG_SYSFS	4	/* exported via /sys/class/gpio/control */
 
 #ifdef CONFIG_DEBUG_FS
 	const char		*label;
@@ -151,6 +156,482 @@ err:
 	return ret;
 }
 
+#ifdef CONFIG_GPIO_SYSFS
+
+/* lock protects against unexport_gpio() being called while
+ * sysfs files are active.
+ */
+static DEFINE_MUTEX(sysfs_lock);
+
+/*
+ * /sys/class/gpio/gpioN... only for GPIOs that are exported
+ *   /direction
+ *      * MAY BE OMITTED if kernel won't allow direction changes
+ *      * is read/write as "in" or "out"
+ *      * may also be written as "high" or "low", initializing
+ *        output value as specified ("out" implies "low")
+ *   /value
+ *      * always readable, subject to hardware behavior
+ *      * may be writable, as zero/nonzero
+ *
+ * REVISIT there will likely be an attribute for configuring async
+ * notifications, e.g. to specify polling interval or IRQ trigger type
+ * that would for example trigger a poll() on the "value".
+ */
+
+static ssize_t gpio_direction_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else
+		status = sprintf(buf, "%s\n",
+			test_bit(FLAG_IS_OUT, &desc->flags)
+				? "out" : "in");
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static ssize_t gpio_direction_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else if (sysfs_streq(buf, "high"))
+		status = gpio_direction_output(gpio, 1);
+	else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
+		status = gpio_direction_output(gpio, 0);
+	else if (sysfs_streq(buf, "in"))
+		status = gpio_direction_input(gpio);
+	else
+		status = -EINVAL;
+
+	mutex_unlock(&sysfs_lock);
+	return status ? : size;
+}
+
+static const DEVICE_ATTR(direction, 0644,
+		gpio_direction_show, gpio_direction_store);
+
+static ssize_t gpio_value_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else
+		status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static ssize_t gpio_value_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else if (!test_bit(FLAG_IS_OUT, &desc->flags))
+		status = -EPERM;
+	else {
+		long		value;
+
+		status = strict_strtol(buf, 0, &value);
+		if (status == 0) {
+			gpio_set_value_cansleep(gpio, value != 0);
+			status = size;
+		}
+	}
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static /*const*/ DEVICE_ATTR(value, 0644,
+		gpio_value_show, gpio_value_store);
+
+static const struct attribute *gpio_attrs[] = {
+	&dev_attr_direction.attr,
+	&dev_attr_value.attr,
+	NULL,
+};
+
+static const struct attribute_group gpio_attr_group = {
+	.attrs = (struct attribute **) gpio_attrs,
+};
+
+/*
+ * /sys/class/gpio/gpiochipN/
+ *   /base ... matching gpio_chip.base (N)
+ *   /label ... matching gpio_chip.label
+ *   /ngpio ... matching gpio_chip.ngpio
+ */
+
+static ssize_t chip_base_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->base);
+}
+static DEVICE_ATTR(base, 0444, chip_base_show, NULL);
+
+static ssize_t chip_label_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", chip->label ? : "");
+}
+static DEVICE_ATTR(label, 0444, chip_label_show, NULL);
+
+static ssize_t chip_ngpio_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->ngpio);
+}
+static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
+
+static const struct attribute *gpiochip_attrs[] = {
+	&dev_attr_base.attr,
+	&dev_attr_label.attr,
+	&dev_attr_ngpio.attr,
+	NULL,
+};
+
+static const struct attribute_group gpiochip_attr_group = {
+	.attrs = (struct attribute **) gpiochip_attrs,
+};
+
+/*
+ * /sys/class/gpio/export ... write-only
+ *	integer N ... number of GPIO to export (full access)
+ * /sys/class/gpio/unexport ... write-only
+ *	integer N ... number of GPIO to unexport
+ */
+static ssize_t export_store(struct class *class, const char *buf, size_t len)
+{
+	long	gpio;
+	int	status;
+
+	status = strict_strtol(buf, 0, &gpio);
+	if (status < 0)
+		goto done;
+
+	/* No extra locking here; FLAG_SYSFS just signifies that the
+	 * request and export were done by on behalf of userspace, so
+	 * they may be undone on its behalf too.
+	 */
+
+	status = gpio_request(gpio, "sysfs");
+	if (status < 0)
+		goto done;
+
+	status = gpio_export(gpio, true);
+	if (status < 0)
+		gpio_free(gpio);
+	else
+		set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags);
+
+done:
+	if (status)
+		pr_debug("%s: status %d\n", __func__, status);
+	return status ? : len;
+}
+
+static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+{
+	long	gpio;
+	int	status;
+
+	status = strict_strtol(buf, 0, &gpio);
+	if (status < 0)
+		goto done;
+
+	status = -EINVAL;
+
+	/* reject bogus commands (gpio_unexport ignores them) */
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	/* No extra locking here; FLAG_SYSFS just signifies that the
+	 * request and export were done by on behalf of userspace, so
+	 * they may be undone on its behalf too.
+	 */
+	if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) {
+		status = 0;
+		gpio_free(gpio);
+	}
+done:
+	if (status)
+		pr_debug("%s: status %d\n", __func__, status);
+	return status ? : len;
+}
+
+static struct class_attribute gpio_class_attrs[] = {
+	__ATTR(export, 0200, NULL, export_store),
+	__ATTR(unexport, 0200, NULL, unexport_store),
+	__ATTR_NULL,
+};
+
+static struct class gpio_class = {
+	.name =		"gpio",
+	.owner =	THIS_MODULE,
+
+	.class_attrs =	gpio_class_attrs,
+};
+
+
+/**
+ * gpio_export - export a GPIO through sysfs
+ * @gpio: gpio to make available, already requested
+ * @direction_may_change: true if userspace may change gpio direction
+ * Context: arch_initcall or later
+ *
+ * When drivers want to make a GPIO accessible to userspace after they
+ * have requested it -- perhaps while debugging, or as part of their
+ * public interface -- they may use this routine.  If the GPIO can
+ * change direction (some can't) and the caller allows it, userspace
+ * will see "direction" sysfs attribute which may be used to change
+ * the gpio's direction.  A "value" attribute will always be provided.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	unsigned long		flags;
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	/* can't export until sysfs is available ... */
+	if (!gpio_class.p) {
+		pr_debug("%s: called too early!\n", __func__);
+		return -ENOENT;
+	}
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	desc = &gpio_desc[gpio];
+	if (test_bit(FLAG_REQUESTED, &desc->flags)
+			&& !test_bit(FLAG_EXPORT, &desc->flags)) {
+		status = 0;
+		if (!desc->chip->direction_input
+				|| !desc->chip->direction_output)
+			direction_may_change = false;
+	}
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	if (status == 0) {
+		struct device	*dev;
+
+		dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
+					desc, "gpio%d", gpio);
+		if (dev) {
+			if (direction_may_change)
+				status = sysfs_create_group(&dev->kobj,
+						&gpio_attr_group);
+			else
+				status = device_create_file(dev,
+						&dev_attr_value);
+			if (status != 0)
+				device_unregister(dev);
+		} else
+			status = -ENODEV;
+		if (status == 0)
+			set_bit(FLAG_EXPORT, &desc->flags);
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export);
+
+static int match_export(struct device *dev, void *data)
+{
+	return dev_get_drvdata(dev) == data;
+}
+
+/**
+ * gpio_unexport - reverse effect of gpio_export()
+ * @gpio: gpio to make unavailable
+ *
+ * This is implicit on gpio_free().
+ */
+void gpio_unexport(unsigned gpio)
+{
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	desc = &gpio_desc[gpio];
+	if (test_bit(FLAG_EXPORT, &desc->flags)) {
+		struct device	*dev = NULL;
+
+		dev = class_find_device(&gpio_class, NULL, desc, match_export);
+		if (dev) {
+			clear_bit(FLAG_EXPORT, &desc->flags);
+			put_device(dev);
+			device_unregister(dev);
+			status = 0;
+		} else
+			status = -ENODEV;
+	}
+
+	mutex_unlock(&sysfs_lock);
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+}
+EXPORT_SYMBOL_GPL(gpio_unexport);
+
+static int gpiochip_export(struct gpio_chip *chip)
+{
+	int		status;
+	struct device	*dev;
+
+	/* Many systems register gpio chips for SOC support very early,
+	 * before driver model support is available.  In those cases we
+	 * export this later, in gpiolib_sysfs_init() ... here we just
+	 * verify that _some_ field of gpio_class got initialized.
+	 */
+	if (!gpio_class.p)
+		return 0;
+
+	/* use chip->base for the ID; it's already known to be unique */
+	mutex_lock(&sysfs_lock);
+	dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
+				"gpiochip%d", chip->base);
+	if (dev) {
+		status = sysfs_create_group(&dev->kobj,
+				&gpiochip_attr_group);
+	} else
+		status = -ENODEV;
+	chip->exported = (status == 0);
+	mutex_unlock(&sysfs_lock);
+
+	if (status) {
+		unsigned long	flags;
+		unsigned	gpio;
+
+		spin_lock_irqsave(&gpio_lock, flags);
+		gpio = chip->base;
+		while (gpio_desc[gpio].chip == chip)
+			gpio_desc[gpio++].chip = NULL;
+		spin_unlock_irqrestore(&gpio_lock, flags);
+
+		pr_debug("%s: chip %s status %d\n", __func__,
+				chip->label, status);
+	}
+
+	return status;
+}
+
+static void gpiochip_unexport(struct gpio_chip *chip)
+{
+	int			status;
+	struct device		*dev;
+
+	mutex_lock(&sysfs_lock);
+	dev = class_find_device(&gpio_class, NULL, chip, match_export);
+	if (dev) {
+		put_device(dev);
+		device_unregister(dev);
+		chip->exported = 0;
+		status = 0;
+	} else
+		status = -ENODEV;
+	mutex_unlock(&sysfs_lock);
+
+	if (status)
+		pr_debug("%s: chip %s status %d\n", __func__,
+				chip->label, status);
+}
+
+static int __init gpiolib_sysfs_init(void)
+{
+	int		status;
+	unsigned long	flags;
+	unsigned	gpio;
+
+	status = class_register(&gpio_class);
+	if (status < 0)
+		return status;
+
+	/* Scan and register the gpio_chips which registered very
+	 * early (e.g. before the class_register above was called).
+	 *
+	 * We run before arch_initcall() so chip->dev nodes can have
+	 * registered, and so arch_initcall() can always gpio_export().
+	 */
+	spin_lock_irqsave(&gpio_lock, flags);
+	for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
+		struct gpio_chip	*chip;
+
+		chip = gpio_desc[gpio].chip;
+		if (!chip || chip->exported)
+			continue;
+
+		spin_unlock_irqrestore(&gpio_lock, flags);
+		status = gpiochip_export(chip);
+		spin_lock_irqsave(&gpio_lock, flags);
+	}
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+
+	return status;
+}
+postcore_initcall(gpiolib_sysfs_init);
+
+#else
+static inline int gpiochip_export(struct gpio_chip *chip)
+{
+	return 0;
+}
+
+static inline void gpiochip_unexport(struct gpio_chip *chip)
+{
+}
+
+#endif /* CONFIG_GPIO_SYSFS */
+
 /**
  * gpiochip_add() - register a gpio_chip
  * @chip: the chip to register, with chip->base initialized
@@ -160,6 +641,11 @@ err:
  * because the chip->base is invalid or already associated with a
  * different chip.  Otherwise it returns zero as a success code.
  *
+ * When gpiochip_add() is called very early during boot, so that GPIOs
+ * can be freely used, the chip->dev device must be registered before
+ * the gpio framework's arch_initcall().  Otherwise sysfs initialization
+ * for GPIOs will fail rudely.
+ *
  * If chip->base is negative, this requests dynamic assignment of
  * a range of valid GPIOs.
  */
@@ -182,7 +668,7 @@ int gpiochip_add(struct gpio_chip *chip)
 		base = gpiochip_find_base(chip->ngpio);
 		if (base < 0) {
 			status = base;
-			goto fail_unlock;
+			goto unlock;
 		}
 		chip->base = base;
 	}
@@ -197,12 +683,23 @@ int gpiochip_add(struct gpio_chip *chip)
 	if (status == 0) {
 		for (id = base; id < base + chip->ngpio; id++) {
 			gpio_desc[id].chip = chip;
-			gpio_desc[id].flags = 0;
+
+			/* REVISIT:  most hardware initializes GPIOs as
+			 * inputs (often with pullups enabled) so power
+			 * usage is minimized.  Linux code should set the
+			 * gpio direction first thing; but until it does,
+			 * we may expose the wrong direction in sysfs.
+			 */
+			gpio_desc[id].flags = !chip->direction_input
+				? (1 << FLAG_IS_OUT)
+				: 0;
 		}
 	}
 
-fail_unlock:
+unlock:
 	spin_unlock_irqrestore(&gpio_lock, flags);
+	if (status == 0)
+		status = gpiochip_export(chip);
 fail:
 	/* failures here can mean systems won't boot... */
 	if (status)
@@ -239,6 +736,10 @@ int gpiochip_remove(struct gpio_chip *chip)
 	}
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	if (status == 0)
+		gpiochip_unexport(chip);
+
 	return status;
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -296,6 +797,8 @@ void gpio_free(unsigned gpio)
 		return;
 	}
 
+	gpio_unexport(gpio);
+
 	spin_lock_irqsave(&gpio_lock, flags);
 
 	desc = &gpio_desc[gpio];
@@ -534,10 +1037,6 @@ EXPORT_SYMBOL_GPL(gpio_set_value_cansleep);
 
 #ifdef CONFIG_DEBUG_FS
 
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-
 static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 {
 	unsigned		i;
@@ -614,17 +1113,28 @@ static int gpiolib_show(struct seq_file *s, void *unused)
 	/* REVISIT this isn't locked against gpio_chip removal ... */
 
 	for (gpio = 0; gpio_is_valid(gpio); gpio++) {
+		struct device *dev;
+
 		if (chip == gpio_desc[gpio].chip)
 			continue;
 		chip = gpio_desc[gpio].chip;
 		if (!chip)
 			continue;
 
-		seq_printf(s, "%sGPIOs %d-%d, %s%s:\n",
+		seq_printf(s, "%sGPIOs %d-%d",
 				started ? "\n" : "",
-				chip->base, chip->base + chip->ngpio - 1,
-				chip->label ? : "generic",
-				chip->can_sleep ? ", can sleep" : "");
+				chip->base, chip->base + chip->ngpio - 1);
+		dev = chip->dev;
+		if (dev)
+			seq_printf(s, ", %s/%s",
+				dev->bus ? dev->bus->name : "no-bus",
+				dev->bus_id);
+		if (chip->label)
+			seq_printf(s, ", %s", chip->label);
+		if (chip->can_sleep)
+			seq_printf(s, ", can sleep");
+		seq_printf(s, ":\n");
+
 		started = 1;
 		if (chip->dbg_show)
 			chip->dbg_show(s, chip);
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7f92fdd5f0e..7efd7d3a81f 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -239,6 +239,7 @@ static int mcp23s08_probe(struct spi_device *spi)
 	mcp->chip.base = pdata->base;
 	mcp->chip.ngpio = 8;
 	mcp->chip.can_sleep = 1;
+	mcp->chip.dev = &spi->dev;
 	mcp->chip.owner = THIS_MODULE;
 
 	spi_set_drvdata(spi, mcp);
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index a380730b61a..cc8468692ae 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -188,6 +188,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
 	gc->base = chip->gpio_start;
 	gc->ngpio = gpios;
 	gc->label = chip->client->name;
+	gc->dev = &chip->client->dev;
 	gc->owner = THIS_MODULE;
 }
 
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
index d25d356c4f2..fc9c6ae739e 100644
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -200,6 +200,7 @@ static int pcf857x_probe(struct i2c_client *client,
 
 	gpio->chip.base = pdata->gpio_base;
 	gpio->chip.can_sleep = 1;
+	gpio->chip.dev = &client->dev;
 	gpio->chip.owner = THIS_MODULE;
 
 	/* NOTE:  the OnSemi jlc1562b is also largely compatible with
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 85949685191..cf02e8fceb4 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -636,6 +636,8 @@ static int tps65010_probe(struct i2c_client *client,
 		tps->outmask = board->outmask;
 
 		tps->chip.label = client->name;
+		tps->chip.dev = &client->dev;
+		tps->chip.owner = THIS_MODULE;
 
 		tps->chip.set = tps65010_gpio_set;
 		tps->chip.direction_output = tps65010_output;
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 8872cc07751..6be43172dc6 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -318,6 +318,8 @@ static int __init egpio_probe(struct platform_device *pdev)
 		ei->chip[i].dev = &(pdev->dev);
 		chip = &(ei->chip[i].chip);
 		chip->label           = "htc-egpio";
+		chip->dev             = &pdev->dev;
+		chip->owner           = THIS_MODULE;
 		chip->get             = egpio_get;
 		chip->set             = egpio_set;
 		chip->direction_input = egpio_direction_input;
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6be061d09da..1beff5166e5 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -32,6 +32,8 @@ struct module;
 /**
  * struct gpio_chip - abstract a GPIO controller
  * @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
  * @direction_input: configures signal "offset" as input, or returns error
  * @get: returns value for signal "offset"; for output signals this
  *	returns either the value actually sensed, or zero
@@ -59,6 +61,7 @@ struct module;
  */
 struct gpio_chip {
 	char			*label;
+	struct device		*dev;
 	struct module		*owner;
 
 	int			(*direction_input)(struct gpio_chip *chip,
@@ -74,6 +77,7 @@ struct gpio_chip {
 	int			base;
 	u16			ngpio;
 	unsigned		can_sleep:1;
+	unsigned		exported:1;
 };
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
@@ -108,7 +112,18 @@ extern void __gpio_set_value(unsigned gpio, int value);
 extern int __gpio_cansleep(unsigned gpio);
 
 
-#else
+#ifdef CONFIG_GPIO_SYSFS
+
+/*
+ * A sysfs interface can be exported by individual drivers if they want,
+ * but more typically is configured entirely from userspace.
+ */
+extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern void gpio_unexport(unsigned gpio);
+
+#endif	/* CONFIG_GPIO_SYSFS */
+
+#else	/* !CONFIG_HAVE_GPIO_LIB */
 
 static inline int gpio_is_valid(int number)
 {
@@ -137,6 +152,20 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 	gpio_set_value(gpio, value);
 }
 
-#endif
+#endif /* !CONFIG_HAVE_GPIO_LIB */
+
+#ifndef CONFIG_GPIO_SYSFS
+
+/* sysfs support is only available with gpiolib, where it's optional */
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+#endif	/* CONFIG_GPIO_SYSFS */
 
 #endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 98be6c5762b..730a20b8357 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -79,6 +79,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 	WARN_ON(1);
 }
 
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	/* GPIO can never have been requested or set as {in,out}put */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+	/* GPIO can never have been exported */
+	WARN_ON(1);
+}
+
 static inline int gpio_to_irq(unsigned gpio)
 {
 	/* GPIO can never have been requested or set as input */
-- 
GitLab


From 8f1cc3b10e6ee0c5c7c8ed27f8771c4f252b4862 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Fri, 25 Jul 2008 01:46:09 -0700
Subject: [PATCH 619/853] gpio: mcp23s08 handles multiple chips per chipselect

Teach the mcp23s08 driver about a curious feature of these chips: up to
four of them can share the same chipselect, with the SPI signals wired in
parallel, by matching two bits in the first protocol byte against two
address lines on the chip.

This is handled by three software changes:

  * Platform data now holds an array of per-chip structs, not
    just one chip's address and pullup configuration.

  * Probe() and remove() now use another level of structure,
    wrapping an instance of the original structure for each
    mcp23s08 chip sharing that chipselect.

  * The HAEN bit is set, so that the hardware address bits can no
    longer be ignored (boot firmware may not have enabled them).

The "one struct per chip" preserves the guts of the current code,
but platform_data will need minor changes.

    OLD:
	/* incorrect "slave" ID may not have mattered */
	.slave = 3,
	.pullups = BIT(3) | BIT(1) | BIT(0),

    NEW:
	/* slave address _must_ match chip's wiring */
	.chip[3] = {
		.is_present = true,
		.pullups = BIT(3) | BIT(1) | BIT(0),
	},

There's no change in how things _behave_ for spi_device nodes with a
single mcp23s08 chip.  New multi-chip configurations assign GPIOs in
sequence, without holes.  The spi_device just resembles a bigger
controller, but internally it has multiple gpio_chip instances.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/mcp23s08.c      | 133 +++++++++++++++++++++++++++--------
 include/linux/spi/mcp23s08.h |  25 ++++---
 2 files changed, 118 insertions(+), 40 deletions(-)

diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7efd7d3a81f..8a1b405fefd 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -40,15 +40,26 @@ struct mcp23s08 {
 	struct spi_device	*spi;
 	u8			addr;
 
+	u8			cache[11];
 	/* lock protects the cached values */
 	struct mutex		lock;
-	u8			cache[11];
 
 	struct gpio_chip	chip;
 
 	struct work_struct	work;
 };
 
+/* A given spi_device can represent up to four mcp23s08 chips
+ * sharing the same chipselect but using different addresses
+ * (e.g. chips #0 and #3 might be populated, but not #1 or $2).
+ * Driver data holds all the per-chip data.
+ */
+struct mcp23s08_driver_data {
+	unsigned		ngpio;
+	struct mcp23s08		*mcp[4];
+	struct mcp23s08		chip[];
+};
+
 static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
 {
 	u8	tx[2], rx[1];
@@ -208,25 +219,18 @@ done:
 
 /*----------------------------------------------------------------------*/
 
-static int mcp23s08_probe(struct spi_device *spi)
+static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr,
+		unsigned base, unsigned pullups)
 {
-	struct mcp23s08			*mcp;
-	struct mcp23s08_platform_data	*pdata;
+	struct mcp23s08_driver_data	*data = spi_get_drvdata(spi);
+	struct mcp23s08			*mcp = data->mcp[addr];
 	int				status;
 	int				do_update = 0;
 
-	pdata = spi->dev.platform_data;
-	if (!pdata || pdata->slave > 3 || !pdata->base)
-		return -ENODEV;
-
-	mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
-	if (!mcp)
-		return -ENOMEM;
-
 	mutex_init(&mcp->lock);
 
 	mcp->spi = spi;
-	mcp->addr = 0x40 | (pdata->slave << 1);
+	mcp->addr = 0x40 | (addr << 1);
 
 	mcp->chip.label = "mcp23s08",
 
@@ -236,27 +240,28 @@ static int mcp23s08_probe(struct spi_device *spi)
 	mcp->chip.set = mcp23s08_set;
 	mcp->chip.dbg_show = mcp23s08_dbg_show;
 
-	mcp->chip.base = pdata->base;
+	mcp->chip.base = base;
 	mcp->chip.ngpio = 8;
 	mcp->chip.can_sleep = 1;
 	mcp->chip.dev = &spi->dev;
 	mcp->chip.owner = THIS_MODULE;
 
-	spi_set_drvdata(spi, mcp);
-
-	/* verify MCP_IOCON.SEQOP = 0, so sequential reads work */
+	/* verify MCP_IOCON.SEQOP = 0, so sequential reads work,
+	 * and MCP_IOCON.HAEN = 1, so we work with all chips.
+	 */
 	status = mcp23s08_read(mcp, MCP_IOCON);
 	if (status < 0)
 		goto fail;
-	if (status & IOCON_SEQOP) {
+	if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) {
 		status &= ~IOCON_SEQOP;
+		status |= IOCON_HAEN;
 		status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
 		if (status < 0)
 			goto fail;
 	}
 
 	/* configure ~100K pullups */
-	status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups);
+	status = mcp23s08_write(mcp, MCP_GPPU, pullups);
 	if (status < 0)
 		goto fail;
 
@@ -283,11 +288,58 @@ static int mcp23s08_probe(struct spi_device *spi)
 		tx[1] = MCP_IPOL;
 		memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
 		status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
-
-		/* FIXME check status... */
+		if (status < 0)
+			goto fail;
 	}
 
 	status = gpiochip_add(&mcp->chip);
+fail:
+	if (status < 0)
+		dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n",
+				addr, status);
+	return status;
+}
+
+static int mcp23s08_probe(struct spi_device *spi)
+{
+	struct mcp23s08_platform_data	*pdata;
+	unsigned			addr;
+	unsigned			chips = 0;
+	struct mcp23s08_driver_data	*data;
+	int				status;
+	unsigned			base;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || !gpio_is_valid(pdata->base))
+		return -ENODEV;
+
+	for (addr = 0; addr < 4; addr++) {
+		if (!pdata->chip[addr].is_present)
+			continue;
+		chips++;
+	}
+	if (!chips)
+		return -ENODEV;
+
+	data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
+			GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	spi_set_drvdata(spi, data);
+
+	base = pdata->base;
+	for (addr = 0; addr < 4; addr++) {
+		if (!pdata->chip[addr].is_present)
+			continue;
+		chips--;
+		data->mcp[addr] = &data->chip[chips];
+		status = mcp23s08_probe_one(spi, addr, base,
+				pdata->chip[addr].pullups);
+		if (status < 0)
+			goto fail;
+		base += 8;
+	}
+	data->ngpio = base - pdata->base;
 
 	/* NOTE:  these chips have a relatively sane IRQ framework, with
 	 * per-signal masking and level/edge triggering.  It's not yet
@@ -295,8 +347,9 @@ static int mcp23s08_probe(struct spi_device *spi)
 	 */
 
 	if (pdata->setup) {
-		status = pdata->setup(spi, mcp->chip.base,
-				mcp->chip.ngpio, pdata->context);
+		status = pdata->setup(spi,
+				pdata->base, data->ngpio,
+				pdata->context);
 		if (status < 0)
 			dev_dbg(&spi->dev, "setup --> %d\n", status);
 	}
@@ -304,19 +357,29 @@ static int mcp23s08_probe(struct spi_device *spi)
 	return 0;
 
 fail:
-	kfree(mcp);
+	for (addr = 0; addr < 4; addr++) {
+		int tmp;
+
+		if (!data->mcp[addr])
+			continue;
+		tmp = gpiochip_remove(&data->mcp[addr]->chip);
+		if (tmp < 0)
+			dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+	}
+	kfree(data);
 	return status;
 }
 
 static int mcp23s08_remove(struct spi_device *spi)
 {
-	struct mcp23s08			*mcp = spi_get_drvdata(spi);
+	struct mcp23s08_driver_data	*data = spi_get_drvdata(spi);
 	struct mcp23s08_platform_data	*pdata = spi->dev.platform_data;
+	unsigned			addr;
 	int				status = 0;
 
 	if (pdata->teardown) {
 		status = pdata->teardown(spi,
-				mcp->chip.base, mcp->chip.ngpio,
+				pdata->base, data->ngpio,
 				pdata->context);
 		if (status < 0) {
 			dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
@@ -324,11 +387,20 @@ static int mcp23s08_remove(struct spi_device *spi)
 		}
 	}
 
-	status = gpiochip_remove(&mcp->chip);
+	for (addr = 0; addr < 4; addr++) {
+		int tmp;
+
+		if (!data->mcp[addr])
+			continue;
+
+		tmp = gpiochip_remove(&data->mcp[addr]->chip);
+		if (tmp < 0) {
+			dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+			status = tmp;
+		}
+	}
 	if (status == 0)
-		kfree(mcp);
-	else
-		dev_err(&spi->dev, "%s --> %d\n", "remove", status);
+		kfree(data);
 	return status;
 }
 
@@ -356,4 +428,3 @@ static void __exit mcp23s08_exit(void)
 module_exit(mcp23s08_exit);
 
 MODULE_LICENSE("GPL");
-
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 835ddf47d45..22ef107d770 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,18 +1,25 @@
 
-/* FIXME driver should be able to handle all four slaves that
- * can be hooked up to each chipselect, as well as IRQs...
- */
+/* FIXME driver should be able to handle IRQs...  */
+
+struct mcp23s08_chip_info {
+	bool	is_present;		/* true iff populated */
+	u8	pullups;		/* BIT(x) means enable pullup x */
+};
 
 struct mcp23s08_platform_data {
-	/* four slaves can share one SPI chipselect */
-	u8		slave;
+	/* Four slaves (numbered 0..3) can share one SPI chipselect, and
+	 * will provide 8..32 GPIOs using 1..4 gpio_chip instances.
+	 */
+	struct mcp23s08_chip_info	chip[4];
 
-	/* number assigned to the first GPIO */
+	/* "base" is the number of the first GPIO.  Dynamic assignment is
+	 * not currently supported, and even if there are gaps in chip
+	 * addressing the GPIO numbers are sequential .. so for example
+	 * if only slaves 0 and 3 are present, their GPIOs range from
+	 * base to base+15.
+	 */
 	unsigned	base;
 
-	/* pins with pullups */
-	u8		pullups;
-
 	void		*context;	/* param to setup/teardown */
 
 	int		(*setup)(struct spi_device *spi,
-- 
GitLab


From ff1d5c2f0268f4e32103536e2e65480b5b7b6530 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Fri, 25 Jul 2008 01:46:10 -0700
Subject: [PATCH 620/853] gpio: add bt8xxgpio driver

This adds the bt8xxgpio driver.  The purpose of the bt8xxgpio driver is to
export all of the 24 GPIO pins available on Brooktree 8xx chips to the
kernel GPIO infrastructure.

This makes it possible to use a physically modified BT8xx card as
cheap digital GPIO card.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Michael Buesch <mb@bu3sch.de>
Cc: David Brownell <david-b@pacbell.net>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/bt8xxgpio.txt |  67 +++++++
 MAINTAINERS                 |   6 +
 drivers/gpio/Kconfig        |  18 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/bt8xxgpio.c    | 348 ++++++++++++++++++++++++++++++++++++
 5 files changed, 440 insertions(+)
 create mode 100644 Documentation/bt8xxgpio.txt
 create mode 100644 drivers/gpio/bt8xxgpio.c

diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt
new file mode 100644
index 00000000000..d8297e4ebd2
--- /dev/null
+++ b/Documentation/bt8xxgpio.txt
@@ -0,0 +1,67 @@
+===============================================================
+==  BT8XXGPIO driver                                         ==
+==                                                           ==
+==  A driver for a selfmade cheap BT8xx based PCI GPIO-card  ==
+==                                                           ==
+==  For advanced documentation, see                          ==
+==  http://www.bu3sch.de/btgpio.php                          ==
+===============================================================
+
+
+A generic digital 24-port PCI GPIO card can be built out of an ordinary
+Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
+Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
+find them used for low prices on the net.
+
+The bt8xx chip does have 24 digital GPIO ports.
+These ports are accessible via 24 pins on the SMD chip package.
+
+
+==============================================
+==  How to physically access the GPIO pins  ==
+==============================================
+
+The are several ways to access these pins. One might unsolder the whole chip
+and put it on a custom PCI board, or one might only unsolder each individual
+GPIO pin and solder that to some tiny wire. As the chip package really is tiny
+there are some advanced soldering skills needed in any case.
+
+The physical pinouts are drawn in the following ASCII art.
+The GPIO pins are marked with G00-G23
+
+                                           G G G G G G G G G G G G     G G G G G G
+                                           0 0 0 0 0 0 0 0 0 0 1 1     1 1 1 1 1 1
+                                           0 1 2 3 4 5 6 7 8 9 0 1     2 3 4 5 6 7
+           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+           ---------------------------------------------------------------------------
+         --|                               ^                                     ^   |--
+         --|                               pin 86                           pin 67   |--
+         --|                                                                         |--
+         --|                                                               pin 61 >  |-- G18
+         --|                                                                         |-- G19
+         --|                                                                         |-- G20
+         --|                                                                         |-- G21
+         --|                                                                         |-- G22
+         --|                                                               pin 56 >  |-- G23
+         --|                                                                         |--
+         --|                           Brooktree 878/879                             |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|   O                                                                     |--
+         --|                                                                         |--
+           ---------------------------------------------------------------------------
+           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+           ^
+           This is pin 1
+
diff --git a/MAINTAINERS b/MAINTAINERS
index be05ef9b7b4..4cbf6016a9b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1043,6 +1043,12 @@ M:	fujita.tomonori@lab.ntt.co.jp
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 
+BT8XXGPIO DRIVER
+P:	Michael Buesch
+M:	mb@bu3sch.de
+W:	http://bu3sch.de/btgpio.php
+S:	Maintained
+
 BTTV VIDEO4LINUX DRIVER
 P:	Mauro Carvalho Chehab
 M:	mchehab@infradead.org
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 6ec0e35b98e..de202dbe530 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -83,6 +83,24 @@ config GPIO_PCF857X
 	  This driver provides an in-kernel interface to those GPIOs using
 	  platform-neutral GPIO calls.
 
+comment "PCI GPIO expanders:"
+
+config GPIO_BT8XX
+	tristate "BT8XX GPIO abuser"
+	depends on PCI && VIDEO_BT848=n
+	help
+	  The BT8xx frame grabber chip has 24 GPIO pins than can be abused
+	  as a cheap PCI GPIO card.
+
+	  This chip can be found on Miro, Hauppauge and STB TV-cards.
+
+	  The card needs to be physically altered for using it as a
+	  GPIO card. For more information on how to build a GPIO card
+	  from a BT8xx TV card, see the documentation file at
+	  Documentation/bt8xxgpio.txt
+
+	  If unsure, say N.
+
 comment "SPI GPIO expanders:"
 
 config GPIO_MAX7301
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 16e796dc541..eeb2f2b2028 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
+obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c
new file mode 100644
index 00000000000..7a1168249dd
--- /dev/null
+++ b/drivers/gpio/bt8xxgpio.c
@@ -0,0 +1,348 @@
+/*
+
+    bt8xx GPIO abuser
+
+    Copyright (C) 2008 Michael Buesch <mb@bu3sch.de>
+
+    Please do _only_ contact the people listed _above_ with issues related to this driver.
+    All the other people listed below are not related to this driver. Their names
+    are only here, because this driver is derived from the bt848 driver.
+
+
+    Derived from the bt848 driver:
+
+    Copyright (C) 1996,97,98 Ralph  Metzler
+			   & Marcus Metzler
+    (c) 1999-2002 Gerd Knorr
+
+    some v4l2 code lines are taken from Justin's bttv2 driver which is
+    (c) 2000 Justin Schoeman
+
+    V4L1 removal from:
+    (c) 2005-2006 Nickolay V. Shmyrev
+
+    Fixes to be fully V4L2 compliant by
+    (c) 2006 Mauro Carvalho Chehab
+
+    Cropping and overscan support
+    Copyright (C) 2005, 2006 Michael H. Schimek
+    Sponsored by OPQ Systems AB
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include <asm/gpio.h>
+
+/* Steal the hardware definitions from the bttv driver. */
+#include "../media/video/bt8xx/bt848.h"
+
+
+#define BT8XXGPIO_NR_GPIOS		24 /* We have 24 GPIO pins */
+
+
+struct bt8xxgpio {
+	spinlock_t lock;
+
+	void __iomem *mmio;
+	struct pci_dev *pdev;
+	struct gpio_chip gpio;
+
+#ifdef CONFIG_PM
+	u32 saved_outen;
+	u32 saved_data;
+#endif
+};
+
+#define bgwrite(dat, adr)	writel((dat), bg->mmio+(adr))
+#define bgread(adr)		readl(bg->mmio+(adr))
+
+
+static int modparam_gpiobase = -1/* dynamic */;
+module_param_named(gpiobase, modparam_gpiobase, int, 0444);
+MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, which is the default.");
+
+
+static int bt8xxgpio_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
+{
+	struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+	unsigned long flags;
+	u32 outen, data;
+
+	spin_lock_irqsave(&bg->lock, flags);
+
+	data = bgread(BT848_GPIO_DATA);
+	data &= ~(1 << nr);
+	bgwrite(data, BT848_GPIO_DATA);
+
+	outen = bgread(BT848_GPIO_OUT_EN);
+	outen &= ~(1 << nr);
+	bgwrite(outen, BT848_GPIO_OUT_EN);
+
+	spin_unlock_irqrestore(&bg->lock, flags);
+
+	return 0;
+}
+
+static int bt8xxgpio_gpio_get(struct gpio_chip *gpio, unsigned nr)
+{
+	struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&bg->lock, flags);
+	val = bgread(BT848_GPIO_DATA);
+	spin_unlock_irqrestore(&bg->lock, flags);
+
+	return !!(val & (1 << nr));
+}
+
+static int bt8xxgpio_gpio_direction_output(struct gpio_chip *gpio,
+					unsigned nr, int val)
+{
+	struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+	unsigned long flags;
+	u32 outen, data;
+
+	spin_lock_irqsave(&bg->lock, flags);
+
+	outen = bgread(BT848_GPIO_OUT_EN);
+	outen |= (1 << nr);
+	bgwrite(outen, BT848_GPIO_OUT_EN);
+
+	data = bgread(BT848_GPIO_DATA);
+	if (val)
+		data |= (1 << nr);
+	else
+		data &= ~(1 << nr);
+	bgwrite(data, BT848_GPIO_DATA);
+
+	spin_unlock_irqrestore(&bg->lock, flags);
+
+	return 0;
+}
+
+static void bt8xxgpio_gpio_set(struct gpio_chip *gpio,
+			    unsigned nr, int val)
+{
+	struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+	unsigned long flags;
+	u32 data;
+
+	spin_lock_irqsave(&bg->lock, flags);
+
+	data = bgread(BT848_GPIO_DATA);
+	if (val)
+		data |= (1 << nr);
+	else
+		data &= ~(1 << nr);
+	bgwrite(data, BT848_GPIO_DATA);
+
+	spin_unlock_irqrestore(&bg->lock, flags);
+}
+
+static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg)
+{
+	struct gpio_chip *c = &bg->gpio;
+
+	c->label = bg->pdev->dev.bus_id;
+	c->owner = THIS_MODULE;
+	c->direction_input = bt8xxgpio_gpio_direction_input;
+	c->get = bt8xxgpio_gpio_get;
+	c->direction_output = bt8xxgpio_gpio_direction_output;
+	c->set = bt8xxgpio_gpio_set;
+	c->dbg_show = NULL;
+	c->base = modparam_gpiobase;
+	c->ngpio = BT8XXGPIO_NR_GPIOS;
+	c->can_sleep = 0;
+}
+
+static int bt8xxgpio_probe(struct pci_dev *dev,
+			const struct pci_device_id *pci_id)
+{
+	struct bt8xxgpio *bg;
+	int err;
+
+	bg = kzalloc(sizeof(*bg), GFP_KERNEL);
+	if (!bg)
+		return -ENOMEM;
+
+	bg->pdev = dev;
+	spin_lock_init(&bg->lock);
+
+	err = pci_enable_device(dev);
+	if (err) {
+		printk(KERN_ERR "bt8xxgpio: Can't enable device.\n");
+		goto err_freebg;
+	}
+	if (!request_mem_region(pci_resource_start(dev, 0),
+				pci_resource_len(dev, 0),
+				"bt8xxgpio")) {
+		printk(KERN_WARNING "bt8xxgpio: Can't request iomem (0x%llx).\n",
+		       (unsigned long long)pci_resource_start(dev, 0));
+		err = -EBUSY;
+		goto err_disable;
+	}
+	pci_set_master(dev);
+	pci_set_drvdata(dev, bg);
+
+	bg->mmio = ioremap(pci_resource_start(dev, 0), 0x1000);
+	if (!bg->mmio) {
+		printk(KERN_ERR "bt8xxgpio: ioremap() failed\n");
+		err = -EIO;
+		goto err_release_mem;
+	}
+
+	/* Disable interrupts */
+	bgwrite(0, BT848_INT_MASK);
+
+	/* gpio init */
+	bgwrite(0, BT848_GPIO_DMA_CTL);
+	bgwrite(0, BT848_GPIO_REG_INP);
+	bgwrite(0, BT848_GPIO_OUT_EN);
+
+	bt8xxgpio_gpio_setup(bg);
+	err = gpiochip_add(&bg->gpio);
+	if (err) {
+		printk(KERN_ERR "bt8xxgpio: Failed to register GPIOs\n");
+		goto err_release_mem;
+	}
+
+	printk(KERN_INFO "bt8xxgpio: Abusing BT8xx card for GPIOs %d to %d\n",
+	       bg->gpio.base, bg->gpio.base + BT8XXGPIO_NR_GPIOS - 1);
+
+	return 0;
+
+err_release_mem:
+	release_mem_region(pci_resource_start(dev, 0),
+			   pci_resource_len(dev, 0));
+	pci_set_drvdata(dev, NULL);
+err_disable:
+	pci_disable_device(dev);
+err_freebg:
+	kfree(bg);
+
+	return err;
+}
+
+static void bt8xxgpio_remove(struct pci_dev *pdev)
+{
+	struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+
+	gpiochip_remove(&bg->gpio);
+
+	bgwrite(0, BT848_INT_MASK);
+	bgwrite(~0x0, BT848_INT_STAT);
+	bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+	iounmap(bg->mmio);
+	release_mem_region(pci_resource_start(pdev, 0),
+			   pci_resource_len(pdev, 0));
+	pci_disable_device(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+	kfree(bg);
+}
+
+#ifdef CONFIG_PM
+static int bt8xxgpio_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bg->lock, flags);
+
+	bg->saved_outen = bgread(BT848_GPIO_OUT_EN);
+	bg->saved_data = bgread(BT848_GPIO_DATA);
+
+	bgwrite(0, BT848_INT_MASK);
+	bgwrite(~0x0, BT848_INT_STAT);
+	bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+	spin_unlock_irqrestore(&bg->lock, flags);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+	return 0;
+}
+
+static int bt8xxgpio_resume(struct pci_dev *pdev)
+{
+	struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+	unsigned long flags;
+	int err;
+
+	pci_set_power_state(pdev, 0);
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+	pci_restore_state(pdev);
+
+	spin_lock_irqsave(&bg->lock, flags);
+
+	bgwrite(0, BT848_INT_MASK);
+	bgwrite(0, BT848_GPIO_DMA_CTL);
+	bgwrite(0, BT848_GPIO_REG_INP);
+	bgwrite(bg->saved_outen, BT848_GPIO_OUT_EN);
+	bgwrite(bg->saved_data & bg->saved_outen,
+		BT848_GPIO_DATA);
+
+	spin_unlock_irqrestore(&bg->lock, flags);
+
+	return 0;
+}
+#else
+#define bt8xxgpio_suspend NULL
+#define bt8xxgpio_resume NULL
+#endif /* CONFIG_PM */
+
+static struct pci_device_id bt8xxgpio_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT848) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT849) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT878) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT879) },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, bt8xxgpio_pci_tbl);
+
+static struct pci_driver bt8xxgpio_pci_driver = {
+	.name		= "bt8xxgpio",
+	.id_table	= bt8xxgpio_pci_tbl,
+	.probe		= bt8xxgpio_probe,
+	.remove		= bt8xxgpio_remove,
+	.suspend	= bt8xxgpio_suspend,
+	.resume		= bt8xxgpio_resume,
+};
+
+static int bt8xxgpio_init(void)
+{
+	return pci_register_driver(&bt8xxgpio_pci_driver);
+}
+module_init(bt8xxgpio_init)
+
+static void bt8xxgpio_exit(void)
+{
+	pci_unregister_driver(&bt8xxgpio_pci_driver);
+}
+module_exit(bt8xxgpio_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Buesch");
+MODULE_DESCRIPTION("Abuse a BT8xx framegrabber card as generic GPIO card");
-- 
GitLab


From 7444a72effa632fcd8edc566f880d96fe213c73b Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Fri, 25 Jul 2008 01:46:11 -0700
Subject: [PATCH 621/853] gpiolib: allow user-selection

This patch adds functionality to the gpio-lib subsystem to make it
possible to enable the gpio-lib code even if the architecture code didn't
request to get it built in.

The archtitecture code does still need to implement the gpiolib accessor
functions in its asm/gpio.h file.  This patch adds the implementations for
x86 and PPC.

With these changes it is possible to run generic GPIO expansion cards on
every architecture that implements the trivial wrapper functions.  Support
for more architectures can easily be added.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: David Brownell <david-b@pacbell.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Samuel Ortiz <sameo@openedhand.com>
Cc: Kumar Gala <galak@gate.crashing.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt               | 12 +++++-
 arch/arm/Kconfig                     |  8 ++--
 arch/avr32/Kconfig                   |  2 +-
 arch/mips/Kconfig                    |  2 +-
 arch/powerpc/Kconfig                 |  1 +
 arch/powerpc/platforms/52xx/Kconfig  |  2 +-
 arch/powerpc/sysdev/qe_lib/Kconfig   |  2 +-
 arch/x86/Kconfig                     |  1 +
 drivers/Makefile                     |  2 +-
 drivers/gpio/Kconfig                 | 33 ++++++++++++++--
 drivers/gpio/Makefile                |  2 +-
 drivers/i2c/chips/Kconfig            |  2 +-
 drivers/mfd/Kconfig                  |  4 +-
 drivers/of/Kconfig                   |  2 +-
 include/asm-generic/gpio.h           |  2 +-
 include/asm-mips/mach-generic/gpio.h |  2 +-
 include/asm-powerpc/gpio.h           |  4 +-
 include/asm-x86/gpio.h               | 56 ++++++++++++++++++++++++++++
 18 files changed, 116 insertions(+), 23 deletions(-)

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 8b69811a964..18022e249c5 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -389,11 +389,21 @@ either NULL or the label associated with that GPIO when it was requested.
 
 Platform Support
 ----------------
-To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB"
+To support this framework, a platform's Kconfig will "select" either
+ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
 and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
 three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
 They may also want to provide a custom value for ARCH_NR_GPIOS.
 
+ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
+into the kernel on that architecture.
+
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
+can enable it and build it into the kernel optionally.
+
+If neither of these options are selected, the platform does not support
+GPIOs through GPIO-lib and the code cannot be enabled by the user.
+
 Trivial implementations of those functions can directly use framework
 code, which always dispatches through the gpio_chip:
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6fb4f03369f..dabb015aa40 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -268,7 +268,7 @@ config ARCH_EP93XX
 	select GENERIC_GPIO
 	select HAVE_CLK
 	select HAVE_CLK
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	help
 	  This enables support for the Cirrus EP93xx series of CPUs.
 
@@ -447,7 +447,7 @@ config ARCH_PXA
 	select ARCH_MTD_XIP
 	select GENERIC_GPIO
 	select HAVE_CLK
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select TICK_ONESHOT
@@ -479,7 +479,7 @@ config ARCH_SA1100
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
 	select TICK_ONESHOT
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	help
 	  Support for StrongARM 11x0 based boards.
 
@@ -522,7 +522,7 @@ config ARCH_OMAP
 	bool "TI OMAP"
 	select GENERIC_GPIO
 	select HAVE_CLK
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	help
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index df4adefedb4..7c239a91627 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -88,7 +88,7 @@ config PLATFORM_AT32AP
 	select SUBARCH_AVR32B
 	select MMU
 	select PERFORMANCE_COUNTERS
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_ALLOCATOR
 
 #
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b9c754f4070..b4c4eaa5dd2 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -713,7 +713,7 @@ config CSRC_SB1250
 
 config GPIO_TXX9
 	select GENERIC_GPIO
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	bool
 
 config CFE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index de6b49cd6be..fe88418167c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,6 +110,7 @@ config PPC
 	default y
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE
+	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index d664b1bce38..ccbd4958412 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -48,6 +48,6 @@ config PPC_MPC5200_BUGFIX
 config PPC_MPC5200_GPIO
 	bool "MPC5200 GPIO support"
 	depends on PPC_MPC52xx
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	help
 	  Enable gpiolib support for mpc5200 based boards
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
index 4bb18f57901..1ce546462be 100644
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -29,7 +29,7 @@ config QE_GPIO
 	bool "QE GPIO support"
 	depends on QUICC_ENGINE
 	select GENERIC_GPIO
-	select HAVE_GPIO_LIB
+	select ARCH_REQUIRE_GPIOLIB
 	help
 	  Say Y here if you're going to use hardware that connects to the
 	  QE GPIOs.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 66f3ab05b18..e3cba0b4560 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,7 @@ config X86
 	select HAVE_OPROFILE
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
+	select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
 	select HAVE_KRETPROBES
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE
diff --git a/drivers/Makefile b/drivers/Makefile
index 808e0ae66aa..54ec5e718c0 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -5,7 +5,7 @@
 # Rewritten to use lists instead of if-statements.
 #
 
-obj-$(CONFIG_HAVE_GPIO_LIB)	+= gpio/
+obj-y				+= gpio/
 obj-$(CONFIG_PCI)		+= pci/
 obj-$(CONFIG_PARISC)		+= parisc/
 obj-$(CONFIG_RAPIDIO)		+= rapidio/
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index de202dbe530..5a355f82916 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -2,15 +2,40 @@
 # GPIO infrastructure and expanders
 #
 
-config HAVE_GPIO_LIB
+config ARCH_WANT_OPTIONAL_GPIOLIB
 	bool
+	help
+	  Select this config option from the architecture Kconfig, if
+	  it is possible to use gpiolib on the architecture, but let the
+	  user decide whether to actually build it or not.
+	  Select this instead of ARCH_REQUIRE_GPIOLIB, if your architecture does
+	  not depend on GPIOs being available, but rather let the user
+	  decide whether he needs it or not.
+
+config ARCH_REQUIRE_GPIOLIB
+	bool
+	select GPIOLIB
 	help
 	  Platforms select gpiolib if they use this infrastructure
 	  for all their GPIOs, usually starting with ones integrated
 	  into SOC processors.
+	  Selecting this from the architecture code will cause the gpiolib
+	  code to always get built in.
+
+
+
+menuconfig GPIOLIB
+	bool "GPIO Support"
+	depends on ARCH_WANT_OPTIONAL_GPIOLIB || ARCH_REQUIRE_GPIOLIB
+	select GENERIC_GPIO
+	help
+	  This enables GPIO support through the generic GPIO library.
+	  You only need to enable this, if you also want to enable
+	  one or more of the GPIO expansion card drivers below.
+
+	  If unsure, say N.
 
-menu "GPIO Support"
-	depends on HAVE_GPIO_LIB
+if GPIOLIB
 
 config DEBUG_GPIO
 	bool "Debug GPIO calls"
@@ -116,4 +141,4 @@ config GPIO_MCP23S08
 	  SPI driver for Microchip MCP23S08 I/O expander.  This provides
 	  a GPIO interface supporting inputs and outputs.
 
-endmenu
+endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index eeb2f2b2028..8c45948d1fe 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -2,7 +2,7 @@
 
 ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 
-obj-$(CONFIG_HAVE_GPIO_LIB)	+= gpiolib.o
+obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 50e0a465374..a95cb9465d6 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -126,7 +126,7 @@ config ISP1301_OMAP
 
 config TPS65010
 	tristate "TPS6501x Power Management chips"
-	depends on HAVE_GPIO_LIB
+	depends on GPIOLIB
 	default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
 	help
 	  If you say yes here you get support for the TPS6501x series of
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index bac9e973ece..1f57a99fd96 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -36,7 +36,7 @@ config MFD_ASIC3
 
 config HTC_EGPIO
 	bool "HTC EGPIO support"
-	depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
+	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
 	help
 	    This driver supports the CPLD egpio chip present on
 	    several HTC phones.  It provides basic support for input
@@ -52,7 +52,7 @@ config HTC_PASIC3
 
 config MFD_TC6393XB
 	bool "Support Toshiba TC6393XB"
-	depends on HAVE_GPIO_LIB
+	depends on GPIOLIB
 	select MFD_CORE
 	help
 	  Support for Toshiba Mobile IO Controller TC6393XB
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3a7a11a75fb..1d7ec312934 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -4,7 +4,7 @@ config OF_DEVICE
 
 config OF_GPIO
 	def_bool y
-	depends on OF && PPC_OF && HAVE_GPIO_LIB
+	depends on OF && PPC_OF && GPIOLIB
 	help
 	  OpenFirmware GPIO accessors
 
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 1beff5166e5..a3034d20ebd 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -3,7 +3,7 @@
 
 #include <linux/types.h>
 
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
 
 #include <linux/compiler.h>
 
diff --git a/include/asm-mips/mach-generic/gpio.h b/include/asm-mips/mach-generic/gpio.h
index e6b376bd9d0..b4e70208da6 100644
--- a/include/asm-mips/mach-generic/gpio.h
+++ b/include/asm-mips/mach-generic/gpio.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_MACH_GENERIC_GPIO_H
 #define __ASM_MACH_GENERIC_GPIO_H
 
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
 #define gpio_get_value	__gpio_get_value
 #define gpio_set_value	__gpio_set_value
 #define gpio_cansleep	__gpio_cansleep
diff --git a/include/asm-powerpc/gpio.h b/include/asm-powerpc/gpio.h
index 77ad3a890f3..ea04632399d 100644
--- a/include/asm-powerpc/gpio.h
+++ b/include/asm-powerpc/gpio.h
@@ -17,7 +17,7 @@
 #include <linux/errno.h>
 #include <asm-generic/gpio.h>
 
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
 
 /*
  * We don't (yet) implement inlined/rapid versions for on-chip gpios.
@@ -51,6 +51,6 @@ static inline int irq_to_gpio(unsigned int irq)
 	return -EINVAL;
 }
 
-#endif /* CONFIG_HAVE_GPIO_LIB */
+#endif /* CONFIG_GPIOLIB */
 
 #endif /* __ASM_POWERPC_GPIO_H */
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index ff87fca0caf..116e9147fe6 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -1,6 +1,62 @@
+/*
+ * Generic GPIO API implementation for x86.
+ *
+ * Derived from the generic GPIO API for powerpc:
+ *
+ * Copyright (c) 2007-2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
 #ifndef _ASM_I386_GPIO_H
 #define _ASM_I386_GPIO_H
 
+#ifdef CONFIG_X86_RDC321X
 #include <gpio.h>
+#else /* CONFIG_X86_RDC321X */
+
+#include <asm-generic/gpio.h>
+
+#ifdef CONFIG_GPIOLIB
+
+/*
+ * Just call gpiolib.
+ */
+static inline int gpio_get_value(unsigned int gpio)
+{
+	return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+	__gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+	return __gpio_cansleep(gpio);
+}
+
+/*
+ * Not implemented, yet.
+ */
+static inline int gpio_to_irq(unsigned int gpio)
+{
+	return -ENOSYS;
+}
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_GPIOLIB */
+
+#endif /* CONFIG_X86_RDC321X */
 
 #endif /* _ASM_I386_GPIO_H */
-- 
GitLab


From bbcd6d543de335bf81e96477f46a60a8bf51039c Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Fri, 25 Jul 2008 01:46:14 -0700
Subject: [PATCH 622/853] gpio: max732x driver

This adds a driver supporting a family of I2C port expanders from Maxim,
which includes the MAX7319 and MAX7320-7327 chips.

[dbrownell@users.sourceforge.net: minor fixes]
Signed-off-by: Jack Ren <jack.ren@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  19 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/max732x.c      | 385 ++++++++++++++++++++++++++++++++++++
 include/linux/i2c/max732x.h |  19 ++
 4 files changed, 424 insertions(+)
 create mode 100644 drivers/gpio/max732x.c
 create mode 100644 include/linux/i2c/max732x.h

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 5a355f82916..dbd42d6c93a 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -67,6 +67,25 @@ config GPIO_SYSFS
 
 comment "I2C GPIO expanders:"
 
+config GPIO_MAX732X
+	tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
+	depends on I2C
+	help
+	  Say yes here to support the MAX7319, MAX7320-7327 series of I2C
+	  Port Expanders. Each IO port on these chips has a fixed role of
+	  Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain
+	  Input and Output (designed by 'P'). The combinations are listed
+	  below:
+
+	  8 bits:	max7319 (8I), max7320 (8O), max7321 (8P),
+		  	max7322 (4I4O), max7323 (4P4O)
+
+	  16 bits:	max7324 (8I8O), max7325 (8P8O),
+		  	max7326 (4I12O), max7327 (4P12O)
+
+	  Board setup code must specify the model to use, and the start
+	  number for these GPIOs.
+
 config GPIO_PCA953X
 	tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
 	depends on I2C
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 8c45948d1fe..01b4bbde195 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
+obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c
new file mode 100644
index 00000000000..b51c8135ca2
--- /dev/null
+++ b/drivers/gpio/max732x.c
@@ -0,0 +1,385 @@
+/*
+ *  max732x.c - I2C Port Expander with 8/16 I/O
+ *
+ *  Copyright (C) 2007 Marvell International Ltd.
+ *  Copyright (C) 2008 Jack Ren <jack.ren@marvell.com>
+ *  Copyright (C) 2008 Eric Miao <eric.miao@marvell.com>
+ *
+ *  Derived from drivers/gpio/pca953x.c
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/max732x.h>
+
+
+/*
+ * Each port of MAX732x (including MAX7319) falls into one of the
+ * following three types:
+ *
+ *   - Push Pull Output
+ *   - Input
+ *   - Open Drain I/O
+ *
+ * designated by 'O', 'I' and 'P' individually according to MAXIM's
+ * datasheets.
+ *
+ * There are two groups of I/O ports, each group usually includes
+ * up to 8 I/O ports, and is accessed by a specific I2C address:
+ *
+ *   - Group A : by I2C address 0b'110xxxx
+ *   - Group B : by I2C address 0b'101xxxx
+ *
+ * where 'xxxx' is decided by the connections of pin AD2/AD0.  The
+ * address used also affects the initial state of output signals.
+ *
+ * Within each group of ports, there are five known combinations of
+ * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
+ * the detailed organization of these ports.
+ *
+ * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
+ * and GPIOs from GROUP_A are numbered before those from GROUP_B
+ * (if there are two groups).
+ *
+ * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so
+ * they are not supported by this driver.
+ */
+
+#define PORT_NONE	0x0	/* '/' No Port */
+#define PORT_OUTPUT	0x1	/* 'O' Push-Pull, Output Only */
+#define PORT_INPUT	0x2	/* 'I' Input Only */
+#define PORT_OPENDRAIN	0x3	/* 'P' Open-Drain, I/O */
+
+#define IO_4I4O		0x5AA5	/* O7 O6 I5 I4 I3 I2 O1 O0 */
+#define IO_4P4O		0x5FF5	/* O7 O6 P5 P4 P3 P2 O1 O0 */
+#define IO_8I		0xAAAA	/* I7 I6 I5 I4 I3 I2 I1 I0 */
+#define IO_8P		0xFFFF	/* P7 P6 P5 P4 P3 P2 P1 P0 */
+#define IO_8O		0x5555	/* O7 O6 O5 O4 O3 O2 O1 O0 */
+
+#define GROUP_A(x)	((x) & 0xffff)	/* I2C Addr: 0b'110xxxx */
+#define GROUP_B(x)	((x) << 16)	/* I2C Addr: 0b'101xxxx */
+
+static const struct i2c_device_id max732x_id[] = {
+	{ "max7319", GROUP_A(IO_8I) },
+	{ "max7320", GROUP_B(IO_8O) },
+	{ "max7321", GROUP_A(IO_8P) },
+	{ "max7322", GROUP_A(IO_4I4O) },
+	{ "max7323", GROUP_A(IO_4P4O) },
+	{ "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
+	{ "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
+	{ "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
+	{ "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, max732x_id);
+
+struct max732x_chip {
+	struct gpio_chip gpio_chip;
+
+	struct i2c_client *client;	/* "main" client */
+	struct i2c_client *client_dummy;
+	struct i2c_client *client_group_a;
+	struct i2c_client *client_group_b;
+
+	unsigned int	mask_group_a;
+	unsigned int	dir_input;
+	unsigned int	dir_output;
+
+	struct mutex	lock;
+	uint8_t		reg_out[2];
+};
+
+static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
+{
+	struct i2c_client *client;
+	int ret;
+
+	client = group_a ? chip->client_group_a : chip->client_group_b;
+	ret = i2c_smbus_write_byte(client, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writing\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
+{
+	struct i2c_client *client;
+	int ret;
+
+	client = group_a ? chip->client_group_a : chip->client_group_b;
+	ret = i2c_smbus_read_byte(client);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading\n");
+		return ret;
+	}
+
+	*val = (uint8_t)ret;
+	return 0;
+}
+
+static inline int is_group_a(struct max732x_chip *chip, unsigned off)
+{
+	return (1u << off) & chip->mask_group_a;
+}
+
+static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+	struct max732x_chip *chip;
+	uint8_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	ret = max732x_read(chip, is_group_a(chip, off), &reg_val);
+	if (ret < 0)
+		return 0;
+
+	return reg_val & (1u << (off & 0x7));
+}
+
+static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct max732x_chip *chip;
+	uint8_t reg_out, mask = 1u << (off & 0x7);
+	int ret;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	mutex_lock(&chip->lock);
+
+	reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
+	reg_out = (val) ? reg_out | mask : reg_out & ~mask;
+
+	ret = max732x_write(chip, is_group_a(chip, off), reg_out);
+	if (ret < 0)
+		goto out;
+
+	/* update the shadow register then */
+	if (off > 7)
+		chip->reg_out[1] = reg_out;
+	else
+		chip->reg_out[0] = reg_out;
+out:
+	mutex_unlock(&chip->lock);
+}
+
+static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+	struct max732x_chip *chip;
+	unsigned int mask = 1u << off;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	if ((mask & chip->dir_input) == 0) {
+		dev_dbg(&chip->client->dev, "%s port %d is output only\n",
+			chip->client->name, off);
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+static int max732x_gpio_direction_output(struct gpio_chip *gc,
+		unsigned off, int val)
+{
+	struct max732x_chip *chip;
+	unsigned int mask = 1u << off;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	if ((mask & chip->dir_output) == 0) {
+		dev_dbg(&chip->client->dev, "%s port %d is input only\n",
+			chip->client->name, off);
+		return -EACCES;
+	}
+
+	max732x_gpio_set_value(gc, off, val);
+	return 0;
+}
+
+static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
+					const struct i2c_device_id *id,
+					unsigned gpio_start)
+{
+	struct gpio_chip *gc = &chip->gpio_chip;
+	uint32_t id_data = id->driver_data;
+	int i, port = 0;
+
+	for (i = 0; i < 16; i++, id_data >>= 2) {
+		unsigned int mask = 1 << port;
+
+		switch (id_data & 0x3) {
+		case PORT_OUTPUT:
+			chip->dir_output |= mask;
+			break;
+		case PORT_INPUT:
+			chip->dir_input |= mask;
+			break;
+		case PORT_OPENDRAIN:
+			chip->dir_output |= mask;
+			chip->dir_input |= mask;
+			break;
+		default:
+			continue;
+		}
+
+		if (i < 8)
+			chip->mask_group_a |= mask;
+		port++;
+	}
+
+	if (chip->dir_input)
+		gc->direction_input = max732x_gpio_direction_input;
+	if (chip->dir_output) {
+		gc->direction_output = max732x_gpio_direction_output;
+		gc->set = max732x_gpio_set_value;
+	}
+	gc->get = max732x_gpio_get_value;
+	gc->can_sleep = 1;
+
+	gc->base = gpio_start;
+	gc->ngpio = port;
+	gc->label = chip->client->name;
+	gc->owner = THIS_MODULE;
+
+	return port;
+}
+
+static int __devinit max732x_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct max732x_platform_data *pdata;
+	struct max732x_chip *chip;
+	struct i2c_client *c;
+	uint16_t addr_a, addr_b;
+	int ret, nr_port;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL)
+		return -ENODEV;
+
+	chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+	chip->client = client;
+
+	nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base);
+
+	addr_a = (client->addr & 0x0f) | 0x60;
+	addr_b = (client->addr & 0x0f) | 0x50;
+
+	switch (client->addr & 0x70) {
+	case 0x60:
+		chip->client_group_a = client;
+		if (nr_port > 7) {
+			c = i2c_new_dummy(client->adapter, addr_b);
+			chip->client_group_b = chip->client_dummy = c;
+		}
+		break;
+	case 0x50:
+		chip->client_group_b = client;
+		if (nr_port > 7) {
+			c = i2c_new_dummy(client->adapter, addr_a);
+			chip->client_group_a = chip->client_dummy = c;
+		}
+		break;
+	default:
+		dev_err(&client->dev, "invalid I2C address specified %02x\n",
+				client->addr);
+		ret = -EINVAL;
+		goto out_failed;
+	}
+
+	mutex_init(&chip->lock);
+
+	max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
+	if (nr_port > 7)
+		max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+
+	ret = gpiochip_add(&chip->gpio_chip);
+	if (ret)
+		goto out_failed;
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, chip);
+	return 0;
+
+out_failed:
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit max732x_remove(struct i2c_client *client)
+{
+	struct max732x_platform_data *pdata = client->dev.platform_data;
+	struct max732x_chip *chip = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "%s failed, %d\n",
+					"teardown", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&chip->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "%s failed, %d\n",
+				"gpiochip_remove()", ret);
+		return ret;
+	}
+
+	/* unregister any dummy i2c_client */
+	if (chip->client_dummy)
+		i2c_unregister_device(chip->client_dummy);
+
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver max732x_driver = {
+	.driver = {
+		.name	= "max732x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= max732x_probe,
+	.remove		= __devexit_p(max732x_remove),
+	.id_table	= max732x_id,
+};
+
+static int __init max732x_init(void)
+{
+	return i2c_add_driver(&max732x_driver);
+}
+module_init(max732x_init);
+
+static void __exit max732x_exit(void)
+{
+	i2c_del_driver(&max732x_driver);
+}
+module_exit(max732x_exit);
+
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for MAX732X");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h
new file mode 100644
index 00000000000..e10336631c6
--- /dev/null
+++ b/include/linux/i2c/max732x.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_I2C_MAX732X_H
+#define __LINUX_I2C_MAX732X_H
+
+/* platform data for the MAX732x 8/16-bit I/O expander driver */
+
+struct max732x_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	void		*context;	/* param to setup/teardown */
+
+	int		(*setup)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	int		(*teardown)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+};
+#endif /* __LINUX_I2C_MAX732X_H */
-- 
GitLab


From fb523f32275344282f20ef3352cbf03e599241e6 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:14 -0700
Subject: [PATCH 623/853] minix: remove !NO_TRUNCATE code

This patch removes the !NO_TRUNCATE code that anyway required a manual
editing of the code for being used.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/minix/inode.c |  3 ---
 fs/minix/minix.h |  6 ------
 fs/minix/namei.c | 24 ------------------------
 3 files changed, 33 deletions(-)

diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242ba6f..523d7371341 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 	if (!s->s_root)
 		goto out_iput;
 
-	if (!NO_TRUNCATE)
-		s->s_root->d_op = &minix_dentry_operations;
-
 	if (!(s->s_flags & MS_RDONLY)) {
 		if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
 			ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe9610..e6a0b193bea 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
 #include <linux/pagemap.h>
 #include <linux/minix_fs.h>
 
-/*
- * change the define below to 0 if you want names > info->s_namelen chars to be
- * truncated. Else they will be disallowed (ENAMETOOLONG).
- */
-#define NO_TRUNCATE 1
 #define INODE_VERSION(inode)	minix_sb(inode->i_sb)->s_version
 #define MINIX_V1		0x0001		/* original minix fs */
 #define MINIX_V2		0x0002		/* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
 extern const struct inode_operations minix_dir_inode_operations;
 extern const struct file_operations minix_file_operations;
 extern const struct file_operations minix_dir_operations;
-extern struct dentry_operations minix_dentry_operations;
 
 static inline struct minix_sb_info *minix_sb(struct super_block *sb)
 {
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241bc9c7..32b131cd612 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
 	return err;
 }
 
-static int minix_hash(struct dentry *dentry, struct qstr *qstr)
-{
-	unsigned long hash;
-	int i;
-	const unsigned char *name;
-
-	i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
-	if (i >= qstr->len)
-		return 0;
-	/* Truncate the name in place, avoids having to define a compare
-	   function. */
-	qstr->len = i;
-	name = qstr->name;
-	hash = init_name_hash();
-	while (i--)
-		hash = partial_name_hash(*name++, hash);
-	qstr->hash = end_name_hash(hash);
-	return 0;
-}
-
-struct dentry_operations minix_dentry_operations = {
-	.d_hash		= minix_hash,
-};
-
 static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode * inode = NULL;
-- 
GitLab


From f905f06fca5d3949eca12f5a43e251a404b3470a Mon Sep 17 00:00:00 2001
From: Shen Feng <shen@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:15 -0700
Subject: [PATCH 624/853] ext2: remove double definitions of xattr macros

remove the definitions of macros:
XATTR_TRUSTED_PREFIX
XATTR_USER_PREFIX
since they are defined in linux/xattr.h

Signed-off-by: Shen Feng <shen@cn.fujitsu.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext2/xattr_security.c | 2 +-
 fs/ext2/xattr_trusted.c  | 4 +---
 fs/ext2/xattr_user.c     | 4 +---
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2d521..70c0dbdcdcb 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
 ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 			 const char *name, size_t name_len)
 {
-	const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+	const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
 	if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149f353..e8219f8eae9 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
 #include <linux/ext2_fs.h>
 #include "xattr.h"
 
-#define XATTR_TRUSTED_PREFIX "trusted."
-
 static size_t
 ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 			const char *name, size_t name_len)
 {
-	const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+	const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
 	if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c3a7b..92495d28c62 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
 #include "ext2.h"
 #include "xattr.h"
 
-#define XATTR_USER_PREFIX "user."
-
 static size_t
 ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 		     const char *name, size_t name_len)
 {
-	const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
 	if (!test_opt(inode->i_sb, XATTR_USER))
-- 
GitLab


From 50c33a84db4aa5082e3af8d873b22344ae2ebea8 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Fri, 25 Jul 2008 01:46:16 -0700
Subject: [PATCH 625/853] ext2: fix typo in Hurd part of
 include/linux/ext2_fs.h

Fix typo in Hurd part of include/linux/ext2_fs.h

The ';' here is redundant or can even pose problem.  This is actually not
used by the Linux kernel, but it is exposed in GNU/Hurd.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext2_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 84cec2aa9f1..2efe7b863cf 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -284,8 +284,8 @@ struct ext2_inode {
 
 #ifdef	__hurd__
 #define i_translator	osd1.hurd1.h_i_translator
-#define i_frag		osd2.hurd2.h_i_frag;
-#define i_fsize		osd2.hurd2.h_i_fsize;
+#define i_frag		osd2.hurd2.h_i_frag
+#define i_fsize		osd2.hurd2.h_i_fsize
 #define i_uid_high	osd2.hurd2.h_i_uid_high
 #define i_gid_high	osd2.hurd2.h_i_gid_high
 #define i_author	osd2.hurd2.h_i_author
-- 
GitLab


From 9cfe7b9010aa66da5f3b2bc33d9e30a4d53bd274 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:16 -0700
Subject: [PATCH 626/853] ext3: fix synchronization of quota files in
 journal=data mode

In journal=data mode, it is not enough to do write_inode_now as done in
vfs_quota_on() to write all data to their final location (which is needed for
quota_read to work correctly).  Calling journal_flush() does its job.

Reported-by: Nick <gentuu@gmail.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/super.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425077e..50796e90d07 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2759,23 +2759,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 
 	if (!test_opt(sb, QUOTA))
 		return -EINVAL;
-	/* Not journalling quota or remount? */
-	if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
-	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
+	/* When remounting, no checks are needed and in fact, path is NULL */
+	if (remount)
 		return vfs_quota_on(sb, type, format_id, path, remount);
+
 	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
 	if (err)
 		return err;
+
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
 		path_put(&nd.path);
 		return -EXDEV;
 	}
-	/* Quotafile not in fs root? */
-	if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-		printk(KERN_WARNING
-			"EXT3-fs: Quota file not on filesystem root. "
-			"Journalled quota will not work.\n");
+	/* Journaling quota? */
+	if (EXT3_SB(sb)->s_qf_names[type]) {
+		/* Quotafile not of fs root? */
+		if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+			printk(KERN_WARNING
+				"EXT3-fs: Quota file not on filesystem root. "
+				"Journaled quota will not work.\n");
+	}
+
+	/*
+	 * When we journal data on quota file, we have to flush journal to see
+	 * all updates to the file when we bypass pagecache...
+	 */
+	if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
+		/*
+		 * We don't need to lock updates but journal_flush() could
+		 * otherwise be livelocked...
+		 */
+		journal_lock_updates(EXT3_SB(sb)->s_journal);
+		journal_flush(EXT3_SB(sb)->s_journal);
+		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+	}
+
 	path_put(&nd.path);
 	return vfs_quota_on(sb, type, format_id, path, remount);
 }
-- 
GitLab


From 99aeaf639f61ab6be1967e5f92e2e28dafad8383 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:17 -0700
Subject: [PATCH 627/853] ext3: fix typos in messages and comments (journalled
 -> journaled)

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/super.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 50796e90d07..0a1bf82845c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1020,7 +1020,7 @@ static int parse_options (char *options, struct super_block *sb,
 set_qf_name:
 			if (sb_any_quota_enabled(sb)) {
 				printk(KERN_ERR
-					"EXT3-fs: Cannot change journalled "
+					"EXT3-fs: Cannot change journaled "
 					"quota options when quota turned on.\n");
 				return 0;
 			}
@@ -1058,7 +1058,7 @@ set_qf_name:
 clear_qf_name:
 			if (sb_any_quota_enabled(sb)) {
 				printk(KERN_ERR "EXT3-fs: Cannot change "
-					"journalled quota options when "
+					"journaled quota options when "
 					"quota turned on.\n");
 				return 0;
 			}
@@ -1169,14 +1169,14 @@ clear_qf_name:
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journalled quota format "
+			printk(KERN_ERR "EXT3-fs: journaled quota format "
 					"not specified.\n");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journalled quota format "
-					"specified with no journalling "
+			printk(KERN_ERR "EXT3-fs: journaled quota format "
+					"specified with no journaling "
 					"enabled.\n");
 			return 0;
 		}
@@ -1370,7 +1370,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 			int ret = ext3_quota_on_mount(sb, i);
 			if (ret < 0)
 				printk(KERN_ERR
-					"EXT3-fs: Cannot turn on journalled "
+					"EXT3-fs: Cannot turn on journaled "
 					"quota: error %d\n", ret);
 		}
 	}
@@ -2712,7 +2712,7 @@ static int ext3_release_dquot(struct dquot *dquot)
 
 static int ext3_mark_dquot_dirty(struct dquot *dquot)
 {
-	/* Are we journalling quotas? */
+	/* Are we journaling quotas? */
 	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
 	    EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
 		dquot_mark_dquot_dirty(dquot);
-- 
GitLab


From d06bf1d252fe16f5f0d13e04da7a9913420aa1cf Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:18 -0700
Subject: [PATCH 628/853] ext3: correct mount option parsing to detect when
 quota options can be changed

We should not allow user to change quota mount options when quota is just
suspended.  I would make mount options and internal quota state inconsistent.
Also we should not allow user to change quota format when quota is turned on.
On the other hand we can just silently ignore when some option is set to the
value it already has (mount does this on remount).

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/super.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0a1bf82845c..615788c6843 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
 	int data_opt = 0;
 	int option;
 #ifdef CONFIG_QUOTA
-	int qtype;
+	int qtype, qfmt;
 	char *qname;
 #endif
 
@@ -1018,7 +1018,9 @@ static int parse_options (char *options, struct super_block *sb,
 		case Opt_grpjquota:
 			qtype = GRPQUOTA;
 set_qf_name:
-			if (sb_any_quota_enabled(sb)) {
+			if ((sb_any_quota_enabled(sb) ||
+			     sb_any_quota_suspended(sb)) &&
+			    !sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR
 					"EXT3-fs: Cannot change journaled "
 					"quota options when quota turned on.\n");
@@ -1056,7 +1058,9 @@ set_qf_name:
 		case Opt_offgrpjquota:
 			qtype = GRPQUOTA;
 clear_qf_name:
-			if (sb_any_quota_enabled(sb)) {
+			if ((sb_any_quota_enabled(sb) ||
+			     sb_any_quota_suspended(sb)) &&
+			    sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR "EXT3-fs: Cannot change "
 					"journaled quota options when "
 					"quota turned on.\n");
@@ -1069,10 +1073,20 @@ clear_qf_name:
 			sbi->s_qf_names[qtype] = NULL;
 			break;
 		case Opt_jqfmt_vfsold:
-			sbi->s_jquota_fmt = QFMT_VFS_OLD;
-			break;
+			qfmt = QFMT_VFS_OLD;
+			goto set_qf_format;
 		case Opt_jqfmt_vfsv0:
-			sbi->s_jquota_fmt = QFMT_VFS_V0;
+			qfmt = QFMT_VFS_V0;
+set_qf_format:
+			if ((sb_any_quota_enabled(sb) ||
+			     sb_any_quota_suspended(sb)) &&
+			    sbi->s_jquota_fmt != qfmt) {
+				printk(KERN_ERR "EXT3-fs: Cannot change "
+					"journaled quota options when "
+					"quota turned on.\n");
+				return 0;
+			}
+			sbi->s_jquota_fmt = qfmt;
 			break;
 		case Opt_quota:
 		case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
 			set_opt(sbi->s_mount_opt, GRPQUOTA);
 			break;
 		case Opt_noquota:
-			if (sb_any_quota_enabled(sb)) {
+			if (sb_any_quota_enabled(sb) ||
+			    sb_any_quota_suspended(sb)) {
 				printk(KERN_ERR "EXT3-fs: Cannot change quota "
 					"options when quota turned on.\n");
 				return 0;
-- 
GitLab


From 3850f7a521dc17659ef6758a219f083418788490 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:19 -0700
Subject: [PATCH 629/853] jbd: replace potentially false assertion with if
 block

If an error occurs during jbd cache initialisation it is possible for the
journal_head_cache to be NULL when journal_destroy_journal_head_cache is
called.  Replace the J_ASSERT with an if block to handle the situation
correctly.

Note that even with this fix things will break badly if jbd is statically
compiled in and cache initialisation fails.

Signed-off-by: Duane Griffin <duaneg@dghda.com
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/journal.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c..15ea16ad866 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1636,9 +1636,10 @@ static int journal_init_journal_head_cache(void)
 
 static void journal_destroy_journal_head_cache(void)
 {
-	J_ASSERT(journal_head_cache != NULL);
-	kmem_cache_destroy(journal_head_cache);
-	journal_head_cache = NULL;
+	if (journal_head_cache) {
+		kmem_cache_destroy(journal_head_cache);
+		journal_head_cache = NULL;
+	}
 }
 
 /*
-- 
GitLab


From f4d79ca2fa211cffc07306eeed7013448e77d7ec Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:20 -0700
Subject: [PATCH 630/853] jbd: eliminate duplicated code in revocation table
 init/destroy functions

The revocation table initialisation/destruction code is repeated for each
of the two revocation tables stored in the journal.  Refactoring the
duplicated code into functions is tidier, simplifies the logic in
initialisation in particular, and slightly reduces the code size.

There should not be any functional change.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/revoke.c | 127 +++++++++++++++++++-----------------------------
 1 file changed, 51 insertions(+), 76 deletions(-)

diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4..8ff5a7b89b9 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -195,109 +195,84 @@ void journal_destroy_revoke_caches(void)
 	revoke_table_cache = NULL;
 }
 
-/* Initialise the revoke table for a given journal to a given size. */
-
-int journal_init_revoke(journal_t *journal, int hash_size)
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
 {
-	int shift, tmp;
+	int shift = 0;
+	int tmp = hash_size;
+	struct jbd_revoke_table_s *table;
 
-	J_ASSERT (journal->j_revoke_table[0] == NULL);
+	table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	if (!table)
+		goto out;
 
-	shift = 0;
-	tmp = hash_size;
 	while((tmp >>= 1UL) != 0UL)
 		shift++;
 
-	journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
-	if (!journal->j_revoke_table[0])
-		return -ENOMEM;
-	journal->j_revoke = journal->j_revoke_table[0];
-
-	/* Check that the hash_size is a power of two */
-	J_ASSERT(is_power_of_2(hash_size));
-
-	journal->j_revoke->hash_size = hash_size;
-
-	journal->j_revoke->hash_shift = shift;
-
-	journal->j_revoke->hash_table =
+	table->hash_size = hash_size;
+	table->hash_shift = shift;
+	table->hash_table =
 		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-	if (!journal->j_revoke->hash_table) {
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-		journal->j_revoke = NULL;
-		return -ENOMEM;
+	if (!table->hash_table) {
+		kmem_cache_free(revoke_table_cache, table);
+		table = NULL;
+		goto out;
 	}
 
 	for (tmp = 0; tmp < hash_size; tmp++)
-		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+		INIT_LIST_HEAD(&table->hash_table[tmp]);
 
-	journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
-	if (!journal->j_revoke_table[1]) {
-		kfree(journal->j_revoke_table[0]->hash_table);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-		return -ENOMEM;
+out:
+	return table;
+}
+
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+	int i;
+	struct list_head *hash_list;
+
+	for (i = 0; i < table->hash_size; i++) {
+		hash_list = &table->hash_table[i];
+		J_ASSERT(list_empty(hash_list));
 	}
 
-	journal->j_revoke = journal->j_revoke_table[1];
+	kfree(table->hash_table);
+	kmem_cache_free(revoke_table_cache, table);
+}
 
-	/* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+	J_ASSERT(journal->j_revoke_table[0] == NULL);
 	J_ASSERT(is_power_of_2(hash_size));
 
-	journal->j_revoke->hash_size = hash_size;
+	journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+	if (!journal->j_revoke_table[0])
+		goto fail0;
 
-	journal->j_revoke->hash_shift = shift;
+	journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+	if (!journal->j_revoke_table[1])
+		goto fail1;
 
-	journal->j_revoke->hash_table =
-		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-	if (!journal->j_revoke->hash_table) {
-		kfree(journal->j_revoke_table[0]->hash_table);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
-		journal->j_revoke = NULL;
-		return -ENOMEM;
-	}
-
-	for (tmp = 0; tmp < hash_size; tmp++)
-		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+	journal->j_revoke = journal->j_revoke_table[1];
 
 	spin_lock_init(&journal->j_revoke_lock);
 
 	return 0;
-}
 
-/* Destoy a journal's revoke table.  The table must already be empty! */
+fail1:
+	journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+	return -ENOMEM;
+}
 
+/* Destroy a journal's revoke table.  The table must already be empty! */
 void journal_destroy_revoke(journal_t *journal)
 {
-	struct jbd_revoke_table_s *table;
-	struct list_head *hash_list;
-	int i;
-
-	table = journal->j_revoke_table[0];
-	if (!table)
-		return;
-
-	for (i=0; i<table->hash_size; i++) {
-		hash_list = &table->hash_table[i];
-		J_ASSERT (list_empty(hash_list));
-	}
-
-	kfree(table->hash_table);
-	kmem_cache_free(revoke_table_cache, table);
-	journal->j_revoke = NULL;
-
-	table = journal->j_revoke_table[1];
-	if (!table)
-		return;
-
-	for (i=0; i<table->hash_size; i++) {
-		hash_list = &table->hash_table[i];
-		J_ASSERT (list_empty(hash_list));
-	}
-
-	kfree(table->hash_table);
-	kmem_cache_free(revoke_table_cache, table);
 	journal->j_revoke = NULL;
+	if (journal->j_revoke_table[0])
+		journal_destroy_revoke_table(journal->j_revoke_table[0]);
+	if (journal->j_revoke_table[1])
+		journal_destroy_revoke_table(journal->j_revoke_table[1]);
 }
 
 
-- 
GitLab


From 1984bb763c2e50d0ebfb0cf56d1b319bd7afe63a Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:21 -0700
Subject: [PATCH 631/853] jbd: tidy up revoke cache initialisation and
 destruction

Make revocation cache destruction safe to call if initialisation fails
partially or entirely.  This allows it to be used to cleanup in the case
of initialisation failure, simplifying that code slightly.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/revoke.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 8ff5a7b89b9..c7bd649bbbd 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,33 +166,43 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
 	return NULL;
 }
 
+void journal_destroy_revoke_caches(void)
+{
+	if (revoke_record_cache) {
+		kmem_cache_destroy(revoke_record_cache);
+		revoke_record_cache = NULL;
+	}
+	if (revoke_table_cache) {
+		kmem_cache_destroy(revoke_table_cache);
+		revoke_table_cache = NULL;
+	}
+}
+
 int __init journal_init_revoke_caches(void)
 {
+	J_ASSERT(!revoke_record_cache);
+	J_ASSERT(!revoke_table_cache);
+
 	revoke_record_cache = kmem_cache_create("revoke_record",
 					   sizeof(struct jbd_revoke_record_s),
 					   0,
 					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
 					   NULL);
 	if (!revoke_record_cache)
-		return -ENOMEM;
+		goto record_cache_failure;
 
 	revoke_table_cache = kmem_cache_create("revoke_table",
 					   sizeof(struct jbd_revoke_table_s),
 					   0, SLAB_TEMPORARY, NULL);
-	if (!revoke_table_cache) {
-		kmem_cache_destroy(revoke_record_cache);
-		revoke_record_cache = NULL;
-		return -ENOMEM;
-	}
+	if (!revoke_table_cache)
+		goto table_cache_failure;
+
 	return 0;
-}
 
-void journal_destroy_revoke_caches(void)
-{
-	kmem_cache_destroy(revoke_record_cache);
-	revoke_record_cache = NULL;
-	kmem_cache_destroy(revoke_table_cache);
-	revoke_table_cache = NULL;
+table_cache_failure:
+	journal_destroy_revoke_caches();
+record_cache_failure:
+	return -ENOMEM;
 }
 
 static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
-- 
GitLab


From 9ebfbe9f926553eabc21b4400918d1216b27ed0c Mon Sep 17 00:00:00 2001
From: Shen Feng <shen@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:21 -0700
Subject: [PATCH 632/853] ext3: improve some code in rb tree part of dir.c

- remove unnecessary code in free_rb_tree_fname
 - rename free_rb_tree_fname to ext3_htree_create_dir_info
   since it and ext3_htree_free_dir_info are a pair
 - replace kmalloc with kzalloc in ext3_htree_free_dir_info

Signed-off-by: Shen Feng <shen@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/dir.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd7242..2eea96ec78e 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
 
 	while (n) {
 		/* Do the node's children first */
-		if ((n)->rb_left) {
+		if (n->rb_left) {
 			n = n->rb_left;
 			continue;
 		}
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
 			parent->rb_right = NULL;
 		n = parent;
 	}
-	root->rb_node = NULL;
 }
 
 
-static struct dir_private_info *create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
 {
 	struct dir_private_info *p;
 
-	p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+	p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
 	if (!p)
 		return NULL;
-	p->root.rb_node = NULL;
-	p->curr_node = NULL;
-	p->extra_fname = NULL;
-	p->last_pos = 0;
 	p->curr_hash = pos2maj_hash(pos);
 	p->curr_minor_hash = pos2min_hash(pos);
-	p->next_hash = 0;
 	return p;
 }
 
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
 	int	ret;
 
 	if (!info) {
-		info = create_dir_info(filp->f_pos);
+		info = ext3_htree_create_dir_info(filp->f_pos);
 		if (!info)
 			return -ENOMEM;
 		filp->private_data = info;
-- 
GitLab


From 3f31fddfa26b7594b44ff2b34f9a04ba409e0f91 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Fri, 25 Jul 2008 01:46:22 -0700
Subject: [PATCH 633/853] jbd: fix race between free buffer and commit
 transaction

journal_try_to_free_buffers() could race with jbd commit transaction when
the later is holding the buffer reference while waiting for the data
buffer to flush to disk.  If the caller of journal_try_to_free_buffers()
request tries hard to release the buffers, it will treat the failure as
error and return back to the caller.  We have seen the directo IO failed
due to this race.  Some of the caller of releasepage() also expecting the
buffer to be dropped when passed with GFP_KERNEL mask to the
releasepage()->journal_try_to_free_buffers().

With this patch, if the caller is passing the __GFP_WAIT and __GFP_FS to
indicating this call could wait, in case of try_to_free_buffers() failed,
let's waiting for journal_commit_transaction() to finish commit the
current committing transaction, then try to free those buffers again.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/transaction.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
 mm/filemap.c         |  3 +--
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23..8dee3200750 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
 	return;
 }
 
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+	transaction_t *transaction = NULL;
+	tid_t tid;
+
+	spin_lock(&journal->j_state_lock);
+	transaction = journal->j_committing_transaction;
+
+	if (!transaction) {
+		spin_unlock(&journal->j_state_lock);
+		return;
+	}
+
+	tid = transaction->t_tid;
+	spin_unlock(&journal->j_state_lock);
+	log_wait_commit(journal, tid);
+}
 
 /**
  * int journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
  * @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
  *
  *
  * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
  * journal_try_to_free_buffer() is changing its state.  But that
  * cannot happen because we never reallocate freed data as metadata
  * while the data is part of a transaction.  Yes?
+ *
+ * Return 0 on failure, 1 on success
  */
 int journal_try_to_free_buffers(journal_t *journal,
-				struct page *page, gfp_t unused_gfp_mask)
+				struct page *page, gfp_t gfp_mask)
 {
 	struct buffer_head *head;
 	struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
 		if (buffer_jbd(bh))
 			goto busy;
 	} while ((bh = bh->b_this_page) != head);
+
 	ret = try_to_free_buffers(page);
+
+	/*
+	 * There are a number of places where journal_try_to_free_buffers()
+	 * could race with journal_commit_transaction(), the later still
+	 * holds the reference to the buffers to free while processing them.
+	 * try_to_free_buffers() failed to free those buffers. Some of the
+	 * caller of releasepage() request page buffers to be dropped, otherwise
+	 * treat the fail-to-free as errors (such as generic_file_direct_IO())
+	 *
+	 * So, if the caller of try_to_release_page() wants the synchronous
+	 * behaviour(i.e make sure buffers are dropped upon return),
+	 * let's wait for the current transaction to finish flush of
+	 * dirty data buffers, then try to free those buffers again,
+	 * with the journal locked.
+	 */
+	if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+		journal_wait_for_transaction_sync_data(journal);
+		ret = try_to_free_buffers(page);
+	}
+
 busy:
 	return ret;
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index 7675b91f4f6..5d4c880d7cd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2563,9 +2563,8 @@ EXPORT_SYMBOL(generic_file_aio_write);
  * Otherwise return zero.
  *
  * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
  *
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
  */
 int try_to_release_page(struct page *page, gfp_t gfp_mask)
 {
-- 
GitLab


From ef1afd39519b74fbe1f63c9ab5a14490effec0e3 Mon Sep 17 00:00:00 2001
From: Shen Feng <shen@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:23 -0700
Subject: [PATCH 634/853] ext3: remove double definitions of xattr macros

remove the definitions of macros:
XATTR_TRUSTED_PREFIX
XATTR_USER_PREFIX
since they are defined in linux/xattr.h

Signed-off-by: Shen Feng <shen@cn.fujitsu.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/xattr_security.c | 2 +-
 fs/ext3/xattr_trusted.c  | 4 +---
 fs/ext3/xattr_user.c     | 4 +---
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf2b94..37b81097bdf 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
 ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 			 const char *name, size_t name_len)
 {
-	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
 
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497a55c..c7c41a410c4 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
 #include <linux/ext3_fs.h>
 #include "xattr.h"
 
-#define XATTR_TRUSTED_PREFIX "trusted."
-
 static size_t
 ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 			const char *name, size_t name_len)
 {
-	const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
 	if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f92c44..430fe63b31b 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
 #include <linux/ext3_fs.h>
 #include "xattr.h"
 
-#define XATTR_USER_PREFIX "user."
-
 static size_t
 ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 		     const char *name, size_t name_len)
 {
-	const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
 	if (!test_opt(inode->i_sb, XATTR_USER))
-- 
GitLab


From ae76dd9a6b5bbe5315fb7028e03f68f75b8538f3 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:23 -0700
Subject: [PATCH 635/853] ext3: handle corrupted orphan list at mount

If the orphan node list includes valid, untruncatable nodes with nlink > 0
the ext3_orphan_cleanup loop which attempts to delete them will not do so,
causing it to loop forever. Fix by checking for such nodes in the
ext3_orphan_get function.

This patch fixes the second case (image hdb.20000009.softlockup.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: printk warning fix]
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/ialloc.c        |  9 +++++++++
 fs/ext3/inode.c         | 20 ++++++++++++++------
 include/linux/ext3_fs.h |  1 +
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e..47b678d73e7 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	if (IS_ERR(inode))
 		goto iget_failed;
 
+	/*
+	 * If the orphans has i_nlinks > 0 then it should be able to be
+	 * truncated, otherwise it won't be removed from the orphan list
+	 * during processing and an infinite loop will result.
+	 */
+	if (inode->i_nlink && !ext3_can_truncate(inode))
+		goto bad_orphan;
+
 	if (NEXT_ORPHAN(inode) > max_ino)
 		goto bad_orphan;
 	brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
 		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
 		       NEXT_ORPHAN(inode));
 		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+		printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
 		/* Avoid freeing blocks if we got a bad deleted inode */
 		if (inode->i_nlink == 0)
 			inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce4..74b432fa166 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2253,6 +2253,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 	}
 }
 
+int ext3_can_truncate(struct inode *inode)
+{
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return 0;
+	if (S_ISREG(inode->i_mode))
+		return 1;
+	if (S_ISDIR(inode->i_mode))
+		return 1;
+	if (S_ISLNK(inode->i_mode))
+		return !ext3_inode_is_fast_symlink(inode);
+	return 0;
+}
+
 /*
  * ext3_truncate()
  *
@@ -2297,12 +2310,7 @@ void ext3_truncate(struct inode *inode)
 	unsigned blocksize = inode->i_sb->s_blocksize;
 	struct page *page;
 
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	    S_ISLNK(inode->i_mode)))
-		return;
-	if (ext3_inode_is_fast_symlink(inode))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+	if (!ext3_can_truncate(inode))
 		return;
 
 	/*
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 36c54039637..80171ee89a2 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
 extern void ext3_dirty_inode(struct inode *);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
 extern void ext3_truncate (struct inode *);
 extern void ext3_set_inode_flags(struct inode *);
 extern void ext3_get_inode_flags(struct ext3_inode_info *);
-- 
GitLab


From 95450f5a7e53d5752ce1a0d0b8282e10fe745ae0 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Fri, 25 Jul 2008 01:46:24 -0700
Subject: [PATCH 636/853] ext3: don't read inode block if the buffer has a
 write error

A transient I/O error can corrupt inode data.  Here is the scenario:

(1) update inode_A at the block_B
(2) pdflush writes out new inode_A to the filesystem, but it results
    in write I/O error, at this point, BH_Uptodate flag of the buffer
    for block_B is cleared and BH_Write_EIO is set
(3) create new inode_C which located at block_B, and
    __ext3_get_inode_loc() tries to read on-disk block_B because the
    buffer is not uptodate
(4) if it can read on-disk block_B successfully, inode_A is
    overwritten by old data

This patch makes __ext3_get_inode_loc() not read the inode block if the
buffer has BH_Write_EIO flag.  In this case, the buffer should have the
latest information, so setting the uptodate flag to the buffer (this
avoids WARN_ON_ONCE() in mark_buffer_dirty().)

According to this change, we would need to test BH_Write_EIO flag for the
error checking.  Currently nobody checks write I/O errors on metadata
buffers, but it will be done in other patches I'm working on.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: sugita <yumiko.sugita.yf@hitachi.com>
Cc: Satoshi OSHIMA <satoshi.oshima.fk@hitachi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Jan Kara <jack@ucw.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/inode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 74b432fa166..36f74f17a11 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2521,6 +2521,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
 	}
 	if (!buffer_uptodate(bh)) {
 		lock_buffer(bh);
+
+		/*
+		 * If the buffer has the write error flag, we have failed
+		 * to write out another inode in the same block.  In this
+		 * case, we don't have to read the block because we may
+		 * read the old inode data successfully.
+		 */
+		if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+			set_buffer_uptodate(bh);
+
 		if (buffer_uptodate(bh)) {
 			/* someone brought it uptodate while we waited */
 			unlock_buffer(bh);
-- 
GitLab


From 3ccc3167b0e5d46ab3bf03e22fbdb7616ce038cd Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:26 -0700
Subject: [PATCH 637/853] ext3: handle deleting corrupted indirect blocks

While freeing indirect blocks we attach a journal head to the parent
buffer head, free the blocks, then journal the parent.  If the indirect
block list is corrupted and points to the parent the journal head will be
detached when the block is cleared, causing an OOPS.

Check for that explicitly and handle it gracefully.

This patch fixes the third case (image hdb.20000057.nullderef.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Immediately above the change, in the ext3_free_data function, we call
ext3_clear_blocks to clear the indirect blocks in this parent block.  If
one of those blocks happens to actually be the parent block it will clear
b_private / BH_JBD.

I did the check at the end rather than earlier as it seemed more elegant.
I don't think there should be much practical difference, although it is
possible the FS may not be quite so badly corrupted if we did it the other
way (and didn't clear the block at all).  To be honest, I'm not convinced
there aren't other similar failure modes lurking in this code, although I
couldn't find any with a quick review.

[akpm@linux-foundation.org: fix printk warning]
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/inode.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 36f74f17a11..3bf07d70b91 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
 
 	if (this_bh) {
 		BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
-		ext3_journal_dirty_metadata(handle, this_bh);
+
+		/*
+		 * The buffer head should have an attached journal head at this
+		 * point. However, if the data is corrupted and an indirect
+		 * block pointed to itself, it would have been detached when
+		 * the block was cleared. Check for this instead of OOPSing.
+		 */
+		if (bh2jh(this_bh))
+			ext3_journal_dirty_metadata(handle, this_bh);
+		else
+			ext3_error(inode->i_sb, "ext3_free_data",
+				   "circular indirect block detected, "
+				   "inode=%lu, block=%llu",
+				   inode->i_ino,
+				   (unsigned long long)this_bh->b_blocknr);
 	}
 }
 
-- 
GitLab


From a10320e8f7c4dcfa050aac566092f29b40458d5a Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:26 -0700
Subject: [PATCH 638/853] jbd: unexport journal_update_superblock

Remove the unused EXPORT_SYMBOL(journal_update_superblock).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/journal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 15ea16ad866..aa7143a8349 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
 EXPORT_SYMBOL(journal_create);
 EXPORT_SYMBOL(journal_load);
 EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
 EXPORT_SYMBOL(journal_abort);
 EXPORT_SYMBOL(journal_errno);
 EXPORT_SYMBOL(journal_ack_err);
-- 
GitLab


From fc80c44277b3c92d808b73e9d40e120229aa4b6a Mon Sep 17 00:00:00 2001
From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:29 -0700
Subject: [PATCH 639/853] jbd: positively dispose the unmapped data buffers in
 journal_commit_transaction()

After ext3-ordered files are truncated, there is a possibility that the
pages which cannot be estimated still remain.  Remaining pages can be
released when the system has really few memory.  So, it is not memory
leakage.  But the resource management software etc.  may not work
correctly.

It is possible that journal_unmap_buffer() cannot release the buffers, and
the pages to which they belong because they are attached to a commiting
transaction and journal_unmap_buffer() cannot release them.  To release
such the buffers and the pages later, journal_unmap_buffer() leaves it to
journal_commit_transaction().  (journal_unmap_buffer() puts the mark
'BH_Freed' to the buffers so that journal_commit_transaction() can
identify whether they can be released or not.)

In the journalled mode and the writeback mode, jbd does with only metadata
buffers.  But in the ordered mode, jbd does with metadata buffers and also
data buffers.

Actually, journal_commit_transaction() releases only the metadata buffers
of which release is demanded by journal_unmap_buffer(), and also releases
the pages to which they belong if possible.

As a result, the data buffers of which release is demanded by
journal_unmap_buffer() remain after a transaction commits.  And also the
pages to which they belong remain.

Such the remained pages don't have mapping any longer.  Due to this fact,
there is a possibility that the pages which cannot be estimated remain.

The metadata buffers marked 'BH_Freed' and the pages to which
they belong can be released at 'JBD: commit phase 7'.

Therefore, by applying the same code into 'JBD: commit phase 2' (where the
data buffers are done with), journal_commit_transaction() can also release
the data buffers marked 'BH_Freed' and the pages to which they belong.

As a result, all the buffers marked 'BH_Freed' can be released, and also
all the pages to which these buffers belong can be released at
journal_commit_transaction().  So, the page which cannot be estimated is
lost.

<<Excerpt of code at 'JBD: commit phase 7'>>
 >         spin_lock(&journal->j_list_lock);
 >         while (commit_transaction->t_forget) {
 >                 transaction_t *cp_transaction;
 >                 struct buffer_head *bh;
 >
 >                 jh = commit_transaction->t_forget;
 >...
 >                 if (buffer_freed(bh)) {
 >                 ^^^^^^^^^^^^^^^^^^^^^^^^
 >                         clear_buffer_freed(bh);
 >                        ^^^^^^^^^^^^^^^^^^^^^^^^
 >                         clear_buffer_jbddirty(bh);
 >                 }
 >
 >                 if (buffer_jbddirty(bh)) {
 >                         JBUFFER_TRACE(jh, "add to new checkpointing trans");
 >                         __journal_insert_checkpoint(jh, commit_transaction);
 >                         JBUFFER_TRACE(jh, "refile for checkpoint writeback");
 >                         __journal_refile_buffer(jh);
 >                         jbd_unlock_bh_state(bh);
 >                 } else {
 >                         J_ASSERT_BH(bh, !buffer_dirty(bh));
 > ...
 >                         JBUFFER_TRACE(jh, "refile or unfile freed buffer");
 >                         __journal_refile_buffer(jh);
 >                         if (!jh->b_transaction) {
 >                                 jbd_unlock_bh_state(bh);
 >                                  /* needs a brelse */
 >                                 journal_remove_journal_head(bh);
 >                                 release_buffer_page(bh);
 >                                 ^^^^^^^^^^^^^^^^^^^^^^^^
 >                         } else
 >                 }
****************************************************************
* Apply the code of "^^^^^^" lines into 'JBD: commit phase 2' *
****************************************************************

At journal_commit_transaction() code, there is one extra message in the
series of jbd debug messages.  ("JBD: commit phase 2") This patch fixes
it, too.

Signed-off-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/commit.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498c..f943b9b3f20 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 
 /*
  * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
  * After the transaction commits, these pages are left on the LRU, with no
  * ->mapping, and with attached buffers.  These pages are trivially reclaimable
  * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
  * So here, we have a buffer which has just come off the forget list.  Look to
  * see if we can strip all buffers from the backing page.
  *
- * Called under lock_journal(), and possibly under journal_datalist_lock.  The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock.  The caller provided us with a ref
+ * against the buffer, and we drop that here.
  */
 static void release_buffer_page(struct buffer_head *bh)
 {
@@ -77,6 +77,19 @@ nope:
 	__brelse(bh);
 }
 
+/*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+	if (buffer_freed(bh)) {
+		clear_buffer_freed(bh);
+		release_buffer_page(bh);
+	} else
+		put_bh(bh);
+}
+
 /*
  * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
  * held.  For ranking reasons we must trylock.  If we lose, schedule away and
@@ -231,7 +244,7 @@ write_out_data:
 			if (locked)
 				unlock_buffer(bh);
 			BUFFER_TRACE(bh, "already cleaned up");
-			put_bh(bh);
+			release_data_buffer(bh);
 			continue;
 		}
 		if (locked && test_clear_buffer_dirty(bh)) {
@@ -258,10 +271,10 @@ write_out_data:
 			if (locked)
 				unlock_buffer(bh);
 			journal_remove_journal_head(bh);
-			/* Once for our safety reference, once for
+			/* One for our safety reference, other for
 			 * journal_remove_journal_head() */
 			put_bh(bh);
-			put_bh(bh);
+			release_data_buffer(bh);
 		}
 
 		if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -443,7 +456,7 @@ void journal_commit_transaction(journal_t *journal)
 		} else {
 			jbd_unlock_bh_state(bh);
 		}
-		put_bh(bh);
+		release_data_buffer(bh);
 		cond_resched_lock(&journal->j_list_lock);
 	}
 	spin_unlock(&journal->j_list_lock);
@@ -453,8 +466,6 @@ void journal_commit_transaction(journal_t *journal)
 
 	journal_write_revoke_records(journal, commit_transaction);
 
-	jbd_debug(3, "JBD: commit phase 2\n");
-
 	/*
 	 * If we found any dirty or locked buffers, then we should have
 	 * looped back up to the write_out_data label.  If there weren't
-- 
GitLab


From 8ef2720397bb813d4985405a5ae7b8ad6474188b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:29 -0700
Subject: [PATCH 640/853] ext3: kill 2 useless magic numbers

dx_root_limit() will never return 20, and I can't figure out what 20
stands for.  This function has never changed since htree directory
indexing was merged.

Similar for dx_node_limit() and the magic 22.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/namei.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80154f..d282ea87008 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
 {
 	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
 		EXT3_DIR_REC_LEN(2) - infosize;
-	return 0? 20: entry_space / sizeof(struct dx_entry);
+	return entry_space / sizeof(struct dx_entry);
 }
 
 static inline unsigned dx_node_limit (struct inode *dir)
 {
 	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
-	return 0? 22: entry_space / sizeof(struct dx_entry);
+	return entry_space / sizeof(struct dx_entry);
 }
 
 /*
-- 
GitLab


From cbe5f466f6995e10a10c7ae66d6dc8608f08a6b8 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Fri, 25 Jul 2008 01:46:30 -0700
Subject: [PATCH 641/853] jbd: don't abort if flushing file data failed

In ordered mode, the current jbd aborts the journal if a file data buffer
has an error.  But this behavior is unintended, and we found that it has
been adopted accidentally.

This patch undoes it and just calls printk() instead of aborting the
journal.  Additionally, set AS_EIO into the address_space object of the
failed buffer which is submitted by journal_do_submit_data() so that
fsync() can get -EIO.

Missing error checkings are also added to inform errors on file data
buffers to the user.  The following buffers are targeted.

  (a) the buffer which has already been written out by pdflush
  (b) the buffer which has been unlocked before scanned in the
      t_locked_list loop

[akpm@linux-foundation.org: improve grammar in a printk]
Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/commit.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index f943b9b3f20..2eccbfaa1d4 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -185,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
 /*
  *  Submit all the data buffers to disk
  */
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
 				transaction_t *commit_transaction)
 {
 	struct journal_head *jh;
@@ -193,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
 	int locked;
 	int bufs = 0;
 	struct buffer_head **wbuf = journal->j_wbuf;
+	int err = 0;
 
 	/*
 	 * Whenever we unlock the journal and sleep, things can get added
@@ -266,6 +267,8 @@ write_out_data:
 			put_bh(bh);
 		} else {
 			BUFFER_TRACE(bh, "writeout complete: unfile");
+			if (unlikely(!buffer_uptodate(bh)))
+				err = -EIO;
 			__journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
 			if (locked)
@@ -284,6 +287,8 @@ write_out_data:
 	}
 	spin_unlock(&journal->j_list_lock);
 	journal_do_submit_data(wbuf, bufs);
+
+	return err;
 }
 
 /*
@@ -423,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
 	 */
-	err = 0;
-	journal_submit_data_buffers(journal, commit_transaction);
+	err = journal_submit_data_buffers(journal, commit_transaction);
 
 	/*
 	 * Wait for all previously submitted IO to complete.
@@ -439,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
 		if (buffer_locked(bh)) {
 			spin_unlock(&journal->j_list_lock);
 			wait_on_buffer(bh);
-			if (unlikely(!buffer_uptodate(bh)))
-				err = -EIO;
 			spin_lock(&journal->j_list_lock);
 		}
+		if (unlikely(!buffer_uptodate(bh))) {
+			if (TestSetPageLocked(bh->b_page)) {
+				spin_unlock(&journal->j_list_lock);
+				lock_page(bh->b_page);
+				spin_lock(&journal->j_list_lock);
+			}
+			if (bh->b_page->mapping)
+				set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+			unlock_page(bh->b_page);
+			SetPageError(bh->b_page);
+			err = -EIO;
+		}
 		if (!inverted_lock(journal, bh)) {
 			put_bh(bh);
 			spin_lock(&journal->j_list_lock);
@@ -461,8 +476,14 @@ void journal_commit_transaction(journal_t *journal)
 	}
 	spin_unlock(&journal->j_list_lock);
 
-	if (err)
-		journal_abort(journal, err);
+	if (err) {
+		char b[BDEVNAME_SIZE];
+
+		printk(KERN_WARNING
+			"JBD: Detected IO errors while flushing file data "
+			"on %s\n", bdevname(journal->j_fs_dev, b));
+		err = 0;
+	}
 
 	journal_write_revoke_records(journal, commit_transaction);
 
-- 
GitLab


From 275c0a8f1253a7542ad9726956c918d8a1f694c4 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:31 -0700
Subject: [PATCH 642/853] ext3: validate directory entry data before use

ext3_dx_find_entry uses ext3_next_entry without verifying that the entry
is valid.  If its rec_len == 0 this causes an infinite loop.  Refactor the
loop to check the validity of entries before checking whether they match
and moving onto the next one.

There are other uses of ext3_next_entry in this file which also look
problematic.  They should be reviewed and fixed if/when we have a
test-case that triggers them.

This patch fixes the first case (image hdb.25.softlockup.gz) reported in
http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/namei.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index d282ea87008..de13e919cd8 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 		de = (struct ext3_dir_entry_2 *) bh->b_data;
 		top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
 				       EXT3_DIR_REC_LEN(0));
-		for (; de < top; de = ext3_next_entry(de))
-		if (ext3_match (namelen, name, de)) {
-			if (!ext3_check_dir_entry("ext3_find_entry",
-						  dir, de, bh,
-				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
-					  +((char *)de - bh->b_data))) {
-				brelse (bh);
+		for (; de < top; de = ext3_next_entry(de)) {
+			int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
+				  + ((char *) de - bh->b_data);
+
+			if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
+				brelse(bh);
 				*err = ERR_BAD_DX_DIR;
 				goto errout;
 			}
-			*res_dir = de;
-			dx_release (frames);
-			return bh;
+
+			if (ext3_match(namelen, name, de)) {
+				*res_dir = de;
+				dx_release(frames);
+				return bh;
+			}
 		}
 		brelse (bh);
 		/* Check to see if we should continue to search */
-- 
GitLab


From c0a1633b6201ef79e31b7da464d44fdf5953054d Mon Sep 17 00:00:00 2001
From: Adam Greenblatt <adam.greenblatt@gmail.com>
Date: Fri, 25 Jul 2008 01:46:32 -0700
Subject: [PATCH 643/853] isofs: fix minor filesystem corruption

Some iso9660 images contain files with rockridge data that is either
incorrect or incompletely parsed.  Prior to commit
f2966632a134e865db3c819346a1dc7d96e05309 ("[PATCH] rock: handle directory
overflows") (included with kernel 2.6.13) the kernel ignored the rockridge
data for these files, while still allowing the files to be accessed under
their non-rockridge names.  That commit inadvertently changed things so
that files with invalid rockridge data could not be accessed at all.  (I
ran across the problem when comparing some old CDs with hard disk copies I
had made long ago under kernel 2.4: a few of the files on the hard disk
copies were no longer visible on the CDs.)

This change reverts to the pre-2.6.13 behavior.

Signed-off-by: Adam Greenblatt <adam.greenblatt@gmail.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <stable@kernel.org>		[2.6.25.x, 2.6.26.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/isofs/rock.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0a704..c2fb2dd0131 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
 
 	while (rs.len > 2) { /* There may be one byte for padding somewhere */
 		rr = (struct rock_ridge *)rs.chr;
+		/*
+		 * Ignore rock ridge info if rr->len is out of range, but
+		 * don't return -EIO because that would make the file
+		 * invisible.
+		 */
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
 		sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
 			goto eio;
 		rs.chr += rr->len;
 		rs.len -= rr->len;
+		/*
+		 * As above, just ignore the rock ridge info if rr->len
+		 * is bogus.
+		 */
 		if (rs.len < 0)
-			goto eio;	/* corrupted isofs */
+			goto out;	/* Something got screwed up here */
 
 		switch (sig) {
 		case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 repeat:
 	while (rs.len > 2) { /* There may be one byte for padding somewhere */
 		rr = (struct rock_ridge *)rs.chr;
+		/*
+		 * Ignore rock ridge info if rr->len is out of range, but
+		 * don't return -EIO because that would make the file
+		 * invisible.
+		 */
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
 		sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
 			goto eio;
 		rs.chr += rr->len;
 		rs.len -= rr->len;
+		/*
+		 * As above, just ignore the rock ridge info if rr->len
+		 * is bogus.
+		 */
 		if (rs.len < 0)
-			goto eio;	/* corrupted isofs */
+			goto out;	/* Something got screwed up here */
 
 		switch (sig) {
 #ifndef CONFIG_ZISOFS		/* No flag for SF or ZF */
-- 
GitLab


From de0ca06a99c33df8333955642843331ab6b6e7ff Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:34 -0700
Subject: [PATCH 644/853] coda: remove CODA_FS_OLD_API

While fixing CONFIG_ leakages to the userspace kernel headers I ran into
CODA_FS_OLD_API.

After five years, are there still people using the old API left?
Especially considering that you have to choose at compile time which API
to support in the kernel (and distributions tend to offer the new API for
some time).

Jan: "The old API can definitely go.  Around the time the new
      interface went in there were some non-Coda userspace file system
      implementations that took a while longer to convert to the new API,
      but by now they all switched to the new interface or in some cases
      to a FUSE-based solution."

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig           | 14 --------------
 fs/coda/coda_linux.c |  6 ++----
 fs/coda/psdev.c      |  4 ----
 fs/coda/upcall.c     | 15 +--------------
 include/linux/coda.h | 43 -------------------------------------------
 5 files changed, 3 insertions(+), 79 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff9..ed563b9e352 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2093,20 +2093,6 @@ config CODA_FS
 	  To compile the coda client support as a module, choose M here: the
 	  module will be called coda.
 
-config CODA_FS_OLD_API
-	bool "Use 96-bit Coda file identifiers"
-	depends on CODA_FS
-	help
-	  A new kernel-userspace API had to be introduced for Coda v6.0
-	  to support larger 128-bit file identifiers as needed by the
-	  new realms implementation.
-
-	  However this new API is not backward compatible with older
-	  clients. If you really need to run the old Coda userspace
-	  cache manager then say Y.
-
-	  For most cases you probably want to say N.
-
 config AFS_FS
 	tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f9..bf4a3fd3c8e 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
 char * coda_f2s(struct CodaFid *f)
 {
 	static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
- 	sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
+
  	sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
+
 	return s;
 }
 
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 40c36f7352a..0d9b80ec689 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
 MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
 MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
 MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
 MODULE_VERSION("6.6");
-#endif
 
 static int __init init_coda(void)
 {
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094d..ce432bca95d 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
         inp->ih.opcode = opcode;
 	inp->ih.pid = current->pid;
 	inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
-	memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
-	inp->ih.cred.cr_fsuid = current->fsuid;
-#else
 	inp->ih.uid = current->fsuid;
-#endif
+
 	return (void*)inp;
 }
 
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
 	union inputArgs *inp;
 	union outputArgs *outp;
 	int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
-	struct coda_cred cred = { 0, };
-	cred.cr_fsuid = uid;
-#endif
 	
 	insize = SIZE(release);
 	UPARG(CODA_CLOSE);
 	
-#ifdef CONFIG_CODA_FS_OLD_API
-	memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
 	inp->ih.uid = uid;
-#endif
-	
         inp->coda_close.VFid = *fid;
         inp->coda_close.flags = flags;
 
diff --git a/include/linux/coda.h b/include/linux/coda.h
index b5cf0780c51..96c87693800 100644
--- a/include/linux/coda.h
+++ b/include/linux/coda.h
@@ -199,28 +199,6 @@ typedef u_int32_t vuid_t;
 typedef u_int32_t vgid_t;
 #endif /*_VUID_T_ */
 
-#ifdef CONFIG_CODA_FS_OLD_API
-struct CodaFid {
-	u_int32_t opaque[3];
-};
-
-static __inline__ ino_t  coda_f2i(struct CodaFid *fid)
-{
-	if ( ! fid ) 
-		return 0; 
-	if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff)
-		return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff));
-	else
-		return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20));
-}
-
-struct coda_cred {
-    vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
-    vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-};
-
-#else /* not defined(CONFIG_CODA_FS_OLD_API) */
-
 struct CodaFid {
 	u_int32_t opaque[4];
 };
@@ -228,8 +206,6 @@ struct CodaFid {
 #define coda_f2i(fid)\
 	(fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0)
 
-#endif
-
 #ifndef _VENUS_VATTR_T_
 #define _VENUS_VATTR_T_
 /*
@@ -313,15 +289,7 @@ struct coda_statfs {
 
 #define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t)
 
-#if 0
-#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */
-#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */
-#endif
-#ifdef CONFIG_CODA_FS_OLD_API
-#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */
-#else
 #define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */
-#endif
 
 /*
  *        Venus <-> Coda  RPC arguments
@@ -329,16 +297,9 @@ struct coda_statfs {
 struct coda_in_hdr {
     u_int32_t opcode;
     u_int32_t unique;	    /* Keep multiple outstanding msgs distinct */
-#ifdef CONFIG_CODA_FS_OLD_API
-    u_int16_t pid;	    /* Common to all */
-    u_int16_t pgid;	    /* Common to all */
-    u_int16_t sid;          /* Common to all */
-    struct coda_cred cred;  /* Common to all */
-#else
     pid_t pid;
     pid_t pgid;
     vuid_t uid;
-#endif
 };
 
 /* Really important that opcode and unique are 1st two fields! */
@@ -613,11 +574,7 @@ struct coda_vget_out {
 /* CODA_PURGEUSER is a venus->kernel call */
 struct coda_purgeuser_out {
     struct coda_out_hdr oh;
-#ifdef CONFIG_CODA_FS_OLD_API
-    struct coda_cred cred;
-#else
     vuid_t uid;
-#endif
 };
 
 /* coda_zapfile: */
-- 
GitLab


From 3084b72de73a6f8af0f16989ffb348068bd066d4 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Fri, 25 Jul 2008 01:46:34 -0700
Subject: [PATCH 645/853] hfs: convert bitmap_lock in a mutex

Apple Macintosh file system: The semaphore bitmap_lock is used as a mutex.
Convert it to the mutex API

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfs/bitmap.c | 8 ++++----
 fs/hfs/hfs_fs.h | 3 ++-
 fs/hfs/super.c  | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e75798ddf..c6e97366e8a 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
 	if (!*num_bits)
 		return 0;
 
-	down(&HFS_SB(sb)->bitmap_lock);
+	mutex_lock(&HFS_SB(sb)->bitmap_lock);
 	bitmap = HFS_SB(sb)->bitmap;
 
 	pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
 	HFS_SB(sb)->free_ablocks -= *num_bits;
 	hfs_bitmap_dirty(sb);
 out:
-	up(&HFS_SB(sb)->bitmap_lock);
+	mutex_unlock(&HFS_SB(sb)->bitmap_lock);
 	return pos;
 }
 
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
 	if ((start + count) > HFS_SB(sb)->fs_ablocks)
 		return -2;
 
-	down(&HFS_SB(sb)->bitmap_lock);
+	mutex_lock(&HFS_SB(sb)->bitmap_lock);
 	/* bitmap is always on a 32-bit boundary */
 	curr = HFS_SB(sb)->bitmap + (start / 32);
 	len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
 	}
 out:
 	HFS_SB(sb)->free_ablocks += len;
-	up(&HFS_SB(sb)->bitmap_lock);
+	mutex_unlock(&HFS_SB(sb)->bitmap_lock);
 	hfs_bitmap_dirty(sb);
 
 	return 0;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b6f67..ad652881911 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
 
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/mutex.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
 
@@ -139,7 +140,7 @@ struct hfs_sb_info {
 
 	struct nls_table *nls_io, *nls_disk;
 
-	struct semaphore bitmap_lock;
+	struct mutex bitmap_lock;
 
 	unsigned long flags;
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf67974adf..ac2ec5ef66e 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_op = &hfs_super_operations;
 	sb->s_flags |= MS_NODIRATIME;
-	init_MUTEX(&sbi->bitmap_lock);
+	mutex_init(&sbi->bitmap_lock);
 
 	res = hfs_mdb_get(sb);
 	if (res) {
-- 
GitLab


From 39f8d472f280dee6503a364d1d911b9e20ce3ec9 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Fri, 25 Jul 2008 01:46:35 -0700
Subject: [PATCH 646/853] hfs: convert extents_lock in a mutex

Apple Macintosh file system: The semaphore extens_lock is used as a mutex.
Convert it to the mutex API

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfs/btree.c  |  2 +-
 fs/hfs/extent.c | 14 +++++++-------
 fs/hfs/hfs_fs.h |  2 +-
 fs/hfs/inode.c  |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a78520..9b9d6395bad 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
 	{
 	struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
 	HFS_I(tree->inode)->flags = 0;
-	init_MUTEX(&HFS_I(tree->inode)->extents_lock);
+	mutex_init(&HFS_I(tree->inode)->extents_lock);
 	switch (id) {
 	case HFS_EXT_CNID:
 		hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67ba0a..2c16316d291 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
 		goto done;
 	}
 
-	down(&HFS_I(inode)->extents_lock);
+	mutex_lock(&HFS_I(inode)->extents_lock);
 	res = hfs_ext_read_extent(inode, ablock);
 	if (!res)
 		dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
 					    ablock - HFS_I(inode)->cached_start);
 	else {
-		up(&HFS_I(inode)->extents_lock);
+		mutex_unlock(&HFS_I(inode)->extents_lock);
 		return -EIO;
 	}
-	up(&HFS_I(inode)->extents_lock);
+	mutex_unlock(&HFS_I(inode)->extents_lock);
 
 done:
 	map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
 	u32 start, len, goal;
 	int res;
 
-	down(&HFS_I(inode)->extents_lock);
+	mutex_lock(&HFS_I(inode)->extents_lock);
 	if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
 		goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
 	else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
 			goto insert_extent;
 	}
 out:
-	up(&HFS_I(inode)->extents_lock);
+	mutex_unlock(&HFS_I(inode)->extents_lock);
 	if (!res) {
 		HFS_I(inode)->alloc_blocks += len;
 		mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
 	if (blk_cnt == alloc_cnt)
 		goto out;
 
-	down(&HFS_I(inode)->extents_lock);
+	mutex_lock(&HFS_I(inode)->extents_lock);
 	hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
 	while (1) {
 		if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
 		hfs_brec_remove(&fd);
 	}
 	hfs_find_exit(&fd);
-	up(&HFS_I(inode)->extents_lock);
+	mutex_unlock(&HFS_I(inode)->extents_lock);
 
 	HFS_I(inode)->alloc_blocks = blk_cnt;
 out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index ad652881911..9955232fdf8 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -54,7 +54,7 @@ struct hfs_inode_info {
 	struct list_head open_dir_list;
 	struct inode *rsrc_inode;
 
-	struct semaphore extents_lock;
+	struct mutex extents_lock;
 
 	u16 alloc_blocks, clump_blocks;
 	sector_t fs_blocks;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446c4ff..dc4ec640e87 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
 	if (!inode)
 		return NULL;
 
-	init_MUTEX(&HFS_I(inode)->extents_lock);
+	mutex_init(&HFS_I(inode)->extents_lock);
 	INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
 	hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
 	inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
 
 	HFS_I(inode)->flags = 0;
 	HFS_I(inode)->rsrc_inode = NULL;
-	init_MUTEX(&HFS_I(inode)->extents_lock);
+	mutex_init(&HFS_I(inode)->extents_lock);
 	INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
 
 	/* Initialize the inode */
-- 
GitLab


From 895c23f8c39c0c8d7b536bb2566d4aa968d78be2 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Fri, 25 Jul 2008 01:46:36 -0700
Subject: [PATCH 647/853] hfsplus: convert the extents_lock in a mutex

Apple Extended HFS file system: The semaphore extents lock is used as a
mutex.  Convert it to the mutex API.

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/extents.c    | 14 +++++++-------
 fs/hfsplus/hfsplus_fs.h |  3 ++-
 fs/hfsplus/inode.c      |  4 ++--
 fs/hfsplus/super.c      |  2 +-
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899cd788..fec8f61227f 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 		goto done;
 	}
 
-	down(&HFSPLUS_I(inode).extents_lock);
+	mutex_lock(&HFSPLUS_I(inode).extents_lock);
 	res = hfsplus_ext_read_extent(inode, ablock);
 	if (!res) {
 		dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
 					     HFSPLUS_I(inode).cached_start);
 	} else {
-		up(&HFSPLUS_I(inode).extents_lock);
+		mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 		return -EIO;
 	}
-	up(&HFSPLUS_I(inode).extents_lock);
+	mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 
 done:
 	dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
 		return -ENOSPC;
 	}
 
-	down(&HFSPLUS_I(inode).extents_lock);
+	mutex_lock(&HFSPLUS_I(inode).extents_lock);
 	if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
 		goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
 	else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
 			goto insert_extent;
 	}
 out:
-	up(&HFSPLUS_I(inode).extents_lock);
+	mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 	if (!res) {
 		HFSPLUS_I(inode).alloc_blocks += len;
 		mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
 	if (blk_cnt == alloc_cnt)
 		goto out;
 
-	down(&HFSPLUS_I(inode).extents_lock);
+	mutex_lock(&HFSPLUS_I(inode).extents_lock);
 	hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
 	while (1) {
 		if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
 		hfs_brec_remove(&fd);
 	}
 	hfs_find_exit(&fd);
-	up(&HFSPLUS_I(inode).extents_lock);
+	mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 
 	HFSPLUS_I(inode).alloc_blocks = blk_cnt;
 out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537b43d..f027a905225 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
 #define _LINUX_HFSPLUS_FS_H
 
 #include <linux/fs.h>
+#include <linux/mutex.h>
 #include <linux/buffer_head.h>
 #include "hfsplus_raw.h"
 
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
 
 
 struct hfsplus_inode_info {
-	struct semaphore extents_lock;
+	struct mutex extents_lock;
 	u32 clump_blocks, alloc_blocks;
 	sector_t fs_blocks;
 	/* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b467c..cc3b5e24339 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 
 	inode->i_ino = dir->i_ino;
 	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-	init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+	mutex_init(&HFSPLUS_I(inode).extents_lock);
 	HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
 
 	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -316,7 +316,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
 	inode->i_nlink = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
 	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-	init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+	mutex_init(&HFSPLUS_I(inode).extents_lock);
 	atomic_set(&HFSPLUS_I(inode).opencnt, 0);
 	HFSPLUS_I(inode).flags = 0;
 	memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54518d..3859118531c 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
 		return inode;
 
 	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-	init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+	mutex_init(&HFSPLUS_I(inode).extents_lock);
 	HFSPLUS_I(inode).flags = 0;
 	HFSPLUS_I(inode).rsrc_inode = NULL;
 	atomic_set(&HFSPLUS_I(inode).opencnt, 0);
-- 
GitLab


From 5d4f7fddf8882b214e4aabb3bdb37f90a72b2537 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:36 -0700
Subject: [PATCH 648/853] reiserfs: fix synchronization of quota files in
 journal=data mode

In journal=data mode, it is not enough to do write_inode_now() as done in
vfs_quota_on() to write all data to their final location (which is needed for
quota_read to work correctly).  Calling journal_end_sync() before calling
vfs_quota_on() does it's job because transactions are committed to the journal
and data marked as dirty in memory so write_inode_now() writes them to their
final locations.

Cc: <reiserfs-devel@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/super.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2bd197..0cbf4cd1114 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2026,6 +2026,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 	int err;
 	struct nameidata nd;
 	struct inode *inode;
+	struct reiserfs_transaction_handle th;
 
 	if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
 		return -EINVAL;
@@ -2053,17 +2054,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		}
 		mark_inode_dirty(inode);
 	}
-	/* Not journalling quota? No more tests needed... */
-	if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
-	    !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
-		path_put(&nd.path);
-		return vfs_quota_on(sb, type, format_id, path, 0);
-	}
-	/* Quotafile not of fs root? */
-	if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-		reiserfs_warning(sb,
+	/* Journaling quota? */
+	if (REISERFS_SB(sb)->s_qf_names[type]) {
+		/* Quotafile not of fs root? */
+		if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+			reiserfs_warning(sb,
 				 "reiserfs: Quota file not on filesystem root. "
 				 "Journalled quota will not work.");
+	}
+
+	/*
+	 * When we journal data on quota file, we have to flush journal to see
+	 * all updates to the file when we bypass pagecache...
+	 */
+	if (reiserfs_file_data_log(inode)) {
+		/* Just start temporary transaction and finish it */
+		err = journal_begin(&th, sb, 1);
+		if (err)
+			return err;
+		err = journal_end_sync(&th, sb, 1);
+		if (err)
+			return err;
+	}
 	path_put(&nd.path);
 	return vfs_quota_on(sb, type, format_id, path, 0);
 }
-- 
GitLab


From 4506567b24d3ea707e46e8aad64caef539382f4b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:37 -0700
Subject: [PATCH 649/853] reiserfs: fix typos in messages and comments
 (journalled -> journaled)

Cc: <reiserfs-devel@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/super.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0cbf4cd1114..f723604c5d9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -182,7 +182,7 @@ static int finish_unfinished(struct super_block *s)
 			int ret = reiserfs_quota_on_mount(s, i);
 			if (ret < 0)
 				reiserfs_warning(s,
-						 "reiserfs: cannot turn on journalled quota: error %d",
+						 "reiserfs: cannot turn on journaled quota: error %d",
 						 ret);
 		}
 	}
@@ -994,7 +994,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 
 			if (sb_any_quota_enabled(s)) {
 				reiserfs_warning(s,
-						 "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
+						 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
 				return 0;
 			}
 			if (*arg) {	/* Some filename specified? */
@@ -1039,7 +1039,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 #else
 		if (c == 'u' || c == 'g' || c == 'f') {
 			reiserfs_warning(s,
-					 "reiserfs_parse_options: journalled quota options not supported.");
+					 "reiserfs_parse_options: journaled quota options not supported.");
 			return 0;
 		}
 #endif
@@ -1050,7 +1050,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 	    && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
 		|| REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
 		reiserfs_warning(s,
-				 "reiserfs_parse_options: journalled quota format not specified.");
+				 "reiserfs_parse_options: journaled quota format not specified.");
 		return 0;
 	}
 	/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1980,7 +1980,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
 
 static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
 {
-	/* Are we journalling quotas? */
+	/* Are we journaling quotas? */
 	if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
 	    REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
 		dquot_mark_dquot_dirty(dquot);
-- 
GitLab


From 00b441970a0ab48185244300ac7d4e4eb76df692 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:38 -0700
Subject: [PATCH 650/853] reiserfs: correct mount option parsing to detect when
 quota options can be changed

We should not allow user to change quota mount options when quota is just
suspended.  It would make mount options and internal quota state inconsistent.

Also we should not allow user to change quota format when quota is turned on.
On the other hand we can just silently ignore when some option is set to the
value it already has (some mount versions do this on remount).  Finally, we
should not discard current quota options if parsing of mount options fails.

Cc: <reiserfs-devel@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/super.c | 83 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 60 insertions(+), 23 deletions(-)

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f723604c5d9..a10a6d2a887 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -876,7 +876,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 				     mount options were selected. */
 				  unsigned long *blocks,	/* strtol-ed from NNN of resize=NNN */
 				  char **jdev_name,
-				  unsigned int *commit_max_age)
+				  unsigned int *commit_max_age,
+				  char **qf_names,
+				  unsigned int *qfmt)
 {
 	int c;
 	char *arg = NULL;
@@ -992,7 +994,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		if (c == 'u' || c == 'g') {
 			int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
 
-			if (sb_any_quota_enabled(s)) {
+			if ((sb_any_quota_enabled(s) ||
+			     sb_any_quota_suspended(s)) &&
+			    (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
 				reiserfs_warning(s,
 						 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
 				return 0;
@@ -1011,30 +1015,39 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 							 "reiserfs_parse_options: quotafile must be on filesystem root.");
 					return 0;
 				}
-				REISERFS_SB(s)->s_qf_names[qtype] =
+				qf_names[qtype] =
 				    kmalloc(strlen(arg) + 1, GFP_KERNEL);
-				if (!REISERFS_SB(s)->s_qf_names[qtype]) {
+				if (!qf_names[qtype]) {
 					reiserfs_warning(s,
 							 "reiserfs_parse_options: not enough memory for storing quotafile name.");
 					return 0;
 				}
-				strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+				strcpy(qf_names[qtype], arg);
 				*mount_options |= 1 << REISERFS_QUOTA;
 			} else {
-				kfree(REISERFS_SB(s)->s_qf_names[qtype]);
-				REISERFS_SB(s)->s_qf_names[qtype] = NULL;
+				if (qf_names[qtype] !=
+				    REISERFS_SB(s)->s_qf_names[qtype])
+					kfree(qf_names[qtype]);
+				qf_names[qtype] = NULL;
 			}
 		}
 		if (c == 'f') {
 			if (!strcmp(arg, "vfsold"))
-				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
+				*qfmt = QFMT_VFS_OLD;
 			else if (!strcmp(arg, "vfsv0"))
-				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
+				*qfmt = QFMT_VFS_V0;
 			else {
 				reiserfs_warning(s,
 						 "reiserfs_parse_options: unknown quota format specified.");
 				return 0;
 			}
+			if ((sb_any_quota_enabled(s) ||
+			     sb_any_quota_suspended(s)) &&
+			    *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
+				reiserfs_warning(s,
+						 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+				return 0;
+			}
 		}
 #else
 		if (c == 'u' || c == 'g' || c == 'f') {
@@ -1046,9 +1059,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 	}
 
 #ifdef CONFIG_QUOTA
-	if (!REISERFS_SB(s)->s_jquota_fmt
-	    && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
-		|| REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
+	if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
+	    && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
 		reiserfs_warning(s,
 				 "reiserfs_parse_options: journaled quota format not specified.");
 		return 0;
@@ -1130,6 +1142,21 @@ static void handle_attrs(struct super_block *s)
 	}
 }
 
+#ifdef CONFIG_QUOTA
+static void handle_quota_files(struct super_block *s, char **qf_names,
+			       unsigned int *qfmt)
+{
+	int i;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+			kfree(REISERFS_SB(s)->s_qf_names[i]);
+		REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
+	}
+	REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+}
+#endif
+
 static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 {
 	struct reiserfs_super_block *rs;
@@ -1141,23 +1168,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	char *new_opts = kstrdup(arg, GFP_KERNEL);
 	int err;
+	char *qf_names[MAXQUOTAS];
+	unsigned int qfmt = 0;
 #ifdef CONFIG_QUOTA
 	int i;
+
+	memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
 #endif
 
 	rs = SB_DISK_SUPER_BLOCK(s);
 
 	if (!reiserfs_parse_options
-	    (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
+	    (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
+	    qf_names, &qfmt)) {
 #ifdef CONFIG_QUOTA
-		for (i = 0; i < MAXQUOTAS; i++) {
-			kfree(REISERFS_SB(s)->s_qf_names[i]);
-			REISERFS_SB(s)->s_qf_names[i] = NULL;
-		}
+		for (i = 0; i < MAXQUOTAS; i++)
+			if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+				kfree(qf_names[i]);
 #endif
 		err = -EINVAL;
 		goto out_err;
 	}
+#ifdef CONFIG_QUOTA
+	handle_quota_files(s, qf_names, &qfmt);
+#endif
 
 	handle_attrs(s);
 
@@ -1570,6 +1604,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	char *jdev_name;
 	struct reiserfs_sb_info *sbi;
 	int errval = -EINVAL;
+	char *qf_names[MAXQUOTAS] = {};
+	unsigned int qfmt = 0;
 
 	save_mount_options(s, data);
 
@@ -1597,9 +1633,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	jdev_name = NULL;
 	if (reiserfs_parse_options
 	    (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
-	     &commit_max_age) == 0) {
+	     &commit_max_age, qf_names, &qfmt) == 0) {
 		goto error;
 	}
+#ifdef CONFIG_QUOTA
+	handle_quota_files(s, qf_names, &qfmt);
+#endif
 
 	if (blocks) {
 		SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1858,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 
 	return (0);
 
-      error:
+error:
 	if (jinit_done) {	/* kill the commit thread, free journal ram */
 		journal_release_error(NULL, s);
 	}
@@ -1830,10 +1869,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 #ifdef CONFIG_QUOTA
 	{
 		int j;
-		for (j = 0; j < MAXQUOTAS; j++) {
-			kfree(sbi->s_qf_names[j]);
-			sbi->s_qf_names[j] = NULL;
-		}
+		for (j = 0; j < MAXQUOTAS; j++)
+			kfree(qf_names[j]);
 	}
 #endif
 	kfree(sbi);
-- 
GitLab


From f68215c4640a38d66429014e524a627bf572d26a Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:38 -0700
Subject: [PATCH 651/853] reiserfs: convert j_lock to mutex

j_lock is a semaphore but uses it as if it were a mutex.  This patch converts
it to a mutex.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 6 +++---
 include/linux/reiserfs_fs_sb.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa474..0f7b1e807e6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -558,13 +558,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 static inline void lock_journal(struct super_block *p_s_sb)
 {
 	PROC_INFO_INC(p_s_sb, journal.lock_journal);
-	down(&SB_JOURNAL(p_s_sb)->j_lock);
+	mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 /* unlock the current transaction */
 static inline void unlock_journal(struct super_block *p_s_sb)
 {
-	up(&SB_JOURNAL(p_s_sb)->j_lock);
+	mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -2837,7 +2837,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	journal->j_last = NULL;
 	journal->j_first = NULL;
 	init_waitqueue_head(&(journal->j_join_wait));
-	sema_init(&journal->j_lock, 1);
+	mutex_init(&journal->j_mutex);
 	sema_init(&journal->j_flush_sem, 1);
 
 	journal->j_trans_id = 10;
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 336ee43ed7d..49b639b88ba 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -193,7 +193,7 @@ struct reiserfs_journal {
 	struct buffer_head *j_header_bh;
 
 	time_t j_trans_start_time;	/* time this transaction started */
-	struct semaphore j_lock;
+	struct mutex j_mutex;
 	struct semaphore j_flush_sem;
 	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
 	atomic_t j_jlock;	/* lock for j_join_wait */
-- 
GitLab


From afe70259076fff0446001eaa1a287f615241a357 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:39 -0700
Subject: [PATCH 652/853] reiserfs: convert j_flush_sem to mutex

j_flush_sem is a semaphore but uses it as if it were a mutex.  This patch
converts it to a mutex.

[akpm@linux-foundation.org: fix mutex_trylock retval treatment]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 14 +++++++-------
 include/linux/reiserfs_fs_sb.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 0f7b1e807e6..3cb4a562030 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1411,8 +1411,8 @@ static int flush_journal_list(struct super_block *s,
 
 	/* if flushall == 0, the lock is already held */
 	if (flushall) {
-		down(&journal->j_flush_sem);
-	} else if (!down_trylock(&journal->j_flush_sem)) {
+		mutex_lock(&journal->j_flush_mutex);
+	} else if (mutex_trylock(&journal->j_flush_mutex)) {
 		BUG();
 	}
 
@@ -1642,7 +1642,7 @@ static int flush_journal_list(struct super_block *s,
 	jl->j_state = 0;
 	put_journal_list(s, jl);
 	if (flushall)
-		up(&journal->j_flush_sem);
+		mutex_unlock(&journal->j_flush_mutex);
 	put_fs_excl();
 	return err;
 }
@@ -1772,12 +1772,12 @@ static int kupdate_transactions(struct super_block *s,
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	chunk.nr = 0;
 
-	down(&journal->j_flush_sem);
+	mutex_lock(&journal->j_flush_mutex);
 	if (!journal_list_still_alive(s, orig_trans_id)) {
 		goto done;
 	}
 
-	/* we've got j_flush_sem held, nobody is going to delete any
+	/* we've got j_flush_mutex held, nobody is going to delete any
 	 * of these lists out from underneath us
 	 */
 	while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1812,7 @@ static int kupdate_transactions(struct super_block *s,
 	}
 
       done:
-	up(&journal->j_flush_sem);
+	mutex_unlock(&journal->j_flush_mutex);
 	return ret;
 }
 
@@ -2838,7 +2838,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	journal->j_first = NULL;
 	init_waitqueue_head(&(journal->j_join_wait));
 	mutex_init(&journal->j_mutex);
-	sema_init(&journal->j_flush_sem, 1);
+	mutex_init(&journal->j_flush_mutex);
 
 	journal->j_trans_id = 10;
 	journal->j_mount_id = 10;
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 49b639b88ba..c0751724ee6 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -194,7 +194,7 @@ struct reiserfs_journal {
 
 	time_t j_trans_start_time;	/* time this transaction started */
 	struct mutex j_mutex;
-	struct semaphore j_flush_sem;
+	struct mutex j_flush_mutex;
 	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
 	atomic_t j_jlock;	/* lock for j_join_wait */
 	int j_list_bitmap_index;	/* number of next list bitmap to use */
-- 
GitLab


From 90415deac75a761a25239af6f56381546f8d2201 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:40 -0700
Subject: [PATCH 653/853] reiserfs: convert j_commit_lock to mutex

j_commit_lock is a semaphore but uses it as if it were a mutex.  This patch
converts it to a mutex.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 22 ++++++++++------------
 include/linux/reiserfs_fs_sb.h |  2 +-
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3cb4a562030..c8f60ee183b 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
 **		        from within kupdate, it will ignore the immediate flag
 */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
 #include <linux/time.h>
 #include <linux/semaphore.h>
-
 #include <linux/vmalloc.h>
 #include <linux/reiserfs_fs.h>
-
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fcntl.h>
@@ -54,6 +49,9 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
 
 /* gets a struct reiserfs_journal_list * from a list head */
 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
 	}
 
 	/* make sure nobody is trying to flush this one at the same time */
-	down(&jl->j_commit_lock);
+	mutex_lock(&jl->j_commit_mutex);
 	if (!journal_list_still_alive(s, trans_id)) {
-		up(&jl->j_commit_lock);
+		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
 	}
 	BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
 		if (flushall) {
 			atomic_set(&(jl->j_older_commits_done), 1);
 		}
-		up(&jl->j_commit_lock);
+		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
 	}
 
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
 	if (flushall) {
 		atomic_set(&(jl->j_older_commits_done), 1);
 	}
-	up(&jl->j_commit_lock);
+	mutex_unlock(&jl->j_commit_mutex);
       put_jl:
 	put_journal_list(s, jl);
 
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
 	INIT_LIST_HEAD(&jl->j_working_list);
 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
 	INIT_LIST_HEAD(&jl->j_bh_list);
-	sema_init(&jl->j_commit_lock, 1);
+	mutex_init(&jl->j_commit_mutex);
 	SB_JOURNAL(s)->j_num_lists++;
 	get_journal_list(jl);
 	return jl;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	 * the new transaction is fully setup, and we've already flushed the
 	 * ordered bh list
 	 */
-	down(&jl->j_commit_lock);
+	mutex_lock(&jl->j_commit_mutex);
 
 	/* save the transaction id in case we need to commit it later */
 	commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 		lock_kernel();
 	}
 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
-	up(&jl->j_commit_lock);
+	mutex_unlock(&jl->j_commit_mutex);
 
 	/* honor the flush wishes from the caller, simple commits can
 	 ** be done outside the journal lock, they are done below
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index c0751724ee6..315517e8bfa 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -152,7 +152,7 @@ struct reiserfs_journal_list {
 	atomic_t j_nonzerolen;
 	atomic_t j_commit_left;
 	atomic_t j_older_commits_done;	/* all commits older than this on disk */
-	struct semaphore j_commit_lock;
+	struct mutex j_commit_mutex;
 	unsigned long j_trans_id;
 	time_t j_timestamp;
 	struct reiserfs_list_bitmap *j_list_bitmap;
-- 
GitLab


From 3264d4ded4d916d294d776b77b72d477c63ac3be Mon Sep 17 00:00:00 2001
From: Shen Feng <shen@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:41 -0700
Subject: [PATCH 654/853] reiserfs: remove double definitions of xattr macros

remove the definitions of macros:
XATTR_SECURITY_PREFIX
XATTR_TRUSTED_PREFIX
XATTR_USER_PREFIX
since they are defined in linux/xattr.h

Signed-off-by: Shen Feng <shen@cn.fujitsu.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/xattr_security.c | 2 --
 fs/reiserfs/xattr_trusted.c  | 2 --
 fs/reiserfs/xattr_user.c     | 2 --
 3 files changed, 6 deletions(-)

diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95ad60..056008db137 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
 #include <linux/reiserfs_xattr.h>
 #include <asm/uaccess.h>
 
-#define XATTR_SECURITY_PREFIX "security."
-
 static int
 security_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938ca60..60abe2bb1f9 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
 #include <linux/reiserfs_xattr.h>
 #include <asm/uaccess.h>
 
-#define XATTR_TRUSTED_PREFIX "trusted."
-
 static int
 trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f39364b1..1384efcb938 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
 # include <linux/reiserfs_acl.h>
 #endif
 
-#define XATTR_USER_PREFIX "user."
-
 static int
 user_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
-- 
GitLab


From 8d44d9741f6808c107a144f469fb89e6fe7c55e3 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:46:41 -0700
Subject: [PATCH 655/853] fat: fix parse_options()

Current parse_options() exits too early.  We need to run the code of
bottom in this function even if users doesn't specify options.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/inode.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508ffd2..60deb5fd118 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -950,7 +950,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 	*debug = 0;
 
 	if (!options)
-		return 0;
+		goto out;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -1104,10 +1104,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 			return -EINVAL;
 		}
 	}
+
+out:
 	/* UTF-8 doesn't provide FAT semantics */
 	if (!strcmp(opts->iocharset, "utf8")) {
 		printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
-		       " for FAT filesystems, filesystem will be case sensitive!\n");
+		       " for FAT filesystems, filesystem will be "
+		       "case sensitive!\n");
 	}
 
 	/* If user doesn't specify allow_utime, it's initialized from dmask. */
-- 
GitLab


From 4596c8aaf96e8634ca755c9f34b91420a39bebd4 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:46:42 -0700
Subject: [PATCH 656/853] fat: fix VFAT_IOCTL_READDIR_xxx and cleanup for
 userland

"struct dirent" is a kernel type here, but is a **different type** in
userspace!  This means both the structure and the IOCTL number is wrong!

So, this adds new "struct __fat_dirent" to generate correct IOCTL number.
And kernel stuff moves to under __KERNEL__.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msdos_fs.h | 47 +++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 81cd36b735b..5161394c789 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -2,11 +2,11 @@
 #define _LINUX_MSDOS_FS_H
 
 #include <linux/magic.h>
+#include <asm/byteorder.h>
 
 /*
  * The MS-DOS filesystem constants/structures
  */
-#include <asm/byteorder.h>
 
 #define SECTOR_SIZE	512		/* sector size (bytes) */
 #define SECTOR_BITS	9		/* log2(SECTOR_SIZE) */
@@ -89,24 +89,22 @@
 #define IS_FSINFO(x)	(le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \
 			 && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2)
 
+struct __fat_dirent {
+	long		d_ino;
+	__kernel_off_t	d_off;
+	unsigned short	d_reclen;
+	char		d_name[256]; /* We must not include limits.h! */
+};
+
 /*
  * ioctl commands
  */
-#define VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct dirent [2])
-#define VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct dirent [2])
+#define VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct __fat_dirent[2])
+#define VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct __fat_dirent[2])
 /* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
 #define FAT_IOCTL_GET_ATTRIBUTES	_IOR('r', 0x10, __u32)
 #define FAT_IOCTL_SET_ATTRIBUTES	_IOW('r', 0x11, __u32)
 
-/*
- * vfat shortname flags
- */
-#define VFAT_SFN_DISPLAY_LOWER	0x0001 /* convert to lowercase for display */
-#define VFAT_SFN_DISPLAY_WIN95	0x0002 /* emulate win95 rule for display */
-#define VFAT_SFN_DISPLAY_WINNT	0x0004 /* emulate winnt rule for display */
-#define VFAT_SFN_CREATE_WIN95	0x0100 /* emulate win95 rule for create */
-#define VFAT_SFN_CREATE_WINNT	0x0200 /* emulate winnt rule for create */
-
 struct fat_boot_sector {
 	__u8	ignored[3];	/* Boot strap short or near jump */
 	__u8	system_id[8];	/* Name - can be used to special case
@@ -168,14 +166,6 @@ struct msdos_dir_slot {
 	__u8    name11_12[4];	/* last 2 characters in name */
 };
 
-struct fat_slot_info {
-	loff_t i_pos;		/* on-disk position of directory entry */
-	loff_t slot_off;	/* offset for slot or de start */
-	int nr_slots;		/* number of slots + 1(de) in filename */
-	struct msdos_dir_entry *de;
-	struct buffer_head *bh;
-};
-
 #ifdef __KERNEL__
 
 #include <linux/buffer_head.h>
@@ -184,6 +174,15 @@ struct fat_slot_info {
 #include <linux/fs.h>
 #include <linux/mutex.h>
 
+/*
+ * vfat shortname flags
+ */
+#define VFAT_SFN_DISPLAY_LOWER	0x0001 /* convert to lowercase for display */
+#define VFAT_SFN_DISPLAY_WIN95	0x0002 /* emulate win95 rule for display */
+#define VFAT_SFN_DISPLAY_WINNT	0x0004 /* emulate winnt rule for display */
+#define VFAT_SFN_CREATE_WIN95	0x0100 /* emulate win95 rule for create */
+#define VFAT_SFN_CREATE_WINNT	0x0200 /* emulate winnt rule for create */
+
 struct fat_mount_options {
 	uid_t fs_uid;
 	gid_t fs_gid;
@@ -267,6 +266,14 @@ struct msdos_inode_info {
 	struct inode vfs_inode;
 };
 
+struct fat_slot_info {
+	loff_t i_pos;		/* on-disk position of directory entry */
+	loff_t slot_off;	/* offset for slot or de start */
+	int nr_slots;		/* number of slots + 1(de) in filename */
+	struct msdos_dir_entry *de;
+	struct buffer_head *bh;
+};
+
 static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
-- 
GitLab


From 531f710f8e68fd2bad7516a090bff372f5f9cf6d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:43 -0700
Subject: [PATCH 657/853] fat/dir.c: switch to struct __fat_dirent

struct __fat_dirent is what was formerly the kernel struct dirent (that
was different from the userspace struct dirent).

Converting all fat users to struct __fat_dirent will allow us to get rid
of the conflicting struct dirent definition.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d06e62..b57c4b1db63 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/msdos_fs.h>
-#include <linux/dirent.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/compat.h>
@@ -715,7 +714,7 @@ efault:									   \
 	return -EFAULT;							   \
 }
 
-FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent)
+FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
 
 static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
 			     void __user *dirent, filldir_t filldir,
@@ -741,7 +740,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
 static int fat_dir_ioctl(struct inode *inode, struct file *filp,
 			 unsigned int cmd, unsigned long arg)
 {
-	struct dirent __user *d1 = (struct dirent __user *)arg;
+	struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
 	int short_only, both;
 
 	switch (cmd) {
@@ -757,7 +756,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
 		return fat_generic_ioctl(inode, filp, cmd, arg);
 	}
 
-	if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2])))
+	if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
 		return -EFAULT;
 	/*
 	 * Yes, we don't need this put_user() absolutely. However old
-- 
GitLab


From d688611674cc9c265ee67e89d2ea8bf060c17e8d Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:46:43 -0700
Subject: [PATCH 658/853] fat: cleanup fs/fat/dir.c

This is no logic changes, just cleans fs/fat/dir.c up.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c | 131 ++++++++++++++++++++++++++-------------------------
 1 file changed, 67 insertions(+), 64 deletions(-)

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index b57c4b1db63..a4410740627 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -123,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
  * but ignore that right now.
  * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
  */
-static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
+static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
 		       int uni_xlate, struct nls_table *nls)
 {
-	wchar_t *ip, ec;
+	const wchar_t *ip;
+	wchar_t ec;
 	unsigned char *op, nc;
 	int charlen;
 	int k;
@@ -166,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
 	return (op - ascii);
 }
 
+static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+				unsigned char *buf, int size)
+{
+	if (sbi->options.utf8)
+		return utf8_wcstombs(buf, uni, size);
+	else
+		return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
+				   sbi->nls_io);
+}
+
 static inline int
 fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
 {
@@ -226,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
 	return len;
 }
 
+static inline int fat_name_match(struct msdos_sb_info *sbi,
+				 const unsigned char *a, int a_len,
+				 const unsigned char *b, int b_len)
+{
+	if (a_len != b_len)
+		return 0;
+
+	if (sbi->options.name_check != 's')
+		return !nls_strnicmp(sbi->nls_io, a, b, a_len);
+	else
+		return !memcmp(a, b, a_len);
+}
+
 enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
 
 /**
@@ -311,29 +335,24 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	struct buffer_head *bh = NULL;
 	struct msdos_dir_entry *de;
-	struct nls_table *nls_io = sbi->nls_io;
 	struct nls_table *nls_disk = sbi->nls_disk;
-	wchar_t bufuname[14];
 	unsigned char nr_slots;
-	int xlate_len;
+	wchar_t bufuname[14];
 	wchar_t *unicode = NULL;
 	unsigned char work[MSDOS_NAME];
 	unsigned char *bufname = NULL;
-	int uni_xlate = sbi->options.unicode_xlate;
-	int utf8 = sbi->options.utf8;
-	int anycase = (sbi->options.name_check != 's');
 	unsigned short opt_shortname = sbi->options.shortname;
 	loff_t cpos = 0;
-	int chl, i, j, last_u, err;
+	int chl, i, j, last_u, err, len;
 
 	bufname = __getname();
 	if (!bufname)
 		return -ENOMEM;
 
 	err = -ENOENT;
-	while(1) {
+	while (1) {
 		if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
-			goto EODir;
+			goto end_of_dir;
 parse_record:
 		nr_slots = 0;
 		if (de->name[0] == DELETED_FLAG)
@@ -352,7 +371,7 @@ parse_record:
 			else if (status == PARSE_NOT_LONGNAME)
 				goto parse_record;
 			else if (status == PARSE_EOF)
-				goto EODir;
+				goto end_of_dir;
 		}
 
 		memcpy(work, de->name, sizeof(de->name));
@@ -393,30 +412,21 @@ parse_record:
 		if (!last_u)
 			continue;
 
+		/* Compare shortname */
 		bufuname[last_u] = 0x0000;
-		xlate_len = utf8
-			?utf8_wcstombs(bufname, bufuname, PATH_MAX)
-			:uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
-		if (xlate_len == name_len)
-			if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
-			    (anycase && !nls_strnicmp(nls_io, name, bufname,
-								xlate_len)))
-				goto Found;
+		len = fat_uni_to_x8(sbi, bufuname, bufname, PATH_MAX);
+		if (fat_name_match(sbi, name, name_len, bufname, len))
+			goto found;
 
 		if (nr_slots) {
-			xlate_len = utf8
-				?utf8_wcstombs(bufname, unicode, PATH_MAX)
-				:uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
-			if (xlate_len != name_len)
-				continue;
-			if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
-			    (anycase && !nls_strnicmp(nls_io, name, bufname,
-								xlate_len)))
-				goto Found;
+			/* Compare longname */
+			len = fat_uni_to_x8(sbi, unicode, bufname, PATH_MAX);
+			if (fat_name_match(sbi, name, name_len, bufname, len))
+				goto found;
 		}
 	}
 
-Found:
+found:
 	nr_slots++;	/* include the de */
 	sinfo->slot_off = cpos - nr_slots * sizeof(*de);
 	sinfo->nr_slots = nr_slots;
@@ -424,7 +434,7 @@ Found:
 	sinfo->bh = bh;
 	sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
 	err = 0;
-EODir:
+end_of_dir:
 	if (bufname)
 		__putname(bufname);
 	if (unicode)
@@ -452,23 +462,19 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	struct buffer_head *bh;
 	struct msdos_dir_entry *de;
-	struct nls_table *nls_io = sbi->nls_io;
 	struct nls_table *nls_disk = sbi->nls_disk;
-	unsigned char long_slots;
-	const char *fill_name;
-	int fill_len;
+	unsigned char nr_slots;
 	wchar_t bufuname[14];
 	wchar_t *unicode = NULL;
 	unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
-	unsigned long lpos, dummy, *furrfu = &lpos;
-	int uni_xlate = sbi->options.unicode_xlate;
+	unsigned short opt_shortname = sbi->options.shortname;
 	int isvfat = sbi->options.isvfat;
-	int utf8 = sbi->options.utf8;
 	int nocase = sbi->options.nocase;
-	unsigned short opt_shortname = sbi->options.shortname;
+	const char *fill_name;
 	unsigned long inum;
-	int chi, chl, i, i2, j, last, last_u, dotoffset = 0;
+	unsigned long lpos, dummy, *furrfu = &lpos;
 	loff_t cpos;
+	int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len;
 	int ret = 0;
 
 	lock_super(sb);
@@ -488,43 +494,43 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
 			cpos = 0;
 		}
 	}
-	if (cpos & (sizeof(struct msdos_dir_entry)-1)) {
+	if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
 		ret = -ENOENT;
 		goto out;
 	}
 
 	bh = NULL;
-GetNew:
+get_new:
 	if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
-		goto EODir;
+		goto end_of_dir;
 parse_record:
-	long_slots = 0;
+	nr_slots = 0;
 	/* Check for long filename entry */
 	if (isvfat) {
 		if (de->name[0] == DELETED_FLAG)
-			goto RecEnd;
+			goto record_end;
 		if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
-			goto RecEnd;
+			goto record_end;
 		if (de->attr != ATTR_EXT && IS_FREE(de->name))
-			goto RecEnd;
+			goto record_end;
 	} else {
 		if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
-			goto RecEnd;
+			goto record_end;
 	}
 
 	if (isvfat && de->attr == ATTR_EXT) {
 		int status = fat_parse_long(inode, &cpos, &bh, &de,
-					    &unicode, &long_slots);
+					    &unicode, &nr_slots);
 		if (status < 0) {
 			filp->f_pos = cpos;
 			ret = status;
 			goto out;
 		} else if (status == PARSE_INVALID)
-			goto RecEnd;
+			goto record_end;
 		else if (status == PARSE_NOT_LONGNAME)
 			goto parse_record;
 		else if (status == PARSE_EOF)
-			goto EODir;
+			goto end_of_dir;
 	}
 
 	if (sbi->options.dotsOK) {
@@ -586,12 +592,12 @@ parse_record:
 		}
 	}
 	if (!last)
-		goto RecEnd;
+		goto record_end;
 
 	i = last + dotoffset;
 	j = last_u;
 
-	lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry);
+	lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
 	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
 		inum = inode->i_ino;
 	else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -608,20 +614,17 @@ parse_record:
 
 	if (isvfat) {
 		bufuname[j] = 0x0000;
-		i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
-			 : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
+		i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
 	}
 
 	fill_name = bufname;
 	fill_len = i;
-	if (!short_only && long_slots) {
+	if (!short_only && nr_slots) {
 		/* convert the unicode long name. 261 is maximum size
 		 * of unicode buffer. (13 * slots + nul) */
 		void *longname = unicode + 261;
 		int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
-		int long_len = utf8
-			? utf8_wcstombs(longname, unicode, buf_size)
-			: uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
+		int long_len = fat_uni_to_x8(sbi, unicode, longname, buf_size);
 
 		if (!both) {
 			fill_name = longname;
@@ -640,15 +643,15 @@ parse_record:
 	}
 	if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
 		    (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
-		goto FillFailed;
+		goto fill_failed;
 
-RecEnd:
+record_end:
 	furrfu = &lpos;
 	filp->f_pos = cpos;
-	goto GetNew;
-EODir:
+	goto get_new;
+end_of_dir:
 	filp->f_pos = cpos;
-FillFailed:
+fill_failed:
 	brelse(bh);
 	if (unicode)
 		__putname(unicode);
-- 
GitLab


From 98a15160049fc1a0f822047f33ff513906a35567 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:46:44 -0700
Subject: [PATCH 659/853] fat: use same logic in fat_search_long() and
 __fat_readdir()

This uses uses stack for shortname, and uses __getname() for longname in
fat_search_long() and __fat_readdir().  By this, it removes unneeded
__getname() for shortname.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index a4410740627..96a1cad30da 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -324,6 +324,19 @@ parse_long:
 	return 0;
 }
 
+/*
+ * Maximum buffer size of short name.
+ * [(MSDOS_NAME + '.') * max one char + nul]
+ * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
+ */
+#define FAT_MAX_SHORT_SIZE	((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
+/*
+ * Maximum buffer size of unicode chars from slots.
+ * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
+ */
+#define FAT_MAX_UNI_CHARS	((MSDOS_SLOTS - 1) * 13 + 1)
+#define FAT_MAX_UNI_SIZE	(FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+
 /*
  * Return values: negative -> error, 0 -> not found, positive -> found,
  * value is the total amount of slots, including the shortname entry.
@@ -340,15 +353,11 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
 	wchar_t bufuname[14];
 	wchar_t *unicode = NULL;
 	unsigned char work[MSDOS_NAME];
-	unsigned char *bufname = NULL;
+	unsigned char bufname[FAT_MAX_SHORT_SIZE];
 	unsigned short opt_shortname = sbi->options.shortname;
 	loff_t cpos = 0;
 	int chl, i, j, last_u, err, len;
 
-	bufname = __getname();
-	if (!bufname)
-		return -ENOMEM;
-
 	err = -ENOENT;
 	while (1) {
 		if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
@@ -414,14 +423,17 @@ parse_record:
 
 		/* Compare shortname */
 		bufuname[last_u] = 0x0000;
-		len = fat_uni_to_x8(sbi, bufuname, bufname, PATH_MAX);
+		len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
 		if (fat_name_match(sbi, name, name_len, bufname, len))
 			goto found;
 
 		if (nr_slots) {
+			void *longname = unicode + FAT_MAX_UNI_CHARS;
+			int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+
 			/* Compare longname */
-			len = fat_uni_to_x8(sbi, unicode, bufname, PATH_MAX);
-			if (fat_name_match(sbi, name, name_len, bufname, len))
+			len = fat_uni_to_x8(sbi, unicode, longname, size);
+			if (fat_name_match(sbi, name, name_len, longname, len))
 				goto found;
 		}
 	}
@@ -435,8 +447,6 @@ found:
 	sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
 	err = 0;
 end_of_dir:
-	if (bufname)
-		__putname(bufname);
 	if (unicode)
 		__putname(unicode);
 
@@ -466,7 +476,8 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
 	unsigned char nr_slots;
 	wchar_t bufuname[14];
 	wchar_t *unicode = NULL;
-	unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
+	unsigned char c, work[MSDOS_NAME];
+	unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
 	unsigned short opt_shortname = sbi->options.shortname;
 	int isvfat = sbi->options.isvfat;
 	int nocase = sbi->options.nocase;
@@ -620,11 +631,10 @@ parse_record:
 	fill_name = bufname;
 	fill_len = i;
 	if (!short_only && nr_slots) {
-		/* convert the unicode long name. 261 is maximum size
-		 * of unicode buffer. (13 * slots + nul) */
-		void *longname = unicode + 261;
-		int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
-		int long_len = fat_uni_to_x8(sbi, unicode, longname, buf_size);
+		void *longname = unicode + FAT_MAX_UNI_CHARS;
+		int long_len, size = PATH_MAX - FAT_MAX_UNI_SIZE;
+
+		long_len = fat_uni_to_x8(sbi, unicode, longname, size);
 
 		if (!both) {
 			fill_name = longname;
-- 
GitLab


From dcd8c53f13f068ee039589d84fbd0baf686abc41 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:46:44 -0700
Subject: [PATCH 660/853] fat: small optimization to __fat_readdir()

This removes unnecessary parsing for directory entries.

If short_only, we don't need to parse longname.  And if !both and it found
the longname, we don't need shortname.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c | 71 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 96a1cad30da..4c35477bc94 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -481,11 +481,11 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
 	unsigned short opt_shortname = sbi->options.shortname;
 	int isvfat = sbi->options.isvfat;
 	int nocase = sbi->options.nocase;
-	const char *fill_name;
+	const char *fill_name = NULL;
 	unsigned long inum;
 	unsigned long lpos, dummy, *furrfu = &lpos;
 	loff_t cpos;
-	int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len;
+	int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
 	int ret = 0;
 
 	lock_super(sb);
@@ -516,8 +516,11 @@ get_new:
 		goto end_of_dir;
 parse_record:
 	nr_slots = 0;
-	/* Check for long filename entry */
-	if (isvfat) {
+	/*
+	 * Check for long filename entry, but if short_only, we don't
+	 * need to parse long filename.
+	 */
+	if (isvfat && !short_only) {
 		if (de->name[0] == DELETED_FLAG)
 			goto record_end;
 		if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
@@ -542,6 +545,18 @@ parse_record:
 			goto parse_record;
 		else if (status == PARSE_EOF)
 			goto end_of_dir;
+
+		if (nr_slots) {
+			void *longname = unicode + FAT_MAX_UNI_CHARS;
+			int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+			int len = fat_uni_to_x8(sbi, unicode, longname, size);
+
+			fill_name = longname;
+			fill_len = len;
+			/* !both && !short_only, so we don't need shortname. */
+			if (!both)
+				goto start_filldir;
+		}
 	}
 
 	if (sbi->options.dotsOK) {
@@ -608,6 +623,26 @@ parse_record:
 	i = last + dotoffset;
 	j = last_u;
 
+	if (isvfat) {
+		bufuname[j] = 0x0000;
+		i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+	}
+	if (nr_slots) {
+		/* hack for fat_ioctl_filldir() */
+		struct fat_ioctl_filldir_callback *p = dirent;
+
+		p->longname = fill_name;
+		p->long_len = fill_len;
+		p->shortname = bufname;
+		p->short_len = i;
+		fill_name = NULL;
+		fill_len = 0;
+	} else {
+		fill_name = bufname;
+		fill_len = i;
+	}
+
+start_filldir:
 	lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
 	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
 		inum = inode->i_ino;
@@ -623,34 +658,6 @@ parse_record:
 			inum = iunique(sb, MSDOS_ROOT_INO);
 	}
 
-	if (isvfat) {
-		bufuname[j] = 0x0000;
-		i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
-	}
-
-	fill_name = bufname;
-	fill_len = i;
-	if (!short_only && nr_slots) {
-		void *longname = unicode + FAT_MAX_UNI_CHARS;
-		int long_len, size = PATH_MAX - FAT_MAX_UNI_SIZE;
-
-		long_len = fat_uni_to_x8(sbi, unicode, longname, size);
-
-		if (!both) {
-			fill_name = longname;
-			fill_len = long_len;
-		} else {
-			/* hack for fat_ioctl_filldir() */
-			struct fat_ioctl_filldir_callback *p = dirent;
-
-			p->longname = longname;
-			p->long_len = long_len;
-			p->shortname = bufname;
-			p->short_len = i;
-			fill_name = NULL;
-			fill_len = 0;
-		}
-	}
 	if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
 		    (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
 		goto fill_failed;
-- 
GitLab


From 7557bc66be629d19a402e752673708bfbb8b5e86 Mon Sep 17 00:00:00 2001
From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Date: Fri, 25 Jul 2008 01:46:45 -0700
Subject: [PATCH 661/853] msdos fs: remove unsettable atari option

It has been impossible to set the option 'atari' of the MSDOS filesystem
for several years.  Since nobody seems to have missed it, let's remove its
remains.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/msdos/namei.c         | 18 ++++++------------
 include/linux/msdos_fs.h |  1 -
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412..e4ad6c6b753 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
 
 /* Characters that are undesirable in an MS-DOS file name */
 static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-
-#define bad_if_strict(opts) \
-	((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
+static unsigned char bad_if_strict[] = "+=,; ";
 
 /***** Formats an MS-DOS file name. Rejects invalid names. */
 static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
 			/* Get rid of dot - test for it elsewhere */
 			name++;
 			len--;
-		} else if (!opts->atari)
+		} else
 			return -EINVAL;
 	}
 	/*
-	 * disallow names that _really_ start with a dot for MS-DOS,
-	 * GEMDOS does not care
+	 * disallow names that _really_ start with a dot
 	 */
-	space = !opts->atari;
+	space = 1;
 	c = 0;
 	for (walk = res; len && walk - res < 8; walk++) {
 		c = *name++;
 		len--;
 		if (opts->name_check != 'r' && strchr(bad_chars, c))
 			return -EINVAL;
-		if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+		if (opts->name_check == 's' && strchr(bad_if_strict, c))
 			return -EINVAL;
 		if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
 			return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
 			if (opts->name_check != 'r' && strchr(bad_chars, c))
 				return -EINVAL;
 			if (opts->name_check == 's' &&
-			    strchr(bad_if_strict(opts), c))
+			    strchr(bad_if_strict, c))
 				return -EINVAL;
 			if (c < ' ' || c == ':' || c == '\\')
 				return -EINVAL;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 5161394c789..3346c9c8f17 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -201,7 +201,6 @@ struct fat_mount_options {
 		 utf8:1,	  /* Use of UTF-8 character set (Default) */
 		 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
-		 atari:1,         /* Use Atari GEMDOS variation of MS-DOS fs */
 		 flush:1,	  /* write things quickly */
 		 nocase:1,	  /* Does this need case conversion? 0=need case conversion*/
 		 usefree:1;	  /* Use free_clusters for FAT32 */
-- 
GitLab


From cf6ae8b50e0ee3f764392dadd1970e3f03c40773 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:46 -0700
Subject: [PATCH 662/853] remove the in-kernel struct dirent{,64}

The kernel struct dirent{,64} were different from the ones in
userspace.

Even worse, we exported the kernel ones to userspace.

But after the fat usages are fixed we can remove the conflicting
kernel versions.

Reviewed-by: H. Peter Anvin <hpa@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild   |  1 -
 include/linux/dirent.h | 20 --------------------
 2 files changed, 21 deletions(-)

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 71d70d1fbce..a18008ce7ab 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -189,7 +189,6 @@ unifdef-y += connector.h
 unifdef-y += cuda.h
 unifdef-y += cyclades.h
 unifdef-y += dccp.h
-unifdef-y += dirent.h
 unifdef-y += dlm.h
 unifdef-y += dlm_plock.h
 unifdef-y += edd.h
diff --git a/include/linux/dirent.h b/include/linux/dirent.h
index 5d6023b8780..f072fb8d10a 100644
--- a/include/linux/dirent.h
+++ b/include/linux/dirent.h
@@ -1,23 +1,6 @@
 #ifndef _LINUX_DIRENT_H
 #define _LINUX_DIRENT_H
 
-struct dirent {
-	long		d_ino;
-	__kernel_off_t	d_off;
-	unsigned short	d_reclen;
-	char		d_name[256]; /* We must not include limits.h! */
-};
-
-struct dirent64 {
-	__u64		d_ino;
-	__s64		d_off;
-	unsigned short	d_reclen;
-	unsigned char	d_type;
-	char		d_name[256];
-};
-
-#ifdef __KERNEL__
-
 struct linux_dirent64 {
 	u64		d_ino;
 	s64		d_off;
@@ -26,7 +9,4 @@ struct linux_dirent64 {
 	char		d_name[0];
 };
 
-#endif	/* __KERNEL__ */
-
-
 #endif
-- 
GitLab


From e8938a62a85d1f487e02c3b01955b47c9598f6d2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:46 -0700
Subject: [PATCH 663/853] remove unused #include <linux/dirent.h>'s

Remove some unused #include <linux/dirent.h>'s.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/kernel/linux32.c | 1 -
 fs/compat_ioctl.c          | 1 -
 fs/smbfs/cache.c           | 1 -
 fs/smbfs/proc.c            | 1 -
 include/linux/nfsd/nfsd.h  | 1 -
 5 files changed, 5 deletions(-)

diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index c266211ed65..2fefb14414b 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -11,7 +11,6 @@
 #include <linux/file.h>
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
-#include <linux/dirent.h>
 #include <linux/resource.h>
 #include <linux/highmem.h>
 #include <linux/time.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 18e2c548161..5235c67e759 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/raid/md.h>
 #include <linux/kd.h>
-#include <linux/dirent.h>
 #include <linux/route.h>
 #include <linux/in6.h>
 #include <linux/ipv6_route.h>
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a2..8c177eb7e34 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/dirent.h>
 #include <linux/smb_fs.h>
 #include <linux/pagemap.h>
 #include <linux/net.h>
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4..ee536e8a649 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/dcache.h>
-#include <linux/dirent.h>
 #include <linux/nls.h>
 #include <linux/smp_lock.h>
 #include <linux/net.h>
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index a2861d95ecc..108f47e5fd9 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -12,7 +12,6 @@
 
 #include <linux/types.h>
 #include <linux/unistd.h>
-#include <linux/dirent.h>
 #include <linux/fs.h>
 #include <linux/posix_acl.h>
 #include <linux/mount.h>
-- 
GitLab


From b271e067c896ad4082b15e96077675d08db40625 Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 25 Jul 2008 01:46:47 -0700
Subject: [PATCH 664/853] fatfs: add UTC timestamp option

Provide a new mount option ("tz=UTC") for DOS (vfat/msdos) filesystems,
allowing timestamps to be in coordinated universal time (UTC) rather than
local time in applications where doing this is advantageous.

In particular, portable devices that use fat/vfat (such as digital
cameras) can benefit from using UTC in their internal clocks, thus
avoiding daylight saving time errors and general time ambiguity issues.
The user of the device does not have to worry about changing the time when
moving from place or when daylight saving changes.

The new mount option, when set, disables the counter-adjustment that Linux
currently makes to FAT timestamp info in anticipation of the normal
userspace time zone correction.  When used in this new mode, all daylight
saving time and time zone handling is done in userspace as is normal for
many other filesystems (like ext3).  The default mode, which remains
unchanged, is still appropriate when mounting volumes written in Windows
(because of its use of local time).

I originally based this patch on one submitted last year by Paul Collins,
but I updated it to work with current source and changed variable/option
naming.  Ogawa Hirofumi (who maintains these filesystems) and I discussed
this patch at length on lkml, and he suggested using the option name in
the attached version of the patch.  Barry Bouwsma pointed out a good
addition to the patch as well.

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Paul Collins <paul@ondioline.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Barry Bouwsma <free_beer_for_all@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c             |  2 +-
 fs/fat/inode.c           | 27 ++++++++++++++++++++-------
 fs/fat/misc.c            | 10 ++++++----
 fs/msdos/namei.c         |  3 ++-
 fs/vfat/namei.c          |  2 +-
 include/linux/msdos_fs.h |  8 +++++---
 6 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4c35477bc94..cd4a0162e10 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1101,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
 		goto error_free;
 	}
 
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 
 	de = (struct msdos_dir_entry *)bhs[0]->b_data;
 	/* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 60deb5fd118..23676f9d79c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
 	inode->i_mtime.tv_sec =
-		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+			      sbi->options.tz_utc);
 	inode->i_mtime.tv_nsec = 0;
 	if (sbi->options.isvfat) {
 		int secs = de->ctime_cs / 100;
 		int csecs = de->ctime_cs % 100;
 		inode->i_ctime.tv_sec  =
 			date_dos2unix(le16_to_cpu(de->ctime),
-				      le16_to_cpu(de->cdate)) + secs;
+				      le16_to_cpu(de->cdate),
+				      sbi->options.tz_utc) + secs;
 		inode->i_ctime.tv_nsec = csecs * 10000000;
 		inode->i_atime.tv_sec =
-			date_dos2unix(0, le16_to_cpu(de->adate));
+			date_dos2unix(0, le16_to_cpu(de->adate),
+				      sbi->options.tz_utc);
 		inode->i_atime.tv_nsec = 0;
 	} else
 		inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -591,11 +594,14 @@ retry:
 	raw_entry->attr = fat_attr(inode);
 	raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
 	raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
-	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+			  &raw_entry->date, sbi->options.tz_utc);
 	if (sbi->options.isvfat) {
 		__le16 atime;
-		fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
-		fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+		fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
+				  &raw_entry->cdate, sbi->options.tz_utc);
+		fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+				  &raw_entry->adate, sbi->options.tz_utc);
 		raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
 			inode->i_ctime.tv_nsec / 10000000;
 	}
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
 	}
 	if (sbi->options.flush)
 		seq_puts(m, ",flush");
+	if (opts->tz_utc)
+		seq_puts(m, ",tz=UTC");
 
 	return 0;
 }
@@ -848,7 +856,7 @@ enum {
 	Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
 	Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
 	Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-	Opt_obsolate, Opt_flush, Opt_err,
+	Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
 };
 
 static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
 	{Opt_obsolate, "cvf_options=%100s"},
 	{Opt_obsolate, "posix"},
 	{Opt_flush, "flush"},
+	{Opt_tz_utc, "tz=UTC"},
 	{Opt_err, NULL},
 };
 static match_table_t msdos_tokens = {
@@ -947,6 +956,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 	opts->utf8 = opts->unicode_xlate = 0;
 	opts->numtail = 1;
 	opts->usefree = opts->nocase = 0;
+	opts->tz_utc = 0;
 	*debug = 0;
 
 	if (!options)
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 		case Opt_flush:
 			opts->flush = 1;
 			break;
+		case Opt_tz_utc:
+			opts->tz_utc = 1;
+			break;
 
 		/* msdos specific */
 		case Opt_dots:
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eac..79fb98ad36d 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
 };
 
 /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
 {
 	int month, year, secs;
 
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
 	    ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
 	    month < 2 ? 1 : 0)+3653);
 			/* days since 1.1.70 plus 80's leap day */
-	secs += sys_tz.tz_minuteswest*60;
+	if (!tz_utc)
+		secs += sys_tz.tz_minuteswest*60;
 	return secs;
 }
 
 /* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
 {
 	int day, year, nl_day, month;
 
-	unix_date -= sys_tz.tz_minuteswest*60;
+	if (!tz_utc)
+		unix_date -= sys_tz.tz_minuteswest*60;
 
 	/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
 	if (unix_date < 315532800)
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index e4ad6c6b753..e844b9809d2 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -237,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 			   int is_dir, int is_hid, int cluster,
 			   struct timespec *ts, struct fat_slot_info *sinfo)
 {
+	struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
 	struct msdos_dir_entry de;
 	__le16 time, date;
 	int err;
@@ -246,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 	if (is_hid)
 		de.attr |= ATTR_HIDDEN;
 	de.lcase = 0;
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 	de.cdate = de.adate = 0;
 	de.ctime = 0;
 	de.ctime_cs = 0;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be8..155c10b4adb 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
 	memcpy(de->name, msdos_name, MSDOS_NAME);
 	de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
 	de->lcase = lcase;
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 	de->time = de->ctime = time;
 	de->date = de->cdate = de->adate = date;
 	de->ctime_cs = 0;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 3346c9c8f17..ba63858056c 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -203,7 +203,8 @@ struct fat_mount_options {
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
 		 flush:1,	  /* write things quickly */
 		 nocase:1,	  /* Does this need case conversion? 0=need case conversion*/
-		 usefree:1;	  /* Use free_clusters for FAT32 */
+		 usefree:1,	  /* Use free_clusters for FAT32 */
+		 tz_utc:1;	  /* Filesystem timestamps are in UTC */
 };
 
 #define FAT_HASH_BITS	8
@@ -434,8 +435,9 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
 extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
 extern void fat_clusters_flush(struct super_block *sb);
 extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern int date_dos2unix(unsigned short time, unsigned short date);
-extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
+extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc);
+extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date,
+			      int tz_utc);
 extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
 
 int fat_cache_init(void);
-- 
GitLab


From 41003cde95e7e976d3876dbdcdc83dd0a9059279 Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 25 Jul 2008 01:46:48 -0700
Subject: [PATCH 665/853] UTC timestamp option for FAT filesystems fix

Signed-off-by: Joe Peterson <joe@skyrush.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/vfat.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 2d5e1e582e1..bbac4f1d905 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -96,6 +96,14 @@ shortname=lower|win95|winnt|mixed
 			emulate the Windows 95 rule for create.
 		 Default setting is `lower'.
 
+tz=UTC        -- Interpret timestamps as UTC rather than local time.
+                 This option disables the conversion of timestamps
+                 between local time (as used by Windows on FAT) and UTC
+                 (which Linux uses internally).  This is particuluarly
+                 useful when mounting devices (like digital cameras)
+                 that are set to UTC in order to avoid the pitfalls of
+                 local time.
+
 <bool>: 0,1,yes,no,true,false
 
 TODO
-- 
GitLab


From b48d380541f634663b71766005838edbb7261685 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:49 -0700
Subject: [PATCH 666/853] quota: fix possible infinite loop in quota code

When quota structure is going to be dropped and it is dirty, quota code tries
to write it.  If the write fails for some reason (e.  g.  transaction cannot
be started because the journal is aborted), we try writing again and again and
again...  Fix the problem by clearing the dirty bit even if the write failed.

(akpm: for 2.6.27, 2.6.26.x and 2.6.25.x)

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: dingdinghua <dingdinghua85@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da1995..ad88cf6fcba 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
  */
 static void dqput(struct dquot *dquot)
 {
+	int ret;
+
 	if (!dquot)
 		return;
 #ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
 	if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
 		spin_unlock(&dq_list_lock);
 		/* Commit dquot before releasing */
-		dquot->dq_sb->dq_op->write_dquot(dquot);
+		ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: cannot write quota structure on "
+				"device %s (error %d). Quota may get out of "
+				"sync!\n", dquot->dq_sb->s_id, ret);
+			/*
+			 * We clear dirty bit anyway, so that we avoid
+			 * infinite loop here
+			 */
+			spin_lock(&dq_list_lock);
+			clear_dquot_dirty(dquot);
+			spin_unlock(&dq_list_lock);
+		}
 		goto we_slept;
 	}
 	/* Clear flag in case dquot was inactive (something bad happened) */
-- 
GitLab


From b85f4b87a511bea86dac68c4f0fabaee2cac6c4c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:50 -0700
Subject: [PATCH 667/853] quota: rename quota functions from upper case, make
 bigger ones non-inline

Cleanup quotaops.h: Rename functions from uppercase to lowercase (and
define backward compatibility macros), move larger functions to dquot.c
and make them non-inline.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c               |  53 +++++++++
 include/linux/quotaops.h | 226 ++++++++++++++++++---------------------
 2 files changed, 160 insertions(+), 119 deletions(-)

diff --git a/fs/dquot.c b/fs/dquot.c
index ad88cf6fcba..0bcaf970bbb 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1153,6 +1153,28 @@ int dquot_drop(struct inode *inode)
 	return 0;
 }
 
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+	/* Here we can get arbitrary inode from clear_inode() so we have
+	 * to be careful. OTOH we don't need locking as quota operations
+	 * are allowed to change only at mount time */
+	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+	    && inode->i_sb->dq_op->drop) {
+		int cnt;
+		/* Test before calling to rule out calls from proc and such
+                 * where we are not allowed to block. Note that this is
+		 * actually reliable test even without the lock - the caller
+		 * must assure that nobody can come after the DQUOT_DROP and
+		 * add quota pointers back anyway */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+			if (inode->i_dquot[cnt] != NODQUOT)
+				break;
+		if (cnt < MAXQUOTAS)
+			inode->i_sb->dq_op->drop(inode);
+	}
+}
+
 /*
  * Following four functions update i_blocks+i_bytes fields and
  * quota information (together with appropriate checks)
@@ -1426,6 +1448,18 @@ warn_put_all:
 	return ret;
 }
 
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+		vfs_dq_init(inode);
+		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
+
 /*
  * Write info of quota file to disk
  */
@@ -1766,6 +1800,22 @@ out:
 	return error;
 }
 
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+	int cnt;
+	int ret = 0, err;
+
+	if (!sb->s_qcop || !sb->s_qcop->quota_on)
+		return -ENOSYS;
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+		if (err < 0 && !ret)
+			ret = err;
+	}
+	return ret;
+}
+
 /* Generic routine for getting common part of quota structure */
 static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 {
@@ -2101,8 +2151,11 @@ EXPORT_SYMBOL(dquot_release);
 EXPORT_SYMBOL(dquot_mark_dquot_dirty);
 EXPORT_SYMBOL(dquot_initialize);
 EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
 EXPORT_SYMBOL(dquot_alloc_space);
 EXPORT_SYMBOL(dquot_alloc_inode);
 EXPORT_SYMBOL(dquot_free_space);
 EXPORT_SYMBOL(dquot_free_inode);
 EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f8670205385..0c8f9fe462a 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -19,34 +19,38 @@
 /*
  * declaration of quota_function calls in kernel.
  */
-extern void sync_dquots(struct super_block *sb, int type);
-
-extern int dquot_initialize(struct inode *inode, int type);
-extern int dquot_drop(struct inode *inode);
-
-extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-extern int dquot_alloc_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_free_space(struct inode *inode, qsize_t number);
-extern int dquot_free_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_transfer(struct inode *inode, struct iattr *iattr);
-extern int dquot_commit(struct dquot *dquot);
-extern int dquot_acquire(struct dquot *dquot);
-extern int dquot_release(struct dquot *dquot);
-extern int dquot_commit_info(struct super_block *sb, int type);
-extern int dquot_mark_dquot_dirty(struct dquot *dquot);
-
-extern int vfs_quota_on(struct super_block *sb, int type, int format_id,
-		char *path, int remount);
-extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
-		int format_id, int type);
-extern int vfs_quota_off(struct super_block *sb, int type, int remount);
-extern int vfs_quota_sync(struct super_block *sb, int type);
-extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
-extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+void sync_dquots(struct super_block *sb, int type);
+
+int dquot_initialize(struct inode *inode, int type);
+int dquot_drop(struct inode *inode);
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+
+int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_free_inode(const struct inode *inode, unsigned long number);
+
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_commit(struct dquot *dquot);
+int dquot_acquire(struct dquot *dquot);
+int dquot_release(struct dquot *dquot);
+int dquot_commit_info(struct super_block *sb, int type);
+int dquot_mark_dquot_dirty(struct dquot *dquot);
+
+int vfs_quota_on(struct super_block *sb, int type, int format_id,
+ 	char *path, int remount);
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+ 	int format_id, int type);
+int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+
+void vfs_dq_drop(struct inode *inode);
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int vfs_dq_quota_on_remount(struct super_block *sb);
 
 /*
  * Operations supported for diskquotas.
@@ -59,38 +63,16 @@ extern struct quotactl_ops vfs_quotactl_ops;
 
 /* It is better to call this function outside of any transaction as it might
  * need a lot of space in journal for dquot structure allocation. */
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
 {
 	BUG_ON(!inode->i_sb);
 	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
-/* The same as with DQUOT_INIT */
-static inline void DQUOT_DROP(struct inode *inode)
-{
-	/* Here we can get arbitrary inode from clear_inode() so we have
-	 * to be careful. OTOH we don't need locking as quota operations
-	 * are allowed to change only at mount time */
-	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
-	    && inode->i_sb->dq_op->drop) {
-		int cnt;
-		/* Test before calling to rule out calls from proc and such
-                 * where we are not allowed to block. Note that this is
-		 * actually reliable test even without the lock - the caller
-		 * must assure that nobody can come after the DQUOT_DROP and
-		 * add quota pointers back anyway */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt] != NODQUOT)
-				break;
-		if (cnt < MAXQUOTAS)
-			inode->i_sb->dq_op->drop(inode);
-	}
-}
-
 /* The following allocation/freeing/transfer functions *must* be called inside
  * a transaction (deadlocks possible otherwise) */
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
@@ -102,15 +84,15 @@ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 {
 	int ret;
-        if (!(ret =  DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr)))
+        if (!(ret =  vfs_dq_prealloc_space_nodirty(inode, nr)))
 		mark_inode_dirty(inode);
 	return ret;
 }
 
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
@@ -122,25 +104,25 @@ static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 {
 	int ret;
-	if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr)))
+	if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
 		mark_inode_dirty(inode);
 	return ret;
 }
 
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
-		DQUOT_INIT(inode);
+		vfs_dq_init(inode);
 		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
 			return 1;
 	}
 	return 0;
 }
 
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_space(inode, nr);
@@ -148,35 +130,25 @@ static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 		inode_sub_bytes(inode, nr);
 }
 
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+	vfs_dq_free_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 }
 
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
 {
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
-{
-	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
-		DQUOT_INIT(inode);
-		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
-			return 1;
-	}
-	return 0;
-}
-
 /* The following two functions cannot be called inside a transaction */
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
 {
 	sync_dquots(sb, -1);
 }
 
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	int ret = -ENOSYS;
 
@@ -185,21 +157,6 @@ static inline int DQUOT_OFF(struct super_block *sb, int remount)
 	return ret;
 }
 
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
-{
-	int cnt;
-	int ret = 0, err;
-
-	if (!sb->s_qcop || !sb->s_qcop->quota_on)
-		return -ENOSYS;
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
-		if (err < 0 && !ret)
-			ret = err;
-	}
-	return ret;
-}
-
 #else
 
 /*
@@ -208,113 +165,144 @@ static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
 #define sb_dquot_ops				(NULL)
 #define sb_quotactl_ops				(NULL)
 
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
 {
 }
 
-static inline void DQUOT_DROP(struct inode *inode)
+static inline void vfs_dq_drop(struct inode *inode)
 {
 }
 
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	return 0;
 }
 
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
 {
 }
 
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
 {
 }
 
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	return 0;
 }
 
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+static inline int vfs_dq_quota_on_remount(struct super_block *sb)
 {
 	return 0;
 }
 
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
+static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr);
+	vfs_dq_prealloc_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_ALLOC_SPACE_NODIRTY(inode, nr);
+	vfs_dq_alloc_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_sub_bytes(inode, nr);
 }
 
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+	vfs_dq_free_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 }	
 
 #endif /* CONFIG_QUOTA */
 
-static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_PREALLOC_SPACE_NODIRTY(inode,
+	return vfs_dq_prealloc_space_nodirty(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_PREALLOC_SPACE(inode,
+	return vfs_dq_prealloc_space(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_ALLOC_SPACE_NODIRTY(inode,
+ 	return vfs_dq_alloc_space_nodirty(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_ALLOC_SPACE(inode,
+	return vfs_dq_alloc_space(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
 }
 
+/*
+ * Define uppercase equivalents for compatibility with old function names
+ * Can go away when we think all users have been converted (15/04/2008)
+ */
+#define DQUOT_INIT(inode) vfs_dq_init(inode)
+#define DQUOT_DROP(inode) vfs_dq_drop(inode)
+#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_prealloc_space_nodirty(inode, nr)
+#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
+#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_alloc_space_nodirty(inode, nr)
+#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
+#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_prealloc_block_nodirty(inode, nr)
+#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
+#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_alloc_block_nodirty(inode, nr)
+#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
+#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
+#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_free_space_nodirty(inode, nr)
+#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
+#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_free_block_nodirty(inode, nr)
+#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
+#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
+#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
+#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
+#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
+#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
+
 #endif /* _LINUX_QUOTAOPS_ */
-- 
GitLab


From 02a55ca87185e114e5d298a8d00608501dbabf67 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:50 -0700
Subject: [PATCH 668/853] quota: cleanup loop in sync_dquots()

Make loop in sync_dquots() checking whether there's something to write
more readable, remove useless variable and macro info_any_dirty() which
is used only in this place.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: "Vegard Nossum" <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/quota.c            | 18 ++++++++++++------
 include/linux/quota.h |  2 --
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7a..7f4386ebc23 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 void sync_dquots(struct super_block *sb, int type)
 {
-	int cnt, dirty;
+	int cnt;
 
 	if (sb) {
 		if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		/* This test just improves performance so it needn't be reliable... */
-		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-				dirty = 1;
-		if (!dirty)
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (type != -1 && type != cnt)
+				continue;
+			if (!sb_has_quota_enabled(sb, cnt))
+				continue;
+			if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+			    list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+				continue;
+			break;
+		}
+		if (cnt == MAXQUOTAS)
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index dcddfb20094..6f1d97ddf82 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -224,8 +224,6 @@ struct super_block;
 
 extern void mark_info_dirty(struct super_block *sb, int type);
 #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
-#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
-#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
 
 #define sb_dqopt(sb) (&(sb)->s_dquot)
 #define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
-- 
GitLab


From 74abb9890dafb12a50dc140de215ed477beb1b88 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:51 -0700
Subject: [PATCH 669/853] quota: move function-macros from quota.h to
 quotaops.h

Move declarations of some macros, which should be in fact functions to
quotaops.h.  This way they can be later converted to inline functions
because we can now use declarations from quota.h.  Also add necessary
includes of quotaops.h to a few files.

[akpm@linux-foundation.org: fix JFS build]
[akpm@linux-foundation.org: fix UFS build]
[vegard.nossum@gmail.com: fix QUOTA=n build]
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Arjen Pool <arjenpool@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext2/super.c          |  1 +
 fs/jfs/super.c           |  1 +
 fs/quota_v1.c            |  1 +
 fs/quota_v2.c            |  1 +
 fs/reiserfs/super.c      |  1 +
 fs/ufs/super.c           |  1 +
 include/linux/quota.h    | 22 +++-------------------
 include/linux/quotaops.h | 26 ++++++++++++++++++++++++++
 8 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792d..31308a3b0b8 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/log2.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext2.h"
 #include "xattr.h"
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936..359c091d896 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/parser.h>
 #include <linux/completion.h>
 #include <linux/vfs.h>
+#include <linux/quotaops.h>
 #include <linux/mount.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105..5ae15b13eeb 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/quota.h>
+#include <linux/quotaops.h>
 #include <linux/dqblk_v1.h>
 #include <linux/quotaio_v1.h>
 #include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada90363..b53827dc02d 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/quotaops.h>
 
 #include <asm/byteorder.h>
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a10a6d2a887..2ec748ba0bd 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
+#include <linux/quotaops.h>
 #include <linux/vfs.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 506f724055c..227c9d70004 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/stat.h>
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 6f1d97ddf82..f9983ea0ff8 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -41,9 +41,6 @@
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
 
-typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t;          /* Type in which we store sizes */
-
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -172,6 +169,9 @@ enum {
 
 #include <asm/atomic.h>
 
+typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+typedef __u64 qsize_t;          /* Type in which we store sizes */
+
 extern spinlock_t dq_data_lock;
 
 /* Maximal numbers of writes for quota operation (insert/delete/update)
@@ -225,9 +225,6 @@ struct super_block;
 extern void mark_info_dirty(struct super_block *sb, int type);
 #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
 
-#define sb_dqopt(sb) (&(sb)->s_dquot)
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
-
 struct dqstats {
 	int lookups;
 	int drops;
@@ -335,19 +332,6 @@ struct quota_info {
 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
 };
 
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
-	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
-
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
-				  sb_has_quota_enabled(sb, GRPQUOTA))
-
-#define sb_has_quota_suspended(sb, type) \
-	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
-			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
-
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
-				  sb_has_quota_suspended(sb, GRPQUOTA))
-
 int register_quota_format(struct quota_format_type *fmt);
 void unregister_quota_format(struct quota_format_type *fmt);
 
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 0c8f9fe462a..38218c1334b 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -14,6 +14,8 @@
 
 #include <linux/fs.h>
 
+#define sb_dqopt(sb) (&(sb)->s_dquot)
+
 #if defined(CONFIG_QUOTA)
 
 /*
@@ -52,6 +54,25 @@ void vfs_dq_drop(struct inode *inode);
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
+#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+
+/*
+ * Functions for checking status of quota
+ */
+
+#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
+	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
+
+#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
+				  sb_has_quota_enabled(sb, GRPQUOTA))
+
+#define sb_has_quota_suspended(sb, type) \
+	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
+			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
+
+#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
+				  sb_has_quota_suspended(sb, GRPQUOTA))
+
 /*
  * Operations supported for diskquotas.
  */
@@ -159,6 +180,11 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
+#define sb_has_quota_enabled(sb, type) 0
+#define sb_any_quota_enabled(sb) 0
+#define sb_has_quota_suspended(sb, type) 0
+#define sb_any_quota_suspended(sb) 0
+
 /*
  * NO-OP when quota not configured.
  */
-- 
GitLab


From 03b063436ca1076301de58d9d628f610ab5404ad Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:52 -0700
Subject: [PATCH 670/853] quota: convert macros to inline functions

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/quota.h    |  5 +++-
 include/linux/quotaops.h | 65 ++++++++++++++++++++++++++++++----------
 2 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/include/linux/quota.h b/include/linux/quota.h
index f9983ea0ff8..4e004fef813 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -223,7 +223,10 @@ struct super_block;
 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
 
 extern void mark_info_dirty(struct super_block *sb, int type);
-#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
+static inline int info_dirty(struct mem_dqinfo *info)
+{
+	return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
+}
 
 struct dqstats {
 	int lookups;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 38218c1334b..742187f7a05 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -11,10 +11,12 @@
 #define _LINUX_QUOTAOPS_
 
 #include <linux/smp_lock.h>
-
 #include <linux/fs.h>
 
-#define sb_dqopt(sb) (&(sb)->s_dquot)
+static inline struct quota_info *sb_dqopt(struct super_block *sb)
+{
+	return &sb->s_dquot;
+}
 
 #if defined(CONFIG_QUOTA)
 
@@ -54,24 +56,40 @@ void vfs_dq_drop(struct inode *inode);
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
+{
+	return sb_dqopt(sb)->info + type;
+}
 
 /*
  * Functions for checking status of quota
  */
 
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
-	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+	if (type == USRQUOTA)
+		return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
+	return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+}
 
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
-				  sb_has_quota_enabled(sb, GRPQUOTA))
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+	return sb_has_quota_enabled(sb, USRQUOTA) ||
+		sb_has_quota_enabled(sb, GRPQUOTA);
+}
 
-#define sb_has_quota_suspended(sb, type) \
-	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
-			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+	if (type == USRQUOTA)
+		return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
+	return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+}
 
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
-				  sb_has_quota_suspended(sb, GRPQUOTA))
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+	return sb_has_quota_suspended(sb, USRQUOTA) ||
+		sb_has_quota_suspended(sb, GRPQUOTA);
+}
 
 /*
  * Operations supported for diskquotas.
@@ -180,10 +198,25 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
-#define sb_has_quota_enabled(sb, type) 0
-#define sb_any_quota_enabled(sb) 0
-#define sb_has_quota_suspended(sb, type) 0
-#define sb_any_quota_suspended(sb) 0
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+	return 0;
+}
 
 /*
  * NO-OP when quota not configured.
-- 
GitLab


From 657d3bfa98e542271b449f8cd84c7501ae2b2255 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:52 -0700
Subject: [PATCH 671/853] quota: implement sending information via netlink
 about user below quota

Sometimes it may be useful for userspace to know (e.g.  for some hosting
guys) that some user stopped exceeding his hardlimit or softlimit in
quotas.  Implement sending of such events to userspace via quota netlink
protocol so that they don't have to poll for such events.  Based on idea
and initial implementation by Vladislav Bogdanov.

Cc: Vladislav Bogdanov <slava@nsys.by>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c            | 60 ++++++++++++++++++++++++++++++++++++++-----
 include/linux/quota.h |  4 +++
 2 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/fs/dquot.c b/fs/dquot.c
index 0bcaf970bbb..1346eebe74c 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -889,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
 	char *msg = NULL;
 	struct tty_struct *tty;
 
-	if (!need_print_warning(dquot))
+	if (warntype == QUOTA_NL_IHARDBELOW ||
+	    warntype == QUOTA_NL_ISOFTBELOW ||
+	    warntype == QUOTA_NL_BHARDBELOW ||
+	    warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
 		return;
 
 	mutex_lock(&tty_mutex);
@@ -1097,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	return QUOTA_OK;
 }
 
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+		return QUOTA_NL_NOWARN;
+
+	if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+		return QUOTA_NL_ISOFTBELOW;
+	if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+	    dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+		return QUOTA_NL_IHARDBELOW;
+	return QUOTA_NL_NOWARN;
+}
+
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+	    toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+		return QUOTA_NL_NOWARN;
+
+	if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+	    dquot->dq_dqb.dqb_bsoftlimit)
+		return QUOTA_NL_BSOFTBELOW;
+	if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+	    toqb(dquot->dq_dqb.dqb_curspace - space) <
+						dquot->dq_dqb.dqb_bhardlimit)
+		return QUOTA_NL_BHARDBELOW;
+	return QUOTA_NL_NOWARN;
+}
 /*
  *	Initialize quota pointers in inode
  *	Transaction must be started at entry
@@ -1284,6 +1316,7 @@ warn_put_all:
 int dquot_free_space(struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
+	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1292,6 +1325,7 @@ out_sub:
 		inode_sub_bytes(inode, number);
 		return QUOTA_OK;
 	}
+
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
 	if (IS_NOQUOTA(inode)) {
@@ -1302,6 +1336,7 @@ out_sub:
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
+		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
 		dquot_decr_space(inode->i_dquot[cnt], number);
 	}
 	inode_sub_bytes(inode, number);
@@ -1310,6 +1345,7 @@ out_sub:
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1320,11 +1356,13 @@ out_sub:
 int dquot_free_inode(const struct inode *inode, unsigned long number)
 {
 	unsigned int cnt;
+	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
+
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
 	if (IS_NOQUOTA(inode)) {
@@ -1335,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
+		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
 		dquot_decr_inodes(inode->i_dquot[cnt], number);
 	}
 	spin_unlock(&dq_data_lock);
@@ -1342,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1359,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	struct dquot *transfer_to[MAXQUOTAS];
 	int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
 	    chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
-	char warntype[MAXQUOTAS];
+	char warntype_to[MAXQUOTAS];
+	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1368,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	/* Clear the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
-		warntype[cnt] = QUOTA_NL_NOWARN;
+		warntype_to[cnt] = QUOTA_NL_NOWARN;
 	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
@@ -1400,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		if (transfer_to[cnt] == NODQUOT)
 			continue;
 		transfer_from[cnt] = inode->i_dquot[cnt];
-		if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
-		    check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+		if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
+		    NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+		    warntype_to + cnt) == NO_QUOTA)
 			goto warn_put_all;
 	}
 
@@ -1417,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 
 		/* Due to IO error we might not have transfer_from[] structure */
 		if (transfer_from[cnt]) {
+			warntype_from_inodes[cnt] =
+				info_idq_free(transfer_from[cnt], 1);
+			warntype_from_space[cnt] =
+				info_bdq_free(transfer_from[cnt], space);
 			dquot_decr_inodes(transfer_from[cnt], 1);
 			dquot_decr_space(transfer_from[cnt], space);
 		}
@@ -1436,7 +1482,9 @@ warn_put_all:
 		if (transfer_to[cnt])
 			mark_dquot_dirty(transfer_to[cnt]);
 	}
-	flush_warnings(transfer_to, warntype);
+	flush_warnings(transfer_to, warntype_to);
+	flush_warnings(transfer_from, warntype_from_inodes);
+	flush_warnings(transfer_from, warntype_from_space);
 	
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 4e004fef813..376a05048bc 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -135,6 +135,10 @@ struct if_dqinfo {
 #define QUOTA_NL_BHARDWARN 4		/* Block hardlimit reached */
 #define QUOTA_NL_BSOFTLONGWARN 5	/* Block grace time expired */
 #define QUOTA_NL_BSOFTWARN 6		/* Block softlimit reached */
+#define QUOTA_NL_IHARDBELOW 7		/* Usage got below inode hardlimit */
+#define QUOTA_NL_ISOFTBELOW 8		/* Usage got below inode softlimit */
+#define QUOTA_NL_BHARDBELOW 9		/* Usage got below block hardlimit */
+#define QUOTA_NL_BSOFTBELOW 10		/* Usage got below block softlimit */
 
 enum {
 	QUOTA_NL_C_UNSPEC,
-- 
GitLab


From 9d96d82da437ed5f2053821779ed5d7797ed1f81 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 25 Jul 2008 01:46:53 -0700
Subject: [PATCH 672/853] procfs-guide: drop pointless &nbsp; entities

Having trailing &nbsp; entities in a revision numer seems pretty pointless
to me.  More so, it's causing me pains, so just drop them since no other
guide is doing this.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/procfs-guide.tmpl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl
index 1fd6a1ec759..8a5dc6e021f 100644
--- a/Documentation/DocBook/procfs-guide.tmpl
+++ b/Documentation/DocBook/procfs-guide.tmpl
@@ -29,12 +29,12 @@
 
     <revhistory>
       <revision>
-	<revnumber>1.0&nbsp;</revnumber>
+	<revnumber>1.0</revnumber>
 	<date>May 30, 2001</date>
 	<revremark>Initial revision posted to linux-kernel</revremark>
       </revision>
       <revision>
-	<revnumber>1.1&nbsp;</revnumber>
+	<revnumber>1.1</revnumber>
 	<date>June 3, 2001</date>
 	<revremark>Revised after comments from linux-kernel</revremark>
       </revision>
-- 
GitLab


From 7e9abd89cbdf9b73d327d8173343abce9022609b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:54 -0700
Subject: [PATCH 673/853] cgroup: use read lock to guard
 find_existing_css_set()

The function does not modify anything (except the temporary css template), so
it's sufficient to hold read lock.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 15ac0e1e4f4..f50edadfdd8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -415,11 +415,11 @@ static struct css_set *find_css_set(
 
 	/* First see if we already have a cgroup group that matches
 	 * the desired set */
-	write_lock(&css_set_lock);
+	read_lock(&css_set_lock);
 	res = find_existing_css_set(oldcg, cgrp, template);
 	if (res)
 		get_css_set(res);
-	write_unlock(&css_set_lock);
+	read_unlock(&css_set_lock);
 
 	if (res)
 		return res;
-- 
GitLab


From f2992db2a4f7ae10f61d5bc68c7c1528cec639e2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:46:55 -0700
Subject: [PATCH 674/853] Mark res_counter_charge(_locked) with __must_check

Ignoring their return values may result in counter underflow in the future -
when the value charged will be uncharged (or in "leaks" - when the value is
not uncharged).

This also prevents from using charging routines to decrement the
counter value (i.e. uncharge it) ;)

(Current code works OK with res_counter, however :) )

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6d9e1fca098..125660e7793 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -95,8 +95,10 @@ void res_counter_init(struct res_counter *counter);
  * counter->limit _locked call expects the counter->lock to be taken
  */
 
-int res_counter_charge_locked(struct res_counter *counter, unsigned long val);
-int res_counter_charge(struct res_counter *counter, unsigned long val);
+int __must_check res_counter_charge_locked(struct res_counter *counter,
+		unsigned long val);
+int __must_check res_counter_charge(struct res_counter *counter,
+		unsigned long val);
 
 /*
  * uncharge - tell that some portion of the resource is released
-- 
GitLab


From 71cbb949d17d4d776abd547135feb7f3282405c8 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:55 -0700
Subject: [PATCH 675/853] cgroup: list_for_each cleanup

--------------------------
while() {
	list_entry();
	...
}
--------------------------

is equivalent to following code.

--------------------------
list_for_each_entry(){
	...
}
--------------------------

later can review easily more.

this patch is just clean up.
it doesn't have any behavor change.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f50edadfdd8..6836a906363 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,17 +241,20 @@ static int use_task_css_set_links;
  */
 static void unlink_css_set(struct css_set *cg)
 {
+	struct cg_cgroup_link *link;
+	struct cg_cgroup_link *saved_link;
+
 	write_lock(&css_set_lock);
 	hlist_del(&cg->hlist);
 	css_set_count--;
-	while (!list_empty(&cg->cg_links)) {
-		struct cg_cgroup_link *link;
-		link = list_entry(cg->cg_links.next,
-				  struct cg_cgroup_link, cg_link_list);
+
+	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+				 cg_link_list) {
 		list_del(&link->cg_link_list);
 		list_del(&link->cgrp_link_list);
 		kfree(link);
 	}
+
 	write_unlock(&css_set_lock);
 }
 
@@ -363,15 +366,14 @@ static struct css_set *find_existing_css_set(
 static int allocate_cg_links(int count, struct list_head *tmp)
 {
 	struct cg_cgroup_link *link;
+	struct cg_cgroup_link *saved_link;
 	int i;
 	INIT_LIST_HEAD(tmp);
 	for (i = 0; i < count; i++) {
 		link = kmalloc(sizeof(*link), GFP_KERNEL);
 		if (!link) {
-			while (!list_empty(tmp)) {
-				link = list_entry(tmp->next,
-						  struct cg_cgroup_link,
-						  cgrp_link_list);
+			list_for_each_entry_safe(link, saved_link, tmp,
+						 cgrp_link_list) {
 				list_del(&link->cgrp_link_list);
 				kfree(link);
 			}
@@ -384,11 +386,10 @@ static int allocate_cg_links(int count, struct list_head *tmp)
 
 static void free_cg_links(struct list_head *tmp)
 {
-	while (!list_empty(tmp)) {
-		struct cg_cgroup_link *link;
-		link = list_entry(tmp->next,
-				  struct cg_cgroup_link,
-				  cgrp_link_list);
+	struct cg_cgroup_link *link;
+	struct cg_cgroup_link *saved_link;
+
+	list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
 		list_del(&link->cgrp_link_list);
 		kfree(link);
 	}
@@ -1093,6 +1094,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
 	struct cgroupfs_root *root = sb->s_fs_info;
 	struct cgroup *cgrp = &root->top_cgroup;
 	int ret;
+	struct cg_cgroup_link *link;
+	struct cg_cgroup_link *saved_link;
 
 	BUG_ON(!root);
 
@@ -1112,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
 	 * root cgroup
 	 */
 	write_lock(&css_set_lock);
-	while (!list_empty(&cgrp->css_sets)) {
-		struct cg_cgroup_link *link;
-		link = list_entry(cgrp->css_sets.next,
-				  struct cg_cgroup_link, cgrp_link_list);
+
+	list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
+				 cgrp_link_list) {
 		list_del(&link->cg_link_list);
 		list_del(&link->cgrp_link_list);
 		kfree(link);
@@ -1756,15 +1758,11 @@ int cgroup_add_files(struct cgroup *cgrp,
 int cgroup_task_count(const struct cgroup *cgrp)
 {
 	int count = 0;
-	struct list_head *l;
+	struct cg_cgroup_link *link;
 
 	read_lock(&css_set_lock);
-	l = cgrp->css_sets.next;
-	while (l != &cgrp->css_sets) {
-		struct cg_cgroup_link *link =
-			list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
 		count += atomic_read(&link->cg->ref.refcount);
-		l = l->next;
 	}
 	read_unlock(&css_set_lock);
 	return count;
-- 
GitLab


From 8947f9d5b361ce927be6d5c11fed57905b7a4100 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:46:56 -0700
Subject: [PATCH 676/853] cgroups: annotate two variables with __read_mostly

- need_forkexit_callback will be read only after system boot.
- use_task_css_set_links will be read only after it's set.

And these 2 variables are checked when a new process is forked.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6836a906363..70d083c6fb6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -118,7 +118,7 @@ static int root_count;
  * extra work in the fork/exit path if none of the subsystems need to
  * be called.
  */
-static int need_forkexit_callback;
+static int need_forkexit_callback __read_mostly;
 static int need_mm_owner_callback __read_mostly;
 
 /* convenient tests for these bits */
@@ -220,7 +220,7 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
  * task until after the first call to cgroup_iter_start(). This
  * reduces the fork()/exit() overhead for people who have cgroups
  * compiled into their kernel but not actually in use */
-static int use_task_css_set_links;
+static int use_task_css_set_links __read_mostly;
 
 /* When we create or destroy a css_set, the operation simply
  * takes/releases a reference count on all the cgroups referenced
-- 
GitLab


From ce16b49d37e748574f7fabc2726268d542d0aa1a Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:57 -0700
Subject: [PATCH 677/853] cgroup files: clean up whitespace in struct cftype

This patch removes some extraneous spaces from method declarations in
struct cftype, to fit in with conventional kernel style.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e155aa78d85..88a734edccb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -205,48 +205,48 @@ struct cftype {
 	 * subsystem, followed by a period */
 	char name[MAX_CFTYPE_NAME];
 	int private;
-	int (*open) (struct inode *inode, struct file *file);
-	ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft,
-			 struct file *file,
-			 char __user *buf, size_t nbytes, loff_t *ppos);
+	int (*open)(struct inode *inode, struct file *file);
+	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+			struct file *file,
+			char __user *buf, size_t nbytes, loff_t *ppos);
 	/*
 	 * read_u64() is a shortcut for the common case of returning a
 	 * single integer. Use it in place of read()
 	 */
-	u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft);
+	u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
 	/*
 	 * read_s64() is a signed version of read_u64()
 	 */
-	s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft);
+	s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
 	/*
 	 * read_map() is used for defining a map of key/value
 	 * pairs. It should call cb->fill(cb, key, value) for each
 	 * entry. The key/value pairs (and their ordering) should not
 	 * change between reboots.
 	 */
-	int (*read_map) (struct cgroup *cont, struct cftype *cft,
-			 struct cgroup_map_cb *cb);
+	int (*read_map)(struct cgroup *cont, struct cftype *cft,
+			struct cgroup_map_cb *cb);
 	/*
 	 * read_seq_string() is used for outputting a simple sequence
 	 * using seqfile.
 	 */
-	int (*read_seq_string) (struct cgroup *cont, struct cftype *cft,
-			 struct seq_file *m);
+	int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
+			       struct seq_file *m);
 
-	ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft,
-			  struct file *file,
-			  const char __user *buf, size_t nbytes, loff_t *ppos);
+	ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+			 struct file *file,
+			 const char __user *buf, size_t nbytes, loff_t *ppos);
 
 	/*
 	 * write_u64() is a shortcut for the common case of accepting
 	 * a single integer (as parsed by simple_strtoull) from
 	 * userspace. Use in place of write(); return 0 or error.
 	 */
-	int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val);
+	int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
 	/*
 	 * write_s64() is a signed version of write_u64()
 	 */
-	int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val);
+	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 
 	/*
 	 * trigger() callback can be used to get some kick from the
@@ -256,7 +256,7 @@ struct cftype {
 	 */
 	int (*trigger)(struct cgroup *cgrp, unsigned int event);
 
-	int (*release) (struct inode *inode, struct file *file);
+	int (*release)(struct inode *inode, struct file *file);
 };
 
 struct cgroup_scanner {
-- 
GitLab


From db3b14978abc02041046ed8353f0899cb58ffffc Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:58 -0700
Subject: [PATCH 678/853] cgroup files: add write_string cgroup control file
 method

This patch adds a write_string() method for cgroups control files. The
semantics are that a buffer is copied from userspace to kernelspace
and the handler function invoked on that buffer.  The buffer is
guaranteed to be nul-terminated, and no longer than max_write_len
(defaulting to 64 bytes if unspecified). Later patches will convert
existing raw file write handlers in control group subsystems to use
this method.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 14 ++++++++++++++
 kernel/cgroup.c        | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 88a734edccb..f5379455bb5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -205,6 +205,13 @@ struct cftype {
 	 * subsystem, followed by a period */
 	char name[MAX_CFTYPE_NAME];
 	int private;
+
+	/*
+	 * If non-zero, defines the maximum length of string that can
+	 * be passed to write_string; defaults to 64
+	 */
+	size_t max_write_len;
+
 	int (*open)(struct inode *inode, struct file *file);
 	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
 			struct file *file,
@@ -248,6 +255,13 @@ struct cftype {
 	 */
 	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 
+	/*
+	 * write_string() is passed a nul-terminated kernelspace
+	 * buffer of maximum length determined by max_write_len.
+	 * Returns 0 or -ve error code.
+	 */
+	int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+			    const char *buffer);
 	/*
 	 * trigger() callback can be used to get some kick from the
 	 * userspace, when the actual string written is not important
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 70d083c6fb6..3a99cc2df86 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1363,6 +1363,39 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 	return retval;
 }
 
+static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   const char __user *userbuf,
+				   size_t nbytes, loff_t *unused_ppos)
+{
+	char local_buffer[64];
+	int retval = 0;
+	size_t max_bytes = cft->max_write_len;
+	char *buffer = local_buffer;
+
+	if (!max_bytes)
+		max_bytes = sizeof(local_buffer) - 1;
+	if (nbytes >= max_bytes)
+		return -E2BIG;
+	/* Allocate a dynamic buffer if we need one */
+	if (nbytes >= sizeof(local_buffer)) {
+		buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+		if (buffer == NULL)
+			return -ENOMEM;
+	}
+	if (nbytes && copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+	strstrip(buffer);
+	retval = cft->write_string(cgrp, cft, buffer);
+	if (!retval)
+		retval = nbytes;
+	if (buffer != local_buffer)
+		kfree(buffer);
+	return retval;
+}
+
 static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
 					   struct cftype *cft,
 					   struct file *file,
@@ -1440,6 +1473,8 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
 		return cft->write(cgrp, cft, file, buf, nbytes, ppos);
 	if (cft->write_u64 || cft->write_s64)
 		return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+	if (cft->write_string)
+		return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
 	if (cft->trigger) {
 		int ret = cft->trigger(cgrp, (unsigned int)cft->private);
 		return ret ? ret : nbytes;
-- 
GitLab


From e788e066c651b1bbf4a927dc95395c1aa13be436 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:59 -0700
Subject: [PATCH 679/853] cgroup files: move the release_agent file to use
 typed handlers

Adds cgroup_release_agent_write() and cgroup_release_agent_show()
methods to handle writing/reading the path to a cgroup hierarchy's
release agent. As a result, cgroup_common_file_read() is now unnecessary.

As part of the change, a previously-tolerated race in
cgroup_release_agent() is avoided by copying the current
release_agent_path prior to calling call_usermode_helper().

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |   2 +
 kernel/cgroup.c        | 125 +++++++++++++++++++----------------------
 2 files changed, 59 insertions(+), 68 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f5379455bb5..e78377a91a7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -295,6 +295,8 @@ int cgroup_add_files(struct cgroup *cgrp,
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
+int cgroup_lock_live_group(struct cgroup *cgrp);
+
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3a99cc2df86..0120b5d67a7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -89,11 +89,7 @@ struct cgroupfs_root {
 	/* Hierarchy-specific flags */
 	unsigned long flags;
 
-	/* The path to use for release notifications. No locking
-	 * between setting and use - so if userspace updates this
-	 * while child cgroups exist, you could miss a
-	 * notification. We ensure that it's always a valid
-	 * NUL-terminated string */
+	/* The path to use for release notifications. */
 	char release_agent_path[PATH_MAX];
 };
 
@@ -1329,6 +1325,45 @@ enum cgroup_filetype {
 	FILE_RELEASE_AGENT,
 };
 
+/**
+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
+ * @cgrp: the cgroup to be checked for liveness
+ *
+ * Returns true (with lock held) on success, or false (with no lock
+ * held) on failure.
+ */
+int cgroup_lock_live_group(struct cgroup *cgrp)
+{
+	mutex_lock(&cgroup_mutex);
+	if (cgroup_is_removed(cgrp)) {
+		mutex_unlock(&cgroup_mutex);
+		return false;
+	}
+	return true;
+}
+
+static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
+				      const char *buffer)
+{
+	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	strcpy(cgrp->root->release_agent_path, buffer);
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
+static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
+				     struct seq_file *seq)
+{
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	seq_puts(seq, cgrp->root->release_agent_path);
+	seq_putc(seq, '\n');
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
 static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
@@ -1443,10 +1478,6 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
 		else
 			clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 		break;
-	case FILE_RELEASE_AGENT:
-		BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-		strcpy(cgrp->root->release_agent_path, buffer);
-		break;
 	default:
 		retval = -EINVAL;
 		goto out2;
@@ -1506,49 +1537,6 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
 	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
 
-static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
-					  struct cftype *cft,
-					  struct file *file,
-					  char __user *buf,
-					  size_t nbytes, loff_t *ppos)
-{
-	enum cgroup_filetype type = cft->private;
-	char *page;
-	ssize_t retval = 0;
-	char *s;
-
-	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
-		return -ENOMEM;
-
-	s = page;
-
-	switch (type) {
-	case FILE_RELEASE_AGENT:
-	{
-		struct cgroupfs_root *root;
-		size_t n;
-		mutex_lock(&cgroup_mutex);
-		root = cgrp->root;
-		n = strnlen(root->release_agent_path,
-			    sizeof(root->release_agent_path));
-		n = min(n, (size_t) PAGE_SIZE);
-		strncpy(s, root->release_agent_path, n);
-		mutex_unlock(&cgroup_mutex);
-		s += n;
-		break;
-	}
-	default:
-		retval = -EINVAL;
-		goto out;
-	}
-	*s++ = '\n';
-
-	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
-out:
-	free_page((unsigned long)page);
-	return retval;
-}
-
 static ssize_t cgroup_file_read(struct file *file, char __user *buf,
 				   size_t nbytes, loff_t *ppos)
 {
@@ -1606,6 +1594,7 @@ int cgroup_seqfile_release(struct inode *inode, struct file *file)
 
 static struct file_operations cgroup_seqfile_operations = {
 	.read = seq_read,
+	.write = cgroup_file_write,
 	.llseek = seq_lseek,
 	.release = cgroup_seqfile_release,
 };
@@ -2283,8 +2272,9 @@ static struct cftype files[] = {
 
 static struct cftype cft_release_agent = {
 	.name = "release_agent",
-	.read = cgroup_common_file_read,
-	.write = cgroup_common_file_write,
+	.read_seq_string = cgroup_release_agent_show,
+	.write_string = cgroup_release_agent_write,
+	.max_write_len = PATH_MAX,
 	.private = FILE_RELEASE_AGENT,
 };
 
@@ -3111,27 +3101,24 @@ static void cgroup_release_agent(struct work_struct *work)
 	while (!list_empty(&release_list)) {
 		char *argv[3], *envp[3];
 		int i;
-		char *pathbuf;
+		char *pathbuf = NULL, *agentbuf = NULL;
 		struct cgroup *cgrp = list_entry(release_list.next,
 						    struct cgroup,
 						    release_list);
 		list_del_init(&cgrp->release_list);
 		spin_unlock(&release_list_lock);
 		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (!pathbuf) {
-			spin_lock(&release_list_lock);
-			continue;
-		}
-
-		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
-			kfree(pathbuf);
-			spin_lock(&release_list_lock);
-			continue;
-		}
+		if (!pathbuf)
+			goto continue_free;
+		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+			goto continue_free;
+		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+		if (!agentbuf)
+			goto continue_free;
 
 		i = 0;
-		argv[i++] = cgrp->root->release_agent_path;
-		argv[i++] = (char *)pathbuf;
+		argv[i++] = agentbuf;
+		argv[i++] = pathbuf;
 		argv[i] = NULL;
 
 		i = 0;
@@ -3145,8 +3132,10 @@ static void cgroup_release_agent(struct work_struct *work)
 		 * be a slow process */
 		mutex_unlock(&cgroup_mutex);
 		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-		kfree(pathbuf);
 		mutex_lock(&cgroup_mutex);
+ continue_free:
+		kfree(pathbuf);
+		kfree(agentbuf);
 		spin_lock(&release_list_lock);
 	}
 	spin_unlock(&release_list_lock);
-- 
GitLab


From 84eea842886ac35020be6043e04748ed22014359 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:00 -0700
Subject: [PATCH 680/853] cgroups: misc cleanups to write_string patchset

This patch contains cleanups suggested by reviewers for the recent
write_string() patchset:

- pair cgroup_lock_live_group() with cgroup_unlock() in cgroup.c for
  clarity, rather than directly unlocking cgroup_mutex.

- make the return type of cgroup_lock_live_group() a bool

- use a #define'd constant for the local buffer size in read/write functions

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  4 ++--
 kernel/cgroup.c        | 21 ++++++++++++---------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e78377a91a7..cc59d3a21d8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,11 +21,13 @@
 struct cgroupfs_root;
 struct cgroup_subsys;
 struct inode;
+struct cgroup;
 
 extern int cgroup_init_early(void);
 extern int cgroup_init(void);
 extern void cgroup_init_smp(void);
 extern void cgroup_lock(void);
+extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
 extern void cgroup_fork(struct task_struct *p);
 extern void cgroup_fork_callbacks(struct task_struct *p);
@@ -295,8 +297,6 @@ int cgroup_add_files(struct cgroup *cgrp,
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
-int cgroup_lock_live_group(struct cgroup *cgrp);
-
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0120b5d67a7..a14122ecaa5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1329,10 +1329,10 @@ enum cgroup_filetype {
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
  *
- * Returns true (with lock held) on success, or false (with no lock
- * held) on failure.
+ * On success, returns true; the lock should be later released with
+ * cgroup_unlock(). On failure returns false with no lock held.
  */
-int cgroup_lock_live_group(struct cgroup *cgrp)
+bool cgroup_lock_live_group(struct cgroup *cgrp)
 {
 	mutex_lock(&cgroup_mutex);
 	if (cgroup_is_removed(cgrp)) {
@@ -1349,7 +1349,7 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 	strcpy(cgrp->root->release_agent_path, buffer);
-	mutex_unlock(&cgroup_mutex);
+	cgroup_unlock();
 	return 0;
 }
 
@@ -1360,16 +1360,19 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
 		return -ENODEV;
 	seq_puts(seq, cgrp->root->release_agent_path);
 	seq_putc(seq, '\n');
-	mutex_unlock(&cgroup_mutex);
+	cgroup_unlock();
 	return 0;
 }
 
+/* A buffer size big enough for numbers or short strings */
+#define CGROUP_LOCAL_BUFFER_SIZE 64
+
 static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
 				size_t nbytes, loff_t *unused_ppos)
 {
-	char buffer[64];
+	char buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
 	char *end;
 
@@ -1403,7 +1406,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
 				   const char __user *userbuf,
 				   size_t nbytes, loff_t *unused_ppos)
 {
-	char local_buffer[64];
+	char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
 	size_t max_bytes = cft->max_write_len;
 	char *buffer = local_buffer;
@@ -1518,7 +1521,7 @@ static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
 			       char __user *buf, size_t nbytes,
 			       loff_t *ppos)
 {
-	char tmp[64];
+	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
 	u64 val = cft->read_u64(cgrp, cft);
 	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
 
@@ -1530,7 +1533,7 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
 			       char __user *buf, size_t nbytes,
 			       loff_t *ppos)
 {
-	char tmp[64];
+	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
 	s64 val = cft->read_s64(cgrp, cft);
 	int len = sprintf(tmp, "%lld\n", (long long) val);
 
-- 
GitLab


From 6379c106152388f7ea45d6dda63edda0e9181fc8 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:01 -0700
Subject: [PATCH 681/853] cgroup files: move notify_on_release file to separate
 write handler

This patch moves the write handler for the cgroups notify_on_release
file into a separate handler. This handler requires no cgroups locking
since it relies on atomic bitops for synchronization.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a14122ecaa5..d597d301578 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1474,13 +1474,6 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
 	case FILE_TASKLIST:
 		retval = attach_task_by_pid(cgrp, buffer);
 		break;
-	case FILE_NOTIFY_ON_RELEASE:
-		clear_bit(CGRP_RELEASABLE, &cgrp->flags);
-		if (simple_strtoul(buffer, NULL, 10) != 0)
-			set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
-		else
-			clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
-		break;
 	default:
 		retval = -EINVAL;
 		goto out2;
@@ -2252,6 +2245,18 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
 	return notify_on_release(cgrp);
 }
 
+static int cgroup_write_notify_on_release(struct cgroup *cgrp,
+					  struct cftype *cft,
+					  u64 val)
+{
+	clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+	if (val)
+		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+	else
+		clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+	return 0;
+}
+
 /*
  * for the common functions, 'private' gives the type of file
  */
@@ -2268,7 +2273,7 @@ static struct cftype files[] = {
 	{
 		.name = "notify_on_release",
 		.read_u64 = cgroup_read_notify_on_release,
-		.write = cgroup_common_file_write,
+		.write_u64 = cgroup_write_notify_on_release,
 		.private = FILE_NOTIFY_ON_RELEASE,
 	},
 };
-- 
GitLab


From af351026aafc8da16518a02b41c66d3e0c1cdef4 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:01 -0700
Subject: [PATCH 682/853] cgroup files: turn attach_task_by_pid directly into a
 cgroup write handler

This patch changes attach_task_by_pid() to take a u64 rather than a
string; as a result it can be called directly as a control groups
write_u64 handler, and cgroup_common_file_write() can be removed.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 80 +++++++++----------------------------------------
 1 file changed, 14 insertions(+), 66 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d597d301578..86b71e714e1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -504,10 +504,6 @@ static struct css_set *find_css_set(
  * knows that the cgroup won't be removed, as cgroup_rmdir()
  * needs that mutex.
  *
- * The cgroup_common_file_write handler for operations that modify
- * the cgroup hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cgroup modifications across the system.
- *
  * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
  * (usually) take cgroup_mutex.  These are the two most performance
  * critical pieces of code here.  The exception occurs on cgroup_exit(),
@@ -1279,18 +1275,14 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 }
 
 /*
- * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
- * cgroup_mutex, may take task_lock of task
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
+ * held. May take task_lock of task
  */
-static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
 {
-	pid_t pid;
 	struct task_struct *tsk;
 	int ret;
 
-	if (sscanf(pidbuf, "%d", &pid) != 1)
-		return -EIO;
-
 	if (pid) {
 		rcu_read_lock();
 		tsk = find_task_by_vpid(pid);
@@ -1316,6 +1308,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
 	return ret;
 }
 
+static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
+{
+	int ret;
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	ret = attach_task_by_pid(cgrp, pid);
+	cgroup_unlock();
+	return ret;
+}
+
 /* The various types of files and directories in a cgroup file system */
 enum cgroup_filetype {
 	FILE_ROOT,
@@ -1434,60 +1436,6 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
 	return retval;
 }
 
-static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
-					   struct cftype *cft,
-					   struct file *file,
-					   const char __user *userbuf,
-					   size_t nbytes, loff_t *unused_ppos)
-{
-	enum cgroup_filetype type = cft->private;
-	char *buffer;
-	int retval = 0;
-
-	if (nbytes >= PATH_MAX)
-		return -E2BIG;
-
-	/* +1 for nul-terminator */
-	buffer = kmalloc(nbytes + 1, GFP_KERNEL);
-	if (buffer == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(buffer, userbuf, nbytes)) {
-		retval = -EFAULT;
-		goto out1;
-	}
-	buffer[nbytes] = 0;	/* nul-terminate */
-	strstrip(buffer);	/* strip -just- trailing whitespace */
-
-	mutex_lock(&cgroup_mutex);
-
-	/*
-	 * This was already checked for in cgroup_file_write(), but
-	 * check again now we're holding cgroup_mutex.
-	 */
-	if (cgroup_is_removed(cgrp)) {
-		retval = -ENODEV;
-		goto out2;
-	}
-
-	switch (type) {
-	case FILE_TASKLIST:
-		retval = attach_task_by_pid(cgrp, buffer);
-		break;
-	default:
-		retval = -EINVAL;
-		goto out2;
-	}
-
-	if (retval == 0)
-		retval = nbytes;
-out2:
-	mutex_unlock(&cgroup_mutex);
-out1:
-	kfree(buffer);
-	return retval;
-}
-
 static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
 						size_t nbytes, loff_t *ppos)
 {
@@ -2265,7 +2213,7 @@ static struct cftype files[] = {
 		.name = "tasks",
 		.open = cgroup_tasks_open,
 		.read = cgroup_tasks_read,
-		.write = cgroup_common_file_write,
+		.write_u64 = cgroup_tasks_write,
 		.release = cgroup_tasks_release,
 		.private = FILE_TASKLIST,
 	},
-- 
GitLab


From e37123953292146445c8629b3950d0513fd10ae2 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:02 -0700
Subject: [PATCH 683/853] cgroup files: remove cpuset_common_file_write()

This patch tweaks the signatures of the update_cpumask() and
update_nodemask() functions so that they can be called directly as
handlers for the new cgroups write_string() method.

This allows cpuset_common_file_write() to be removed.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 109 ++++++++++++++++--------------------------------
 1 file changed, 35 insertions(+), 74 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d5738910c34..276ce7e4f1a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -227,10 +227,6 @@ static struct cpuset top_cpuset = {
  * The task_struct fields mems_allowed and mems_generation may only
  * be accessed in the context of that task, so require no locks.
  *
- * The cpuset_common_file_write handler for operations that modify
- * the cpuset hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cpuset modifications across the system.
- *
  * The cpuset_common_file_read() handlers only hold callback_mutex across
  * small pieces of code, such as when reading out possibly multi-word
  * cpumasks and nodemasks.
@@ -772,7 +768,7 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
  * @cs: the cpuset to consider
  * @buf: buffer of cpu numbers written to this cpuset
  */
-static int update_cpumask(struct cpuset *cs, char *buf)
+static int update_cpumask(struct cpuset *cs, const char *buf)
 {
 	struct cpuset trialcs;
 	struct cgroup_scanner scan;
@@ -792,7 +788,6 @@ static int update_cpumask(struct cpuset *cs, char *buf)
 	 * that parsing.  The validate_change() call ensures that cpusets
 	 * with tasks have cpus.
 	 */
-	buf = strstrip(buf);
 	if (!*buf) {
 		cpus_clear(trialcs.cpus_allowed);
 	} else {
@@ -902,7 +897,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 
 static void *cpuset_being_rebound;
 
-static int update_nodemask(struct cpuset *cs, char *buf)
+static int update_nodemask(struct cpuset *cs, const char *buf)
 {
 	struct cpuset trialcs;
 	nodemask_t oldmem;
@@ -929,7 +924,6 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 	 * that parsing.  The validate_change() call ensures that cpusets
 	 * with tasks have memory.
 	 */
-	buf = strstrip(buf);
 	if (!*buf) {
 		nodes_clear(trialcs.mems_allowed);
 	} else {
@@ -1256,72 +1250,14 @@ typedef enum {
 	FILE_SPREAD_SLAB,
 } cpuset_filetype_t;
 
-static ssize_t cpuset_common_file_write(struct cgroup *cont,
-					struct cftype *cft,
-					struct file *file,
-					const char __user *userbuf,
-					size_t nbytes, loff_t *unused_ppos)
-{
-	struct cpuset *cs = cgroup_cs(cont);
-	cpuset_filetype_t type = cft->private;
-	char *buffer;
-	int retval = 0;
-
-	/* Crude upper limit on largest legitimate cpulist user might write. */
-	if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
-		return -E2BIG;
-
-	/* +1 for nul-terminator */
-	buffer = kmalloc(nbytes + 1, GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	if (copy_from_user(buffer, userbuf, nbytes)) {
-		retval = -EFAULT;
-		goto out1;
-	}
-	buffer[nbytes] = 0;	/* nul-terminate */
-
-	cgroup_lock();
-
-	if (cgroup_is_removed(cont)) {
-		retval = -ENODEV;
-		goto out2;
-	}
-
-	switch (type) {
-	case FILE_CPULIST:
-		retval = update_cpumask(cs, buffer);
-		break;
-	case FILE_MEMLIST:
-		retval = update_nodemask(cs, buffer);
-		break;
-	default:
-		retval = -EINVAL;
-		goto out2;
-	}
-
-	if (retval == 0)
-		retval = nbytes;
-out2:
-	cgroup_unlock();
-out1:
-	kfree(buffer);
-	return retval;
-}
-
 static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
 {
 	int retval = 0;
 	struct cpuset *cs = cgroup_cs(cgrp);
 	cpuset_filetype_t type = cft->private;
 
-	cgroup_lock();
-
-	if (cgroup_is_removed(cgrp)) {
-		cgroup_unlock();
+	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
-	}
 
 	switch (type) {
 	case FILE_CPU_EXCLUSIVE:
@@ -1367,12 +1303,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
 	struct cpuset *cs = cgroup_cs(cgrp);
 	cpuset_filetype_t type = cft->private;
 
-	cgroup_lock();
-
-	if (cgroup_is_removed(cgrp)) {
-		cgroup_unlock();
+	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
-	}
+
 	switch (type) {
 	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
 		retval = update_relax_domain_level(cs, val);
@@ -1385,6 +1318,32 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
 	return retval;
 }
 
+/*
+ * Common handling for a write to a "cpus" or "mems" file.
+ */
+static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
+				const char *buf)
+{
+	int retval = 0;
+
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+
+	switch (cft->private) {
+	case FILE_CPULIST:
+		retval = update_cpumask(cgroup_cs(cgrp), buf);
+		break;
+	case FILE_MEMLIST:
+		retval = update_nodemask(cgroup_cs(cgrp), buf);
+		break;
+	default:
+		retval = -EINVAL;
+		break;
+	}
+	cgroup_unlock();
+	return retval;
+}
+
 /*
  * These ascii lists should be read in a single call, by using a user
  * buffer large enough to hold the entire map.  If read in smaller
@@ -1504,14 +1463,16 @@ static struct cftype files[] = {
 	{
 		.name = "cpus",
 		.read = cpuset_common_file_read,
-		.write = cpuset_common_file_write,
+		.write_string = cpuset_write_resmask,
+		.max_write_len = (100U + 6 * NR_CPUS),
 		.private = FILE_CPULIST,
 	},
 
 	{
 		.name = "mems",
 		.read = cpuset_common_file_read,
-		.write = cpuset_common_file_write,
+		.write_string = cpuset_write_resmask,
+		.max_write_len = (100U + 6 * MAX_NUMNODES),
 		.private = FILE_MEMLIST,
 	},
 
-- 
GitLab


From f92523e3a7861f5dbd76021e0719a35fe8771f2d Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:03 -0700
Subject: [PATCH 684/853] cgroup files: convert devcgroup_access_write() into a
 cgroup write_string() handler

This patch converts devcgroup_access_write() from a raw file handler
into a handler for the cgroup write_string() method. This allows some
boilerplate copying/locking/checking to be removed and simplifies the
cleanup path, since these functions are performed by the cgroups
framework before calling the handler.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 101 +++++++++++++++------------------------
 1 file changed, 38 insertions(+), 63 deletions(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index ddd92cec78e..236fffa9d05 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -59,6 +59,11 @@ static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
 	return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
 }
 
+static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
+{
+	return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+}
+
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
@@ -312,10 +317,10 @@ static int may_access_whitelist(struct dev_cgroup *c,
  * when adding a new allow rule to a device whitelist, the rule
  * must be allowed in the parent device
  */
-static int parent_has_perm(struct cgroup *childcg,
+static int parent_has_perm(struct dev_cgroup *childcg,
 				  struct dev_whitelist_item *wh)
 {
-	struct cgroup *pcg = childcg->parent;
+	struct cgroup *pcg = childcg->css.cgroup->parent;
 	struct dev_cgroup *parent;
 	int ret;
 
@@ -341,39 +346,18 @@ static int parent_has_perm(struct cgroup *childcg,
  * new access is only allowed if you're in the top-level cgroup, or your
  * parent cgroup has the access you're asking for.
  */
-static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
-				struct file *file, const char __user *userbuf,
-				size_t nbytes, loff_t *ppos)
+static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+				   int filetype, const char *buffer)
 {
-	struct cgroup *cur_cgroup;
-	struct dev_cgroup *devcgroup, *cur_devcgroup;
-	int filetype = cft->private;
-	char *buffer, *b;
+	struct dev_cgroup *cur_devcgroup;
+	const char *b;
 	int retval = 0, count;
 	struct dev_whitelist_item wh;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	devcgroup = cgroup_to_devcgroup(cgroup);
-	cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
-	cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
-
-	buffer = kmalloc(nbytes+1, GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	if (copy_from_user(buffer, userbuf, nbytes)) {
-		retval = -EFAULT;
-		goto out1;
-	}
-	buffer[nbytes] = 0;	/* nul-terminate */
-
-	cgroup_lock();
-	if (cgroup_is_removed(cgroup)) {
-		retval = -ENODEV;
-		goto out2;
-	}
+	cur_devcgroup = task_devcgroup(current);
 
 	memset(&wh, 0, sizeof(wh));
 	b = buffer;
@@ -392,14 +376,11 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 		wh.type = DEV_CHAR;
 		break;
 	default:
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
 	b++;
-	if (!isspace(*b)) {
-		retval = -EINVAL;
-		goto out2;
-	}
+	if (!isspace(*b))
+		return -EINVAL;
 	b++;
 	if (*b == '*') {
 		wh.major = ~0;
@@ -411,13 +392,10 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 			b++;
 		}
 	} else {
-		retval = -EINVAL;
-		goto out2;
-	}
-	if (*b != ':') {
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
+	if (*b != ':')
+		return -EINVAL;
 	b++;
 
 	/* read minor */
@@ -431,13 +409,10 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 			b++;
 		}
 	} else {
-		retval = -EINVAL;
-		goto out2;
-	}
-	if (!isspace(*b)) {
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
+	if (!isspace(*b))
+		return -EINVAL;
 	for (b++, count = 0; count < 3; count++, b++) {
 		switch (*b) {
 		case 'r':
@@ -454,8 +429,7 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 			count = 3;
 			break;
 		default:
-			retval = -EINVAL;
-			goto out2;
+			return -EINVAL;
 		}
 	}
 
@@ -463,38 +437,39 @@ handle:
 	retval = 0;
 	switch (filetype) {
 	case DEVCG_ALLOW:
-		if (!parent_has_perm(cgroup, &wh))
-			retval = -EPERM;
-		else
-			retval = dev_whitelist_add(devcgroup, &wh);
-		break;
+		if (!parent_has_perm(devcgroup, &wh))
+			return -EPERM;
+		return dev_whitelist_add(devcgroup, &wh);
 	case DEVCG_DENY:
 		dev_whitelist_rm(devcgroup, &wh);
 		break;
 	default:
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
+	return 0;
+}
 
-	if (retval == 0)
-		retval = nbytes;
-
-out2:
+static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
+				  const char *buffer)
+{
+	int retval;
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+					 cft->private, buffer);
 	cgroup_unlock();
-out1:
-	kfree(buffer);
 	return retval;
 }
 
 static struct cftype dev_cgroup_files[] = {
 	{
 		.name = "allow",
-		.write  = devcgroup_access_write,
+		.write_string  = devcgroup_access_write,
 		.private = DEVCG_ALLOW,
 	},
 	{
 		.name = "deny",
-		.write = devcgroup_access_write,
+		.write_string = devcgroup_access_write,
 		.private = DEVCG_DENY,
 	},
 	{
-- 
GitLab


From 856c13aa1ff6136c1968414fdea5938ea9d5ebf2 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:04 -0700
Subject: [PATCH 685/853] cgroup files: convert res_counter_write() to be a
 cgroups write_string() handler

Currently res_counter_write() is a raw file handler even though it's
ultimately taking a number, since in some cases it wants to
pre-process the string when converting it to a number.

This patch converts res_counter_write() from a raw file handler to a
write_string() handler; this allows some of the boilerplate
copying/locking/checking to be removed, and simplies the cleanup path,
since these functions are now performed by the cgroups framework.

[lizf@cn.fujitsu.com: build fix]
Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 11 ++++++---
 kernel/res_counter.c        | 48 ++++++++++++++++---------------------
 mm/memcontrol.c             | 24 ++++---------------
 3 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 125660e7793..290205dfe09 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -63,9 +63,14 @@ u64 res_counter_read_u64(struct res_counter *counter, int member);
 ssize_t res_counter_read(struct res_counter *counter, int member,
 		const char __user *buf, size_t nbytes, loff_t *pos,
 		int (*read_strategy)(unsigned long long val, char *s));
-ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos,
-		int (*write_strategy)(char *buf, unsigned long long *val));
+
+typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
+
+int res_counter_memparse_write_strategy(const char *buf,
+					unsigned long long *res);
+
+int res_counter_write(struct res_counter *counter, int member,
+		      const char *buffer, write_strategy_fn write_strategy);
 
 /*
  * the field descriptors. one for each member of res_counter
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index d3c61b4ebef..f275c8eca77 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/res_counter.h>
 #include <linux/uaccess.h>
+#include <linux/mm.h>
 
 void res_counter_init(struct res_counter *counter)
 {
@@ -102,44 +103,37 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
 	return *res_counter_member(counter, member);
 }
 
-ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos,
-		int (*write_strategy)(char *st_buf, unsigned long long *val))
+int res_counter_memparse_write_strategy(const char *buf,
+					unsigned long long *res)
 {
-	int ret;
-	char *buf, *end;
-	unsigned long flags;
-	unsigned long long tmp, *val;
-
-	buf = kmalloc(nbytes + 1, GFP_KERNEL);
-	ret = -ENOMEM;
-	if (buf == NULL)
-		goto out;
+	char *end;
+	/* FIXME - make memparse() take const char* args */
+	*res = memparse((char *)buf, &end);
+	if (*end != '\0')
+		return -EINVAL;
 
-	buf[nbytes] = '\0';
-	ret = -EFAULT;
-	if (copy_from_user(buf, userbuf, nbytes))
-		goto out_free;
+	*res = PAGE_ALIGN(*res);
+	return 0;
+}
 
-	ret = -EINVAL;
+int res_counter_write(struct res_counter *counter, int member,
+		      const char *buf, write_strategy_fn write_strategy)
+{
+	char *end;
+	unsigned long flags;
+	unsigned long long tmp, *val;
 
-	strstrip(buf);
 	if (write_strategy) {
-		if (write_strategy(buf, &tmp)) {
-			goto out_free;
-		}
+		if (write_strategy(buf, &tmp))
+			return -EINVAL;
 	} else {
 		tmp = simple_strtoull(buf, &end, 10);
 		if (*end != '\0')
-			goto out_free;
+			return -EINVAL;
 	}
 	spin_lock_irqsave(&counter->lock, flags);
 	val = res_counter_member(counter, member);
 	*val = tmp;
 	spin_unlock_irqrestore(&counter->lock, flags);
-	ret = nbytes;
-out_free:
-	kfree(buf);
-out:
-	return ret;
+	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e46451e1d9b..7385d58fb06 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -838,32 +838,18 @@ out:
 	return ret;
 }
 
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
-	*tmp = memparse(buf, &buf);
-	if (*buf != '\0')
-		return -EINVAL;
-
-	/*
-	 * Round up the value to the closest page size
-	 */
-	*tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
-	return 0;
-}
-
 static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 {
 	return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
 				    cft->private);
 }
 
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
-				struct file *file, const char __user *userbuf,
-				size_t nbytes, loff_t *ppos)
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+			    const char *buffer)
 {
 	return res_counter_write(&mem_cgroup_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos,
-				mem_cgroup_write_strategy);
+				 cft->private, buffer,
+				 res_counter_memparse_write_strategy);
 }
 
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +926,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "limit_in_bytes",
 		.private = RES_LIMIT,
-		.write = mem_cgroup_write,
+		.write_string = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read,
 	},
 	{
-- 
GitLab


From e885dcde75685e09f23cffae1f6d5169c105b8a0 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Fri, 25 Jul 2008 01:47:06 -0700
Subject: [PATCH 686/853] cgroup_clone: use pid of newly created task for new
 cgroup

cgroup_clone creates a new cgroup with the pid of the task.  This works
correctly for unshare, but for clone cgroup_clone is called from
copy_namespaces inside copy_process, which happens before the new pid is
created.  As a result, the new cgroup was created with current's pid.
This patch:

	1. Moves the call inside copy_process to after the new pid
	   is created
	2. Passes the struct pid into ns_cgroup_clone (as it is not
	   yet attached to the task)
	3. Passes a name from ns_cgroup_clone() into cgroup_clone()
	   so as to keep cgroup_clone() itself simpler
	4. Uses pid_vnr() to get the process id value, so that the
	   pid used to name the new cgroup is always the pid as it
	   would be known to the task which did the cloning or
	   unsharing.  I think that is the most intuitive thing to
	   do.  This way, task t1 does clone(CLONE_NEWPID) to get
	   t2, which does clone(CLONE_NEWPID) to get t3, then the
	   cgroup for t3 will be named for the pid by which t2 knows
	   t3.

(Thanks to Dan Smith for finding the main bug)

Changelog:
	June 11: Incorporate Paul Menage's feedback:  don't pass
	         NULL to ns_cgroup_clone from unshare, and reduce
		 patch size by using 'nodename' in cgroup_clone.
	June 10: Original version

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Serge Hallyn <serge@us.ibm.com>
Acked-by: Paul Menage <menage@google.com>
Tested-by: Dan Smith <danms@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h  | 3 ++-
 include/linux/nsproxy.h | 7 +++++--
 kernel/cgroup.c         | 7 +++----
 kernel/fork.c           | 6 ++++++
 kernel/ns_cgroup.c      | 8 ++++++--
 kernel/nsproxy.c        | 8 +-------
 6 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cc59d3a21d8..c98dd7cb707 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -364,7 +364,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
 	return task_subsys_state(task, subsys_id)->cgroup;
 }
 
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
+							char *nodename);
 
 /* A cgroup_iter should be treated as an opaque object */
 struct cgroup_iter {
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 0e66b57631f..c8a768e5964 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -82,9 +82,12 @@ static inline void get_nsproxy(struct nsproxy *ns)
 }
 
 #ifdef CONFIG_CGROUP_NS
-int ns_cgroup_clone(struct task_struct *tsk);
+int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
 #else
-static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86b71e714e1..66ec9fd21e0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2848,16 +2848,17 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
  * cgroup_clone - clone the cgroup the given subsystem is attached to
  * @tsk: the task to be moved
  * @subsys: the given subsystem
+ * @nodename: the name for the new cgroup
  *
  * Duplicate the current cgroup in the hierarchy that the given
  * subsystem is attached to, and move this task into the new
  * child.
  */
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
+							char *nodename)
 {
 	struct dentry *dentry;
 	int ret = 0;
-	char nodename[MAX_CGROUP_TYPE_NAMELEN];
 	struct cgroup *parent, *child;
 	struct inode *inode;
 	struct css_set *cg;
@@ -2882,8 +2883,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
 	cg = tsk->cgroups;
 	parent = task_cgroup(tsk, subsys->subsys_id);
 
-	snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
-
 	/* Pin the hierarchy */
 	atomic_inc(&parent->root->sb->s_active);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5a5d6fef341..228f80c9155 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1107,6 +1107,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD)
 		p->tgid = current->tgid;
 
+	if (current->nsproxy != p->nsproxy) {
+		retval = ns_cgroup_clone(p, pid);
+		if (retval)
+			goto bad_fork_free_pid;
+	}
+
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
 	 * Clear TID on mm_release()?
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 48d7ed6fc3a..43c2111cd54 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/cgroup.h>
 #include <linux/fs.h>
+#include <linux/proc_fs.h>
 #include <linux/slab.h>
 #include <linux/nsproxy.h>
 
@@ -24,9 +25,12 @@ static inline struct ns_cgroup *cgroup_to_ns(
 			    struct ns_cgroup, css);
 }
 
-int ns_cgroup_clone(struct task_struct *task)
+int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
 {
-	return cgroup_clone(task, &ns_subsys);
+	char name[PROC_NUMBUF];
+
+	snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
+	return cgroup_clone(task, &ns_subsys, name);
 }
 
 /*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index adc785146a1..21575fc46d0 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -157,12 +157,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 		goto out;
 	}
 
-	err = ns_cgroup_clone(tsk);
-	if (err) {
-		put_nsproxy(new_ns);
-		goto out;
-	}
-
 	tsk->nsproxy = new_ns;
 
 out:
@@ -209,7 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 		goto out;
 	}
 
-	err = ns_cgroup_clone(current);
+	err = ns_cgroup_clone(current, task_pid(current));
 	if (err)
 		put_nsproxy(*new_nsp);
 
-- 
GitLab


From 4efd1a1b2f09a4b746dd9dc057986c6dadcb1317 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:47:07 -0700
Subject: [PATCH 687/853] devcgroup: relax white-list protection down to RCU

Currently this list is protected with a simple spinlock, even for reading
from one.  This is OK, but can be better.

Actually I want it to be better very much, since after replacing the
OpenVZ device permissions engine with the cgroup-based one I noticed, that
we set 12 default device permissions for each newly created container (for
/dev/null, full, terminals, ect devices), and people sometimes have up to
20 perms more, so traversing the ~30-40 elements list under a spinlock
doesn't seem very good.

Here's the RCU protection for white-list - dev_whitelist_item-s are added
and removed under the devcg->lock, but are looked up in permissions
checking under the rcu_read_lock.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 236fffa9d05..9da3532726f 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -41,6 +41,7 @@ struct dev_whitelist_item {
 	short type;
 	short access;
 	struct list_head list;
+	struct rcu_head rcu;
 };
 
 struct dev_cgroup {
@@ -133,11 +134,19 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 	}
 
 	if (whcopy != NULL)
-		list_add_tail(&whcopy->list, &dev_cgroup->whitelist);
+		list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
 	spin_unlock(&dev_cgroup->lock);
 	return 0;
 }
 
+static void whitelist_item_free(struct rcu_head *rcu)
+{
+	struct dev_whitelist_item *item;
+
+	item = container_of(rcu, struct dev_whitelist_item, rcu);
+	kfree(item);
+}
+
 /*
  * called under cgroup_lock()
  * since the list is visible to other tasks, we need the spinlock also
@@ -161,8 +170,8 @@ static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
 remove:
 		walk->access &= ~wh->access;
 		if (!walk->access) {
-			list_del(&walk->list);
-			kfree(walk);
+			list_del_rcu(&walk->list);
+			call_rcu(&walk->rcu, whitelist_item_free);
 		}
 	}
 	spin_unlock(&dev_cgroup->lock);
@@ -269,15 +278,15 @@ static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 	struct dev_whitelist_item *wh;
 	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 
-	spin_lock(&devcgroup->lock);
-	list_for_each_entry(wh, &devcgroup->whitelist, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
 		set_access(acc, wh->access);
 		set_majmin(maj, wh->major);
 		set_majmin(min, wh->minor);
 		seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
 			   maj, min, acc);
 	}
-	spin_unlock(&devcgroup->lock);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -510,8 +519,8 @@ int devcgroup_inode_permission(struct inode *inode, int mask)
 	if (!dev_cgroup)
 		return 0;
 
-	spin_lock(&dev_cgroup->lock);
-	list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
 		if (wh->type & DEV_ALL)
 			goto acc_check;
 		if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
@@ -527,10 +536,10 @@ acc_check:
 			continue;
 		if ((mask & MAY_READ) && !(wh->access & ACC_READ))
 			continue;
-		spin_unlock(&dev_cgroup->lock);
+		rcu_read_unlock();
 		return 0;
 	}
-	spin_unlock(&dev_cgroup->lock);
+	rcu_read_unlock();
 
 	return -EPERM;
 }
@@ -545,7 +554,7 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
 	if (!dev_cgroup)
 		return 0;
 
-	spin_lock(&dev_cgroup->lock);
+	rcu_read_lock();
 	list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
 		if (wh->type & DEV_ALL)
 			goto acc_check;
@@ -560,9 +569,9 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
 acc_check:
 		if (!(wh->access & ACC_MKNOD))
 			continue;
-		spin_unlock(&dev_cgroup->lock);
+		rcu_read_unlock();
 		return 0;
 	}
-	spin_unlock(&dev_cgroup->lock);
+	rcu_read_unlock();
 	return -EPERM;
 }
-- 
GitLab


From 7759fc9d10d3559f365cb122d81e0c0a185fe0fe Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:08 -0700
Subject: [PATCH 688/853] devcgroup: code cleanup

- clean up set_majmin()
- use simple_strtoul() to parse major/minor

[akpm@linux-foundation.org: fix simple_strtoul() usage]
[kosaki.motohiro@jp.fujitsu.com: fix warnings]
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 9da3532726f..7bd296cca04 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -202,7 +202,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
 		}
 		wh->minor = wh->major = ~0;
 		wh->type = DEV_ALL;
-		wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE;
+		wh->access = ACC_MASK;
 		list_add(&wh->list, &dev_cgroup->whitelist);
 	} else {
 		parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
@@ -264,11 +264,10 @@ static char type_to_char(short type)
 
 static void set_majmin(char *str, unsigned m)
 {
-	memset(str, 0, MAJMINLEN);
 	if (m == ~0)
-		sprintf(str, "*");
+		strcpy(str, "*");
 	else
-		snprintf(str, MAJMINLEN, "%u", m);
+		sprintf(str, "%u", m);
 }
 
 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
@@ -360,6 +359,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 {
 	struct dev_cgroup *cur_devcgroup;
 	const char *b;
+	char *endp;
 	int retval = 0, count;
 	struct dev_whitelist_item wh;
 
@@ -395,11 +395,8 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 		wh.major = ~0;
 		b++;
 	} else if (isdigit(*b)) {
-		wh.major = 0;
-		while (isdigit(*b)) {
-			wh.major = wh.major*10+(*b-'0');
-			b++;
-		}
+		wh.major = simple_strtoul(b, &endp, 10);
+		b = endp;
 	} else {
 		return -EINVAL;
 	}
@@ -412,11 +409,8 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 		wh.minor = ~0;
 		b++;
 	} else if (isdigit(*b)) {
-		wh.minor = 0;
-		while (isdigit(*b)) {
-			wh.minor = wh.minor*10+(*b-'0');
-			b++;
-		}
+		wh.minor = simple_strtoul(b, &endp, 10);
+		b = endp;
 	} else {
 		return -EINVAL;
 	}
-- 
GitLab


From a181b0e888a1d917edcab57cd73ccf7d8e75a46c Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:08 -0700
Subject: [PATCH 689/853] memcg: make global var read_mostly

mem_cgroup_subsys and page_cgroup_cache should be read_mostly and
MEM_CGROUP_RECLAIM_RETRIES can be just a fixed number.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7385d58fb06..c52c045f515 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,9 +35,9 @@
 
 #include <asm/uaccess.h>
 
-struct cgroup_subsys mem_cgroup_subsys;
-static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
-static struct kmem_cache *page_cgroup_cache;
+struct cgroup_subsys mem_cgroup_subsys __read_mostly;
+static struct kmem_cache *page_cgroup_cache __read_mostly;
+#define MEM_CGROUP_RECLAIM_RETRIES	5
 
 /*
  * Statistics for memory cgroup.
-- 
GitLab


From 508b7be0a5b06b64203512ed9b34191cddc83f56 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:09 -0700
Subject: [PATCH 690/853] memcg: avoid unnecessary initialization

* remove over-killing initialization (in fast path)
* makeing the condition for PAGE_CGROUP_FLAG_ACTIVE be more obvious.

Signed-off-by: KAMEAZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c52c045f515..90ccc132635 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
 		MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
 
 	mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
-	list_del_init(&pc->lru);
+	list_del(&pc->lru);
 }
 
 static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
@@ -559,7 +559,7 @@ retry:
 	}
 	unlock_page_cgroup(page);
 
-	pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
+	pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
 	if (pc == NULL)
 		goto err;
 
@@ -606,9 +606,14 @@ retry:
 	pc->ref_cnt = 1;
 	pc->mem_cgroup = mem;
 	pc->page = page;
-	pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
+	/*
+	 * If a page is accounted as a page cache, insert to inactive list.
+	 * If anon, insert to active list.
+	 */
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
 		pc->flags = PAGE_CGROUP_FLAG_CACHE;
+	else
+		pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
 
 	lock_page_cgroup(page);
 	if (page_get_page_cgroup(page)) {
-- 
GitLab


From e8589cc189f96b87348ae83ea4db38eaac624135 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:10 -0700
Subject: [PATCH 691/853] memcg: better migration handling

This patch changes page migration under memory controller to use a
different algorithm.  (thanks to Christoph for new idea.)

Before:
 - page_cgroup is migrated from an old page to a new page.
After:
 - a new page is accounted , no reuse of page_cgroup.

Pros:

 - We can avoid compliated lock depndencies and races in migration.

Cons:

 - new param to mem_cgroup_charge_common().

 - mem_cgroup_getref() is added for handling ref_cnt ping-pong.

This version simplifies complicated lock dependency in page migraiton
under memory resource controller.

  new refcnt sequence is following.

a mapped page:
  prepage_migration() ..... +1 to NEW page
  try_to_unmap()      ..... all refs to OLD page is gone.
  move_pages()        ..... +1 to NEW page if page cache.
  remap...            ..... all refs from *map* is added to NEW one.
  end_migration()     ..... -1 to New page.

  page's mapcount + (page_is_cache) refs are added to NEW one.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  11 ++--
 mm/memcontrol.c            | 128 +++++++++++++++++++------------------
 mm/migrate.c               |  22 +++++--
 3 files changed, 86 insertions(+), 75 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6608776bc9..84ead2aa6f1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -50,9 +50,10 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 #define mm_match_cgroup(mm, cgroup)	\
 	((cgroup) == mem_cgroup_from_task((mm)->owner))
 
-extern int mem_cgroup_prepare_migration(struct page *page);
+extern int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
 extern void mem_cgroup_end_migration(struct page *page);
-extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
+extern int mem_cgroup_getref(struct page *page);
 
 /*
  * For memory reclaim.
@@ -112,7 +113,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 	return 1;
 }
 
-static inline int mem_cgroup_prepare_migration(struct page *page)
+static inline int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 {
 	return 0;
 }
@@ -121,8 +123,7 @@ static inline void mem_cgroup_end_migration(struct page *page)
 {
 }
 
-static inline void
-mem_cgroup_page_migration(struct page *page, struct page *newpage)
+static inline void mem_cgroup_getref(struct page *page)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90ccc132635..da5912b8455 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -524,7 +524,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
  * < 0 if the cgroup is over its limit
  */
 static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
-				gfp_t gfp_mask, enum charge_type ctype)
+				gfp_t gfp_mask, enum charge_type ctype,
+				struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *mem;
 	struct page_cgroup *pc;
@@ -569,16 +570,21 @@ retry:
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
-	if (!mm)
-		mm = &init_mm;
+	if (!memcg) {
+		if (!mm)
+			mm = &init_mm;
 
-	rcu_read_lock();
-	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	/*
-	 * For every charge from the cgroup, increment reference count
-	 */
-	css_get(&mem->css);
-	rcu_read_unlock();
+		rcu_read_lock();
+		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+		/*
+		 * For every charge from the cgroup, increment reference count
+		 */
+		css_get(&mem->css);
+		rcu_read_unlock();
+	} else {
+		mem = memcg;
+		css_get(&memcg->css);
+	}
 
 	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
 		if (!(gfp_mask & __GFP_WAIT))
@@ -648,7 +654,7 @@ err:
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
-				MEM_CGROUP_CHARGE_TYPE_MAPPED);
+				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
 }
 
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -657,7 +663,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (!mm)
 		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
-				MEM_CGROUP_CHARGE_TYPE_CACHE);
+				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+}
+
+int mem_cgroup_getref(struct page *page)
+{
+	struct page_cgroup *pc;
+
+	if (mem_cgroup_subsys.disabled)
+		return 0;
+
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	VM_BUG_ON(!pc);
+	pc->ref_cnt++;
+	unlock_page_cgroup(page);
+	return 0;
 }
 
 /*
@@ -707,65 +728,39 @@ unlock:
 }
 
 /*
- * Returns non-zero if a page (under migration) has valid page_cgroup member.
- * Refcnt of page_cgroup is incremented.
+ * Before starting migration, account against new page.
  */
-int mem_cgroup_prepare_migration(struct page *page)
+int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 {
 	struct page_cgroup *pc;
+	struct mem_cgroup *mem = NULL;
+	enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+	int ret = 0;
 
 	if (mem_cgroup_subsys.disabled)
 		return 0;
 
 	lock_page_cgroup(page);
 	pc = page_get_page_cgroup(page);
-	if (pc)
-		pc->ref_cnt++;
+	if (pc) {
+		mem = pc->mem_cgroup;
+		css_get(&mem->css);
+		if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
+			ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+	}
 	unlock_page_cgroup(page);
-	return pc != NULL;
-}
-
-void mem_cgroup_end_migration(struct page *page)
-{
-	mem_cgroup_uncharge_page(page);
+	if (mem) {
+		ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
+			ctype, mem);
+		css_put(&mem->css);
+	}
+	return ret;
 }
 
-/*
- * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
- * And no race with uncharge() routines because page_cgroup for *page*
- * has extra one reference by mem_cgroup_prepare_migration.
- */
-void mem_cgroup_page_migration(struct page *page, struct page *newpage)
+/* remove redundant charge */
+void mem_cgroup_end_migration(struct page *newpage)
 {
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-	unsigned long flags;
-
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	if (!pc) {
-		unlock_page_cgroup(page);
-		return;
-	}
-
-	mz = page_cgroup_zoneinfo(pc);
-	spin_lock_irqsave(&mz->lru_lock, flags);
-	__mem_cgroup_remove_list(mz, pc);
-	spin_unlock_irqrestore(&mz->lru_lock, flags);
-
-	page_assign_page_cgroup(page, NULL);
-	unlock_page_cgroup(page);
-
-	pc->page = newpage;
-	lock_page_cgroup(newpage);
-	page_assign_page_cgroup(newpage, pc);
-
-	mz = page_cgroup_zoneinfo(pc);
-	spin_lock_irqsave(&mz->lru_lock, flags);
-	__mem_cgroup_add_list(mz, pc);
-	spin_unlock_irqrestore(&mz->lru_lock, flags);
-
-	unlock_page_cgroup(newpage);
+	mem_cgroup_uncharge_page(newpage);
 }
 
 /*
@@ -795,12 +790,19 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 		page = pc->page;
 		get_page(page);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
-		mem_cgroup_uncharge_page(page);
-		put_page(page);
-		if (--count <= 0) {
-			count = FORCE_UNCHARGE_BATCH;
+		/*
+		 * Check if this page is on LRU. !LRU page can be found
+		 * if it's under page migration.
+		 */
+		if (PageLRU(page)) {
+			mem_cgroup_uncharge_page(page);
+			put_page(page);
+			if (--count <= 0) {
+				count = FORCE_UNCHARGE_BATCH;
+				cond_resched();
+			}
+		} else
 			cond_resched();
-		}
 		spin_lock_irqsave(&mz->lru_lock, flags);
 	}
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
diff --git a/mm/migrate.c b/mm/migrate.c
index 376cceba82f..f6d7f8efd1a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,6 +358,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
 
 	write_unlock_irq(&mapping->tree_lock);
+	if (!PageSwapCache(newpage)) {
+		mem_cgroup_uncharge_page(page);
+		mem_cgroup_getref(newpage);
+	}
 
 	return 0;
 }
@@ -611,7 +615,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 		rc = fallback_migrate_page(mapping, newpage, page);
 
 	if (!rc) {
-		mem_cgroup_page_migration(page, newpage);
 		remove_migration_ptes(page, newpage);
 	} else
 		newpage->mapping = NULL;
@@ -641,6 +644,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		/* page was freed from under us. So we are done. */
 		goto move_newpage;
 
+	charge = mem_cgroup_prepare_migration(page, newpage);
+	if (charge == -ENOMEM) {
+		rc = -ENOMEM;
+		goto move_newpage;
+	}
+	/* prepare cgroup just returns 0 or -ENOMEM */
+	BUG_ON(charge);
+
 	rc = -EAGAIN;
 	if (TestSetPageLocked(page)) {
 		if (!force)
@@ -692,19 +703,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		goto rcu_unlock;
 	}
 
-	charge = mem_cgroup_prepare_migration(page);
 	/* Establish migration ptes or remove ptes */
 	try_to_unmap(page, 1);
 
 	if (!page_mapped(page))
 		rc = move_to_new_page(newpage, page);
 
-	if (rc) {
+	if (rc)
 		remove_migration_ptes(page, page);
-		if (charge)
-			mem_cgroup_end_migration(page);
-	} else if (charge)
- 		mem_cgroup_end_migration(newpage);
 rcu_unlock:
 	if (rcu_locked)
 		rcu_read_unlock();
@@ -725,6 +731,8 @@ unlock:
 	}
 
 move_newpage:
+	if (!charge)
+		mem_cgroup_end_migration(newpage);
 	/*
 	 * Move the new page to the LRU. If migration was not successful
 	 * then this will free the page.
-- 
GitLab


From 69029cd550284e32de13d6dd2f77b723c8a0e444 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:14 -0700
Subject: [PATCH 692/853] memcg: remove refcnt from page_cgroup

memcg: performance improvements

Patch Description
 1/5 ... remove refcnt fron page_cgroup patch (shmem handling is fixed)
 2/5 ... swapcache handling patch
 3/5 ... add helper function for shmem's memory reclaim patch
 4/5 ... optimize by likely/unlikely ppatch
 5/5 ... remove redundunt check patch (shmem handling is fixed.)

Unix bench result.

== 2.6.26-rc2-mm1 + memory resource controller
Execl Throughput                           2915.4 lps   (29.6 secs, 3 samples)
C Compiler Throughput                      1019.3 lpm   (60.0 secs, 3 samples)
Shell Scripts (1 concurrent)               5796.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (8 concurrent)               1097.7 lpm   (60.0 secs, 3 samples)
Shell Scripts (16 concurrent)               565.3 lpm   (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks    1022128.0 KBps  (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks   544057.0 KBps  (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks    346481.0 KBps  (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks      319325.0 KBps  (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks     148788.0 KBps  (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks       99051.0 KBps  (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks    2058917.0 KBps  (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks   1606109.0 KBps  (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks    854789.0 KBps  (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places         126145.2 lpm   (30.0 secs, 3 samples)

                     INDEX VALUES
TEST                                        BASELINE     RESULT      INDEX

Execl Throughput                                43.0     2915.4      678.0
File Copy 1024 bufsize 2000 maxblocks         3960.0   346481.0      875.0
File Copy 256 bufsize 500 maxblocks           1655.0    99051.0      598.5
File Copy 4096 bufsize 8000 maxblocks         5800.0   854789.0     1473.8
Shell Scripts (8 concurrent)                     6.0     1097.7     1829.5
                                                                 =========
     FINAL SCORE                                                     991.3

== 2.6.26-rc2-mm1 + this set ==
Execl Throughput                           3012.9 lps   (29.9 secs, 3 samples)
C Compiler Throughput                       981.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (1 concurrent)               5872.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (8 concurrent)               1120.3 lpm   (60.0 secs, 3 samples)
Shell Scripts (16 concurrent)               578.0 lpm   (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks    1003993.0 KBps  (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks   550452.0 KBps  (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks    347159.0 KBps  (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks      314644.0 KBps  (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks     151852.0 KBps  (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks      101000.0 KBps  (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks    2033256.0 KBps  (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks   1611814.0 KBps  (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks    847979.0 KBps  (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places         128148.7 lpm   (30.0 secs, 3 samples)

                     INDEX VALUES
TEST                                        BASELINE     RESULT      INDEX

Execl Throughput                                43.0     3012.9      700.7
File Copy 1024 bufsize 2000 maxblocks         3960.0   347159.0      876.7
File Copy 256 bufsize 500 maxblocks           1655.0   101000.0      610.3
File Copy 4096 bufsize 8000 maxblocks         5800.0   847979.0     1462.0
Shell Scripts (8 concurrent)                     6.0     1120.3     1867.2
                                                                 =========
     FINAL SCORE                                                    1004.6

This patch:

Remove refcnt from page_cgroup().

After this,

 * A page is charged only when !page_mapped() && no page_cgroup is assigned.
	* Anon page is newly mapped.
	* File page is added to mapping->tree.

 * A page is uncharged only when
	* Anon page is fully unmapped.
	* File page is removed from LRU.

There is no change in behavior from user's view.

This patch also removes unnecessary calls in rmap.c which was used only for
refcnt mangement.

[akpm@linux-foundation.org: fix warning]
[hugh@veritas.com: fix shmem_unuse_inode charging]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  10 ++--
 mm/filemap.c               |   6 +-
 mm/memcontrol.c            | 109 +++++++++++++++++++++----------------
 mm/migrate.c               |   3 +-
 mm/rmap.c                  |  14 +----
 mm/shmem.c                 |  35 ++++++++----
 6 files changed, 97 insertions(+), 80 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 84ead2aa6f1..b4980b8f048 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,6 +35,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 extern void mem_cgroup_uncharge_page(struct page *page);
+extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
@@ -53,7 +54,6 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern int
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
 extern void mem_cgroup_end_migration(struct page *page);
-extern int mem_cgroup_getref(struct page *page);
 
 /*
  * For memory reclaim.
@@ -98,6 +98,10 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
 
+static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+}
+
 static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
@@ -123,10 +127,6 @@ static inline void mem_cgroup_end_migration(struct page *page)
 {
 }
 
-static inline void mem_cgroup_getref(struct page *page)
-{
-}
-
 static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
 {
 	return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5d4c880d7cd..2d3ec1ffc66 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
-	mem_cgroup_uncharge_page(page);
+	mem_cgroup_uncharge_cache_page(page);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
@@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
 		} else
-			mem_cgroup_uncharge_page(page);
+			mem_cgroup_uncharge_cache_page(page);
 
 		write_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
 	} else
-		mem_cgroup_uncharge_page(page);
+		mem_cgroup_uncharge_cache_page(page);
 out:
 	return error;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index da5912b8455..a61706193c3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -166,7 +166,6 @@ struct page_cgroup {
 	struct list_head lru;		/* per cgroup LRU list */
 	struct page *page;
 	struct mem_cgroup *mem_cgroup;
-	int ref_cnt;			/* cached, mapped, migrating */
 	int flags;
 };
 #define PAGE_CGROUP_FLAG_CACHE	(0x1)	/* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
+	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
 };
 
 /*
@@ -552,9 +552,7 @@ retry:
 	 */
 	if (pc) {
 		VM_BUG_ON(pc->page != page);
-		VM_BUG_ON(pc->ref_cnt <= 0);
-
-		pc->ref_cnt++;
+		VM_BUG_ON(!pc->mem_cgroup);
 		unlock_page_cgroup(page);
 		goto done;
 	}
@@ -570,10 +568,7 @@ retry:
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
-	if (!memcg) {
-		if (!mm)
-			mm = &init_mm;
-
+	if (likely(!memcg)) {
 		rcu_read_lock();
 		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		/*
@@ -609,7 +604,6 @@ retry:
 		}
 	}
 
-	pc->ref_cnt = 1;
 	pc->mem_cgroup = mem;
 	pc->page = page;
 	/*
@@ -653,6 +647,17 @@ err:
 
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
+	/*
+	 * If already mapped, we don't have to account.
+	 * If page cache, page->mapping has address_space.
+	 * But page->mapping may have out-of-use anon_vma pointer,
+	 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
+	 * is NULL.
+  	 */
+	if (page_mapped(page) || (page->mapping && !PageAnon(page)))
+		return 0;
+	if (unlikely(!mm))
+		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
 }
@@ -660,32 +665,17 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask)
 {
-	if (!mm)
+	if (unlikely(!mm))
 		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
 }
 
-int mem_cgroup_getref(struct page *page)
-{
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_subsys.disabled)
-		return 0;
-
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	VM_BUG_ON(!pc);
-	pc->ref_cnt++;
-	unlock_page_cgroup(page);
-	return 0;
-}
-
 /*
- * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge if !page_mapped(page)
  */
-void mem_cgroup_uncharge_page(struct page *page)
+static void
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 {
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem;
@@ -704,29 +694,41 @@ void mem_cgroup_uncharge_page(struct page *page)
 		goto unlock;
 
 	VM_BUG_ON(pc->page != page);
-	VM_BUG_ON(pc->ref_cnt <= 0);
 
-	if (--(pc->ref_cnt) == 0) {
-		mz = page_cgroup_zoneinfo(pc);
-		spin_lock_irqsave(&mz->lru_lock, flags);
-		__mem_cgroup_remove_list(mz, pc);
-		spin_unlock_irqrestore(&mz->lru_lock, flags);
+	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+	    && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
+		|| page_mapped(page)))
+		goto unlock;
 
-		page_assign_page_cgroup(page, NULL);
-		unlock_page_cgroup(page);
+	mz = page_cgroup_zoneinfo(pc);
+	spin_lock_irqsave(&mz->lru_lock, flags);
+	__mem_cgroup_remove_list(mz, pc);
+	spin_unlock_irqrestore(&mz->lru_lock, flags);
 
-		mem = pc->mem_cgroup;
-		res_counter_uncharge(&mem->res, PAGE_SIZE);
-		css_put(&mem->css);
+	page_assign_page_cgroup(page, NULL);
+	unlock_page_cgroup(page);
 
-		kmem_cache_free(page_cgroup_cache, pc);
-		return;
-	}
+	mem = pc->mem_cgroup;
+	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	css_put(&mem->css);
 
+	kmem_cache_free(page_cgroup_cache, pc);
+	return;
 unlock:
 	unlock_page_cgroup(page);
 }
 
+void mem_cgroup_uncharge_page(struct page *page)
+{
+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+}
+
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+	VM_BUG_ON(page_mapped(page));
+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+}
+
 /*
  * Before starting migration, account against new page.
  */
@@ -757,15 +759,29 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 	return ret;
 }
 
-/* remove redundant charge */
+/* remove redundant charge if migration failed*/
 void mem_cgroup_end_migration(struct page *newpage)
 {
-	mem_cgroup_uncharge_page(newpage);
+	/*
+	 * At success, page->mapping is not NULL.
+	 * special rollback care is necessary when
+	 * 1. at migration failure. (newpage->mapping is cleared in this case)
+	 * 2. the newpage was moved but not remapped again because the task
+	 *    exits and the newpage is obsolete. In this case, the new page
+	 *    may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+	 *    always for avoiding mess. The  page_cgroup will be removed if
+	 *    unnecessary. File cache pages is still on radix-tree. Don't
+	 *    care it.
+	 */
+	if (!newpage->mapping)
+		__mem_cgroup_uncharge_common(newpage,
+					 MEM_CGROUP_CHARGE_TYPE_FORCE);
+	else if (PageAnon(newpage))
+		mem_cgroup_uncharge_page(newpage);
 }
 
 /*
  * This routine traverse page_cgroup in given list and drop them all.
- * This routine ignores page_cgroup->ref_cnt.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
  */
 #define FORCE_UNCHARGE_BATCH	(128)
@@ -795,7 +811,8 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 		 * if it's under page migration.
 		 */
 		if (PageLRU(page)) {
-			mem_cgroup_uncharge_page(page);
+			__mem_cgroup_uncharge_common(page,
+					MEM_CGROUP_CHARGE_TYPE_FORCE);
 			put_page(page);
 			if (--count <= 0) {
 				count = FORCE_UNCHARGE_BATCH;
diff --git a/mm/migrate.c b/mm/migrate.c
index f6d7f8efd1a..d8c65a65c61 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -359,8 +359,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 
 	write_unlock_irq(&mapping->tree_lock);
 	if (!PageSwapCache(newpage)) {
-		mem_cgroup_uncharge_page(page);
-		mem_cgroup_getref(newpage);
+		mem_cgroup_uncharge_cache_page(page);
 	}
 
 	return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index bf0a5b7cfb8..abbd29f7c43 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page,
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	if (atomic_inc_and_test(&page->_mapcount))
 		__page_set_anon_rmap(page, vma, address);
-	else {
+	else
 		__page_check_anon_rmap(page, vma, address);
-		/*
-		 * We unconditionally charged during prepare, we uncharge here
-		 * This takes care of balancing the reference counts
-		 */
-		mem_cgroup_uncharge_page(page);
-	}
 }
 
 /**
@@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page)
 {
 	if (atomic_inc_and_test(&page->_mapcount))
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
-	else
-		/*
-		 * We unconditionally charged during prepare, we uncharge here
-		 * This takes care of balancing the reference counts
-		 */
-		mem_cgroup_uncharge_page(page);
 }
 
 #ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ffbea9b79e..d58305e8a48 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@ found:
 	error = 1;
 	if (!inode)
 		goto out;
-	/* Precharge page while we can wait, compensate afterwards */
+	/* Precharge page using GFP_KERNEL while we can wait */
 	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 	if (error)
 		goto out;
 	error = radix_tree_preload(GFP_KERNEL);
-	if (error)
-		goto uncharge;
+	if (error) {
+		mem_cgroup_uncharge_cache_page(page);
+		goto out;
+	}
 	error = 1;
 
 	spin_lock(&info->lock);
 	ptr = shmem_swp_entry(info, idx, NULL);
-	if (ptr && ptr->val == entry.val)
+	if (ptr && ptr->val == entry.val) {
 		error = add_to_page_cache(page, inode->i_mapping,
 						idx, GFP_NOWAIT);
+		/* does mem_cgroup_uncharge_cache_page on error */
+	} else	/* we must compensate for our precharge above */
+		mem_cgroup_uncharge_cache_page(page);
+
 	if (error == -EEXIST) {
 		struct page *filepage = find_get_page(inode->i_mapping, idx);
 		error = 1;
@@ -961,8 +967,6 @@ found:
 		shmem_swp_unmap(ptr);
 	spin_unlock(&info->lock);
 	radix_tree_preload_end();
-uncharge:
-	mem_cgroup_uncharge_page(page);
 out:
 	unlock_page(page);
 	page_cache_release(page);
@@ -1319,7 +1323,7 @@ repeat:
 					page_cache_release(swappage);
 					goto failed;
 				}
-				mem_cgroup_uncharge_page(swappage);
+				mem_cgroup_uncharge_cache_page(swappage);
 			}
 			page_cache_release(swappage);
 			goto repeat;
@@ -1358,6 +1362,8 @@ repeat:
 		}
 
 		if (!filepage) {
+			int ret;
+
 			spin_unlock(&info->lock);
 			filepage = shmem_alloc_page(gfp, info, idx);
 			if (!filepage) {
@@ -1386,10 +1392,18 @@ repeat:
 				swap = *entry;
 				shmem_swp_unmap(entry);
 			}
-			if (error || swap.val || 0 != add_to_page_cache_lru(
-					filepage, mapping, idx, GFP_NOWAIT)) {
+			ret = error || swap.val;
+			if (ret)
+				mem_cgroup_uncharge_cache_page(filepage);
+			else
+				ret = add_to_page_cache_lru(filepage, mapping,
+						idx, GFP_NOWAIT);
+			/*
+			 * At add_to_page_cache_lru() failure, uncharge will
+			 * be done automatically.
+			 */
+			if (ret) {
 				spin_unlock(&info->lock);
-				mem_cgroup_uncharge_page(filepage);
 				page_cache_release(filepage);
 				shmem_unacct_blocks(info->flags, 1);
 				shmem_free_blocks(inode, 1);
@@ -1398,7 +1412,6 @@ repeat:
 					goto failed;
 				goto repeat;
 			}
-			mem_cgroup_uncharge_page(filepage);
 			info->flags |= SHMEM_PAGEIN;
 		}
 
-- 
GitLab


From c9b0ed51483cc2fc42bb801b6675c4231b0e4634 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:15 -0700
Subject: [PATCH 693/853] memcg: helper function for relcaim from shmem.

A new call, mem_cgroup_shrink_usage() is added for shmem handling and
relacing non-standard usage of mem_cgroup_charge/uncharge.

Now, shmem calls mem_cgroup_charge() just for reclaim some pages from
mem_cgroup.  In general, shmem is used by some process group and not for
global resource (like file caches).  So, it's reasonable to reclaim pages
from mem_cgroup where shmem is mainly used.

[hugh@veritas.com: shmem_getpage release page sooner]
[hugh@veritas.com: mem_cgroup_shrink_usage css_put]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 +++++++
 mm/memcontrol.c            | 26 ++++++++++++++++++++++++++
 mm/shmem.c                 | 11 ++++-------
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b4980b8f048..fdf3967e139 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -37,6 +37,8 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page, bool active);
+extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
@@ -102,6 +104,11 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
+static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+	return 0;
+}
+
 static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a61706193c3..f46b8615de6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -780,6 +780,32 @@ void mem_cgroup_end_migration(struct page *newpage)
 		mem_cgroup_uncharge_page(newpage);
 }
 
+/*
+ * A call to try to shrink memory usage under specified resource controller.
+ * This is typically used for page reclaiming for shmem for reducing side
+ * effect of page allocation from shmem, which is used by some mem_cgroup.
+ */
+int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+	struct mem_cgroup *mem;
+	int progress = 0;
+	int retry = MEM_CGROUP_RECLAIM_RETRIES;
+
+	rcu_read_lock();
+	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+	css_get(&mem->css);
+	rcu_read_unlock();
+
+	do {
+		progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
+	} while (!progress && --retry);
+
+	css_put(&mem->css);
+	if (!retry)
+		return -ENOMEM;
+	return 0;
+}
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
diff --git a/mm/shmem.c b/mm/shmem.c
index d58305e8a48..f92fea94d03 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1315,17 +1315,14 @@ repeat:
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			unlock_page(swappage);
+			page_cache_release(swappage);
 			if (error == -ENOMEM) {
 				/* allow reclaim from this memory cgroup */
-				error = mem_cgroup_cache_charge(swappage,
-					current->mm, gfp & ~__GFP_HIGHMEM);
-				if (error) {
-					page_cache_release(swappage);
+				error = mem_cgroup_shrink_usage(current->mm,
+								gfp);
+				if (error)
 					goto failed;
-				}
-				mem_cgroup_uncharge_cache_page(swappage);
 			}
-			page_cache_release(swappage);
 			goto repeat;
 		}
 	} else if (sgp == SGP_READ && !filepage) {
-- 
GitLab


From b76734e5e34e1889ab9fc5f3756570b1129f0f50 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:16 -0700
Subject: [PATCH 694/853] memcg: add hints for branch

Showing brach direction for obvious conditions.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f46b8615de6..04ded27f622 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -550,7 +550,7 @@ retry:
 	 * The page_cgroup exists and
 	 * the page has already been accounted.
 	 */
-	if (pc) {
+	if (unlikely(pc)) {
 		VM_BUG_ON(pc->page != page);
 		VM_BUG_ON(!pc->mem_cgroup);
 		unlock_page_cgroup(page);
@@ -559,7 +559,7 @@ retry:
 	unlock_page_cgroup(page);
 
 	pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
-	if (pc == NULL)
+	if (unlikely(pc == NULL))
 		goto err;
 
 	/*
@@ -616,7 +616,7 @@ retry:
 		pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
 
 	lock_page_cgroup(page);
-	if (page_get_page_cgroup(page)) {
+	if (unlikely(page_get_page_cgroup(page))) {
 		unlock_page_cgroup(page);
 		/*
 		 * Another charge has been added to this page already.
@@ -690,7 +690,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	 */
 	lock_page_cgroup(page);
 	pc = page_get_page_cgroup(page);
-	if (!pc)
+	if (unlikely(!pc))
 		goto unlock;
 
 	VM_BUG_ON(pc->page != page);
-- 
GitLab


From accf163e6ab729f1fc5fffaa0310e498270bf4e7 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:17 -0700
Subject: [PATCH 695/853] memcg: remove a redundant check

Because of remove refcnt patch, it's very rare case to that
mem_cgroup_charge_common() is called against a page which is accounted.

mem_cgroup_charge_common() is called when.
 1. a page is added into file cache.
 2. an anon page is _newly_ mapped.

A racy case is that a newly-swapped-in anonymous page is referred from
prural threads in do_swap_page() at the same time.
(a page is not Locked when mem_cgroup_charge() is called from do_swap_page.)

Another case is shmem. It charges its page before calling add_to_page_cache().
Then, mem_cgroup_charge_cache() is called twice. This case is handled in
mem_cgroup_cache_charge(). But this check may be too hacky...

Signed-off-by : KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 53 +++++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 04ded27f622..5b3759bd549 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -536,28 +536,6 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 	if (mem_cgroup_subsys.disabled)
 		return 0;
 
-	/*
-	 * Should page_cgroup's go to their own slab?
-	 * One could optimize the performance of the charging routine
-	 * by saving a bit in the page_flags and using it as a lock
-	 * to see if the cgroup page already has a page_cgroup associated
-	 * with it
-	 */
-retry:
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	/*
-	 * The page_cgroup exists and
-	 * the page has already been accounted.
-	 */
-	if (unlikely(pc)) {
-		VM_BUG_ON(pc->page != page);
-		VM_BUG_ON(!pc->mem_cgroup);
-		unlock_page_cgroup(page);
-		goto done;
-	}
-	unlock_page_cgroup(page);
-
 	pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
 	if (unlikely(pc == NULL))
 		goto err;
@@ -618,15 +596,10 @@ retry:
 	lock_page_cgroup(page);
 	if (unlikely(page_get_page_cgroup(page))) {
 		unlock_page_cgroup(page);
-		/*
-		 * Another charge has been added to this page already.
-		 * We take lock_page_cgroup(page) again and read
-		 * page->cgroup, increment refcnt.... just retry is OK.
-		 */
 		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		css_put(&mem->css);
 		kmem_cache_free(page_cgroup_cache, pc);
-		goto retry;
+		goto done;
 	}
 	page_assign_page_cgroup(page, pc);
 
@@ -665,8 +638,32 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask)
 {
+	/*
+	 * Corner case handling. This is called from add_to_page_cache()
+	 * in usual. But some FS (shmem) precharges this page before calling it
+	 * and call add_to_page_cache() with GFP_NOWAIT.
+	 *
+	 * For GFP_NOWAIT case, the page may be pre-charged before calling
+	 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
+	 * charge twice. (It works but has to pay a bit larger cost.)
+	 */
+	if (!(gfp_mask & __GFP_WAIT)) {
+		struct page_cgroup *pc;
+
+		lock_page_cgroup(page);
+		pc = page_get_page_cgroup(page);
+		if (pc) {
+			VM_BUG_ON(pc->page != page);
+			VM_BUG_ON(!pc->mem_cgroup);
+			unlock_page_cgroup(page);
+			return 0;
+		}
+		unlock_page_cgroup(page);
+	}
+
 	if (unlikely(!mm))
 		mm = &init_mm;
+
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
 }
-- 
GitLab


From cede86acd8bd5d2205dec28db8ac86410a3a19e8 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:18 -0700
Subject: [PATCH 696/853] memcg: clean up checking of the disabled flag

Those checks are unnecessary, because when the subsystem is disabled
it can't be mounted, so those functions won't get called.

The check is needed in functions which will be called in other places
except cgroup.

[hugh@veritas.com: further checking of disabled flag]
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5b3759bd549..0c035647d36 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active)
 	struct mem_cgroup_per_zone *mz;
 	unsigned long flags;
 
+	if (mem_cgroup_subsys.disabled)
+		return;
+
 	/*
 	 * We cannot lock_page_cgroup while holding zone's lru_lock,
 	 * because other holders of lock_page_cgroup can be interrupted
@@ -533,9 +536,6 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 	struct mem_cgroup_per_zone *mz;
 
-	if (mem_cgroup_subsys.disabled)
-		return 0;
-
 	pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
 	if (unlikely(pc == NULL))
 		goto err;
@@ -620,6 +620,9 @@ err:
 
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
+	if (mem_cgroup_subsys.disabled)
+		return 0;
+
 	/*
 	 * If already mapped, we don't have to account.
 	 * If page cache, page->mapping has address_space.
@@ -638,6 +641,9 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask)
 {
+	if (mem_cgroup_subsys.disabled)
+		return 0;
+
 	/*
 	 * Corner case handling. This is called from add_to_page_cache()
 	 * in usual. But some FS (shmem) precharges this page before calling it
@@ -788,6 +794,9 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
 	int progress = 0;
 	int retry = MEM_CGROUP_RECLAIM_RETRIES;
 
+	if (mem_cgroup_subsys.disabled)
+		return 0;
+
 	rcu_read_lock();
 	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
 	css_get(&mem->css);
@@ -857,9 +866,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
 	int ret = -EBUSY;
 	int node, zid;
 
-	if (mem_cgroup_subsys.disabled)
-		return 0;
-
 	css_get(&mem->css);
 	/*
 	 * page reclaim code (kswapd etc..) will move pages between
@@ -1103,8 +1109,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
 static int mem_cgroup_populate(struct cgroup_subsys *ss,
 				struct cgroup *cont)
 {
-	if (mem_cgroup_subsys.disabled)
-		return 0;
 	return cgroup_add_files(cont, ss, mem_cgroup_files,
 					ARRAY_SIZE(mem_cgroup_files));
 }
@@ -1117,9 +1121,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 	struct mm_struct *mm;
 	struct mem_cgroup *mem, *old_mem;
 
-	if (mem_cgroup_subsys.disabled)
-		return;
-
 	mm = get_task_mm(p);
 	if (mm == NULL)
 		return;
-- 
GitLab


From 12b9804419cfb1c1bdac413f6c373af3b88d154b Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:19 -0700
Subject: [PATCH 697/853] res_counter: limit change support ebusy

Add an interface to set limit.  This is necessary to memory resource
controller because it shrinks usage at set limit.

Other controllers may not need this interface to shrink usage because
shrinking is not necessary or impossible.

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 290205dfe09..fdeadd9740d 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -158,4 +158,20 @@ static inline void res_counter_reset_failcnt(struct res_counter *cnt)
 	cnt->failcnt = 0;
 	spin_unlock_irqrestore(&cnt->lock, flags);
 }
+
+static inline int res_counter_set_limit(struct res_counter *cnt,
+		unsigned long long limit)
+{
+	unsigned long flags;
+	int ret = -EBUSY;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	if (cnt->usage < limit) {
+		cnt->limit = limit;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 #endif
-- 
GitLab


From 628f42355389cfb596ca3a5a5f64fb9054a2a06a Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:20 -0700
Subject: [PATCH 698/853] memcg: limit change shrink usage

Shrinking memory usage at limit change.

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/controllers/memory.txt |  3 +-
 mm/memcontrol.c                      | 48 +++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 866b9cd9a95..9b53d582736 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -242,8 +242,7 @@ rmdir() if there are no tasks.
 1. Add support for accounting huge pages (as a separate controller)
 2. Make per-cgroup scanner reclaim not-shared pages first
 3. Teach controller to account for shared-pages
-4. Start reclamation when the limit is lowered
-5. Start reclamation in the background when the limit is
+4. Start reclamation in the background when the limit is
    not yet hit but the usage is getting closer
 
 Summary
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0c035647d36..fba566c5132 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -812,6 +812,30 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
 	return 0;
 }
 
+int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
+{
+
+	int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
+	int progress;
+	int ret = 0;
+
+	while (res_counter_set_limit(&memcg->res, val)) {
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+		if (!retry_count) {
+			ret = -EBUSY;
+			break;
+		}
+		progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
+		if (!progress)
+			retry_count--;
+	}
+	return ret;
+}
+
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
@@ -896,13 +920,29 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 	return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
 				    cft->private);
 }
-
+/*
+ * The user of this function is...
+ * RES_LIMIT.
+ */
 static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 			    const char *buffer)
 {
-	return res_counter_write(&mem_cgroup_from_cont(cont)->res,
-				 cft->private, buffer,
-				 res_counter_memparse_write_strategy);
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	unsigned long long val;
+	int ret;
+
+	switch (cft->private) {
+	case RES_LIMIT:
+		/* This function does all necessary parse...reuse it */
+		ret = res_counter_memparse_write_strategy(buffer, &val);
+		if (!ret)
+			ret = mem_cgroup_resize_limit(memcg, val);
+		break;
+	default:
+		ret = -EINVAL; /* should be BUG() ? */
+		break;
+	}
+	return ret;
 }
 
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
-- 
GitLab


From 0b2f630a28d53b5a2082a5275bc3334b10373508 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:21 -0700
Subject: [PATCH 699/853] cpusets: restructure the function update_cpumask()
 and update_nodemask()

Extract two functions from update_cpumask() and update_nodemask().They
will be used later for updating tasks' cpus_allowed and mems_allowed after
CPU/NODE offline/online.

[lizf@cn.fujitsu.com: build fix]
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Acked-by: Paul Jackson <pj@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Cc:  Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 181 +++++++++++++++++++++++++++++-------------------
 1 file changed, 109 insertions(+), 72 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 276ce7e4f1a..7326d51eefe 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -763,6 +763,37 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
 	set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
 }
 
+/**
+ * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+ *
+ * Called with cgroup_mutex held
+ *
+ * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
+ * calling callback functions for each.
+ *
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_cpumask(struct cpuset *cs)
+{
+	struct cgroup_scanner scan;
+	struct ptr_heap heap;
+	int retval;
+
+	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
+	if (retval)
+		return retval;
+
+	scan.cg = cs->css.cgroup;
+	scan.test_task = cpuset_test_cpumask;
+	scan.process_task = cpuset_change_cpumask;
+	scan.heap = &heap;
+	retval = cgroup_scan_tasks(&scan);
+
+	heap_free(&heap);
+	return retval;
+}
+
 /**
  * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
  * @cs: the cpuset to consider
@@ -771,8 +802,6 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
 static int update_cpumask(struct cpuset *cs, const char *buf)
 {
 	struct cpuset trialcs;
-	struct cgroup_scanner scan;
-	struct ptr_heap heap;
 	int retval;
 	int is_load_balanced;
 
@@ -806,10 +835,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
 	if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
 		return 0;
 
-	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
-	if (retval)
-		return retval;
-
 	is_load_balanced = is_sched_load_balance(&trialcs);
 
 	mutex_lock(&callback_mutex);
@@ -820,12 +845,9 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
 	 * Scan tasks in the cpuset, and update the cpumasks of any
 	 * that need an update.
 	 */
-	scan.cg = cs->css.cgroup;
-	scan.test_task = cpuset_test_cpumask;
-	scan.process_task = cpuset_change_cpumask;
-	scan.heap = &heap;
-	cgroup_scan_tasks(&scan);
-	heap_free(&heap);
+	retval = update_tasks_cpumask(cs);
+	if (retval < 0)
+		return retval;
 
 	if (is_load_balanced)
 		rebuild_sched_domains();
@@ -881,73 +903,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 	mutex_unlock(&callback_mutex);
 }
 
-/*
- * Handle user request to change the 'mems' memory placement
- * of a cpuset.  Needs to validate the request, update the
- * cpusets mems_allowed and mems_generation, and for each
- * task in the cpuset, rebind any vma mempolicies and if
- * the cpuset is marked 'memory_migrate', migrate the tasks
- * pages to the new memory.
- *
- * Call with cgroup_mutex held.  May take callback_mutex during call.
- * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
- * lock each such tasks mm->mmap_sem, scan its vma's and rebind
- * their mempolicies to the cpusets new mems_allowed.
- */
-
 static void *cpuset_being_rebound;
 
-static int update_nodemask(struct cpuset *cs, const char *buf)
+/**
+ * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
+ * @oldmem: old mems_allowed of cpuset cs
+ *
+ * Called with cgroup_mutex held
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
 {
-	struct cpuset trialcs;
-	nodemask_t oldmem;
 	struct task_struct *p;
 	struct mm_struct **mmarray;
 	int i, n, ntasks;
 	int migrate;
 	int fudge;
-	int retval;
 	struct cgroup_iter it;
-
-	/*
-	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
-	 * it's read-only
-	 */
-	if (cs == &top_cpuset)
-		return -EACCES;
-
-	trialcs = *cs;
-
-	/*
-	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
-	 * Since nodelist_parse() fails on an empty mask, we special case
-	 * that parsing.  The validate_change() call ensures that cpusets
-	 * with tasks have memory.
-	 */
-	if (!*buf) {
-		nodes_clear(trialcs.mems_allowed);
-	} else {
-		retval = nodelist_parse(buf, trialcs.mems_allowed);
-		if (retval < 0)
-			goto done;
-
-		if (!nodes_subset(trialcs.mems_allowed,
-				node_states[N_HIGH_MEMORY]))
-			return -EINVAL;
-	}
-	oldmem = cs->mems_allowed;
-	if (nodes_equal(oldmem, trialcs.mems_allowed)) {
-		retval = 0;		/* Too easy - nothing to do */
-		goto done;
-	}
-	retval = validate_change(cs, &trialcs);
-	if (retval < 0)
-		goto done;
-
-	mutex_lock(&callback_mutex);
-	cs->mems_allowed = trialcs.mems_allowed;
-	cs->mems_generation = cpuset_mems_generation++;
-	mutex_unlock(&callback_mutex);
+	int retval;
 
 	cpuset_being_rebound = cs;		/* causes mpol_dup() rebind */
 
@@ -1014,7 +988,7 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
 
 		mpol_rebind_mm(mm, &cs->mems_allowed);
 		if (migrate)
-			cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
+			cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
 		mmput(mm);
 	}
 
@@ -1026,6 +1000,70 @@ done:
 	return retval;
 }
 
+/*
+ * Handle user request to change the 'mems' memory placement
+ * of a cpuset.  Needs to validate the request, update the
+ * cpusets mems_allowed and mems_generation, and for each
+ * task in the cpuset, rebind any vma mempolicies and if
+ * the cpuset is marked 'memory_migrate', migrate the tasks
+ * pages to the new memory.
+ *
+ * Call with cgroup_mutex held.  May take callback_mutex during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_sem, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
+ */
+static int update_nodemask(struct cpuset *cs, const char *buf)
+{
+	struct cpuset trialcs;
+	nodemask_t oldmem;
+	int retval;
+
+	/*
+	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+	 * it's read-only
+	 */
+	if (cs == &top_cpuset)
+		return -EACCES;
+
+	trialcs = *cs;
+
+	/*
+	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+	 * Since nodelist_parse() fails on an empty mask, we special case
+	 * that parsing.  The validate_change() call ensures that cpusets
+	 * with tasks have memory.
+	 */
+	if (!*buf) {
+		nodes_clear(trialcs.mems_allowed);
+	} else {
+		retval = nodelist_parse(buf, trialcs.mems_allowed);
+		if (retval < 0)
+			goto done;
+
+		if (!nodes_subset(trialcs.mems_allowed,
+				node_states[N_HIGH_MEMORY]))
+			return -EINVAL;
+	}
+	oldmem = cs->mems_allowed;
+	if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+		retval = 0;		/* Too easy - nothing to do */
+		goto done;
+	}
+	retval = validate_change(cs, &trialcs);
+	if (retval < 0)
+		goto done;
+
+	mutex_lock(&callback_mutex);
+	cs->mems_allowed = trialcs.mems_allowed;
+	cs->mems_generation = cpuset_mems_generation++;
+	mutex_unlock(&callback_mutex);
+
+	retval = update_tasks_nodemask(cs, &oldmem);
+done:
+	return retval;
+}
+
 int current_cpuset_is_being_rebound(void)
 {
 	return task_cs(current) == cpuset_being_rebound;
@@ -1935,7 +1973,6 @@ void __init cpuset_init_smp(void)
 }
 
 /**
-
  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
  * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
-- 
GitLab


From f9b4fb8dabf38fb456c97f01aace07cb6e7c1723 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:22 -0700
Subject: [PATCH 700/853] cpusets: update task's cpus_allowed and mems_allowed
 after CPU/NODE offline/online

The bug is that a task may run on the cpu/node which is not in its
cpuset.cpus/ cpuset.mems.

It can be reproduced by the following commands:
-----------------------------------
# mkdir /dev/cpuset
# mount -t cpuset xxx /dev/cpuset
# mkdir /dev/cpuset/0
# echo 0-1 > /dev/cpuset/0/cpus
# echo 0 > /dev/cpuset/0/mems
# echo $$ > /dev/cpuset/0/tasks
# echo 0 > /sys/devices/system/cpu/cpu1/online
# echo 1 > /sys/devices/system/cpu/cpu1/online
-----------------------------------

There is only CPU0 in cpuset.cpus, but the task in this cpuset runs on
both CPU0 and CPU1.

It is because the task's cpu_allowed didn't get updated after we did CPU
offline/online manipulation.  Similar for mem_allowed.

This patch fixes this bug expect for root cpuset.  Because there is a
problem about root cpuset, in that whether it is necessary to update all
the tasks in root cpuset or not after cpu/node offline/online.

If updating, some kernel threads which is bound into a specified cpu will
be unbound.

If not updating, there is a bug in root cpuset.  This bug is also caused
by offline/online manipulation.  For example, there is a dual-cpu machine.
 we create a sub cpuset in root cpuset and assign 1 to its cpus.  And then
we attach some tasks into this sub cpuset.  After this, we offline CPU1.
Now, the tasks in this new cpuset are moved into root cpuset automatically
because there is no cpu in sub cpuset.  Then we online CPU1, we find all
the tasks which doesn't belong to root cpuset originally just run on CPU0.

Maybe we need to add a flag in the task_struct to mark which task can't be
unbound?

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Acked-by: Paul Jackson <pj@sgi.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7326d51eefe..6eae6639e85 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1851,6 +1851,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
 	struct cpuset *child;	/* scans child cpusets of cp */
 	struct list_head queue;
 	struct cgroup *cont;
+	nodemask_t oldmems;
 
 	INIT_LIST_HEAD(&queue);
 
@@ -1870,6 +1871,8 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
 		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
 			continue;
 
+		oldmems = cp->mems_allowed;
+
 		/* Remove offline cpus and mems from this cpuset. */
 		mutex_lock(&callback_mutex);
 		cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
@@ -1881,6 +1884,10 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
 		if (cpus_empty(cp->cpus_allowed) ||
 		     nodes_empty(cp->mems_allowed))
 			remove_tasks_in_empty_cpuset(cp);
+		else {
+			update_tasks_cpumask(cp);
+			update_tasks_nodemask(cp, &oldmems);
+		}
 	}
 }
 
-- 
GitLab


From c372e817afc629fea9ff6321313325ed0b4a855b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:23 -0700
Subject: [PATCH 701/853] cpuset: avoid unnecessary sched domains rebuilding

When changing 'sched_relax_domain_level', don't rebuild sched domains if
'cpus' is empty or 'sched_load_balance' is not set.

Also make the comments of rebuild_sched_domains() more readable.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6eae6639e85..60d2c4702c6 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -496,11 +496,16 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
 /*
  * rebuild_sched_domains()
  *
- * If the flag 'sched_load_balance' of any cpuset with non-empty
- * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
- * which has that flag enabled, or if any cpuset with a non-empty
- * 'cpus' is removed, then call this routine to rebuild the
- * scheduler's dynamic sched domains.
+ * This routine will be called to rebuild the scheduler's dynamic
+ * sched domains:
+ * - if the flag 'sched_load_balance' of any cpuset with non-empty
+ *   'cpus' changes,
+ * - or if the 'cpus' allowed changes in any cpuset which has that
+ *   flag enabled,
+ * - or if the 'sched_relax_domain_level' of any cpuset which has
+ *   that flag enabled and with non-empty 'cpus' changes,
+ * - or if any cpuset with non-empty 'cpus' is removed,
+ * - or if a cpu gets offlined.
  *
  * This routine builds a partial partition of the systems CPUs
  * (the set of non-overlappping cpumask_t's in the array 'part'
@@ -1076,7 +1081,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
 
 	if (val != cs->relax_domain_level) {
 		cs->relax_domain_level = val;
-		rebuild_sched_domains();
+		if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+			rebuild_sched_domains();
 	}
 
 	return 0;
-- 
GitLab


From 489a5393a20dcbf91104052120eb2eff8791b61b Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:23 -0700
Subject: [PATCH 702/853] cpuset: don't pass empty cpumasks to
 partition_sched_domains()

I create lots of empty cpusets(empty cpumasks) and turn off the
"sched_load_balance" in top cpuset.

I found that all these empty cpumasks are passed to
partition_sched_domains() in rebuild_sched_domains(), it's very
time-consuming for partition_sched_domains() and it's not need.

It also reduce memory consumed and some works in rebuild_sched_domains()
too.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 60d2c4702c6..531b235e546 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -610,8 +610,13 @@ void rebuild_sched_domains(void)
 	while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
 		struct cgroup *cont;
 		struct cpuset *child;   /* scans child cpusets of cp */
+
+		if (cpus_empty(cp->cpus_allowed))
+			continue;
+
 		if (is_sched_load_balance(cp))
 			csa[csn++] = cp;
+
 		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
 			child = cgroup_cs(cont);
 			__kfifo_put(q, (void *)&child, sizeof(cp));
-- 
GitLab


From 02412483777651a26b19a75e49c2a451a174ca9c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:24 -0700
Subject: [PATCH 703/853] cpuset: code-cleanup for started_after

cgroup(cgroup_scan_tasks) will initialize heap->gt for us.  This patch
removes started_after() and its helper-function.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 37 ++++++-------------------------------
 1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 531b235e546..ebbc9b082e4 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -709,36 +709,6 @@ done:
 	/* Don't kfree(dattr) -- partition_sched_domains() does that. */
 }
 
-static inline int started_after_time(struct task_struct *t1,
-				     struct timespec *time,
-				     struct task_struct *t2)
-{
-	int start_diff = timespec_compare(&t1->start_time, time);
-	if (start_diff > 0) {
-		return 1;
-	} else if (start_diff < 0) {
-		return 0;
-	} else {
-		/*
-		 * Arbitrarily, if two processes started at the same
-		 * time, we'll say that the lower pointer value
-		 * started first. Note that t2 may have exited by now
-		 * so this may not be a valid pointer any longer, but
-		 * that's fine - it still serves to distinguish
-		 * between two tasks started (effectively)
-		 * simultaneously.
-		 */
-		return t1 > t2;
-	}
-}
-
-static inline int started_after(void *p1, void *p2)
-{
-	struct task_struct *t1 = p1;
-	struct task_struct *t2 = p2;
-	return started_after_time(t1, &t2->start_time, t2);
-}
-
 /**
  * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
  * @tsk: task to test
@@ -790,7 +760,12 @@ static int update_tasks_cpumask(struct cpuset *cs)
 	struct ptr_heap heap;
 	int retval;
 
-	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
+	/*
+	 * cgroup_scan_tasks() will initialize heap->gt for us.
+	 * heap_init() is still needed here for we should not change
+	 * cs->cpus_allowed when heap_init() fails.
+	 */
+	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
 	if (retval)
 		return retval;
 
-- 
GitLab


From da5ef6bb96158b0fc0d808704237a453af449124 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:25 -0700
Subject: [PATCH 704/853] cpuset: two minor code-cleanups

In cpuset_update_task_memory_state() local variable struct task_struct
*tsk = current;

And local variable tsk is used 14 times and statement task_cs(tsk) is used
twice in this function.  So using task_cs(tsk) instead of task_cs(current)
is better for readability.

And "(struct cgroup_scanner *)&scan" is not good for readability also.
(and "container_of" is used in cpuset_do_move_task(), not
"(cpuset_hotplug_scanner *)scan")

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ebbc9b082e4..91cf85b36dd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -365,7 +365,7 @@ void cpuset_update_task_memory_state(void)
 		my_cpusets_mem_gen = top_cpuset.mems_generation;
 	} else {
 		rcu_read_lock();
-		my_cpusets_mem_gen = task_cs(current)->mems_generation;
+		my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
 		rcu_read_unlock();
 	}
 
@@ -1777,7 +1777,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
 	scan.scan.heap = NULL;
 	scan.to = to->css.cgroup;
 
-	if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
+	if (cgroup_scan_tasks(&scan.scan))
 		printk(KERN_ERR "move_member_tasks_to_cpuset: "
 				"cgroup_scan_tasks failed\n");
 }
-- 
GitLab


From 4b7a1304267bff68260ae861784b27130e805be3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:26 -0700
Subject: [PATCH 705/853] posix timers: timer_delete: remove the bogus
 "->it_process != NULL" check

sys_timer_delete() and itimer_delete() check "timer->it_process != NULL",
this looks completely bogus.  ->it_process == NULL means that this timer
is already under destruction or it is not fully initialized, this must not
happen.

	sys_timer_delete: the timer is locked, and lock_timer() can't succeed
	if ->it_process == NULL.

	itimer_delete: it is called by exit_itimers() when there are no other
	threads which can play with signal_struct->posix_timers.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/posix-timers.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index dbd8398ddb0..17f53266fb6 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -856,11 +856,10 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_process) {
-		if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-			put_task_struct(timer->it_process);
-		timer->it_process = NULL;
-	}
+	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+		put_task_struct(timer->it_process);
+	timer->it_process = NULL;
+
 	unlock_timer(timer, flags);
 	release_posix_timer(timer, IT_ID_SET);
 	return 0;
@@ -885,11 +884,10 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_process) {
-		if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-			put_task_struct(timer->it_process);
-		timer->it_process = NULL;
-	}
+	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+		put_task_struct(timer->it_process);
+	timer->it_process = NULL;
+
 	unlock_timer(timer, flags);
 	release_posix_timer(timer, IT_ID_SET);
 }
-- 
GitLab


From 96347e7759e2e433c427defa0fa1adfc8cce6226 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:27 -0700
Subject: [PATCH 706/853] posix timers: release_posix_timer: kill the bogus
 put_task_struct(->it_process);

release_posix_timer() can't be called with ->it_process != NULL.  Once
sys_timer_create() sets ->it_process it must not call
release_posix_timer(), otherwise we can race with another thread doing
sys_timer_delete(), this timer is visible to idr_find() and unlocked.

The same is true for two other callers (actually, for any possible
caller), sys_timer_delete() and itimer_delete().  They must clear
->it_process before unlock_timer() + release_posix_timer().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/posix-timers.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 17f53266fb6..9a21681aa80 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -449,9 +449,6 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 		spin_unlock_irqrestore(&idr_lock, flags);
 	}
 	sigqueue_free(tmr->sigq);
-	if (unlikely(tmr->it_process) &&
-	    tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-		put_task_struct(tmr->it_process);
 	kmem_cache_free(posix_timers_cache, tmr);
 }
 
-- 
GitLab


From 6715ca451cfff1c9ce4b33ad9918a1dacf43997c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:27 -0700
Subject: [PATCH 707/853] signals: collect_signal: remove the unneeded
 sigismember() check

collect_signal() checks sigismember(&list->signal, sig), this is not
needed.  This "sig" was just found by next_signal(), so it must be valid.

We have a (completely broken) call to ->notifier in between, but it must
not play with sigpending->signal bits or unlock ->siglock.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 6c0958e52ea..c5b9aabb155 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -343,9 +343,6 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 	struct sigqueue *q, *first = NULL;
 	int still_pending = 0;
 
-	if (unlikely(!sigismember(&list->signal, sig)))
-		return 0;
-
 	/*
 	 * Collect the siginfo appropriate to this signal.  Check if
 	 * there is another siginfo for the same signal.
-- 
GitLab


From d4434207616980885205c605697868c0f07e4378 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:28 -0700
Subject: [PATCH 708/853] signals: collect_signal: simplify the "still_pending"
 logic

Factor out sigdelset() calls and remove the "still_pending" variable.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index c5b9aabb155..50ad439377b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -341,7 +341,6 @@ unblock_all_signals(void)
 static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 {
 	struct sigqueue *q, *first = NULL;
-	int still_pending = 0;
 
 	/*
 	 * Collect the siginfo appropriate to this signal.  Check if
@@ -349,26 +348,24 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 	*/
 	list_for_each_entry(q, &list->list, list) {
 		if (q->info.si_signo == sig) {
-			if (first) {
-				still_pending = 1;
-				break;
-			}
+			if (first)
+				goto still_pending;
 			first = q;
 		}
 	}
+
+	sigdelset(&list->signal, sig);
+
 	if (first) {
+still_pending:
 		list_del_init(&first->list);
 		copy_siginfo(info, &first->info);
 		__sigqueue_free(first);
-		if (!still_pending)
-			sigdelset(&list->signal, sig);
 	} else {
-
 		/* Ok, it wasn't in the queue.  This must be
 		   a fast-pathed signal or we must have been
 		   out of queue space.  So zero out the info.
 		 */
-		sigdelset(&list->signal, sig);
 		info->si_signo = sig;
 		info->si_errno = 0;
 		info->si_code = 0;
-- 
GitLab


From 100360f03077663b7bef3af44805b6cf700c3bee Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:29 -0700
Subject: [PATCH 709/853] signals: change collect_signal() to return void

With the recent changes collect_signal() always returns true.  Change it
to return void and update the single caller.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 50ad439377b..fea236fe0b5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -338,7 +338,7 @@ unblock_all_signals(void)
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 }
 
-static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 {
 	struct sigqueue *q, *first = NULL;
 
@@ -372,7 +372,6 @@ still_pending:
 		info->si_pid = 0;
 		info->si_uid = 0;
 	}
-	return 1;
 }
 
 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
@@ -390,8 +389,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 			}
 		}
 
-		if (!collect_signal(sig, pending, info))
-			sig = 0;
+		collect_signal(sig, pending, info);
 	}
 
 	return sig;
-- 
GitLab


From 3854a771821c970065e3203a0b40ddc4101538cc Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:29 -0700
Subject: [PATCH 710/853] __exit_signal: don't take rcu lock

There is no reason for rcu_read_lock() in __exit_signal().  tsk->sighand
can only be changed if tsk does exec, obviously this is not possible.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 93d2711b938..a7799d8a640 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,6 @@ static void __exit_signal(struct task_struct *tsk)
 	BUG_ON(!sig);
 	BUG_ON(!atomic_read(&sig->count));
 
-	rcu_read_lock();
 	sighand = rcu_dereference(tsk->sighand);
 	spin_lock(&sighand->siglock);
 
@@ -136,7 +135,6 @@ static void __exit_signal(struct task_struct *tsk)
 	tsk->signal = NULL;
 	tsk->sighand = NULL;
 	spin_unlock(&sighand->siglock);
-	rcu_read_unlock();
 
 	__cleanup_sighand(sighand);
 	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
-- 
GitLab


From 92413d771e7123304fb4b9efd2a00cccc946e383 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:30 -0700
Subject: [PATCH 711/853] signals: dequeue_signal: don't check
 SIGNAL_GROUP_EXIT when setting SIGNAL_STOP_DEQUEUED

dequeue_signal() checks SIGNAL_GROUP_EXIT before setting
SIGNAL_STOP_DEQUEUED.  This was added by
788e05a67c343fa22f2ae1d3ca264e7f15c25eaf a long ago to avoid the
coredump/SIGSTOP race.

Since then the related code was changed, and now this subtle check is both
incomplete and unneeded at the same time.  It is incomplete because
nowadays exec() doesn't set SIGNAL_GROUP_EXIT, so in fact we should check
signal_group_exit() to avoid a similar race.  Fortunately, we doesn't need
the check at all.  The only function which relies on SIGNAL_STOP_DEQUEUED
is do_signal_stop(), and it ignores this flag if signal_group_exit() == T,
this covers the SIGNAL_GROUP_EXIT case.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index fea236fe0b5..15f901a26ec 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -454,8 +454,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		 * is to alert stop-signal processing code when another
 		 * processor has come along and cleared the flag.
 		 */
-		if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
-			tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+		tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
 	}
 	if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
 		/*
-- 
GitLab


From 2b201a9eddf509e8e935b45e573648e36f4b623f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:31 -0700
Subject: [PATCH 712/853] signals: do_signal_stop: kill the SIGNAL_UNKILLABLE
 check

fae5fa44f1fd079ffbed8e0add929dd7bbd1347f changed do_signal_stop() to check
SIGNAL_UNKILLABLE, this wasn't needed.  If signal_group_exit() == F, the
signal sent to SIGNAL_UNKILLABLE task must be already filtered out by the
caller, get_signal_to_deliver().  And if signal_group_exit() == T we are
not going to stop.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 15f901a26ec..0514da573f2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1649,8 +1649,7 @@ static int do_signal_stop(int signr)
 	} else {
 		struct task_struct *t;
 
-		if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE))
-					 != SIGNAL_STOP_DEQUEUED) ||
+		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
 		    unlikely(signal_group_exit(sig)))
 			return 0;
 		/*
-- 
GitLab


From e4901f92a8dbe843e76651a50f7a2a6dd3d53474 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:31 -0700
Subject: [PATCH 713/853] coredump: zap_threads: comments && use
 while_each_thread()

No changes in fs/exec.o

The for_each_process() loop in zap_threads() is very subtle, it is not
clear why we don't race with fork/exit/exec.  Add the fat comment.

Also, change the code to use while_each_thread().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e41aef0fb35..af249af4cca 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1517,7 +1517,7 @@ static void zap_process(struct task_struct *start)
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
 		}
-	} while ((t = next_thread(t)) != start);
+	} while_each_thread(start, t);
 }
 
 static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
@@ -1539,7 +1539,36 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 
 	if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
 		goto done;
-
+	/*
+	 * We should find and kill all tasks which use this mm, and we should
+	 * count them correctly into mm->core_waiters. We don't take tasklist
+	 * lock, but this is safe wrt:
+	 *
+	 * fork:
+	 *	None of sub-threads can fork after zap_process(leader). All
+	 *	processes which were created before this point should be
+	 *	visible to zap_threads() because copy_process() adds the new
+	 *	process to the tail of init_task.tasks list, and lock/unlock
+	 *	of ->siglock provides a memory barrier.
+	 *
+	 * do_exit:
+	 *	The caller holds mm->mmap_sem. This means that the task which
+	 *	uses this mm can't pass exit_mm(), so it can't exit or clear
+	 *	its ->mm.
+	 *
+	 * de_thread:
+	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
+	 *	we must see either old or new leader, this does not matter.
+	 *	However, it can change p->sighand, so lock_task_sighand(p)
+	 *	must be used. Since p->mm != NULL and we hold ->mmap_sem
+	 *	it can't fail.
+	 *
+	 *	Note also that "g" can be the old leader with ->mm == NULL
+	 *	and already unhashed and thus removed from ->thread_group.
+	 *	This is OK, __unhash_process()->list_del_rcu() does not
+	 *	clear the ->next pointer, we will find the new leader via
+	 *	next_thread().
+	 */
 	rcu_read_lock();
 	for_each_process(g) {
 		if (g == tsk->group_leader)
@@ -1549,17 +1578,13 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 		do {
 			if (p->mm) {
 				if (p->mm == mm) {
-					/*
-					 * p->sighand can't disappear, but
-					 * may be changed by de_thread()
-					 */
 					lock_task_sighand(p, &flags);
 					zap_process(p);
 					unlock_task_sighand(p, &flags);
 				}
 				break;
 			}
-		} while ((p = next_thread(p)) != g);
+		} while_each_thread(g, p);
 	}
 	rcu_read_unlock();
 done:
-- 
GitLab


From d8878ba3f05ae5bbfad5a6e72e5121c0ea35f989 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@googlemail.com>
Date: Fri, 25 Jul 2008 01:47:32 -0700
Subject: [PATCH 714/853] signals: make siginfo_t si_utime + si_sstime report
 times in USER_HZ, not HZ

In the switch to configurable HZ in 2.6, the treatment of the si_utime and
si_stime fields that are exposed to userland via the siginfo structure
looks to have been botched.  As things stand, these fields report times in
units of HZ, so that userland gets information that varies depending on
the HZ that the kernel was configured with.  This patch changes the
reported values to use USER_HZ units.

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 0514da573f2..ba60eeeb63a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1370,10 +1370,9 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 
 	info.si_uid = tsk->uid;
 
-	/* FIXME: find out whether or not this is supposed to be c*time. */
-	info.si_utime = cputime_to_jiffies(cputime_add(tsk->utime,
+	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
 						       tsk->signal->utime));
-	info.si_stime = cputime_to_jiffies(cputime_add(tsk->stime,
+	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
 						       tsk->signal->stime));
 
 	info.si_status = tsk->exit_code & 0x7f;
@@ -1441,9 +1440,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
 
 	info.si_uid = tsk->uid;
 
-	/* FIXME: find out whether or not this is supposed to be c*time. */
-	info.si_utime = cputime_to_jiffies(tsk->utime);
-	info.si_stime = cputime_to_jiffies(tsk->stime);
+	info.si_utime = cputime_to_clock_t(tsk->utime);
+	info.si_stime = cputime_to_clock_t(tsk->stime);
 
  	info.si_code = why;
  	switch (why) {
-- 
GitLab


From bc64efd220dcd4449aef8dd2564d73127b583b09 Mon Sep 17 00:00:00 2001
From: Gustavo Fernando Padovan <gustavo@las.ic.unicamp.br>
Date: Fri, 25 Jul 2008 01:47:33 -0700
Subject: [PATCH 715/853] kernel/signal.c: change vars pid and tgid types to
 pid_t

Change the type of pid and tgid variables from int to the POSIX type
pid_t.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index ba60eeeb63a..fdab7b363fa 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1116,7 +1116,7 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
  * is probably wrong.  Should make it like BSD or SYSV.
  */
 
-static int kill_something_info(int sig, struct siginfo *info, int pid)
+static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
 {
 	int ret;
 
@@ -2184,7 +2184,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
 }
 
 asmlinkage long
-sys_kill(int pid, int sig)
+sys_kill(pid_t pid, int sig)
 {
 	struct siginfo info;
 
@@ -2197,7 +2197,7 @@ sys_kill(int pid, int sig)
 	return kill_something_info(sig, &info, pid);
 }
 
-static int do_tkill(int tgid, int pid, int sig)
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
 {
 	int error;
 	struct siginfo info;
@@ -2243,7 +2243,7 @@ static int do_tkill(int tgid, int pid, int sig)
  *  exists but it's not belonging to the target process anymore. This
  *  method solves the problem of threads exiting and PIDs getting reused.
  */
-asmlinkage long sys_tgkill(int tgid, int pid, int sig)
+asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
 {
 	/* This is only valid for single tasks */
 	if (pid <= 0 || tgid <= 0)
@@ -2256,7 +2256,7 @@ asmlinkage long sys_tgkill(int tgid, int pid, int sig)
  *  Send a signal to only one task, even if it's a CLONE_THREAD task.
  */
 asmlinkage long
-sys_tkill(int pid, int sig)
+sys_tkill(pid_t pid, int sig)
 {
 	/* This is only valid for single tasks */
 	if (pid <= 0)
@@ -2266,7 +2266,7 @@ sys_tkill(int pid, int sig)
 }
 
 asmlinkage long
-sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
+sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
 {
 	siginfo_t info;
 
-- 
GitLab


From f22ab814a24e654b1de24db0c5f8b57b5ab2026a Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:47:34 -0700
Subject: [PATCH 716/853] include/asm/ptrace.h userspace headers cleanup

This patch contains the following cleanups for the asm/ptrace.h
userspace headers:

- include/asm-generic/Kbuild.asm already lists ptrace.h, remove
  the superfluous listings in the Kbuild files of the following
  architectures:
  - cris
  - frv
  - powerpc
  - x86
- don't expose function prototypes and macros to userspace:
  - arm
  - blackfin
  - cris
  - mn10300
  - parisc
- remove #ifdef CONFIG_'s around #define's:
  - blackfin
  - m68knommu
- sh: AFAIK __SH5__ should work in both kernel and userspace,
      no need to leak CONFIG_SUPERH64 to userspace
- xtensa: cosmetical change to remove empty
            #ifndef __ASSEMBLY__ #else #endif
          from the userspace headers

Not changed by this patch is the fact that the following architectures
have a different struct pt_regs depending on CONFIG_ variables:
- h8300
- m68knommu
- mips

This does not work in userspace.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: <linux-arch@vger.kernel.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Grant Grundler <grundler@parisc-linux.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Chris Zankel <chris@zankel.net>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-arm/ptrace.h           |  6 ++----
 include/asm-blackfin/ptrace.h      |  6 ++++--
 include/asm-cris/arch-v10/Kbuild   |  1 -
 include/asm-cris/arch-v10/ptrace.h |  4 ++++
 include/asm-cris/arch-v32/Kbuild   |  1 -
 include/asm-cris/arch-v32/ptrace.h |  4 ++++
 include/asm-cris/ptrace.h          |  4 +++-
 include/asm-frv/Kbuild             |  1 -
 include/asm-m68knommu/ptrace.h     |  2 --
 include/asm-mn10300/ptrace.h       |  8 ++++++--
 include/asm-parisc/ptrace.h        |  4 +++-
 include/asm-powerpc/Kbuild         |  1 -
 include/asm-sh/ptrace.h            |  2 +-
 include/asm-x86/Kbuild             |  1 -
 include/asm-xtensa/ptrace.h        | 10 +++++-----
 15 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/include/asm-arm/ptrace.h b/include/asm-arm/ptrace.h
index 7aaa206cb54..8382b7510f9 100644
--- a/include/asm-arm/ptrace.h
+++ b/include/asm-arm/ptrace.h
@@ -139,8 +139,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
 	return 0;
 }
 
-#endif	/* __KERNEL__ */
-
 #define pc_pointer(v) \
 	((v) & ~PCMASK)
 
@@ -153,10 +151,10 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
 
-#ifdef __KERNEL__
 #define predicate(x)		((x) & 0xf0000000)
 #define PREDICATE_ALWAYS	0xe0000000
-#endif
+
+#endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-blackfin/ptrace.h b/include/asm-blackfin/ptrace.h
index b8346cd3a6f..a45a80e54ad 100644
--- a/include/asm-blackfin/ptrace.h
+++ b/include/asm-blackfin/ptrace.h
@@ -83,14 +83,14 @@ struct pt_regs {
 #define PTRACE_GETREGS            12
 #define PTRACE_SETREGS            13	/* ptrace signal  */
 
-#ifdef CONFIG_BINFMT_ELF_FDPIC
 #define PTRACE_GETFDPIC           31
 #define PTRACE_GETFDPIC_EXEC      0
 #define PTRACE_GETFDPIC_INTERP    1
-#endif
 
 #define PS_S  (0x0002)
 
+#ifdef __KERNEL__
+
 /* user_mode returns true if only one bit is set in IPEND, other than the
    master interrupt enable.  */
 #define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1)))
@@ -98,6 +98,8 @@ struct pt_regs {
 #define profile_pc(regs) instruction_pointer(regs)
 extern void show_regs(struct pt_regs *);
 
+#endif  /*  __KERNEL__  */
+
 #endif				/* __ASSEMBLY__ */
 
 /*
diff --git a/include/asm-cris/arch-v10/Kbuild b/include/asm-cris/arch-v10/Kbuild
index 60e7e1b73ce..7a192e1290b 100644
--- a/include/asm-cris/arch-v10/Kbuild
+++ b/include/asm-cris/arch-v10/Kbuild
@@ -1,4 +1,3 @@
-header-y += ptrace.h
 header-y += user.h
 header-y += svinto.h
 header-y += sv_addr_ag.h
diff --git a/include/asm-cris/arch-v10/ptrace.h b/include/asm-cris/arch-v10/ptrace.h
index fb14c5ee37f..2f464eab3a5 100644
--- a/include/asm-cris/arch-v10/ptrace.h
+++ b/include/asm-cris/arch-v10/ptrace.h
@@ -106,10 +106,14 @@ struct switch_stack {
 	unsigned long return_ip; /* ip that _resume will return to */
 };
 
+#ifdef __KERNEL__
+
 /* bit 8 is user-mode flag */
 #define user_mode(regs) (((regs)->dccr & 0x100) != 0)
 #define instruction_pointer(regs) ((regs)->irp)
 #define profile_pc(regs) instruction_pointer(regs)
 extern void show_regs(struct pt_regs *);
 
+#endif  /*  __KERNEL__  */
+
 #endif
diff --git a/include/asm-cris/arch-v32/Kbuild b/include/asm-cris/arch-v32/Kbuild
index a0ec545e242..35f2fc4f993 100644
--- a/include/asm-cris/arch-v32/Kbuild
+++ b/include/asm-cris/arch-v32/Kbuild
@@ -1,3 +1,2 @@
-header-y += ptrace.h
 header-y += user.h
 header-y += cryptocop.h
diff --git a/include/asm-cris/arch-v32/ptrace.h b/include/asm-cris/arch-v32/ptrace.h
index 516cc7062d9..41f4e8662bc 100644
--- a/include/asm-cris/arch-v32/ptrace.h
+++ b/include/asm-cris/arch-v32/ptrace.h
@@ -106,9 +106,13 @@ struct switch_stack {
 	unsigned long return_ip; /* ip that _resume will return to */
 };
 
+#ifdef __KERNEL__
+
 #define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0)
 #define instruction_pointer(regs) ((regs)->erp)
 extern void show_regs(struct pt_regs *);
 #define profile_pc(regs) instruction_pointer(regs)
 
+#endif  /*  __KERNEL__  */
+
 #endif
diff --git a/include/asm-cris/ptrace.h b/include/asm-cris/ptrace.h
index 1ec69a7ea83..d910925e317 100644
--- a/include/asm-cris/ptrace.h
+++ b/include/asm-cris/ptrace.h
@@ -4,11 +4,13 @@
 #include <asm/arch/ptrace.h>
 
 #ifdef __KERNEL__
+
 /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
 #define PTRACE_GETREGS            12
 #define PTRACE_SETREGS            13
-#endif
 
 #define profile_pc(regs) instruction_pointer(regs)
 
+#endif /* __KERNEL__ */
+
 #endif /* _CRIS_PTRACE_H */
diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild
index bc3f12c5b7e..0f8956def73 100644
--- a/include/asm-frv/Kbuild
+++ b/include/asm-frv/Kbuild
@@ -3,4 +3,3 @@ include include/asm-generic/Kbuild.asm
 header-y += registers.h
 
 unifdef-y += termios.h
-unifdef-y += ptrace.h
diff --git a/include/asm-m68knommu/ptrace.h b/include/asm-m68knommu/ptrace.h
index 47258e86e8c..8c9194b9854 100644
--- a/include/asm-m68knommu/ptrace.h
+++ b/include/asm-m68knommu/ptrace.h
@@ -68,10 +68,8 @@ struct switch_stack {
 /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
 #define PTRACE_GETREGS            12
 #define PTRACE_SETREGS            13
-#ifdef CONFIG_FPU
 #define PTRACE_GETFPREGS          14
 #define PTRACE_SETFPREGS          15
-#endif
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-mn10300/ptrace.h b/include/asm-mn10300/ptrace.h
index b3684689fcc..7b06cc623d8 100644
--- a/include/asm-mn10300/ptrace.h
+++ b/include/asm-mn10300/ptrace.h
@@ -88,12 +88,16 @@ extern struct pt_regs *__frame; /* current frame pointer */
 /* options set using PTRACE_SETOPTIONS */
 #define PTRACE_O_TRACESYSGOOD     0x00000001
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+
+#if !defined(__ASSEMBLY__)
 #define user_mode(regs)			(((regs)->epsw & EPSW_nSL) == EPSW_nSL)
 #define instruction_pointer(regs)	((regs)->pc)
 extern void show_regs(struct pt_regs *);
-#endif
+#endif  /*  !__ASSEMBLY  */
 
 #define profile_pc(regs) ((regs)->pc)
 
+#endif  /*  __KERNEL__  */
+
 #endif /* _ASM_PTRACE_H */
diff --git a/include/asm-parisc/ptrace.h b/include/asm-parisc/ptrace.h
index 93f990e418f..3e94c5d85ff 100644
--- a/include/asm-parisc/ptrace.h
+++ b/include/asm-parisc/ptrace.h
@@ -33,7 +33,6 @@ struct pt_regs {
 	unsigned long ipsw;	/* CR22 */
 };
 
-#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
 /*
  * The numbers chosen here are somewhat arbitrary but absolutely MUST
  * not overlap with any of the number assigned in <linux/ptrace.h>.
@@ -43,8 +42,11 @@ struct pt_regs {
  * since we have taken branch traps too)
  */
 #define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
+
 #ifdef __KERNEL__
 
+#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
+
 /* XXX should we use iaoq[1] or iaoq[0] ? */
 #define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
 #define user_space(regs)		(((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild
index 04ce8f8a2ee..5ab7d7fe198 100644
--- a/include/asm-powerpc/Kbuild
+++ b/include/asm-powerpc/Kbuild
@@ -29,7 +29,6 @@ unifdef-y += elf.h
 unifdef-y += nvram.h
 unifdef-y += param.h
 unifdef-y += posix_types.h
-unifdef-y += ptrace.h
 unifdef-y += seccomp.h
 unifdef-y += signal.h
 unifdef-y += spu_info.h
diff --git a/include/asm-sh/ptrace.h b/include/asm-sh/ptrace.h
index 8d6c92b3e77..7d36dc3bee6 100644
--- a/include/asm-sh/ptrace.h
+++ b/include/asm-sh/ptrace.h
@@ -5,7 +5,7 @@
  * Copyright (C) 1999, 2000  Niibe Yutaka
  *
  */
-#if defined(__SH5__) || defined(CONFIG_SUPERH64)
+#if defined(__SH5__)
 struct pt_regs {
 	unsigned long long pc;
 	unsigned long long sr;
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 1e3554596f7..00473f7dd81 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -19,7 +19,6 @@ unifdef-y += msr.h
 unifdef-y += mtrr.h
 unifdef-y += posix_types_32.h
 unifdef-y += posix_types_64.h
-unifdef-y += ptrace.h
 unifdef-y += unistd_32.h
 unifdef-y += unistd_64.h
 unifdef-y += vm86.h
diff --git a/include/asm-xtensa/ptrace.h b/include/asm-xtensa/ptrace.h
index 422c73e2693..089b0db4481 100644
--- a/include/asm-xtensa/ptrace.h
+++ b/include/asm-xtensa/ptrace.h
@@ -73,10 +73,10 @@
 #define PTRACE_GETXTREGS	18
 #define PTRACE_SETXTREGS	19
 
-#ifndef __ASSEMBLY__
-
 #ifdef __KERNEL__
 
+#ifndef __ASSEMBLY__
+
 /*
  * This struct defines the way the registers are stored on the
  * kernel stack during a system call or other kernel entry.
@@ -122,14 +122,14 @@ extern void show_regs(struct pt_regs *);
 # ifndef CONFIG_SMP
 #  define profile_pc(regs) instruction_pointer(regs)
 # endif
-#endif /* __KERNEL__ */
 
 #else	/* __ASSEMBLY__ */
 
-#ifdef __KERNEL__
 # include <asm/asm-offsets.h>
 #define PT_REGS_OFFSET	  (KERNEL_STACK_SIZE - PT_USER_SIZE)
-#endif
 
 #endif	/* !__ASSEMBLY__ */
+
+#endif  /* __KERNEL__ */
+
 #endif	/* _XTENSA_PTRACE_H */
-- 
GitLab


From 364d3c13c17f45da6d638011078d4c4d3070d719 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:36 -0700
Subject: [PATCH 717/853] ptrace: give more respect to SIGKILL

ptrace_stop() has some complicated checks to prevent the scheduling in the
TASK_TRACED state with the pending SIGKILL, but these checks are racy, and
they depend on arch_ptrace_stop_needed().

This patch assumes that the traced task should die asap if it was killed by
SIGKILL, in that case schedule()->signal_pending_state() has no reason to
ignore the TASK_WAKEKILL part of TASK_TRACED, and we can kill this nasty
special case.

Note: do_exit()->ptrace_notify() is special, the killed task can already
dequeue SIGKILL at this point. Another indication that fatal_signal_pending()
is not exactly right.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6aca4a16e37..79e749dbf81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2054,9 +2054,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 	if (!signal_pending(p))
 		return 0;
 
-	if (state & (__TASK_STOPPED | __TASK_TRACED))
-		return 0;
-
 	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
 }
 
-- 
GitLab


From 3d749b9e676b26584a47e75c235aa6f69d0697ae Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:37 -0700
Subject: [PATCH 718/853] ptrace: simplify ptrace_stop()->sigkill_pending()
 path

1. SIGKILL can't be blocked, remove this check from sigkill_pending().

2. When ptrace_stop() sees sigkill_pending() == T, it can just return.
   Kill "int killed" and simplify the code. This also is more correct,
   the tracer shouldn't see us in TASK_TRACED if we are not going to
   stop.

I strongly believe this code needs further changes.  We should do the "was
this task killed" check unconditionally, currently it depends on
arch_ptrace_stop_needed().  On the other hand, sigkill_pending() isn't
very clever.  If the task was killed tkill(SIGKILL), the signal can be
already dequeued if the caller is do_exit().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index fdab7b363fa..39c1706edf0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1496,9 +1496,8 @@ static inline int may_ptrace_stop(void)
  */
 static int sigkill_pending(struct task_struct *tsk)
 {
-	return ((sigismember(&tsk->pending.signal, SIGKILL) ||
-		 sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) &&
-		!unlikely(sigismember(&tsk->blocked, SIGKILL)));
+	return	sigismember(&tsk->pending.signal, SIGKILL) ||
+		sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
 }
 
 /*
@@ -1514,8 +1513,6 @@ static int sigkill_pending(struct task_struct *tsk)
  */
 static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
 {
-	int killed = 0;
-
 	if (arch_ptrace_stop_needed(exit_code, info)) {
 		/*
 		 * The arch code has something special to do before a
@@ -1531,7 +1528,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
 		spin_unlock_irq(&current->sighand->siglock);
 		arch_ptrace_stop(exit_code, info);
 		spin_lock_irq(&current->sighand->siglock);
-		killed = sigkill_pending(current);
+		if (sigkill_pending(current))
+			return;
 	}
 
 	/*
@@ -1548,7 +1546,7 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
 	__set_current_state(TASK_TRACED);
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
-	if (!unlikely(killed) && may_ptrace_stop()) {
+	if (may_ptrace_stop()) {
 		do_notify_parent_cldstop(current, CLD_TRAPPED);
 		read_unlock(&tasklist_lock);
 		schedule();
-- 
GitLab


From 7b34e4283c685f5cc6ba6d30e939906eee0d4bcf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:37 -0700
Subject: [PATCH 719/853] introduce PF_KTHREAD flag

Introduce the new PF_KTHREAD flag to mark the kernel threads.  It is set
by INIT_TASK() and copied to the forked childs (we could set it in
kthreadd() along with PF_NOFREEZE instead).

daemonize() was changed as well.  In that case testing of PF_KTHREAD is
racy, but daemonize() is hopeless anyway.

This flag is cleared in do_execve(), before search_binary_handler().
Probably not the best place, we can do this in exec_mmap() or in
start_thread(), or clear it along with PF_FORKNOEXEC.  But I think this
doesn't matter in practice, and if do_execve() fails kthread should die
soon.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                 | 1 +
 include/linux/init_task.h | 2 +-
 include/linux/sched.h     | 1 +
 kernel/exit.c             | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index af249af4cca..cd2e8c9b124 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1326,6 +1326,7 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	current->flags &= ~PF_KTHREAD;
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
 		/* execve success */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 93c45acf249..021d8e720c7 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -122,7 +122,7 @@ extern struct group_info init_groups;
 	.state		= 0,						\
 	.stack		= &init_thread_info,				\
 	.usage		= ATOMIC_INIT(2),				\
-	.flags		= 0,						\
+	.flags		= PF_KTHREAD,					\
 	.lock_depth	= -1,						\
 	.prio		= MAX_PRIO-20,					\
 	.static_prio	= MAX_PRIO-20,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 79e749dbf81..eec64a4adb9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1483,6 +1483,7 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
+#define PF_KTHREAD	0x00000020	/* I am a kernel thread */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
diff --git a/kernel/exit.c b/kernel/exit.c
index a7799d8a640..28a44a2612d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -430,7 +430,7 @@ void daemonize(const char *name, ...)
 	 * We don't want to have TIF_FREEZE set if the system-wide hibernation
 	 * or suspend transition begins right now.
 	 */
-	current->flags |= PF_NOFREEZE;
+	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
 
 	if (current->nsproxy != &init_nsproxy) {
 		get_nsproxy(&init_nsproxy);
-- 
GitLab


From 246bb0b1deb29726990620d8b5e55ca29f331362 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:38 -0700
Subject: [PATCH 720/853] kill PF_BORROWED_MM in favour of PF_KTHREAD

Kill PF_BORROWED_MM.  Change use_mm/unuse_mm to not play with ->flags, and
do s/PF_BORROWED_MM/PF_KTHREAD/ for a couple of other users.

No functional changes yet.  But this allows us to do further
fixes/cleanups.

oom_kill/ptrace/etc often check "p->mm != NULL" to filter out the
kthreads, this is wrong because of use_mm().  The problem with
PF_BORROWED_MM is that we need task_lock() to avoid races.  With this
patch we can check PF_KTHREAD directly, or use a simple lockless helper:

	/* The result must not be dereferenced !!! */
	struct mm_struct *__get_task_mm(struct task_struct *tsk)
	{
		if (tsk->flags & PF_KTHREAD)
			return NULL;
		return tsk->mm;
	}

Note also ecard_task().  It runs with ->mm != NULL, but it's the kernel
thread without PF_BORROWED_MM.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c              | 2 --
 include/linux/sched.h | 3 +--
 kernel/fork.c         | 4 ++--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd9..0051fd94b44 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
 	struct task_struct *tsk = current;
 
 	task_lock(tsk);
-	tsk->flags |= PF_BORROWED_MM;
 	active_mm = tsk->active_mm;
 	atomic_inc(&mm->mm_count);
 	tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
 	struct task_struct *tsk = current;
 
 	task_lock(tsk);
-	tsk->flags &= ~PF_BORROWED_MM;
 	tsk->mm = NULL;
 	/* active_mm is still 'mm' */
 	enter_lazy_tlb(mm, tsk);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eec64a4adb9..0560999eb1d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1483,7 +1483,6 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
-#define PF_KTHREAD	0x00000020	/* I am a kernel thread */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
@@ -1497,7 +1496,7 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
 #define PF_SWAPOFF	0x00080000	/* I am in swapoff */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
-#define PF_BORROWED_MM	0x00200000	/* I am a kthread doing use_mm */
+#define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
diff --git a/kernel/fork.c b/kernel/fork.c
index 228f80c9155..eeaec6893b0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(mmput);
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
- * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
+ * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
  * this kernel workthread has transiently adopted a user mm with use_mm,
  * to do its AIO) is not set and if so returns a reference to it, after
  * bumping up the use count.  User must release the mm via mmput()
@@ -487,7 +487,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 	task_lock(task);
 	mm = task->mm;
 	if (mm) {
-		if (task->flags & PF_BORROWED_MM)
+		if (task->flags & PF_KTHREAD)
 			mm = NULL;
 		else
 			atomic_inc(&mm->mm_users);
-- 
GitLab


From 15b9f360c0316c06d37c09b02d85565edbaf9dd3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:39 -0700
Subject: [PATCH 721/853] coredump: zap_threads() must skip kernel threads

The main loop in zap_threads() must skip kthreads which may use the same
mm.  Otherwise we "kill" this thread erroneously (for example, it can not
fork or exec after that), and the coredumping task stucks in the
TASK_UNINTERRUPTIBLE state forever because of the wrong ->core_waiters
count.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index cd2e8c9b124..e347e6ed161 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1574,11 +1574,12 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	for_each_process(g) {
 		if (g == tsk->group_leader)
 			continue;
-
+		if (g->flags & PF_KTHREAD)
+			continue;
 		p = g;
 		do {
 			if (p->mm) {
-				if (p->mm == mm) {
+				if (unlikely(p->mm == mm)) {
 					lock_task_sighand(p, &flags);
 					zap_process(p);
 					unlock_task_sighand(p, &flags);
-- 
GitLab


From 24d5288f06ed8b3a368eba967d587cdb012dfdf7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:40 -0700
Subject: [PATCH 722/853] coredump: elf_core_dump: skip kernel threads

linux_binfmt->core_dump() runs before the process does exit_aio(), this
means that we can hit the kernel thread which shares the same ->mm.
Afaics, nothing really bad can happen, but perhaps it makes sense to fix
this minor bug.

It is sad we have to iterate over all threads in system and use
GFP_ATOMIC.  Hopefully we can kill theses ugly do_each_thread()s, but this
needs some nontrivial changes in mm_struct and do_coredump.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c       | 6 ++++++
 fs/binfmt_elf_fdpic.c | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 639d2d8b571..bad7d8770d7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1520,6 +1520,9 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	rcu_read_lock();
 	do_each_thread(g, p)
 		if (p->mm == dump_task->mm) {
+			if (p->flags & PF_KTHREAD)
+				continue;
+
 			t = kzalloc(offsetof(struct elf_thread_core_info,
 					     notes[info->thread_notes]),
 				    GFP_ATOMIC);
@@ -1724,6 +1727,9 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 		rcu_read_lock();
 		do_each_thread(g, p)
 			if (current->mm == p->mm && current != p) {
+				if (p->flags & PF_KTHREAD)
+					continue;
+
 				ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
 				if (!ets) {
 					rcu_read_unlock();
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32e627..71bcc4b4d08 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1626,6 +1626,9 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 		rcu_read_lock();
 		do_each_thread(g,p)
 			if (current->mm == p->mm && current != p) {
+				if (p->flags & PF_KTHREAD)
+					continue;
+
 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
 					rcu_read_unlock();
-- 
GitLab


From 32ecb1f26dd50eeaac4e3f4dea4541c97848e459 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:41 -0700
Subject: [PATCH 723/853] coredump: turn mm->core_startup_done into the pointer
 to struct core_state

mm->core_startup_done points to "struct completion startup_done" allocated
on the coredump_wait()'s stack.  Introduce the new structure, core_state,
which holds this "struct completion".  This way we can add more info
visible to the threads participating in coredump without enlarging
mm_struct.

No changes in affected .o files.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 8 ++++----
 include/linux/mm_types.h | 7 ++++++-
 kernel/exit.c            | 2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e347e6ed161..71734568f01 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1597,13 +1597,13 @@ static int coredump_wait(int exit_code)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	struct completion startup_done;
+	struct core_state core_state;
 	struct completion *vfork_done;
 	int core_waiters;
 
 	init_completion(&mm->core_done);
-	init_completion(&startup_done);
-	mm->core_startup_done = &startup_done;
+	init_completion(&core_state.startup);
+	mm->core_state = &core_state;
 
 	core_waiters = zap_threads(tsk, mm, exit_code);
 	up_write(&mm->mmap_sem);
@@ -1622,7 +1622,7 @@ static int coredump_wait(int exit_code)
 	}
 
 	if (core_waiters)
-		wait_for_completion(&startup_done);
+		wait_for_completion(&core_state.startup);
 fail:
 	BUG_ON(mm->core_waiters);
 	return core_waiters;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02a27ae7853..97819efd233 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,6 +159,10 @@ struct vm_area_struct {
 #endif
 };
 
+struct core_state {
+	struct completion startup;
+};
+
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
@@ -220,7 +224,8 @@ struct mm_struct {
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	/* coredumping support */
-	struct completion *core_startup_done, core_done;
+	struct core_state *core_state;
+	struct completion core_done;
 
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;	/* aio lock */
diff --git a/kernel/exit.c b/kernel/exit.c
index 28a44a2612d..f7fa21dbced 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -680,7 +680,7 @@ static void exit_mm(struct task_struct * tsk)
 		up_read(&mm->mmap_sem);
 		down_write(&mm->mmap_sem);
 		if (!--mm->core_waiters)
-			complete(mm->core_startup_done);
+			complete(&mm->core_state->startup);
 		up_write(&mm->mmap_sem);
 
 		wait_for_completion(&mm->core_done);
-- 
GitLab


From 999d9fc1670bc082928b93b11d1f2e0e417d973c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:41 -0700
Subject: [PATCH 724/853] coredump: move mm->core_waiters into struct
 core_state

Move mm->core_waiters into "struct core_state" allocated on stack.  This
shrinks mm_struct a little bit and allows further changes.

This patch mostly does s/core_waiters/core_state.  The only essential
change is that coredump_wait() must clear mm->core_state before return.

The coredump_wait()'s path is uglified and .text grows by 30 bytes, this
is fixed by the next patch.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 21 +++++++++++----------
 include/linux/mm_types.h |  2 +-
 kernel/exit.c            |  8 ++++----
 kernel/fork.c            |  2 +-
 kernel/signal.c          |  4 ++--
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 71734568f01..50de3aaff4d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -722,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm)
 		 * Make sure that if there is a core dump in progress
 		 * for the old mm, we get out and die instead of going
 		 * through with the exec.  We must hold mmap_sem around
-		 * checking core_waiters and changing tsk->mm.  The
-		 * core-inducing thread will increment core_waiters for
-		 * each thread whose ->mm == old_mm.
+		 * checking core_state and changing tsk->mm.
 		 */
 		down_read(&old_mm->mmap_sem);
-		if (unlikely(old_mm->core_waiters)) {
+		if (unlikely(old_mm->core_state)) {
 			up_read(&old_mm->mmap_sem);
 			return -EINTR;
 		}
@@ -1514,7 +1512,7 @@ static void zap_process(struct task_struct *start)
 	t = start;
 	do {
 		if (t != current && t->mm) {
-			t->mm->core_waiters++;
+			t->mm->core_state->nr_threads++;
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
 		}
@@ -1538,11 +1536,11 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	if (err)
 		return err;
 
-	if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+	if (atomic_read(&mm->mm_users) == mm->core_state->nr_threads + 1)
 		goto done;
 	/*
 	 * We should find and kill all tasks which use this mm, and we should
-	 * count them correctly into mm->core_waiters. We don't take tasklist
+	 * count them correctly into ->nr_threads. We don't take tasklist
 	 * lock, but this is safe wrt:
 	 *
 	 * fork:
@@ -1590,7 +1588,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	}
 	rcu_read_unlock();
 done:
-	return mm->core_waiters;
+	return mm->core_state->nr_threads;
 }
 
 static int coredump_wait(int exit_code)
@@ -1603,9 +1601,12 @@ static int coredump_wait(int exit_code)
 
 	init_completion(&mm->core_done);
 	init_completion(&core_state.startup);
+	core_state.nr_threads = 0;
 	mm->core_state = &core_state;
 
 	core_waiters = zap_threads(tsk, mm, exit_code);
+	if (core_waiters < 0)
+		mm->core_state = NULL;
 	up_write(&mm->mmap_sem);
 
 	if (unlikely(core_waiters < 0))
@@ -1623,8 +1624,8 @@ static int coredump_wait(int exit_code)
 
 	if (core_waiters)
 		wait_for_completion(&core_state.startup);
+	mm->core_state = NULL;
 fail:
-	BUG_ON(mm->core_waiters);
 	return core_waiters;
 }
 
@@ -1702,7 +1703,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	/*
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
-	if (mm->core_waiters || !get_dumpable(mm)) {
+	if (mm->core_state || !get_dumpable(mm)) {
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 97819efd233..c0b1747b61a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -160,6 +160,7 @@ struct vm_area_struct {
 };
 
 struct core_state {
+	int nr_threads;
 	struct completion startup;
 };
 
@@ -179,7 +180,6 @@ struct mm_struct {
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
-	int core_waiters;
 	struct rw_semaphore mmap_sem;
 	spinlock_t page_table_lock;		/* Protects page tables and some counters */
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f7fa21dbced..988e232254e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -670,16 +670,16 @@ static void exit_mm(struct task_struct * tsk)
 		return;
 	/*
 	 * Serialize with any possible pending coredump.
-	 * We must hold mmap_sem around checking core_waiters
+	 * We must hold mmap_sem around checking core_state
 	 * and clearing tsk->mm.  The core-inducing thread
-	 * will increment core_waiters for each thread in the
+	 * will increment ->nr_threads for each thread in the
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
-	if (mm->core_waiters) {
+	if (mm->core_state) {
 		up_read(&mm->mmap_sem);
 		down_write(&mm->mmap_sem);
-		if (!--mm->core_waiters)
+		if (!--mm->core_state->nr_threads)
 			complete(&mm->core_state->startup);
 		up_write(&mm->mmap_sem);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index eeaec6893b0..813d5c89b9d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -400,7 +400,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->flags = (current->mm) ? current->mm->flags
 				  : MMF_DUMP_FILTER_DEFAULT;
-	mm->core_waiters = 0;
+	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
diff --git a/kernel/signal.c b/kernel/signal.c
index 39c1706edf0..5c7b7eaa0dc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1480,10 +1480,10 @@ static inline int may_ptrace_stop(void)
 	 * is a deadlock situation, and pointless because our tracer
 	 * is dead so don't allow us to stop.
 	 * If SIGKILL was already sent before the caller unlocked
-	 * ->siglock we must see ->core_waiters != 0. Otherwise it
+	 * ->siglock we must see ->core_state != NULL. Otherwise it
 	 * is safe to enter schedule().
 	 */
-	if (unlikely(current->mm->core_waiters) &&
+	if (unlikely(current->mm->core_state) &&
 	    unlikely(current->mm == current->parent->mm))
 		return 0;
 
-- 
GitLab


From 8cd9c249128a59e8e833d454a784b0cbd338d468 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:42 -0700
Subject: [PATCH 725/853] coredump: simplify core_state->nr_threads calculation

Change zap_process() to return int instead of incrementing
mm->core_state->nr_threads directly.  Change zap_threads() to set
mm->core_state only on success.

This patch restores the original size of .text, and more importantly now
->nr_threads is used in two places only.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 50de3aaff4d..c74bb34eeef 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1502,9 +1502,10 @@ out:
 	return ispipe;
 }
 
-static void zap_process(struct task_struct *start)
+static int zap_process(struct task_struct *start)
 {
 	struct task_struct *t;
+	int nr = 0;
 
 	start->signal->flags = SIGNAL_GROUP_EXIT;
 	start->signal->group_stop_count = 0;
@@ -1512,31 +1513,33 @@ static void zap_process(struct task_struct *start)
 	t = start;
 	do {
 		if (t != current && t->mm) {
-			t->mm->core_state->nr_threads++;
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
+			nr++;
 		}
 	} while_each_thread(start, t);
+
+	return nr;
 }
 
 static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
-				int exit_code)
+				struct core_state *core_state, int exit_code)
 {
 	struct task_struct *g, *p;
 	unsigned long flags;
-	int err = -EAGAIN;
+	int nr = -EAGAIN;
 
 	spin_lock_irq(&tsk->sighand->siglock);
 	if (!signal_group_exit(tsk->signal)) {
+		mm->core_state = core_state;
 		tsk->signal->group_exit_code = exit_code;
-		zap_process(tsk);
-		err = 0;
+		nr = zap_process(tsk);
 	}
 	spin_unlock_irq(&tsk->sighand->siglock);
-	if (err)
-		return err;
+	if (unlikely(nr < 0))
+		return nr;
 
-	if (atomic_read(&mm->mm_users) == mm->core_state->nr_threads + 1)
+	if (atomic_read(&mm->mm_users) == nr + 1)
 		goto done;
 	/*
 	 * We should find and kill all tasks which use this mm, and we should
@@ -1579,7 +1582,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 			if (p->mm) {
 				if (unlikely(p->mm == mm)) {
 					lock_task_sighand(p, &flags);
-					zap_process(p);
+					nr += zap_process(p);
 					unlock_task_sighand(p, &flags);
 				}
 				break;
@@ -1588,7 +1591,8 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	}
 	rcu_read_unlock();
 done:
-	return mm->core_state->nr_threads;
+	core_state->nr_threads = nr;
+	return nr;
 }
 
 static int coredump_wait(int exit_code)
@@ -1601,12 +1605,7 @@ static int coredump_wait(int exit_code)
 
 	init_completion(&mm->core_done);
 	init_completion(&core_state.startup);
-	core_state.nr_threads = 0;
-	mm->core_state = &core_state;
-
-	core_waiters = zap_threads(tsk, mm, exit_code);
-	if (core_waiters < 0)
-		mm->core_state = NULL;
+	core_waiters = zap_threads(tsk, mm, &core_state, exit_code);
 	up_write(&mm->mmap_sem);
 
 	if (unlikely(core_waiters < 0))
-- 
GitLab


From c5f1cc8c1828486a61ab3e575da6e2c62b34d399 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:42 -0700
Subject: [PATCH 726/853] coredump: turn core_state->nr_threads into atomic_t

Turn core_state->nr_threads into atomic_t and kill now unneeded
down_write(&mm->mmap_sem) in exit_mm().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 2 +-
 include/linux/mm_types.h | 2 +-
 kernel/exit.c            | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index c74bb34eeef..15d493fe8aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1591,7 +1591,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	}
 	rcu_read_unlock();
 done:
-	core_state->nr_threads = nr;
+	atomic_set(&core_state->nr_threads, nr);
 	return nr;
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c0b1747b61a..ae99a28ba6a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -160,7 +160,7 @@ struct vm_area_struct {
 };
 
 struct core_state {
-	int nr_threads;
+	atomic_t nr_threads;
 	struct completion startup;
 };
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 988e232254e..63d82957baa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -678,10 +678,9 @@ static void exit_mm(struct task_struct * tsk)
 	down_read(&mm->mmap_sem);
 	if (mm->core_state) {
 		up_read(&mm->mmap_sem);
-		down_write(&mm->mmap_sem);
-		if (!--mm->core_state->nr_threads)
+
+		if (atomic_dec_and_test(&mm->core_state->nr_threads))
 			complete(&mm->core_state->startup);
-		up_write(&mm->mmap_sem);
 
 		wait_for_completion(&mm->core_done);
 		down_read(&mm->mmap_sem);
-- 
GitLab


From 9d5b327bf198d2720666de958dcc2ae219d86952 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:43 -0700
Subject: [PATCH 727/853] coredump: make mm->core_state visible to
 ->core_dump()

Move the "struct core_state core_state" from coredump_wait() to
do_coredump(), this makes mm->core_state visible to binfmt->core_dump().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 15d493fe8aa..b8ee842d93c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1595,17 +1595,16 @@ done:
 	return nr;
 }
 
-static int coredump_wait(int exit_code)
+static int coredump_wait(int exit_code, struct core_state *core_state)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	struct core_state core_state;
 	struct completion *vfork_done;
 	int core_waiters;
 
 	init_completion(&mm->core_done);
-	init_completion(&core_state.startup);
-	core_waiters = zap_threads(tsk, mm, &core_state, exit_code);
+	init_completion(&core_state->startup);
+	core_waiters = zap_threads(tsk, mm, core_state, exit_code);
 	up_write(&mm->mmap_sem);
 
 	if (unlikely(core_waiters < 0))
@@ -1622,8 +1621,7 @@ static int coredump_wait(int exit_code)
 	}
 
 	if (core_waiters)
-		wait_for_completion(&core_state.startup);
-	mm->core_state = NULL;
+		wait_for_completion(&core_state->startup);
 fail:
 	return core_waiters;
 }
@@ -1679,6 +1677,7 @@ int get_dumpable(struct mm_struct *mm)
 
 int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 {
+	struct core_state core_state;
 	char corename[CORENAME_MAX_SIZE + 1];
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
@@ -1717,7 +1716,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		current->fsuid = 0;	/* Dump root private */
 	}
 
-	retval = coredump_wait(exit_code);
+	retval = coredump_wait(exit_code, &core_state);
 	if (retval < 0)
 		goto fail;
 
@@ -1812,6 +1811,7 @@ fail_unlock:
 
 	current->fsuid = fsuid;
 	complete_all(&mm->core_done);
+	mm->core_state = NULL;
 fail:
 	return retval;
 }
-- 
GitLab


From b564daf806d492dd4f7afe9b6c83b8d35d137669 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:44 -0700
Subject: [PATCH 728/853] coredump: construct the list of coredumping threads
 at startup time

binfmt->core_dump() has to iterate over the all threads in system in order
to find the coredumping threads and construct the list using the
GFP_ATOMIC allocations.

With this patch each thread allocates the list node on exit_mm()'s stack and
adds itself to the list.

This allows us to do further changes:

	- simplify ->core_dump()

	- change exit_mm() to clear ->mm first, then wait for ->core_done.
	  this makes the coredumping process visible to oom_kill

	- kill mm->core_done

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  2 ++
 include/linux/mm_types.h |  6 ++++++
 kernel/exit.c            | 15 ++++++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index b8ee842d93c..fe2873b8037 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1604,6 +1604,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 
 	init_completion(&mm->core_done);
 	init_completion(&core_state->startup);
+	core_state->dumper.task = tsk;
+	core_state->dumper.next = NULL;
 	core_waiters = zap_threads(tsk, mm, core_state, exit_code);
 	up_write(&mm->mmap_sem);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ae99a28ba6a..4d0d0abc79f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,8 +159,14 @@ struct vm_area_struct {
 #endif
 };
 
+struct core_thread {
+	struct task_struct *task;
+	struct core_thread *next;
+};
+
 struct core_state {
 	atomic_t nr_threads;
+	struct core_thread dumper;
 	struct completion startup;
 };
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 63d82957baa..b66f0d55c79 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -664,6 +664,7 @@ assign_new_owner:
 static void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
+	struct core_state *core_state;
 
 	mm_release(tsk, mm);
 	if (!mm)
@@ -676,11 +677,19 @@ static void exit_mm(struct task_struct * tsk)
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
-	if (mm->core_state) {
+	core_state = mm->core_state;
+	if (core_state) {
+		struct core_thread self;
 		up_read(&mm->mmap_sem);
 
-		if (atomic_dec_and_test(&mm->core_state->nr_threads))
-			complete(&mm->core_state->startup);
+		self.task = tsk;
+		self.next = xchg(&core_state->dumper.next, &self);
+		/*
+		 * Implies mb(), the result of xchg() must be visible
+		 * to core_state->dumper.
+		 */
+		if (atomic_dec_and_test(&core_state->nr_threads))
+			complete(&core_state->startup);
 
 		wait_for_completion(&mm->core_done);
 		down_read(&mm->mmap_sem);
-- 
GitLab


From 83914441f94c6f2cd468ca97365f6c34f418706e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:45 -0700
Subject: [PATCH 729/853] coredump: elf_core_dump: use core_state->dumper list

Kill the nasty rcu_read_lock() + do_each_thread() loop, use the list
encoded in mm->core_state instead, s/GFP_ATOMIC/GFP_KERNEL/.

This patch allows futher cleanups in binfmt_elf.c, in particular we can
kill the parallel info->threads list.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 77 ++++++++++++++++++++-----------------------------
 1 file changed, 32 insertions(+), 45 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bad7d8770d7..88d180306cf 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1478,7 +1478,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	const struct user_regset_view *view = task_user_regset_view(dump_task);
 	struct elf_thread_core_info *t;
 	struct elf_prpsinfo *psinfo;
-	struct task_struct *g, *p;
+	struct core_thread *ct;
 	unsigned int i;
 
 	info->size = 0;
@@ -1517,34 +1517,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	/*
 	 * Allocate a structure for each thread.
 	 */
-	rcu_read_lock();
-	do_each_thread(g, p)
-		if (p->mm == dump_task->mm) {
-			if (p->flags & PF_KTHREAD)
-				continue;
-
-			t = kzalloc(offsetof(struct elf_thread_core_info,
-					     notes[info->thread_notes]),
-				    GFP_ATOMIC);
-			if (unlikely(!t)) {
-				rcu_read_unlock();
-				return 0;
-			}
-			t->task = p;
-			if (p == dump_task || !info->thread) {
-				t->next = info->thread;
-				info->thread = t;
-			} else {
-				/*
-				 * Make sure to keep the original task at
-				 * the head of the list.
-				 */
-				t->next = info->thread->next;
-				info->thread->next = t;
-			}
+	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+		t = kzalloc(offsetof(struct elf_thread_core_info,
+				     notes[info->thread_notes]),
+			    GFP_KERNEL);
+		if (unlikely(!t))
+			return 0;
+
+		t->task = ct->task;
+		if (ct->task == dump_task || !info->thread) {
+			t->next = info->thread;
+			info->thread = t;
+		} else {
+			/*
+			 * Make sure to keep the original task at
+			 * the head of the list.
+			 */
+			t->next = info->thread->next;
+			info->thread->next = t;
 		}
-	while_each_thread(g, p);
-	rcu_read_unlock();
+	}
 
 	/*
 	 * Now fill in each thread's information.
@@ -1691,7 +1683,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 {
 #define	NUM_NOTES	6
 	struct list_head *t;
-	struct task_struct *g, *p;
 
 	info->notes = NULL;
 	info->prstatus = NULL;
@@ -1723,23 +1714,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 
 	info->thread_status_size = 0;
 	if (signr) {
+		struct core_thread *ct;
 		struct elf_thread_status *ets;
-		rcu_read_lock();
-		do_each_thread(g, p)
-			if (current->mm == p->mm && current != p) {
-				if (p->flags & PF_KTHREAD)
-					continue;
-
-				ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
-				if (!ets) {
-					rcu_read_unlock();
-					return 0;
-				}
-				ets->thread = p;
-				list_add(&ets->list, &info->thread_list);
-			}
-		while_each_thread(g, p);
-		rcu_read_unlock();
+
+		for (ct = current->mm->core_state->dumper.next;
+						ct; ct = ct->next) {
+			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+			if (!ets)
+				return 0;
+
+			ets->thread = ct->task;
+			list_add(&ets->list, &info->thread_list);
+		}
+
 		list_for_each(t, &info->thread_list) {
 			int sz;
 
-- 
GitLab


From 182c515fd2a942623aed4e4e0e0b37fe96571b05 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:45 -0700
Subject: [PATCH 730/853] coredump: elf_fdpic_core_dump: use core_state->dumper
 list

Kill the nasty rcu_read_lock() + do_each_thread() loop, use the list
encoded in mm->core_state instead, s/GFP_ATOMIC/GFP_KERNEL/.

This patch allows futher cleanups in binfmt_elf_fdpic.c.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf_fdpic.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 71bcc4b4d08..1b59b1edf26 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1573,7 +1573,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 	struct memelfnote *notes = NULL;
 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
- 	struct task_struct *g, *p;
  	LIST_HEAD(thread_list);
  	struct list_head *t;
 	elf_fpregset_t *fpu = NULL;
@@ -1622,23 +1621,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 #endif
 
 	if (signr) {
+		struct core_thread *ct;
 		struct elf_thread_status *tmp;
-		rcu_read_lock();
-		do_each_thread(g,p)
-			if (current->mm == p->mm && current != p) {
-				if (p->flags & PF_KTHREAD)
-					continue;
-
-				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
-				if (!tmp) {
-					rcu_read_unlock();
-					goto cleanup;
-				}
-				tmp->thread = p;
-				list_add(&tmp->list, &thread_list);
-			}
-		while_each_thread(g,p);
-		rcu_read_unlock();
+
+		for (ct = current->mm->core_state->dumper.next;
+						ct; ct = ct->next) {
+			tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+			if (!tmp)
+				goto cleanup;
+
+			tmp->thread = ct->task;
+			list_add(&tmp->list, &thread_list);
+		}
+
 		list_for_each(t, &thread_list) {
 			struct elf_thread_status *tmp;
 			int sz;
-- 
GitLab


From a94e2d408eaedbd85aae259621d46fafc10479a2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:46 -0700
Subject: [PATCH 731/853] coredump: kill mm->core_done

Now that we have core_state->dumper list we can use it to wake up the
sub-threads waiting for the coredump completion.

This uglifies the code and .text grows by 47 bytes, but otoh mm_struct
lessens by sizeof(struct completion).  Also, with this change we can
decouple exit_mm() from the coredumping code.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 25 ++++++++++++++++++++++---
 include/linux/mm_types.h |  4 +---
 kernel/exit.c            |  8 +++++++-
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index fe2873b8037..bff43aeb235 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1602,7 +1602,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 	struct completion *vfork_done;
 	int core_waiters;
 
-	init_completion(&mm->core_done);
 	init_completion(&core_state->startup);
 	core_state->dumper.task = tsk;
 	core_state->dumper.next = NULL;
@@ -1628,6 +1627,27 @@ fail:
 	return core_waiters;
 }
 
+static void coredump_finish(struct mm_struct *mm)
+{
+	struct core_thread *curr, *next;
+	struct task_struct *task;
+
+	next = mm->core_state->dumper.next;
+	while ((curr = next) != NULL) {
+		next = curr->next;
+		task = curr->task;
+		/*
+		 * see exit_mm(), curr->task must not see
+		 * ->task == NULL before we read ->next.
+		 */
+		smp_mb();
+		curr->task = NULL;
+		wake_up_process(task);
+	}
+
+	mm->core_state = NULL;
+}
+
 /*
  * set_dumpable converts traditional three-value dumpable to two flags and
  * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
@@ -1812,8 +1832,7 @@ fail_unlock:
 		argv_free(helper_argv);
 
 	current->fsuid = fsuid;
-	complete_all(&mm->core_done);
-	mm->core_state = NULL;
+	coredump_finish(mm);
 fail:
 	return retval;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4d0d0abc79f..746f975b58e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -229,9 +229,7 @@ struct mm_struct {
 
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
-	/* coredumping support */
-	struct core_state *core_state;
-	struct completion core_done;
+	struct core_state *core_state; /* coredumping support */
 
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;	/* aio lock */
diff --git a/kernel/exit.c b/kernel/exit.c
index b66f0d55c79..8a4d4d12e29 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -691,7 +691,13 @@ static void exit_mm(struct task_struct * tsk)
 		if (atomic_dec_and_test(&core_state->nr_threads))
 			complete(&core_state->startup);
 
-		wait_for_completion(&mm->core_done);
+		for (;;) {
+			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			if (!self.task) /* see coredump_finish() */
+				break;
+			schedule();
+		}
+		__set_task_state(tsk, TASK_RUNNING);
 		down_read(&mm->mmap_sem);
 	}
 	atomic_inc(&mm->mm_count);
-- 
GitLab


From 565b9b14e7f48131bca58840aa404bbef058fa89 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:47 -0700
Subject: [PATCH 732/853] coredump: format_corename: fix the "core_uses_pid"
 logic

I don't understand why the multi-thread coredump implies the core_uses_pid
behaviour, but we shouldn't use mm->mm_users for that.  This counter can
be incremented by get_task_mm().  Use the valued returned by
coredump_wait() instead.

Also, remove the "const char *pattern" argument, format_corename() can use
core_pattern directly.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index bff43aeb235..5e559013e30 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1379,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt);
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static int format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, int nr_threads, long signr)
 {
-	const char *pat_ptr = pattern;
+	const char *pat_ptr = core_pattern;
+	int ispipe = (*pat_ptr == '|');
 	char *out_ptr = corename;
 	char *const out_end = corename + CORENAME_MAX_SIZE;
 	int rc;
 	int pid_in_pattern = 0;
-	int ispipe = 0;
-
-	if (*pattern == '|')
-		ispipe = 1;
 
 	/* Repeat as long as we have more pattern to process and more output
 	   space */
@@ -1490,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
 	 * and core_uses_pid is set, then .%pid will be appended to
 	 * the filename. Do not do this for piped commands. */
 	if (!ispipe && !pid_in_pattern
-            && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+	    && (core_uses_pid || nr_threads)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
 			      ".%d", task_tgid_vnr(current));
 		if (rc > out_end - out_ptr)
@@ -1753,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 * uses lock_kernel()
 	 */
  	lock_kernel();
-	ispipe = format_corename(corename, core_pattern, signr);
+	ispipe = format_corename(corename, retval, signr);
 	unlock_kernel();
 	/*
 	 * Don't bother to check the RLIMIT_CORE value if core_pattern points
-- 
GitLab


From 1a4d9b0aa0d3c50314e57525a5e5ec2cfc48b4c8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:47 -0700
Subject: [PATCH 733/853] workqueues: insert_work: use "list_head *" instead of
 "int tail"

insert_work() inserts the new work_struct before or after cwq->worklist,
depending on the "int tail" parameter. Change it to accept "list_head *"
instead, this shrinks .text a bit and allows us to insert the barrier
after specific work_struct.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6fd158b2102..d9a2d65cc63 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,7 +125,7 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 }
 
 static void insert_work(struct cpu_workqueue_struct *cwq,
-				struct work_struct *work, int tail)
+			struct work_struct *work, struct list_head *head)
 {
 	set_wq_data(work, cwq);
 	/*
@@ -133,10 +133,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	 * result of list_add() below, see try_to_grab_pending().
 	 */
 	smp_wmb();
-	if (tail)
-		list_add_tail(&work->entry, &cwq->worklist);
-	else
-		list_add(&work->entry, &cwq->worklist);
+	list_add_tail(&work->entry, head);
 	wake_up(&cwq->more_work);
 }
 
@@ -146,7 +143,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 	unsigned long flags;
 
 	spin_lock_irqsave(&cwq->lock, flags);
-	insert_work(cwq, work, 1);
+	insert_work(cwq, work, &cwq->worklist);
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -361,14 +358,14 @@ static void wq_barrier_func(struct work_struct *work)
 }
 
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-					struct wq_barrier *barr, int tail)
+			struct wq_barrier *barr, struct list_head *head)
 {
 	INIT_WORK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
 
 	init_completion(&barr->done);
 
-	insert_work(cwq, &barr->work, tail);
+	insert_work(cwq, &barr->work, head);
 }
 
 static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -388,7 +385,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 		active = 0;
 		spin_lock_irq(&cwq->lock);
 		if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
-			insert_wq_barrier(cwq, &barr, 1);
+			insert_wq_barrier(cwq, &barr, &cwq->worklist);
 			active = 1;
 		}
 		spin_unlock_irq(&cwq->lock);
@@ -473,7 +470,7 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 
 	spin_lock_irq(&cwq->lock);
 	if (unlikely(cwq->current_work == work)) {
-		insert_wq_barrier(cwq, &barr, 0);
+		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
 		running = 1;
 	}
 	spin_unlock_irq(&cwq->lock);
-- 
GitLab


From db700897224b5ebdf852f2d38920ce428940d059 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:49 -0700
Subject: [PATCH 734/853] workqueues: implement flush_work()

Most of users of flush_workqueue() can be changed to use cancel_work_sync(),
but sometimes we really need to wait for the completion and cancelling is not
an option. schedule_on_each_cpu() is good example.

Add the new helper, flush_work(work), which waits for the completion of the
specific work_struct. More precisely, it "flushes" the result of of the last
queue_work() which is visible to the caller.

For example, this code

	queue_work(wq, work);
	/* WINDOW */
	queue_work(wq, work);

	flush_work(work);

doesn't necessary work "as expected". What can happen in the WINDOW above is

	- wq starts the execution of work->func()

	- the caller migrates to another CPU

now, after the 2nd queue_work() this work is active on the previous CPU, and
at the same time it is queued on another. In this case flush_work(work) may
return before the first work->func() completes.

It is trivial to add another helper

	int flush_work_sync(struct work_struct *work)
	{
		return flush_work(work) || wait_on_work(work);
	}

which works "more correctly", but it has to iterate over all CPUs and thus
it much slower than flush_work().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Max Krasnyansky <maxk@qualcomm.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 46 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 14d47120682..5c158c477ac 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -201,6 +201,8 @@ extern int keventd_up(void);
 extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
+extern int flush_work(struct work_struct *work);
+
 extern int cancel_work_sync(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d9a2d65cc63..ee41cf857d5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -423,6 +423,52 @@ void flush_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+	struct cpu_workqueue_struct *cwq;
+	struct list_head *prev;
+	struct wq_barrier barr;
+
+	might_sleep();
+	cwq = get_wq_data(work);
+	if (!cwq)
+		return 0;
+
+	prev = NULL;
+	spin_lock_irq(&cwq->lock);
+	if (!list_empty(&work->entry)) {
+		/*
+		 * See the comment near try_to_grab_pending()->smp_rmb().
+		 * If it was re-queued under us we are not going to wait.
+		 */
+		smp_rmb();
+		if (unlikely(cwq != get_wq_data(work)))
+			goto out;
+		prev = &work->entry;
+	} else {
+		if (cwq->current_work != work)
+			goto out;
+		prev = &cwq->worklist;
+	}
+	insert_wq_barrier(cwq, &barr, prev->next);
+out:
+	spin_unlock_irq(&cwq->lock);
+	if (!prev)
+		return 0;
+
+	wait_for_completion(&barr.done);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
-- 
GitLab


From 8616a89ab761239c963eea3a63be383f127cc7e8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:49 -0700
Subject: [PATCH 735/853] workqueues: schedule_on_each_cpu: use flush_work()

Change schedule_on_each_cpu() to use flush_work() instead of
flush_workqueue(), this way we don't wait for other work_struct's which
can be queued meanwhile.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee41cf857d5..5fbffd302eb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -690,7 +690,8 @@ int schedule_on_each_cpu(work_func_t func)
 		set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
 		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
 	}
-	flush_workqueue(keventd_wq);
+	for_each_online_cpu(cpu)
+		flush_work(per_cpu_ptr(works, cpu));
 	put_online_cpus();
 	free_percpu(works);
 	return 0;
-- 
GitLab


From 3da1c84c00c7e5fa8348336bd8c342f9128b0f14 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:50 -0700
Subject: [PATCH 736/853] workqueues: make get_online_cpus() useable for
 work->func()

workqueue_cpu_callback(CPU_DEAD) flushes cwq->thread under
cpu_maps_update_begin().  This means that the multithreaded workqueues
can't use get_online_cpus() due to the possible deadlock, very bad and
very old problem.

Introduce the new state, CPU_POST_DEAD, which is called after
cpu_hotplug_done() but before cpu_maps_update_done().

Change workqueue_cpu_callback() to use CPU_POST_DEAD instead of CPU_DEAD.
This means that create/destroy functions can't rely on get_online_cpus()
any longer and should take cpu_add_remove_lock instead.

[akpm@linux-foundation.org: fix CONFIG_SMP=n]
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h      | 15 +++++++++++----
 include/linux/notifier.h |  2 ++
 kernel/cpu.c             |  5 +++++
 kernel/workqueue.c       | 18 +++++++++---------
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7464ba3b433..d7faf880849 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,10 +69,11 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
 #endif
 
 int cpu_up(unsigned int cpu);
-
 extern void cpu_hotplug_init(void);
+extern void cpu_maps_update_begin(void);
+extern void cpu_maps_update_done(void);
 
-#else
+#else	/* CONFIG_SMP */
 
 static inline int register_cpu_notifier(struct notifier_block *nb)
 {
@@ -87,10 +88,16 @@ static inline void cpu_hotplug_init(void)
 {
 }
 
+static inline void cpu_maps_update_begin(void)
+{
+}
+
+static inline void cpu_maps_update_done(void)
+{
+}
+
 #endif /* CONFIG_SMP */
 extern struct sysdev_class cpu_sysdev_class;
-extern void cpu_maps_update_begin(void);
-extern void cpu_maps_update_done(void);
 
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index bd3d72ddf33..da2698b0fdd 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -214,6 +214,8 @@ static inline int notifier_to_errno(int ret)
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 #define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
 				        * not handling interrupts, soon dead */
+#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+					* lock is dropped */
 
 /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
  * operation in progress
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2cc409ce0a8..10ba5f1004a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -285,6 +285,11 @@ out_allowed:
 	set_cpus_allowed_ptr(current, &old_allowed);
 out_release:
 	cpu_hotplug_done();
+	if (!err) {
+		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
+					    hcpu) == NOTIFY_BAD)
+			BUG();
+	}
 	return err;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5fbffd302eb..828e58230cb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -828,7 +828,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		err = create_workqueue_thread(cwq, singlethread_cpu);
 		start_workqueue_thread(cwq, -1);
 	} else {
-		get_online_cpus();
+		cpu_maps_update_begin();
 		spin_lock(&workqueue_lock);
 		list_add(&wq->list, &workqueues);
 		spin_unlock(&workqueue_lock);
@@ -840,7 +840,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 			err = create_workqueue_thread(cwq, cpu);
 			start_workqueue_thread(cwq, cpu);
 		}
-		put_online_cpus();
+		cpu_maps_update_done();
 	}
 
 	if (err) {
@@ -854,8 +854,8 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key);
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
-	 * Our caller is either destroy_workqueue() or CPU_DEAD,
-	 * get_online_cpus() protects cwq->thread.
+	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
+	 * cpu_add_remove_lock protects cwq->thread.
 	 */
 	if (cwq->thread == NULL)
 		return;
@@ -865,7 +865,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 
 	flush_cpu_workqueue(cwq);
 	/*
-	 * If the caller is CPU_DEAD and cwq->worklist was not empty,
+	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
 	 * a concurrent flush_workqueue() can insert a barrier after us.
 	 * However, in that case run_workqueue() won't return and check
 	 * kthread_should_stop() until it flushes all work_struct's.
@@ -889,14 +889,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	const cpumask_t *cpu_map = wq_cpu_map(wq);
 	int cpu;
 
-	get_online_cpus();
+	cpu_maps_update_begin();
 	spin_lock(&workqueue_lock);
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
 
 	for_each_cpu_mask_nr(cpu, *cpu_map)
 		cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
-	put_online_cpus();
+ 	cpu_maps_update_done();
 
 	free_percpu(wq->cpu_wq);
 	kfree(wq);
@@ -935,7 +935,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 		case CPU_UP_CANCELED:
 			start_workqueue_thread(cwq, -1);
-		case CPU_DEAD:
+		case CPU_POST_DEAD:
 			cleanup_workqueue_thread(cwq);
 			break;
 		}
@@ -943,7 +943,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 	switch (action) {
 	case CPU_UP_CANCELED:
-	case CPU_DEAD:
+	case CPU_POST_DEAD:
 		cpu_clear(cpu, cpu_populated_map);
 	}
 
-- 
GitLab


From 69b895fd13d73aebf62b75502eb6513d43057ba3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:51 -0700
Subject: [PATCH 737/853] S390 topology: don't use kthread() for
 arch_reinit_sched_domains()

Now that it is safe to use get_online_cpus() we can revert

	[S390] cpu topology: Fix possible deadlock.
	commit: fd781fa25c9e9c6fd1599df060b05e7c4ad724e5

and call arch_reinit_sched_domains() directly from topology_work_fn().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/kernel/topology.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 212d618b009..632b13e1005 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -9,7 +9,6 @@
 #include <linux/device.h>
 #include <linux/bootmem.h>
 #include <linux/sched.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
@@ -230,20 +229,9 @@ void arch_update_cpu_topology(void)
 	}
 }
 
-static int topology_kthread(void *data)
-{
-	arch_reinit_sched_domains();
-	return 0;
-}
-
 static void topology_work_fn(struct work_struct *work)
 {
-	/* We can't call arch_reinit_sched_domains() from a multi-threaded
-	 * workqueue context since it may deadlock in case of cpu hotplug.
-	 * So we have to create a kernel thread in order to call
-	 * arch_reinit_sched_domains().
-	 */
-	kthread_run(topology_kthread, NULL, "topology_update");
+	arch_reinit_sched_domains();
 }
 
 void topology_schedule_update(void)
-- 
GitLab


From a67da70dc0955580665f5444f318b92e69a3c272 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:52 -0700
Subject: [PATCH 738/853] workqueues: lockdep annotations for flush_work()

Add lockdep annotations to flush_work() and update the comment.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Jarek Poplawski <jarkao2@o2.pl>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 828e58230cb..4fcb75b9844 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -427,6 +427,8 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
  * flush_work - block until a work_struct's callback has terminated
  * @work: the work which is to be flushed
  *
+ * Returns false if @work has already terminated.
+ *
  * It is expected that, prior to calling flush_work(), the caller has
  * arranged for the work to not be requeued, otherwise it doesn't make
  * sense to use this function.
@@ -442,6 +444,9 @@ int flush_work(struct work_struct *work)
 	if (!cwq)
 		return 0;
 
+	lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
 	prev = NULL;
 	spin_lock_irq(&cwq->lock);
 	if (!list_empty(&work->entry)) {
-- 
GitLab


From ef1ca236b8d645349ed6569598ae3f6c1b9511c0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:53 -0700
Subject: [PATCH 739/853] workqueues: queue_work() can use queue_work_on()

queue_work() can use queue_work_on() to avoid the code duplication.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4fcb75b9844..fe08a8512dd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -159,14 +159,11 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
  */
 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-	int ret = 0;
+	int ret;
+
+	ret = queue_work_on(get_cpu(), wq, work);
+	put_cpu();
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
-		BUG_ON(!list_empty(&work->entry));
-		__queue_work(wq_per_cpu(wq, get_cpu()), work);
-		put_cpu();
-		ret = 1;
-	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(queue_work);
-- 
GitLab


From 8de6d308bab4f67fcf953562f9f08f9527cad72d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:53 -0700
Subject: [PATCH 740/853] workqueues: schedule_on_each_cpu() can use
 schedule_work_on()

schedule_on_each_cpu() can use schedule_work_on() to avoid the code
duplication.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fe08a8512dd..7cf430372f8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -689,8 +689,7 @@ int schedule_on_each_cpu(work_func_t func)
 		struct work_struct *work = per_cpu_ptr(works, cpu);
 
 		INIT_WORK(work, func);
-		set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
-		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
+		schedule_work_on(cpu, work);
 	}
 	for_each_online_cpu(cpu)
 		flush_work(per_cpu_ptr(works, cpu));
-- 
GitLab


From 8448502cfc915f70e3f8923849ade27d472044cb Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:54 -0700
Subject: [PATCH 741/853] workqueues: do CPU_UP_CANCELED if CPU_UP_PREPARE
 fails

The bug was pointed out by Akinobu Mita <akinobu.mita@gmail.com>, and this
patch is based on his original patch.

workqueue_cpu_callback(CPU_UP_PREPARE) expects that if it returns
NOTIFY_BAD, _cpu_up() will send CPU_UP_CANCELED then.

However, this is not true since

	"cpu hotplug: cpu: deliver CPU_UP_CANCELED only to NOTIFY_OKed callbacks with CPU_UP_PREPARE"
	commit: a0d8cdb652d35af9319a9e0fb7134de2a276c636

The callback which has returned NOTIFY_BAD will not receive
CPU_UP_CANCELED.  Change the code to fulfil the CPU_UP_CANCELED logic if
CPU_UP_PREPARE fails.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Reported-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7cf430372f8..ec7e4f62aaf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -911,6 +911,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 	unsigned int cpu = (unsigned long)hcpu;
 	struct cpu_workqueue_struct *cwq;
 	struct workqueue_struct *wq;
+	int ret = NOTIFY_OK;
 
 	action &= ~CPU_TASKS_FROZEN;
 
@@ -918,7 +919,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 	case CPU_UP_PREPARE:
 		cpu_set(cpu, cpu_populated_map);
 	}
-
+undo:
 	list_for_each_entry(wq, &workqueues, list) {
 		cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 
@@ -928,7 +929,9 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 				break;
 			printk(KERN_ERR "workqueue [%s] for %i failed\n",
 				wq->name, cpu);
-			return NOTIFY_BAD;
+			action = CPU_UP_CANCELED;
+			ret = NOTIFY_BAD;
+			goto undo;
 
 		case CPU_ONLINE:
 			start_workqueue_thread(cwq, cpu);
@@ -948,7 +951,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		cpu_clear(cpu, cpu_populated_map);
 	}
 
-	return NOTIFY_OK;
+	return ret;
 }
 
 void __init init_workqueues(void)
-- 
GitLab


From 95b68dec0d52c7b8fea3698b3938cf3ab936436b Mon Sep 17 00:00:00 2001
From: Chandru <chandru@in.ibm.com>
Date: Fri, 25 Jul 2008 01:47:55 -0700
Subject: [PATCH 742/853] calgary iommu: use the first kernels TCE tables in
 kdump

kdump kernel fails to boot with calgary iommu and aacraid driver on a x366
box.  The ongoing dma's of aacraid from the first kernel continue to exist
until the driver is loaded in the kdump kernel.  Calgary is initialized
prior to aacraid and creation of new tce tables causes wrong dma's to
occur.  Here we try to get the tce tables of the first kernel in kdump
kernel and use them.  While in the kdump kernel we do not allocate new tce
tables but instead read the base address register contents of calgary
iommu and use the tables that the registers point to.  With these changes
the kdump kernel and hence aacraid now boots normally.

Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/pci-calgary_64.c | 85 +++++++++++++++++++++++++++++---
 include/linux/crash_dump.h       |  8 +++
 2 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7..19e7fc7c2c4 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/crash_dump.h>
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
 #include <linux/pci_ids.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
 static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
 static void calioc2_tce_cache_blast(struct iommu_table *tbl);
 static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
 
 static struct cal_chipset_ops calgary_chip_ops = {
 	.handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
 
 	tbl = pci_iommu(dev->bus);
 	tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-	tce_free(tbl, 0, tbl->it_size);
+
+	if (is_kdump_kernel())
+		calgary_init_bitmap_from_tce_table(tbl);
+	else
+		tce_free(tbl, 0, tbl->it_size);
 
 	if (is_calgary(dev->device))
 		tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@ static int __init calgary_init(void)
 	if (ret)
 		return ret;
 
+	/* Purely for kdump kernel case */
+	if (is_kdump_kernel())
+		get_tce_space_from_tar();
+
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
 		if (!dev)
@@ -1339,6 +1350,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
 	return (val != 0xffffffff);
 }
 
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+	u64 *tp;
+	unsigned int index;
+	tp = ((u64 *)tbl->it_base);
+	for (index = 0 ; index < tbl->it_size; index++) {
+		if (*tp != 0x0)
+			set_bit(index, tbl->it_map);
+		tp++;
+	}
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+	int bus;
+	void __iomem *target;
+	unsigned long tce_space;
+
+	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+		struct calgary_bus_info *info = &bus_info[bus];
+		unsigned short pci_device;
+		u32 val;
+
+		val = read_pci_config(bus, 0, 0, 0);
+		pci_device = (val & 0xFFFF0000) >> 16;
+
+		if (!is_cal_pci_dev(pci_device))
+			continue;
+		if (info->translation_disabled)
+			continue;
+
+		if (calgary_bus_has_devices(bus, pci_device) ||
+						translate_empty_slots) {
+			target = calgary_reg(bus_info[bus].bbar,
+						tar_offset(bus));
+			tce_space = be64_to_cpu(readq(target));
+			tce_space = tce_space & TAR_SW_BITS;
+
+			tce_space = tce_space & (~specified_table_size);
+			info->tce_space = (u64 *)__va(tce_space);
+		}
+	}
+	return;
+}
+
 void __init detect_calgary(void)
 {
 	int bus;
@@ -1394,7 +1460,8 @@ void __init detect_calgary(void)
 		return;
 	}
 
-	specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+	specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+					saved_max_pfn : max_pfn) * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@ void __init detect_calgary(void)
 
 		if (calgary_bus_has_devices(bus, pci_device) ||
 		    translate_empty_slots) {
-			tbl = alloc_tce_table();
-			if (!tbl)
-				goto cleanup;
-			info->tce_space = tbl;
+			/*
+			 * If it is kdump kernel, find and use tce tables
+			 * from first kernel, else allocate tce tables here
+			 */
+			if (!is_kdump_kernel()) {
+				tbl = alloc_tce_table();
+				if (!tbl)
+					goto cleanup;
+				info->tce_space = tbl;
+			}
 			calgary_found = 1;
 		}
 	}
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 22c7ac5cd80..6cd39a927e1 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcore;
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
 
+static inline int is_kdump_kernel(void)
+{
+	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
 #endif /* LINUX_CRASHDUMP_H */
-- 
GitLab


From 2027d1abc25ff770cc3bc936abd33570ce85d85a Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:57 -0700
Subject: [PATCH 743/853] idr: change the idr structure

After scalability problems have been detected when using the sysV ipcs, I have
proposed to use an RCU based implementation of the IDR api instead (see
threads http://lkml.org/lkml/2008/4/11/212 and
http://lkml.org/lkml/2008/4/29/295).

This resulted in many people asking to convert the idr API and make it rcu
safe (because most of the code was duplicated and thus unmaintanable and
unreviewable).

So here is a first attempt.

The important change wrt to the idr API itself is during idr removes: idr
layers are freed after a grace period, instead of being moved to the free
list.

The important change wrt to ipcs, is that idr_find() can now be called
locklessly inside a rcu read critical section.

Here are the results I've got for the pmsg test sent by Manfred:

   2.6.25-rc3-mm1   2.6.25-rc3-mm1+   2.6.25-mm1   Patched 2.6.25-mm1
1         1168441           1064021       876000               947488
2         1094264            921059      1549592              1730685
3         2082520           1738165      1694370              2324880
4         2079929           1695521       404553              2400408
5         2898758            406566       391283              3246580
6         2921417            261275       263249              3752148
7         3308761            126056       191742              4243142
8         3329456            100129       141722              4275780

1st column: stock 2.6.25-rc3-mm1
2nd column: 2.6.25-rc3-mm1 + ipc patches (store ipcs into idrs)
3nd column: stock 2.6.25-mm1
4th column: 2.6.25-mm1 + this pacth series.

This patch:

Add an rcu_head to the idr_layer structure in order to free it after a grace
period.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 9a2d762124d..1af61d23be3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/init.h>
+#include <linux/rcupdate.h>
 
 #if BITS_PER_LONG == 32
 # define IDR_BITS 5
@@ -51,6 +52,7 @@ struct idr_layer {
 	unsigned long		 bitmap; /* A zero bit means "space here" */
 	struct idr_layer	*ary[1<<IDR_BITS];
 	int			 count;	 /* When zero, we can release it */
+	struct rcu_head		 rcu_head;
 };
 
 struct idr {
-- 
GitLab


From 4ae537892ab9858f71c78701f4651ad1ca531a1b Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:58 -0700
Subject: [PATCH 744/853] idr: rename some of the idr APIs internal routines

This is a trivial patch that renames:

   . alloc_layer to get_from_free_list since it idr_pre_get that actually
     allocates memory.
   . free_layer to move_to_free_list since memory is not actually freed there.

This makes things more clear for the next patches.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/lib/idr.c b/lib/idr.c
index 7a02e173f02..8170ace154f 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -35,7 +35,7 @@
 
 static struct kmem_cache *idr_layer_cache;
 
-static struct idr_layer *alloc_layer(struct idr *idp)
+static struct idr_layer *get_from_free_list(struct idr *idp)
 {
 	struct idr_layer *p;
 	unsigned long flags;
@@ -51,14 +51,14 @@ static struct idr_layer *alloc_layer(struct idr *idp)
 }
 
 /* only called when idp->lock is held */
-static void __free_layer(struct idr *idp, struct idr_layer *p)
+static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
 {
 	p->ary[0] = idp->id_free;
 	idp->id_free = p;
 	idp->id_free_cnt++;
 }
 
-static void free_layer(struct idr *idp, struct idr_layer *p)
+static void move_to_free_list(struct idr *idp, struct idr_layer *p)
 {
 	unsigned long flags;
 
@@ -66,7 +66,7 @@ static void free_layer(struct idr *idp, struct idr_layer *p)
 	 * Depends on the return element being zeroed.
 	 */
 	spin_lock_irqsave(&idp->lock, flags);
-	__free_layer(idp, p);
+	__move_to_free_list(idp, p);
 	spin_unlock_irqrestore(&idp->lock, flags);
 }
 
@@ -109,7 +109,7 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 		new = kmem_cache_alloc(idr_layer_cache, gfp_mask);
 		if (new == NULL)
 			return (0);
-		free_layer(idp, new);
+		move_to_free_list(idp, new);
 	}
 	return 1;
 }
@@ -167,7 +167,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 		 * Create the layer below if it is missing.
 		 */
 		if (!p->ary[m]) {
-			if (!(new = alloc_layer(idp)))
+			new = get_from_free_list(idp);
+			if (!new)
 				return -1;
 			p->ary[m] = new;
 			p->count++;
@@ -192,7 +193,7 @@ build_up:
 	p = idp->top;
 	layers = idp->layers;
 	if (unlikely(!p)) {
-		if (!(p = alloc_layer(idp)))
+		if (!(p = get_from_free_list(idp)))
 			return -1;
 		layers = 1;
 	}
@@ -204,7 +205,7 @@ build_up:
 		layers++;
 		if (!p->count)
 			continue;
-		if (!(new = alloc_layer(idp))) {
+		if (!(new = get_from_free_list(idp))) {
 			/*
 			 * The allocation failed.  If we built part of
 			 * the structure tear it down.
@@ -214,7 +215,7 @@ build_up:
 				p = p->ary[0];
 				new->ary[0] = NULL;
 				new->bitmap = new->count = 0;
-				__free_layer(idp, new);
+				__move_to_free_list(idp, new);
 			}
 			spin_unlock_irqrestore(&idp->lock, flags);
 			return -1;
@@ -351,7 +352,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 		__clear_bit(n, &p->bitmap);
 		p->ary[n] = NULL;
 		while(*paa && ! --((**paa)->count)){
-			free_layer(idp, **paa);
+			move_to_free_list(idp, **paa);
 			**paa-- = NULL;
 		}
 		if (!*paa)
@@ -378,12 +379,12 @@ void idr_remove(struct idr *idp, int id)
 
 		p = idp->top->ary[0];
 		idp->top->bitmap = idp->top->count = 0;
-		free_layer(idp, idp->top);
+		move_to_free_list(idp, idp->top);
 		idp->top = p;
 		--idp->layers;
 	}
 	while (idp->id_free_cnt >= IDR_FREE_MAX) {
-		p = alloc_layer(idp);
+		p = get_from_free_list(idp);
 		kmem_cache_free(idr_layer_cache, p);
 	}
 	return;
@@ -426,7 +427,7 @@ void idr_remove_all(struct idr *idp)
 		while (n < fls(id)) {
 			if (p) {
 				memset(p, 0, sizeof *p);
-				free_layer(idp, p);
+				move_to_free_list(idp, p);
 			}
 			n += IDR_BITS;
 			p = *--paa;
@@ -444,7 +445,7 @@ EXPORT_SYMBOL(idr_remove_all);
 void idr_destroy(struct idr *idp)
 {
 	while (idp->id_free_cnt) {
-		struct idr_layer *p = alloc_layer(idp);
+		struct idr_layer *p = get_from_free_list(idp);
 		kmem_cache_free(idr_layer_cache, p);
 	}
 }
@@ -749,7 +750,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 	 * allocation.
 	 */
 	if (ida->idr.id_free_cnt || ida->free_bitmap) {
-		struct idr_layer *p = alloc_layer(&ida->idr);
+		struct idr_layer *p = get_from_free_list(&ida->idr);
 		if (p)
 			kmem_cache_free(idr_layer_cache, p);
 	}
-- 
GitLab


From f098ad655f4dd8e3da98ffbeda9cedcc4459c01a Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:59 -0700
Subject: [PATCH 745/853] idr: fix a printk call

Fix the incomplete printk call.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/idr.c b/lib/idr.c
index 8170ace154f..9d905b131ec 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -326,7 +326,8 @@ EXPORT_SYMBOL(idr_get_new);
 
 static void idr_remove_warning(int id)
 {
-	printk("idr_remove called for id=%d which is not allocated.\n", id);
+	printk(KERN_WARNING
+		"idr_remove called for id=%d which is not allocated.\n", id);
 	dump_stack();
 }
 
-- 
GitLab


From 944ca05c7b4972f2ebf37262e0f4933d178ad6db Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:59 -0700
Subject: [PATCH 746/853] idr: error checking factorization

Do some code factorization in the return code analysis.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h |  6 ++++++
 lib/idr.c           | 30 +++++++++---------------------
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 1af61d23be3..762c3f2c631 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -73,6 +73,12 @@ struct idr {
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
+/* Actions to be taken after a call to _idr_sub_alloc */
+#define IDR_NEED_TO_GROW -2
+#define IDR_NOMORE_SPACE -3
+
+#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
+
 /*
  * This is what we export.
  */
diff --git a/lib/idr.c b/lib/idr.c
index 9d905b131ec..80ba06f29d3 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -143,7 +143,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			/* if already at the top layer, we need to grow */
 			if (!(p = pa[l])) {
 				*starting_id = id;
-				return -2;
+				return IDR_NEED_TO_GROW;
 			}
 
 			/* If we need to go up one layer, continue the
@@ -160,7 +160,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = ((id >> sh) ^ n ^ m) << sh;
 		}
 		if ((id >= MAX_ID_BIT) || (id < 0))
-			return -3;
+			return IDR_NOMORE_SPACE;
 		if (l == 0)
 			break;
 		/*
@@ -229,7 +229,7 @@ build_up:
 	idp->top = p;
 	idp->layers = layers;
 	v = sub_alloc(idp, &id, pa);
-	if (v == -2)
+	if (v == IDR_NEED_TO_GROW)
 		goto build_up;
 	return(v);
 }
@@ -278,12 +278,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -313,12 +309,8 @@ int idr_get_new(struct idr *idp, void *ptr, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -696,12 +688,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
  restart:
 	/* get vacant slot */
 	t = idr_get_empty_slot(&ida->idr, idr_id, pa);
-	if (t < 0) {
-		if (t == -1)
-			return -EAGAIN;
-		else /* will be -3 */
-			return -ENOSPC;
-	}
+	if (t < 0)
+		return _idr_rc_to_errno(t);
 
 	if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
 		return -ENOSPC;
-- 
GitLab


From 3219b3b7456d5cf15ba7b1fe7b1bcf15ce8840e2 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:00 -0700
Subject: [PATCH 747/853] idr: make idr_get_new* rcu-safe

Make the idr_get_new* routines rcu-safe.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lib/idr.c b/lib/idr.c
index 80ba06f29d3..44ab3b2a4eb 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,6 +6,8 @@
  * Modified by George Anzinger to reuse immediately and to use
  * find bit instructions.  Also removed _irq on spinlocks.
  *
+ * Modified by Nadia Derbey to make it RCU safe.
+ *
  * Small id to pointer translation service.
  *
  * It uses a radix tree like structure as a sparse array indexed
@@ -96,7 +98,7 @@ static void idr_mark_full(struct idr_layer **pa, int id)
  * @gfp_mask:	memory allocation flags
  *
  * This function should be called prior to locking and calling the
- * following function.  It preallocates enough memory to satisfy
+ * idr_get_new* functions. It preallocates enough memory to satisfy
  * the worst possible allocation.
  *
  * If the system is REALLY out of memory this function returns 0,
@@ -170,7 +172,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			new = get_from_free_list(idp);
 			if (!new)
 				return -1;
-			p->ary[m] = new;
+			rcu_assign_pointer(p->ary[m], new);
 			p->count++;
 		}
 		pa[l--] = p;
@@ -226,7 +228,7 @@ build_up:
 			__set_bit(0, &new->bitmap);
 		p = new;
 	}
-	idp->top = p;
+	rcu_assign_pointer(idp->top, p);
 	idp->layers = layers;
 	v = sub_alloc(idp, &id, pa);
 	if (v == IDR_NEED_TO_GROW)
@@ -245,7 +247,8 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
 		 * Successfully found an empty slot.  Install the user
 		 * pointer and mark the slot full.
 		 */
-		pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr;
+		rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
+				(struct idr_layer *)ptr);
 		pa[0]->count++;
 		idr_mark_full(pa, id);
 	}
@@ -710,7 +713,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 			return -EAGAIN;
 
 		memset(bitmap, 0, sizeof(struct ida_bitmap));
-		pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap;
+		rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
+				(void *)bitmap);
 		pa[0]->count++;
 	}
 
-- 
GitLab


From f9c46d6ea5ce138a886c3a0f10a46130afab75f5 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:01 -0700
Subject: [PATCH 748/853] idr: make idr_find rcu-safe

Make idr_find rcu-safe: it can now be called inside an rcu_read critical
section.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 16 ++++++++++++++++
 lib/idr.c           | 11 ++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 762c3f2c631..fa035f96f2a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -79,6 +79,22 @@ struct idr {
 
 #define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
 
+/**
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
 /*
  * This is what we export.
  */
diff --git a/lib/idr.c b/lib/idr.c
index 44ab3b2a4eb..21e12af1f23 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -456,7 +456,8 @@ EXPORT_SYMBOL(idr_destroy);
  * return indicates that @id is not valid or you passed %NULL in
  * idr_get_new().
  *
- * The caller must serialize idr_find() vs idr_get_new() and idr_remove().
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
  */
 void *idr_find(struct idr *idp, int id)
 {
@@ -464,7 +465,7 @@ void *idr_find(struct idr *idp, int id)
 	struct idr_layer *p;
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 
 	/* Mask off upper bits we don't use for the search. */
 	id &= MAX_ID_MASK;
@@ -474,7 +475,7 @@ void *idr_find(struct idr *idp, int id)
 
 	while (n > 0 && p) {
 		n -= IDR_BITS;
-		p = p->ary[(id >> n) & IDR_MASK];
+		p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 	}
 	return((void *)p);
 }
@@ -507,7 +508,7 @@ int idr_for_each(struct idr *idp,
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 	max = 1 << n;
 
 	id = 0;
@@ -515,7 +516,7 @@ int idr_for_each(struct idr *idp,
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = p->ary[(id >> n) & IDR_MASK];
+			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
-- 
GitLab


From cf481c20c476ad2c0febdace9ce23f5a4db19582 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:02 -0700
Subject: [PATCH 749/853] idr: make idr_remove rcu-safe

Introduce the free_layer() routine: it is the one that actually frees memory
after a grace period has elapsed.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 57 +++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/lib/idr.c b/lib/idr.c
index 21e12af1f23..3476f8203e9 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -52,6 +52,19 @@ static struct idr_layer *get_from_free_list(struct idr *idp)
 	return(p);
 }
 
+static void idr_layer_rcu_free(struct rcu_head *head)
+{
+	struct idr_layer *layer;
+
+	layer = container_of(head, struct idr_layer, rcu_head);
+	kmem_cache_free(idr_layer_cache, layer);
+}
+
+static inline void free_layer(struct idr_layer *p)
+{
+	call_rcu(&p->rcu_head, idr_layer_rcu_free);
+}
+
 /* only called when idp->lock is held */
 static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
 {
@@ -331,6 +344,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 	struct idr_layer *p = idp->top;
 	struct idr_layer **pa[MAX_LEVEL];
 	struct idr_layer ***paa = &pa[0];
+	struct idr_layer *to_free;
 	int n;
 
 	*paa = NULL;
@@ -346,13 +360,18 @@ static void sub_remove(struct idr *idp, int shift, int id)
 	n = id & IDR_MASK;
 	if (likely(p != NULL && test_bit(n, &p->bitmap))){
 		__clear_bit(n, &p->bitmap);
-		p->ary[n] = NULL;
+		rcu_assign_pointer(p->ary[n], NULL);
+		to_free = NULL;
 		while(*paa && ! --((**paa)->count)){
-			move_to_free_list(idp, **paa);
+			if (to_free)
+				free_layer(to_free);
+			to_free = **paa;
 			**paa-- = NULL;
 		}
 		if (!*paa)
 			idp->layers = 0;
+		if (to_free)
+			free_layer(to_free);
 	} else
 		idr_remove_warning(id);
 }
@@ -365,22 +384,34 @@ static void sub_remove(struct idr *idp, int shift, int id)
 void idr_remove(struct idr *idp, int id)
 {
 	struct idr_layer *p;
+	struct idr_layer *to_free;
 
 	/* Mask off upper bits we don't use for the search. */
 	id &= MAX_ID_MASK;
 
 	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
 	if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
-	    idp->top->ary[0]) {  // We can drop a layer
-
+	    idp->top->ary[0]) {
+		/*
+		 * Single child at leftmost slot: we can shrink the tree.
+		 * This level is not needed anymore since when layers are
+		 * inserted, they are inserted at the top of the existing
+		 * tree.
+		 */
+		to_free = idp->top;
 		p = idp->top->ary[0];
-		idp->top->bitmap = idp->top->count = 0;
-		move_to_free_list(idp, idp->top);
-		idp->top = p;
+		rcu_assign_pointer(idp->top, p);
 		--idp->layers;
+		to_free->bitmap = to_free->count = 0;
+		free_layer(to_free);
 	}
 	while (idp->id_free_cnt >= IDR_FREE_MAX) {
 		p = get_from_free_list(idp);
+		/*
+		 * Note: we don't call the rcu callback here, since the only
+		 * layers that fall into the freelist are those that have been
+		 * preallocated.
+		 */
 		kmem_cache_free(idr_layer_cache, p);
 	}
 	return;
@@ -421,15 +452,13 @@ void idr_remove_all(struct idr *idp)
 
 		id += 1 << n;
 		while (n < fls(id)) {
-			if (p) {
-				memset(p, 0, sizeof *p);
-				move_to_free_list(idp, p);
-			}
+			if (p)
+				free_layer(p);
 			n += IDR_BITS;
 			p = *--paa;
 		}
 	}
-	idp->top = NULL;
+	rcu_assign_pointer(idp->top, NULL);
 	idp->layers = 0;
 }
 EXPORT_SYMBOL(idr_remove_all);
@@ -546,7 +575,7 @@ EXPORT_SYMBOL(idr_for_each);
  * A -ENOENT return indicates that @id was not found.
  * A -EINVAL return indicates that @id was not within valid constraints.
  *
- * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove().
+ * The caller must serialize with writers.
  */
 void *idr_replace(struct idr *idp, void *ptr, int id)
 {
@@ -572,7 +601,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 		return ERR_PTR(-ENOENT);
 
 	old_p = p->ary[n];
-	p->ary[n] = ptr;
+	rcu_assign_pointer(p->ary[n], ptr);
 
 	return old_p;
 }
-- 
GitLab


From 983bfb7db303cfde56ae5bbf4e0f2f46e38c9576 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:03 -0700
Subject: [PATCH 750/853] ipc: call idr_find() without locking in ipc_lock()

Call idr_find() locklessly from ipc_lock(), since the idr tree is now RCU
protected.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/util.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/ipc/util.c b/ipc/util.c
index 3339177b336..0f468c34e83 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -688,10 +688,6 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
  * Look for an id in the ipc ids idr and lock the associated ipc object.
  *
  * The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is not already
- * held, i.e. idr tree not protected: it protects the idr tree in read mode
- * during the idr_find().
  */
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
@@ -699,18 +695,13 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
 	struct kern_ipc_perm *out;
 	int lid = ipcid_to_idx(id);
 
-	down_read(&ids->rw_mutex);
-
 	rcu_read_lock();
 	out = idr_find(&ids->ipcs_idr, lid);
 	if (out == NULL) {
 		rcu_read_unlock();
-		up_read(&ids->rw_mutex);
 		return ERR_PTR(-EINVAL);
 	}
 
-	up_read(&ids->rw_mutex);
-
 	spin_lock(&out->lock);
 	
 	/* ipc_rmid() may have already freed the ID while ipc_lock
-- 
GitLab


From 00c2bf85d8febfcfddde63822043462b026134ff Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:03 -0700
Subject: [PATCH 751/853] ipc: get rid of ipc_lock_down()

Remove the ipc_lock_down() routines: they used to call idr_find() locklessly
(given that the ipc ids lock was already held), so they are not needed
anymore.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/shm.c  | 21 +++------------------
 ipc/util.c | 52 +---------------------------------------------------
 ipc/util.h |  6 ------
 3 files changed, 4 insertions(+), 75 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index a726aebce7d..e77ec698cf4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -111,24 +111,9 @@ void __init shm_init (void)
 				IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
-/*
- * shm_lock_(check_)down routines are called in the paths where the rw_mutex
- * is held to protect access to the idr tree.
- */
-static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
-						int id)
-{
-	struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
-
-	if (IS_ERR(ipcp))
-		return (struct shmid_kernel *)ipcp;
-
-	return container_of(ipcp, struct shmid_kernel, shm_perm);
-}
-
 /*
  * shm_lock_(check_) routines are called in the paths where the rw_mutex
- * is not held.
+ * is not necessarily held.
  */
 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
 {
@@ -211,7 +196,7 @@ static void shm_close(struct vm_area_struct *vma)
 
 	down_write(&shm_ids(ns).rw_mutex);
 	/* remove from the list of attaches of the shm segment */
-	shp = shm_lock_down(ns, sfd->id);
+	shp = shm_lock(ns, sfd->id);
 	BUG_ON(IS_ERR(shp));
 	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_dtim = get_seconds();
@@ -932,7 +917,7 @@ invalid:
 
 out_nattch:
 	down_write(&shm_ids(ns).rw_mutex);
-	shp = shm_lock_down(ns, shmid);
+	shp = shm_lock(ns, shmid);
 	BUG_ON(IS_ERR(shp));
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
diff --git a/ipc/util.c b/ipc/util.c
index 0f468c34e83..49b3ea615dc 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -716,56 +716,6 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
 	return out;
 }
 
-/**
- * ipc_lock_down - Lock an ipc structure with rw_sem held
- * @ids: IPC identifier set
- * @id: ipc id to look for
- *
- * Look for an id in the ipc ids idr and lock the associated ipc object.
- *
- * The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is already
- * held, i.e. idr tree protected.
- */
-
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
-{
-	struct kern_ipc_perm *out;
-	int lid = ipcid_to_idx(id);
-
-	rcu_read_lock();
-	out = idr_find(&ids->ipcs_idr, lid);
-	if (out == NULL) {
-		rcu_read_unlock();
-		return ERR_PTR(-EINVAL);
-	}
-
-	spin_lock(&out->lock);
-
-	/*
-	 * No need to verify that the structure is still valid since the
-	 * rw_mutex is held.
-	 */
-	return out;
-}
-
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id)
-{
-	struct kern_ipc_perm *out;
-
-	out = ipc_lock_down(ids, id);
-	if (IS_ERR(out))
-		return out;
-
-	if (ipc_checkid(out, id)) {
-		ipc_unlock(out);
-		return ERR_PTR(-EIDRM);
-	}
-
-	return out;
-}
-
 struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
 {
 	struct kern_ipc_perm *out;
@@ -837,7 +787,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
 	int err;
 
 	down_write(&ids->rw_mutex);
-	ipcp = ipc_lock_check_down(ids, id);
+	ipcp = ipc_lock_check(ids, id);
 	if (IS_ERR(ipcp)) {
 		err = PTR_ERR(ipcp);
 		goto out_up;
diff --git a/ipc/util.h b/ipc/util.h
index cdb966aebe0..3646b45a03c 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -102,11 +102,6 @@ void* ipc_rcu_alloc(int size);
 void ipc_rcu_getref(void *ptr);
 void ipc_rcu_putref(void *ptr);
 
-/*
- * ipc_lock_down: called with rw_mutex held
- * ipc_lock: called without that lock held
- */
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 
 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
@@ -155,7 +150,6 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
 	rcu_read_unlock();
 }
 
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id);
 struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
 			struct ipc_ops *ops, struct ipc_params *params);
-- 
GitLab


From 4daa28f6d8f5cda8ea0f55048e3c8811c384cbdd Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:04 -0700
Subject: [PATCH 752/853] ipc/sem.c: convert undo structures to struct
 list_head

The undo structures contain two linked lists, the attached patch replaces
them with generic struct list_head lists.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h |  12 ++--
 ipc/sem.c           | 163 ++++++++++++++++++++++++--------------------
 2 files changed, 95 insertions(+), 80 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index c8eaad9e4b7..6a1af1b49a1 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -95,7 +95,7 @@ struct sem_array {
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
 	struct sem_queue	*sem_pending;	/* pending operations to be processed */
 	struct sem_queue	**sem_pending_last; /* last pending operation */
-	struct sem_undo		*undo;		/* undo requests on this array */
+	struct list_head	list_id;	/* undo requests on this array */
 	unsigned long		sem_nsems;	/* no. of semaphores in array */
 };
 
@@ -118,8 +118,8 @@ struct sem_queue {
  * when the process exits.
  */
 struct sem_undo {
-	struct sem_undo *	proc_next;	/* next entry on this process */
-	struct sem_undo *	id_next;	/* next entry on this semaphore set */
+	struct list_head	list_proc;	/* per-process list: all undos from one process */
+	struct list_head	list_id;	/* per semaphore array list: all undos for one array */
 	int			semid;		/* semaphore set identifier */
 	short *			semadj;		/* array of adjustments, one per semaphore */
 };
@@ -128,9 +128,9 @@ struct sem_undo {
  * that may be shared among all a CLONE_SYSVSEM task group.
  */ 
 struct sem_undo_list {
-	atomic_t	refcnt;
-	spinlock_t	lock;
-	struct sem_undo	*proc_list;
+	atomic_t		refcnt;
+	spinlock_t		lock;
+	struct list_head	list_proc;
 };
 
 struct sysv_sem {
diff --git a/ipc/sem.c b/ipc/sem.c
index e9418df5ff3..4f26c715735 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -274,7 +274,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_base = (struct sem *) &sma[1];
 	/* sma->sem_pending = NULL; */
 	sma->sem_pending_last = &sma->sem_pending;
-	/* sma->undo = NULL; */
+	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
 	sem_unlock(sma);
@@ -536,7 +536,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	 * (They will be freed without any further action in exit_sem()
 	 * or during the next semop.)
 	 */
-	for (un = sma->undo; un; un = un->id_next)
+	assert_spin_locked(&sma->sem_perm.lock);
+	list_for_each_entry(un, &sma->list_id, list_id)
 		un->semid = -1;
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
@@ -763,9 +764,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
 		for (i = 0; i < nsems; i++)
 			sma->sem_base[i].semval = sem_io[i];
-		for (un = sma->undo; un; un = un->id_next)
+
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id) {
 			for (i = 0; i < nsems; i++)
 				un->semadj[i] = 0;
+		}
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -797,12 +801,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 	{
 		int val = arg.val;
 		struct sem_undo *un;
+
 		err = -ERANGE;
 		if (val > SEMVMX || val < 0)
 			goto out_unlock;
 
-		for (un = sma->undo; un; un = un->id_next)
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id)
 			un->semadj[semnum] = 0;
+
 		curr->semval = val;
 		curr->sempid = task_tgid_vnr(current);
 		sma->sem_ctime = get_seconds();
@@ -952,6 +959,8 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 			return -ENOMEM;
 		spin_lock_init(&undo_list->lock);
 		atomic_set(&undo_list->refcnt, 1);
+		INIT_LIST_HEAD(&undo_list->list_proc);
+
 		current->sysvsem.undo_list = undo_list;
 	}
 	*undo_listp = undo_list;
@@ -960,25 +969,30 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 
 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 {
-	struct sem_undo **last, *un;
-
-	last = &ulp->proc_list;
-	un = *last;
-	while(un != NULL) {
-		if(un->semid==semid)
-			break;
-		if(un->semid==-1) {
-			*last=un->proc_next;
-			kfree(un);
-		} else {
-			last=&un->proc_next;
+	struct sem_undo *walk, *tmp;
+
+	assert_spin_locked(&ulp->lock);
+	list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) {
+		if (walk->semid == semid)
+			return walk;
+		if (walk->semid == -1) {
+			list_del(&walk->list_proc);
+			kfree(walk);
 		}
-		un=*last;
 	}
-	return un;
+	return NULL;
 }
 
-static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
+/**
+ * find_alloc_undo - Lookup (and if not present create) undo array
+ * @ns: namespace
+ * @semid: semaphore array id
+ *
+ * The function looks up (and if not present creates) the undo structure.
+ * The size of the undo structure depends on the size of the semaphore
+ * array, thus the alloc path is not that straightforward.
+ */
+static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 {
 	struct sem_array *sma;
 	struct sem_undo_list *ulp;
@@ -997,6 +1011,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 		goto out;
 
 	/* no undo structure around - allocate one. */
+	/* step 1: figure out the size of the semaphore array */
 	sma = sem_lock_check(ns, semid);
 	if (IS_ERR(sma))
 		return ERR_PTR(PTR_ERR(sma));
@@ -1004,15 +1019,19 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	nsems = sma->sem_nsems;
 	sem_getref_and_unlock(sma);
 
+	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		sem_putref(sma);
 		return ERR_PTR(-ENOMEM);
 	}
-	new->semadj = (short *) &new[1];
-	new->semid = semid;
 
+	/* step 3: Acquire the lock on the undo list pointer */
 	spin_lock(&ulp->lock);
+
+	/* step 4: check for races: someone else allocated the undo struct,
+	 *         semaphore array was destroyed.
+	 */
 	un = lookup_undo(ulp, semid);
 	if (un) {
 		spin_unlock(&ulp->lock);
@@ -1028,13 +1047,17 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 		un = ERR_PTR(-EIDRM);
 		goto out;
 	}
-	new->proc_next = ulp->proc_list;
-	ulp->proc_list = new;
-	new->id_next = sma->undo;
-	sma->undo = new;
+	/* step 5: initialize & link new undo structure */
+	new->semadj = (short *) &new[1];
+	new->semid = semid;
+	assert_spin_locked(&ulp->lock);
+	list_add(&new->list_proc, &ulp->list_proc);
+	assert_spin_locked(&sma->sem_perm.lock);
+	list_add(&new->list_id, &sma->list_id);
+
 	sem_unlock(sma);
-	un = new;
 	spin_unlock(&ulp->lock);
+	un = new;
 out:
 	return un;
 }
@@ -1090,9 +1113,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 			alter = 1;
 	}
 
-retry_undos:
 	if (undos) {
-		un = find_undo(ns, semid);
+		un = find_alloc_undo(ns, semid);
 		if (IS_ERR(un)) {
 			error = PTR_ERR(un);
 			goto out_free;
@@ -1107,14 +1129,14 @@ retry_undos:
 	}
 
 	/*
-	 * semid identifiers are not unique - find_undo may have
+	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
-	 * and now a new array with received the same id. Check and retry.
+	 * and now a new array with received the same id. Check and fail.
 	 */
-	if (un && un->semid == -1) {
-		sem_unlock(sma);
-		goto retry_undos;
-	}
+	error = -EIDRM;
+	if (un && un->semid == -1)
+		goto out_unlock_free;
+
 	error = -EFBIG;
 	if (max >= sma->sem_nsems)
 		goto out_unlock_free;
@@ -1243,56 +1265,44 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
  */
 void exit_sem(struct task_struct *tsk)
 {
-	struct sem_undo_list *undo_list;
-	struct sem_undo *u, **up;
-	struct ipc_namespace *ns;
+	struct sem_undo_list *ulp;
+	struct sem_undo *un, *tmp;
 
-	undo_list = tsk->sysvsem.undo_list;
-	if (!undo_list)
+	ulp = tsk->sysvsem.undo_list;
+	if (!ulp)
 		return;
 	tsk->sysvsem.undo_list = NULL;
 
-	if (!atomic_dec_and_test(&undo_list->refcnt))
+	if (!atomic_dec_and_test(&ulp->refcnt))
 		return;
 
-	ns = tsk->nsproxy->ipc_ns;
-	/* There's no need to hold the semundo list lock, as current
-         * is the last task exiting for this undo list.
-	 */
-	for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
+	spin_lock(&ulp->lock);
+
+	list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) {
 		struct sem_array *sma;
-		int nsems, i;
-		struct sem_undo *un, **unp;
-		int semid;
-	       
-		semid = u->semid;
-
-		if(semid == -1)
-			continue;
-		sma = sem_lock(ns, semid);
+		int i;
+
+		if (un->semid == -1)
+			goto free;
+
+		sma = sem_lock(tsk->nsproxy->ipc_ns, un->semid);
 		if (IS_ERR(sma))
-			continue;
+			goto free;
 
-		if (u->semid == -1)
-			goto next_entry;
+		if (un->semid == -1)
+			goto unlock_free;
 
-		BUG_ON(sem_checkid(sma, u->semid));
+		BUG_ON(sem_checkid(sma, un->semid));
 
-		/* remove u from the sma->undo list */
-		for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
-			if (u == un)
-				goto found;
-		}
-		printk ("exit_sem undo list error id=%d\n", u->semid);
-		goto next_entry;
-found:
-		*unp = un->id_next;
-		/* perform adjustments registered in u */
-		nsems = sma->sem_nsems;
-		for (i = 0; i < nsems; i++) {
+		/* remove un from sma->list_id */
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_del(&un->list_id);
+
+		/* perform adjustments registered in un */
+		for (i = 0; i < sma->sem_nsems; i++) {
 			struct sem * semaphore = &sma->sem_base[i];
-			if (u->semadj[i]) {
-				semaphore->semval += u->semadj[i];
+			if (un->semadj[i]) {
+				semaphore->semval += un->semadj[i];
 				/*
 				 * Range checks of the new semaphore value,
 				 * not defined by sus:
@@ -1316,10 +1326,15 @@ found:
 		sma->sem_otime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
-next_entry:
+unlock_free:
 		sem_unlock(sma);
+free:
+		assert_spin_locked(&ulp->lock);
+		list_del(&un->list_proc);
+		kfree(un);
 	}
-	kfree(undo_list);
+	spin_unlock(&ulp->lock);
+	kfree(ulp);
 }
 
 #ifdef CONFIG_PROC_FS
-- 
GitLab


From 2c0c29d414087f3b021059673c20a7088f5f1fff Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:05 -0700
Subject: [PATCH 753/853] ipc/sem.c: remove unused entries from struct
 sem_queue

sem_queue.sma and sem_queue.id were never used, the attached patch removes
them.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 2 --
 ipc/sem.c           | 2 --
 2 files changed, 4 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 6a1af1b49a1..87756ef1198 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -107,8 +107,6 @@ struct sem_queue {
 	struct sem_undo *	undo;	 /* undo structure */
 	int    			pid;	 /* process id of requesting process */
 	int    			status;	 /* completion status of operation */
-	struct sem_array *	sma;	 /* semaphore array for operations */
-	int			id;	 /* internal sem id */
 	struct sembuf *		sops;	 /* array of pending operations */
 	int			nsops;	 /* number of operations */
 	int			alter;   /* does the operation alter the array? */
diff --git a/ipc/sem.c b/ipc/sem.c
index 4f26c715735..d5ce4000ca1 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1160,12 +1160,10 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 * task into the pending queue and go to sleep.
 	 */
 		
-	queue.sma = sma;
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
 	queue.pid = task_tgid_vnr(current);
-	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
 		append_to_queue(sma ,&queue);
-- 
GitLab


From a1193f8ec091cd8fd309cc2982abe4499f6f2b4d Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:06 -0700
Subject: [PATCH 754/853] ipc/sem.c: convert sem_array.sem_pending to struct
 list_head

sem_array.sem_pending is a double linked list, the attached patch converts
it to struct list_head.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 12 +++---
 ipc/sem.c           | 92 +++++++++++++++++----------------------------
 2 files changed, 40 insertions(+), 64 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 87756ef1198..d42599395d7 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -93,21 +93,19 @@ struct sem_array {
 	time_t			sem_otime;	/* last semop time */
 	time_t			sem_ctime;	/* last change time */
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
-	struct sem_queue	*sem_pending;	/* pending operations to be processed */
-	struct sem_queue	**sem_pending_last; /* last pending operation */
+	struct list_head	sem_pending;	/* pending operations to be processed */
 	struct list_head	list_id;	/* undo requests on this array */
 	unsigned long		sem_nsems;	/* no. of semaphores in array */
 };
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
-	struct sem_queue *	next;	 /* next entry in the queue */
-	struct sem_queue **	prev;	 /* previous entry in the queue, *(q->prev) == q */
-	struct task_struct*	sleeper; /* this process */
-	struct sem_undo *	undo;	 /* undo structure */
+	struct list_head	list;	 /* queue of pending operations */
+	struct task_struct	*sleeper; /* this process */
+	struct sem_undo		*undo;	 /* undo structure */
 	int    			pid;	 /* process id of requesting process */
 	int    			status;	 /* completion status of operation */
-	struct sembuf *		sops;	 /* array of pending operations */
+	struct sembuf		*sops;	 /* array of pending operations */
 	int			nsops;	 /* number of operations */
 	int			alter;   /* does the operation alter the array? */
 };
diff --git a/ipc/sem.c b/ipc/sem.c
index d5ce4000ca1..3ca232736b3 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -272,8 +272,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	ns->used_sems += nsems;
 
 	sma->sem_base = (struct sem *) &sma[1];
-	/* sma->sem_pending = NULL; */
-	sma->sem_pending_last = &sma->sem_pending;
+	INIT_LIST_HEAD(&sma->sem_pending);
 	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
@@ -331,38 +330,6 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg)
 	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 }
 
-/* Manage the doubly linked list sma->sem_pending as a FIFO:
- * insert new queue elements at the tail sma->sem_pending_last.
- */
-static inline void append_to_queue (struct sem_array * sma,
-				    struct sem_queue * q)
-{
-	*(q->prev = sma->sem_pending_last) = q;
-	*(sma->sem_pending_last = &q->next) = NULL;
-}
-
-static inline void prepend_to_queue (struct sem_array * sma,
-				     struct sem_queue * q)
-{
-	q->next = sma->sem_pending;
-	*(q->prev = &sma->sem_pending) = q;
-	if (q->next)
-		q->next->prev = &q->next;
-	else /* sma->sem_pending_last == &sma->sem_pending */
-		sma->sem_pending_last = &q->next;
-}
-
-static inline void remove_from_queue (struct sem_array * sma,
-				      struct sem_queue * q)
-{
-	*(q->prev) = q->next;
-	if (q->next)
-		q->next->prev = q->prev;
-	else /* sma->sem_pending_last == &q->next */
-		sma->sem_pending_last = q->prev;
-	q->prev = NULL; /* mark as removed */
-}
-
 /*
  * Determine whether a sequence of semaphore operations would succeed
  * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
@@ -438,16 +405,15 @@ static void update_queue (struct sem_array * sma)
 	int error;
 	struct sem_queue * q;
 
-	q = sma->sem_pending;
-	while(q) {
+	q = list_entry(sma->sem_pending.next, struct sem_queue, list);
+	while (&q->list != &sma->sem_pending) {
 		error = try_atomic_semop(sma, q->sops, q->nsops,
 					 q->undo, q->pid);
 
 		/* Does q->sleeper still need to sleep? */
 		if (error <= 0) {
 			struct sem_queue *n;
-			remove_from_queue(sma,q);
-			q->status = IN_WAKEUP;
+
 			/*
 			 * Continue scanning. The next operation
 			 * that must be checked depends on the type of the
@@ -458,11 +424,26 @@ static void update_queue (struct sem_array * sma)
 			 *   for semaphore values to become 0.
 			 * - if the operation didn't modify the array,
 			 *   then just continue.
+			 * The order of list_del() and reading ->next
+			 * is crucial: In the former case, the list_del()
+			 * must be done first [because we might be the
+			 * first entry in ->sem_pending], in the latter
+			 * case the list_del() must be done last
+			 * [because the list is invalid after the list_del()]
 			 */
-			if (q->alter)
-				n = sma->sem_pending;
-			else
-				n = q->next;
+			if (q->alter) {
+				list_del(&q->list);
+				n = list_entry(sma->sem_pending.next,
+						struct sem_queue, list);
+			} else {
+				n = list_entry(q->list.next, struct sem_queue,
+						list);
+				list_del(&q->list);
+			}
+
+			/* wake up the waiting thread */
+			q->status = IN_WAKEUP;
+
 			wake_up_process(q->sleeper);
 			/* hands-off: q will disappear immediately after
 			 * writing q->status.
@@ -471,7 +452,7 @@ static void update_queue (struct sem_array * sma)
 			q->status = error;
 			q = n;
 		} else {
-			q = q->next;
+			q = list_entry(q->list.next, struct sem_queue, list);
 		}
 	}
 }
@@ -491,7 +472,7 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
 	struct sem_queue * q;
 
 	semncnt = 0;
-	for (q = sma->sem_pending; q; q = q->next) {
+	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
 		int i;
@@ -503,13 +484,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
 	}
 	return semncnt;
 }
+
 static int count_semzcnt (struct sem_array * sma, ushort semnum)
 {
 	int semzcnt;
 	struct sem_queue * q;
 
 	semzcnt = 0;
-	for (q = sma->sem_pending; q; q = q->next) {
+	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
 		int i;
@@ -529,7 +511,7 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
 	struct sem_undo *un;
-	struct sem_queue *q;
+	struct sem_queue *q, *t;
 	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
 
 	/* Invalidate the existing undo structures for this semaphore set.
@@ -541,17 +523,14 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		un->semid = -1;
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
-	q = sma->sem_pending;
-	while(q) {
-		struct sem_queue *n;
-		/* lazy remove_from_queue: we are killing the whole queue */
-		q->prev = NULL;
-		n = q->next;
+
+	list_for_each_entry_safe(q, t, &sma->sem_pending, list) {
+		list_del(&q->list);
+
 		q->status = IN_WAKEUP;
 		wake_up_process(q->sleeper); /* doesn't sleep */
 		smp_wmb();
 		q->status = -EIDRM;	/* hands-off q */
-		q = n;
 	}
 
 	/* Remove the semaphore set from the IDR */
@@ -1166,9 +1145,9 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	queue.pid = task_tgid_vnr(current);
 	queue.alter = alter;
 	if (alter)
-		append_to_queue(sma ,&queue);
+		list_add_tail(&queue.list, &sma->sem_pending);
 	else
-		prepend_to_queue(sma ,&queue);
+		list_add(&queue.list, &sma->sem_pending);
 
 	queue.status = -EINTR;
 	queue.sleeper = current;
@@ -1194,7 +1173,6 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 
 	sma = sem_lock(ns, semid);
 	if (IS_ERR(sma)) {
-		BUG_ON(queue.prev != NULL);
 		error = -EIDRM;
 		goto out_free;
 	}
@@ -1212,7 +1190,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 */
 	if (timeout && jiffies_left == 0)
 		error = -EAGAIN;
-	remove_from_queue(sma,&queue);
+	list_del(&queue.list);
 	goto out_unlock_free;
 
 out_unlock_free:
-- 
GitLab


From 380af1b33b3ff92df5cda96329b58f5d1b6b5a53 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:06 -0700
Subject: [PATCH 755/853] ipc/sem.c: rewrite undo list locking

The attached patch:
- reverses the locking order of ulp->lock and sem_lock:
  Previously, it was first ulp->lock, then inside sem_lock.
  Now it's the other way around.
- converts the undo structure to rcu.

Benefits:
- With the old locking order, IPC_RMID could not kfree the undo structures.
  The stale entries remained in the linked lists and were released later.
- The patch fixes a a race in semtimedop(): if both IPC_RMID and a semget() that
  recreates exactly the same id happen between find_alloc_undo() and sem_lock,
  then semtimedop() would access already kfree'd memory.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h |   6 +-
 ipc/sem.c           | 147 ++++++++++++++++++++++++++++----------------
 2 files changed, 98 insertions(+), 55 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index d42599395d7..1b191c176bc 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -78,6 +78,7 @@ struct  seminfo {
 
 #ifdef __KERNEL__
 #include <asm/atomic.h>
+#include <linux/rcupdate.h>
 
 struct task_struct;
 
@@ -114,7 +115,10 @@ struct sem_queue {
  * when the process exits.
  */
 struct sem_undo {
-	struct list_head	list_proc;	/* per-process list: all undos from one process */
+	struct list_head	list_proc;	/* per-process list: all undos from one process. */
+						/* rcu protected */
+	struct rcu_head		rcu;		/* rcu struct for sem_undo() */
+	struct sem_undo_list	*ulp;		/* sem_undo_list for the process */
 	struct list_head	list_id;	/* per semaphore array list: all undos for one array */
 	int			semid;		/* semaphore set identifier */
 	short *			semadj;		/* array of adjustments, one per semaphore */
diff --git a/ipc/sem.c b/ipc/sem.c
index 3ca232736b3..bf1bc36cb7e 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -504,27 +504,35 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 	return semzcnt;
 }
 
+void free_un(struct rcu_head *head)
+{
+	struct sem_undo *un = container_of(head, struct sem_undo, rcu);
+	kfree(un);
+}
+
 /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
  * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
  * remains locked on exit.
  */
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
-	struct sem_undo *un;
-	struct sem_queue *q, *t;
+	struct sem_undo *un, *tu;
+	struct sem_queue *q, *tq;
 	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
 
-	/* Invalidate the existing undo structures for this semaphore set.
-	 * (They will be freed without any further action in exit_sem()
-	 * or during the next semop.)
-	 */
+	/* Free the existing undo structures for this semaphore set.  */
 	assert_spin_locked(&sma->sem_perm.lock);
-	list_for_each_entry(un, &sma->list_id, list_id)
+	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
+		list_del(&un->list_id);
+		spin_lock(&un->ulp->lock);
 		un->semid = -1;
+		list_del_rcu(&un->list_proc);
+		spin_unlock(&un->ulp->lock);
+		call_rcu(&un->rcu, free_un);
+	}
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
-
-	list_for_each_entry_safe(q, t, &sma->sem_pending, list) {
+	list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
 		list_del(&q->list);
 
 		q->status = IN_WAKEUP;
@@ -948,16 +956,11 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 
 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 {
-	struct sem_undo *walk, *tmp;
+	struct sem_undo *walk;
 
-	assert_spin_locked(&ulp->lock);
-	list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) {
+	list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
 		if (walk->semid == semid)
 			return walk;
-		if (walk->semid == -1) {
-			list_del(&walk->list_proc);
-			kfree(walk);
-		}
 	}
 	return NULL;
 }
@@ -970,6 +973,8 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
  * The function looks up (and if not present creates) the undo structure.
  * The size of the undo structure depends on the size of the semaphore
  * array, thus the alloc path is not that straightforward.
+ * Lifetime-rules: sem_undo is rcu-protected, on success, the function
+ * performs a rcu_read_lock().
  */
 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 {
@@ -983,11 +988,13 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	if (error)
 		return ERR_PTR(error);
 
+	rcu_read_lock();
 	spin_lock(&ulp->lock);
 	un = lookup_undo(ulp, semid);
 	spin_unlock(&ulp->lock);
 	if (likely(un!=NULL))
 		goto out;
+	rcu_read_unlock();
 
 	/* no undo structure around - allocate one. */
 	/* step 1: figure out the size of the semaphore array */
@@ -1005,38 +1012,38 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	/* step 3: Acquire the lock on the undo list pointer */
-	spin_lock(&ulp->lock);
-
-	/* step 4: check for races: someone else allocated the undo struct,
-	 *         semaphore array was destroyed.
-	 */
-	un = lookup_undo(ulp, semid);
-	if (un) {
-		spin_unlock(&ulp->lock);
-		kfree(new);
-		sem_putref(sma);
-		goto out;
-	}
+	/* step 3: Acquire the lock on semaphore array */
 	sem_lock_and_putref(sma);
 	if (sma->sem_perm.deleted) {
 		sem_unlock(sma);
-		spin_unlock(&ulp->lock);
 		kfree(new);
 		un = ERR_PTR(-EIDRM);
 		goto out;
 	}
+	spin_lock(&ulp->lock);
+
+	/*
+	 * step 4: check for races: did someone else allocate the undo struct?
+	 */
+	un = lookup_undo(ulp, semid);
+	if (un) {
+		kfree(new);
+		goto success;
+	}
 	/* step 5: initialize & link new undo structure */
 	new->semadj = (short *) &new[1];
+	new->ulp = ulp;
 	new->semid = semid;
 	assert_spin_locked(&ulp->lock);
-	list_add(&new->list_proc, &ulp->list_proc);
+	list_add_rcu(&new->list_proc, &ulp->list_proc);
 	assert_spin_locked(&sma->sem_perm.lock);
 	list_add(&new->list_id, &sma->list_id);
+	un = new;
 
-	sem_unlock(sma);
+success:
 	spin_unlock(&ulp->lock);
-	un = new;
+	rcu_read_lock();
+	sem_unlock(sma);
 out:
 	return un;
 }
@@ -1103,6 +1110,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 
 	sma = sem_lock_check(ns, semid);
 	if (IS_ERR(sma)) {
+		if (un)
+			rcu_read_unlock();
 		error = PTR_ERR(sma);
 		goto out_free;
 	}
@@ -1111,10 +1120,26 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
 	 * and now a new array with received the same id. Check and fail.
+	 * This case can be detected checking un->semid. The existance of
+	 * "un" itself is guaranteed by rcu.
 	 */
 	error = -EIDRM;
-	if (un && un->semid == -1)
-		goto out_unlock_free;
+	if (un) {
+		if (un->semid == -1) {
+			rcu_read_unlock();
+			goto out_unlock_free;
+		} else {
+			/*
+			 * rcu lock can be released, "un" cannot disappear:
+			 * - sem_lock is acquired, thus IPC_RMID is
+			 *   impossible.
+			 * - exit_sem is impossible, it always operates on
+			 *   current (or a dead task).
+			 */
+
+			rcu_read_unlock();
+		}
+	}
 
 	error = -EFBIG;
 	if (max >= sma->sem_nsems)
@@ -1242,7 +1267,6 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
 void exit_sem(struct task_struct *tsk)
 {
 	struct sem_undo_list *ulp;
-	struct sem_undo *un, *tmp;
 
 	ulp = tsk->sysvsem.undo_list;
 	if (!ulp)
@@ -1252,28 +1276,47 @@ void exit_sem(struct task_struct *tsk)
 	if (!atomic_dec_and_test(&ulp->refcnt))
 		return;
 
-	spin_lock(&ulp->lock);
-
-	list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) {
+	for (;;) {
 		struct sem_array *sma;
+		struct sem_undo *un;
+		int semid;
 		int i;
 
-		if (un->semid == -1)
-			goto free;
+		rcu_read_lock();
+		un = list_entry(rcu_dereference(ulp->list_proc.next),
+					struct sem_undo, list_proc);
+		if (&un->list_proc == &ulp->list_proc)
+			semid = -1;
+		 else
+			semid = un->semid;
+		rcu_read_unlock();
 
-		sma = sem_lock(tsk->nsproxy->ipc_ns, un->semid);
-		if (IS_ERR(sma))
-			goto free;
+		if (semid == -1)
+			break;
 
-		if (un->semid == -1)
-			goto unlock_free;
+		sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
 
-		BUG_ON(sem_checkid(sma, un->semid));
+		/* exit_sem raced with IPC_RMID, nothing to do */
+		if (IS_ERR(sma))
+			continue;
 
-		/* remove un from sma->list_id */
+		un = lookup_undo(ulp, semid);
+		if (un == NULL) {
+			/* exit_sem raced with IPC_RMID+semget() that created
+			 * exactly the same semid. Nothing to do.
+			 */
+			sem_unlock(sma);
+			continue;
+		}
+
+		/* remove un from the linked lists */
 		assert_spin_locked(&sma->sem_perm.lock);
 		list_del(&un->list_id);
 
+		spin_lock(&ulp->lock);
+		list_del_rcu(&un->list_proc);
+		spin_unlock(&ulp->lock);
+
 		/* perform adjustments registered in un */
 		for (i = 0; i < sma->sem_nsems; i++) {
 			struct sem * semaphore = &sma->sem_base[i];
@@ -1302,14 +1345,10 @@ void exit_sem(struct task_struct *tsk)
 		sma->sem_otime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
-unlock_free:
 		sem_unlock(sma);
-free:
-		assert_spin_locked(&ulp->lock);
-		list_del(&un->list_proc);
-		kfree(un);
+
+		call_rcu(&un->rcu, free_un);
 	}
-	spin_unlock(&ulp->lock);
 	kfree(ulp);
 }
 
-- 
GitLab


From f1a43f93f0f3bab418800eaccb9e2e3b5427e173 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 25 Jul 2008 01:48:07 -0700
Subject: [PATCH 756/853] ipc: use simple_read_from_buffer()

Also this patch kills unneccesary trailing NULL character.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/mqueue.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 3e84b958186..1fdc2eb2f6d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -314,15 +314,11 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 *	through std routines)
 */
 static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
-				size_t count, loff_t * off)
+				size_t count, loff_t *off)
 {
 	struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
 	char buffer[FILENT_SIZE];
-	size_t slen;
-	loff_t o;
-
-	if (!count)
-		return 0;
+	ssize_t ret;
 
 	spin_lock(&info->lock);
 	snprintf(buffer, sizeof(buffer),
@@ -335,21 +331,14 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 			pid_vnr(info->notify_owner));
 	spin_unlock(&info->lock);
 	buffer[sizeof(buffer)-1] = '\0';
-	slen = strlen(buffer)+1;
-
-	o = *off;
-	if (o > slen)
-		return 0;
-
-	if (o + count > slen)
-		count = slen - o;
 
-	if (copy_to_user(u_data, buffer + o, count))
-		return -EFAULT;
+	ret = simple_read_from_buffer(u_data, count, off, buffer,
+				strlen(buffer));
+	if (ret <= 0)
+		return ret;
 
-	*off = o + count;
 	filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME;
-	return count;
+	return ret;
 }
 
 static int mqueue_flush_file(struct file *filp, fl_owner_t id)
-- 
GitLab


From 9eefe520c814f6f62c5d36a2ddcd3fb99dfdb30e Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:08 -0700
Subject: [PATCH 757/853] ipc: do not use a negative value to re-enable msgmni
 automatic recomputing

This patch proposes an alternative to the "magical
positive-versus-negative number trick" Andrew complained about last week
in http://lkml.org/lkml/2008/6/24/418.

This had been introduced with the patches that scale msgmni to the amount
of lowmem.  With these patches, msgmni has a registered notification
routine that recomputes msgmni value upon memory add/remove or ipc
namespace creation/ removal.

When msgmni is changed from user space (i.e.  value written to the proc
file), that notification routine is unregistered, and the way to make it
registered back is to write a negative value into the proc file.  This is
the "magical positive-versus-negative number trick".

To fix this, a new proc file is introduced: /proc/sys/kernel/auto_msgmni.
This file acts as ON/OFF for msgmni automatic recomputing.

With this patch, the process is the following:
1) kernel boots in "automatic recomputing mode"
   /proc/sys/kernel/msgmni contains the value that has been computed (depends
                           on lowmem)
   /proc/sys/kernel/automatic_msgmni contains "1"

2) echo <val> > /proc/sys/kernel/msgmni
   . sets msg_ctlmni to <val>
   . de-activates automatic recomputing (i.e. if, say, some memory is added
     msgmni won't be recomputed anymore)
   . /proc/sys/kernel/automatic_msgmni now contains "0"

3) echo "0" > /proc/sys/kernel/automatic_msgmni
   . de-activates msgmni automatic recomputing
     this has the same effect as 2) except that msg_ctlmni's value stays
     blocked at its current value)

3) echo "1" > /proc/sys/kernel/automatic_msgmni
   . recomputes msgmni's value based on the current available memory size
     and number of ipc namespaces
   . re-activates automatic recomputing for msgmni.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Solofo Ramangalahy <Solofo.Ramangalahy@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  3 +-
 ipc/ipc_sysctl.c              | 72 ++++++++++++++++++++++++++++-------
 ipc/ipcns_notifier.c          | 20 +++++++---
 3 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index ea6c18a8b0d..ea330f9e710 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -36,6 +36,7 @@ struct ipc_namespace {
 	int		msg_ctlmni;
 	atomic_t	msg_bytes;
 	atomic_t	msg_hdrs;
+	int		auto_msgmni;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
@@ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns;
 
 extern int register_ipcns_notifier(struct ipc_namespace *);
 extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern int unregister_ipcns_notifier(struct ipc_namespace *);
+extern void unregister_ipcns_notifier(struct ipc_namespace *);
 extern int ipcns_notify(unsigned long);
 
 #else /* CONFIG_SYSVIPC */
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index d3497465cc0..69bc85978ba 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table)
 }
 
 /*
- * Routine that is called when a tunable has successfully been changed by
- * hand and it has a callback routine registered on the ipc namespace notifier
- * chain: we don't want such tunables to be recomputed anymore upon memory
- * add/remove or ipc namespace creation/removal.
- * They can come back to a recomputable state by being set to a <0 value.
+ * Routine that is called when the file "auto_msgmni" has successfully been
+ * written.
+ * Two values are allowed:
+ * 0: unregister msgmni's callback routine from the ipc namespace notifier
+ *    chain. This means that msgmni won't be recomputed anymore upon memory
+ *    add/remove or ipc namespace creation/removal.
+ * 1: register back the callback routine.
  */
-static void tunable_set_callback(int val)
+static void ipc_auto_callback(int val)
 {
-	if (val >= 0)
+	if (!val)
 		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 	else {
 		/*
@@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp)
-		tunable_set_callback(*((int *)(ipc_table.data)));
+		/*
+		 * Tunable has successfully been changed by hand. Disable its
+		 * automatic adjustment. This simply requires unregistering
+		 * the notifiers that trigger recalculation.
+		 */
+		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 
 	return rc;
 }
@@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
 					lenp, ppos);
 }
 
+static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
+	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table ipc_table;
+	size_t lenp_bef = *lenp;
+	int oldval;
+	int rc;
+
+	memcpy(&ipc_table, table, sizeof(ipc_table));
+	ipc_table.data = get_ipc(table);
+	oldval = *((int *)(ipc_table.data));
+
+	rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+
+	if (write && !rc && lenp_bef == *lenp) {
+		int newval = *((int *)(ipc_table.data));
+		/*
+		 * The file "auto_msgmni" has correctly been set.
+		 * React by (un)registering the corresponding tunable, if the
+		 * value has changed.
+		 */
+		if (newval != oldval)
+			ipc_auto_callback(newval);
+	}
+
+	return rc;
+}
+
 #else
 #define proc_ipc_doulongvec_minmax NULL
 #define proc_ipc_dointvec	   NULL
 #define proc_ipc_callback_dointvec NULL
+#define proc_ipcauto_dointvec_minmax NULL
 #endif
 
 #ifdef CONFIG_SYSCTL_SYSCALL
@@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
 	rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
 		newlen);
 
-	if (newval && newlen && rc > 0) {
+	if (newval && newlen && rc > 0)
 		/*
 		 * Tunable has successfully been changed from userland
 		 */
-		int *data = get_ipc(table);
-
-		tunable_set_callback(*data);
-	}
+		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 
 	return rc;
 }
@@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
 #define sysctl_ipc_registered_data NULL
 #endif
 
+static int zero;
+static int one = 1;
+
 static struct ctl_table ipc_kern_table[] = {
 	{
 		.ctl_name	= KERN_SHMMAX,
@@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = {
 		.proc_handler	= proc_ipc_dointvec,
 		.strategy	= sysctl_ipc_data,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "auto_msgmni",
+		.data		= &init_ipc_ns.auto_msgmni,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_ipcauto_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{}
 };
 
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
index 70ff09183f7..b9b31a4f77e 100644
--- a/ipc/ipcns_notifier.c
+++ b/ipc/ipcns_notifier.c
@@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self,
 
 int register_ipcns_notifier(struct ipc_namespace *ns)
 {
+	int rc;
+
 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
 	ns->ipcns_nb.notifier_call = ipcns_callback;
 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+	rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+	if (!rc)
+		ns->auto_msgmni = 1;
+	return rc;
 }
 
 int cond_register_ipcns_notifier(struct ipc_namespace *ns)
 {
+	int rc;
+
 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
 	ns->ipcns_nb.notifier_call = ipcns_callback;
 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	return blocking_notifier_chain_cond_register(&ipcns_chain,
+	rc = blocking_notifier_chain_cond_register(&ipcns_chain,
 							&ns->ipcns_nb);
+	if (!rc)
+		ns->auto_msgmni = 1;
+	return rc;
 }
 
-int unregister_ipcns_notifier(struct ipc_namespace *ns)
+void unregister_ipcns_notifier(struct ipc_namespace *ns)
 {
-	return blocking_notifier_chain_unregister(&ipcns_chain,
-						&ns->ipcns_nb);
+	blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
+	ns->auto_msgmni = 0;
 }
 
 int ipcns_notify(unsigned long val)
-- 
GitLab


From 7833351b5260b3a58b54a0c2e7065001d986d749 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:09 -0700
Subject: [PATCH 758/853] pty: remove unused UNIX98_PTY_COUNT options

The h8300 and sparc options somehow survived when the code stopped using
CONFIG_UNIX98_PTY_COUNT.

Reviewed-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/h8300/Kconfig | 14 --------------
 arch/sparc/Kconfig | 14 --------------
 2 files changed, 28 deletions(-)

diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 085dc6ec152..396ab059efa 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -203,20 +203,6 @@ config UNIX98_PTYS
 	  Read the instructions in <file:Documentation/Changes> pertaining to
 	  pseudo terminals. It's safe to say N.
 
-config UNIX98_PTY_COUNT
-	int "Maximum number of Unix98 PTYs in use (0-2048)"
-	depends on UNIX98_PTYS
-	default "256"
-	help
-	  The maximum number of Unix98 PTYs that can be used at any one time.
-	  The default is 256, and should be enough for desktop systems. Server
-	  machines which support incoming telnet/rlogin/ssh connections and/or
-	  serve several X terminals may want to increase this: every incoming
-	  connection and every xterm uses up one PTY.
-
-	  When not in use, each additional set of 256 PTYs occupy
-	  approximately 8 KB of kernel memory on 32-bit architectures.
-
 source "drivers/char/pcmcia/Kconfig"
 
 source "drivers/serial/Kconfig"
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 789724e61e8..375de7c6d08 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -298,20 +298,6 @@ config UNIX98_PTYS
 	  Read the instructions in <file:Documentation/Changes> pertaining to
 	  pseudo terminals. It's safe to say N.
 
-config UNIX98_PTY_COUNT
-	int "Maximum number of Unix98 PTYs in use (0-2048)"
-	depends on UNIX98_PTYS
-	default "256"
-	help
-	  The maximum number of Unix98 PTYs that can be used at any one time.
-	  The default is 256, and should be enough for desktop systems. Server
-	  machines which support incoming telnet/rlogin/ssh connections and/or
-	  serve several X terminals may want to increase this: every incoming
-	  connection and every xterm uses up one PTY.
-
-	  When not in use, each additional set of 256 PTYs occupy
-	  approximately 8 KB of kernel memory on 32-bit architectures.
-
 endmenu
 
 source "fs/Kconfig"
-- 
GitLab


From 79885b227740b9c7d3057f2de556f4098d37cc8f Mon Sep 17 00:00:00 2001
From: "Edgar E. Iglesias" <edgar.iglesias@axis.com>
Date: Fri, 25 Jul 2008 01:48:10 -0700
Subject: [PATCH 759/853] elf: use ELF_CORE_EFLAGS for kcore ELF header flags

ELF_CORE_EFLAGS is already used by the binfmt_elf coredumper to set correct
arch specific ELF header flags on coredumps.  Use it for kcore dumps as well.
At the moment, this affects the CRIS and the H8300 arch.

Signed-off-by: Edgar E. Iglesias <edgar@axis.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c         | 10 +++++-----
 include/asm-h8300/elf.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf54..c2370c76fb7 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
 
 #define CORE_STR "CORE"
 
+#ifndef ELF_CORE_EFLAGS
+#define ELF_CORE_EFLAGS	0
+#endif
+
 static int open_kcore(struct inode * inode, struct file * filp)
 {
 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	elf->e_entry	= 0;
 	elf->e_phoff	= sizeof(struct elfhdr);
 	elf->e_shoff	= 0;
-#if defined(CONFIG_H8300)
-	elf->e_flags	= ELF_FLAGS;
-#else
-	elf->e_flags	= 0;
-#endif
+	elf->e_flags	= ELF_CORE_EFLAGS;
 	elf->e_ehsize	= sizeof(struct elfhdr);
 	elf->e_phentsize= sizeof(struct elf_phdr);
 	elf->e_phnum	= nphdr;
diff --git a/include/asm-h8300/elf.h b/include/asm-h8300/elf.h
index 26bfc7e641d..a8b57d1f412 100644
--- a/include/asm-h8300/elf.h
+++ b/include/asm-h8300/elf.h
@@ -26,10 +26,10 @@ typedef unsigned long elf_fpregset_t;
 #define ELF_DATA	ELFDATA2MSB
 #define ELF_ARCH	EM_H8_300
 #if defined(__H8300H__)
-#define ELF_FLAGS       0x810000
+#define ELF_CORE_EFLAGS 0x810000
 #endif
 #if defined(__H8300S__)
-#define ELF_FLAGS       0x820000
+#define ELF_CORE_EFLAGS 0x820000
 #endif
 
 #define ELF_PLAT_INIT(_r)	_r->er1 = 0
-- 
GitLab


From 8d1e120f695e9bcf01585e052577dc1e099033f9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:11 -0700
Subject: [PATCH 760/853] proper extern for mwave_s_mdd

This patch adds a proper extern for mwave_s_mdd in
drivers/char/mwave/mwavedd.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mwave/mwavedd.h | 2 ++
 drivers/char/mwave/tp3780i.c | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h
index 8eca61e0a19..7e0d530e2e0 100644
--- a/drivers/char/mwave/mwavedd.h
+++ b/drivers/char/mwave/mwavedd.h
@@ -147,4 +147,6 @@ typedef struct _MWAVE_DEVICE_DATA {
 
 } MWAVE_DEVICE_DATA, *pMWAVE_DEVICE_DATA;
 
+extern MWAVE_DEVICE_DATA mwave_s_mdd;
+
 #endif
diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c
index f282976daaa..c6896970806 100644
--- a/drivers/char/mwave/tp3780i.c
+++ b/drivers/char/mwave/tp3780i.c
@@ -57,8 +57,6 @@
 #include "3780i.h"
 #include "mwavepub.h"
 
-extern MWAVE_DEVICE_DATA mwave_s_mdd;
-
 static unsigned short s_ausThinkpadIrqToField[16] =
 	{ 0xFFFF, 0xFFFF, 0xFFFF, 0x0001, 0x0002, 0x0003, 0xFFFF, 0x0004,
 	0xFFFF, 0xFFFF, 0x0005, 0x0006, 0xFFFF, 0xFFFF, 0xFFFF, 0x0007 };
-- 
GitLab


From 372572e9b1dcc5e36091199be63766d13e5a8ae0 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:11 -0700
Subject: [PATCH 761/853] #if 0 hpet_unregister()

This patch #if 0's the unused hpet_unregister().

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/hpet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index fb0a85a1eb3..b3f5dbc6d88 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -623,6 +623,7 @@ static inline int hpet_tpcheck(struct hpet_task *tp)
 	return -ENXIO;
 }
 
+#if 0
 int hpet_unregister(struct hpet_task *tp)
 {
 	struct hpet_dev *devp;
@@ -652,6 +653,7 @@ int hpet_unregister(struct hpet_task *tp)
 
 	return 0;
 }
+#endif  /*  0  */
 
 static ctl_table hpet_table[] = {
 	{
-- 
GitLab


From 76528a42e2c5199a1208909318a9c9948d25d0b7 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:12 -0700
Subject: [PATCH 762/853] efirtc: push down the BKL

Push it down as far as the EFI method calls.  Someone who knows EFI can do
the other bits.  Also fix another wrong unknown ioctl return.

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/efirtc.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index d57ca3e4e53..67fbd7aab5d 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -37,8 +37,9 @@
 #include <linux/rtc.h>
 #include <linux/proc_fs.h>
 #include <linux/efi.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/system.h>
 
 #define EFI_RTC_VERSION		"0.4"
@@ -51,8 +52,8 @@
 
 static DEFINE_SPINLOCK(efi_rtc_lock);
 
-static int efi_rtc_ioctl(struct inode *inode, struct file *file,
-		     unsigned int cmd, unsigned long arg);
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+							unsigned long arg);
 
 #define is_leap(year) \
           ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
@@ -146,9 +147,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
 	}
 }
 
-static int
-efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-		     unsigned long arg)
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+							unsigned long arg)
 {
 
 	efi_status_t	status;
@@ -175,13 +175,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			return -EINVAL;
 
 		case RTC_RD_TIME:
-
+			lock_kernel();
 			spin_lock_irqsave(&efi_rtc_lock, flags);
 
 			status = efi.get_time(&eft, &cap);
 
 			spin_unlock_irqrestore(&efi_rtc_lock,flags);
-
+			unlock_kernel();
 			if (status != EFI_SUCCESS) {
 				/* should never happen */
 				printk(KERN_ERR "efitime: can't read time\n");
@@ -203,11 +203,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 
 			convert_to_efi_time(&wtime, &eft);
 
+			lock_kernel();
 			spin_lock_irqsave(&efi_rtc_lock, flags);
 
 			status = efi.set_time(&eft);
 
 			spin_unlock_irqrestore(&efi_rtc_lock,flags);
+			unlock_kernel();
 
 			return status == EFI_SUCCESS ? 0 : -EINVAL;
 
@@ -223,6 +225,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 
 			convert_to_efi_time(&wtime, &eft);
 
+			lock_kernel();
 			spin_lock_irqsave(&efi_rtc_lock, flags);
 			/*
 			 * XXX Fixme:
@@ -233,16 +236,19 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			status = efi.set_wakeup_time((efi_bool_t)enabled, &eft);
 
 			spin_unlock_irqrestore(&efi_rtc_lock,flags);
+			unlock_kernel();
 
 			return status == EFI_SUCCESS ? 0 : -EINVAL;
 
 		case RTC_WKALM_RD:
 
+			lock_kernel();
 			spin_lock_irqsave(&efi_rtc_lock, flags);
 
 			status = efi.get_wakeup_time((efi_bool_t *)&enabled, (efi_bool_t *)&pending, &eft);
 
 			spin_unlock_irqrestore(&efi_rtc_lock,flags);
+			unlock_kernel();
 
 			if (status != EFI_SUCCESS) return -EINVAL;
 
@@ -256,7 +262,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			return copy_to_user(&ewp->time, &wtime,
 					    sizeof(struct rtc_time)) ? -EFAULT : 0;
 	}
-	return -EINVAL;
+	return -ENOTTY;
 }
 
 /*
@@ -265,8 +271,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
  *	up things on a close.
  */
 
-static int
-efi_rtc_open(struct inode *inode, struct file *file)
+static int efi_rtc_open(struct inode *inode, struct file *file)
 {
 	/*
 	 * nothing special to do here
@@ -277,8 +282,7 @@ efi_rtc_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int
-efi_rtc_close(struct inode *inode, struct file *file)
+static int efi_rtc_close(struct inode *inode, struct file *file)
 {
 	return 0;
 }
@@ -289,13 +293,12 @@ efi_rtc_close(struct inode *inode, struct file *file)
 
 static const struct file_operations efi_rtc_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= efi_rtc_ioctl,
+	.unlocked_ioctl	= efi_rtc_ioctl,
 	.open		= efi_rtc_open,
 	.release	= efi_rtc_close,
 };
 
-static struct miscdevice efi_rtc_dev=
-{
+static struct miscdevice efi_rtc_dev= {
 	EFI_RTC_MINOR,
 	"efirtc",
 	&efi_rtc_fops
-- 
GitLab


From 47be36a24defbd19aea1354c416ec99f291c7ab8 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:13 -0700
Subject: [PATCH 763/853] ip2: push BKL down for the firmware interface

(The tty side is already done)

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/ip2/ip2main.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index 9cb48fcd316..689f9dcd3b8 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -203,7 +203,7 @@ static int set_serial_info(i2ChanStrPtr, struct serial_struct __user *);
 
 static ssize_t ip2_ipl_read(struct file *, char __user *, size_t, loff_t *);
 static ssize_t ip2_ipl_write(struct file *, const char __user *, size_t, loff_t *);
-static int ip2_ipl_ioctl(struct inode *, struct file *, UINT, ULONG);
+static long ip2_ipl_ioctl(struct file *, UINT, ULONG);
 static int ip2_ipl_open(struct inode *, struct file *);
 
 static int DumpTraceBuffer(char __user *, int);
@@ -236,7 +236,7 @@ static const struct file_operations ip2_ipl = {
 	.owner		= THIS_MODULE,
 	.read		= ip2_ipl_read,
 	.write		= ip2_ipl_write,
-	.ioctl		= ip2_ipl_ioctl,
+	.unlocked_ioctl	= ip2_ipl_ioctl,
 	.open		= ip2_ipl_open,
 }; 
 
@@ -2845,10 +2845,10 @@ ip2_ipl_write(struct file *pFile, const char __user *pData, size_t count, loff_t
 /*                                                                            */
 /*                                                                            */
 /******************************************************************************/
-static int
-ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
+static long
+ip2_ipl_ioctl (struct file *pFile, UINT cmd, ULONG arg )
 {
-	unsigned int iplminor = iminor(pInode);
+	unsigned int iplminor = iminor(pFile->f_path.dentry->d_inode);
 	int rc = 0;
 	void __user *argp = (void __user *)arg;
 	ULONG __user *pIndex = argp;
@@ -2859,6 +2859,8 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
 	printk (KERN_DEBUG "IP2IPL: ioctl cmd %d, arg %ld\n", cmd, arg );
 #endif
 
+	lock_kernel();
+
 	switch ( iplminor ) {
 	case 0:	    // IPL device
 		rc = -EINVAL;
@@ -2919,6 +2921,7 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
 		rc = -ENODEV;
 		break;
 	}
+	unlock_kernel();
 	return rc;
 }
 
-- 
GitLab


From 909d145f0decbc4f17955e1fc4122a669a51fbc0 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:14 -0700
Subject: [PATCH 764/853] mwave: ioctl BKL pushdown

Push the BKL down to the point it wraps the actual mwave method handlers

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Eric Sesterhenn <snakebyte@gmx.de>
Cc: Yani Ioannou <yani.ioannou@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mwave/mwavedd.c | 39 ++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 50243fcd87e..4f8d67fed29 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -86,8 +86,8 @@ module_param(mwave_uart_io, int, 0);
 
 static int mwave_open(struct inode *inode, struct file *file);
 static int mwave_close(struct inode *inode, struct file *file);
-static int mwave_ioctl(struct inode *inode, struct file *filp,
-                       unsigned int iocmd, unsigned long ioarg);
+static long mwave_ioctl(struct file *filp, unsigned int iocmd,
+							unsigned long ioarg);
 
 MWAVE_DEVICE_DATA mwave_s_mdd;
 
@@ -119,16 +119,16 @@ static int mwave_close(struct inode *inode, struct file *file)
 	return retval;
 }
 
-static int mwave_ioctl(struct inode *inode, struct file *file,
-                       unsigned int iocmd, unsigned long ioarg)
+static long mwave_ioctl(struct file *file, unsigned int iocmd,
+							unsigned long ioarg)
 {
 	unsigned int retval = 0;
 	pMWAVE_DEVICE_DATA pDrvData = &mwave_s_mdd;
 	void __user *arg = (void __user *)ioarg;
 
-	PRINTK_5(TRACE_MWAVE,
-		"mwavedd::mwave_ioctl, entry inode %p file %p cmd %x arg %x\n",
-		 inode,  file, iocmd, (int) ioarg);
+	PRINTK_4(TRACE_MWAVE,
+		"mwavedd::mwave_ioctl, entry file %p cmd %x arg %x\n",
+		file, iocmd, (int) ioarg);
 
 	switch (iocmd) {
 
@@ -136,7 +136,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 			PRINTK_1(TRACE_MWAVE,
 				"mwavedd::mwave_ioctl, IOCTL_MW_RESET"
 				" calling tp3780I_ResetDSP\n");
+			lock_kernel();
 			retval = tp3780I_ResetDSP(&pDrvData->rBDData);
+			unlock_kernel();
 			PRINTK_2(TRACE_MWAVE,
 				"mwavedd::mwave_ioctl, IOCTL_MW_RESET"
 				" retval %x from tp3780I_ResetDSP\n",
@@ -147,7 +149,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 			PRINTK_1(TRACE_MWAVE,
 				"mwavedd::mwave_ioctl, IOCTL_MW_RUN"
 				" calling tp3780I_StartDSP\n");
+			lock_kernel();
 			retval = tp3780I_StartDSP(&pDrvData->rBDData);
+			unlock_kernel();
 			PRINTK_2(TRACE_MWAVE,
 				"mwavedd::mwave_ioctl, IOCTL_MW_RUN"
 				" retval %x from tp3780I_StartDSP\n",
@@ -161,8 +165,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 				"mwavedd::mwave_ioctl,"
 				" IOCTL_MW_DSP_ABILITIES calling"
 				" tp3780I_QueryAbilities\n");
+			lock_kernel();
 			retval = tp3780I_QueryAbilities(&pDrvData->rBDData,
 					&rAbilities);
+			unlock_kernel();
 			PRINTK_2(TRACE_MWAVE,
 				"mwavedd::mwave_ioctl, IOCTL_MW_DSP_ABILITIES"
 				" retval %x from tp3780I_QueryAbilities\n",
@@ -193,11 +199,13 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 				"mwavedd::mwave_ioctl IOCTL_MW_READ_DATA,"
 				" size %lx, ioarg %lx pusBuffer %p\n",
 				rReadData.ulDataLength, ioarg, pusBuffer);
+			lock_kernel();
 			retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
 					iocmd,
 					pusBuffer,
 					rReadData.ulDataLength,
 					rReadData.usDspAddress);
+			unlock_kernel();
 		}
 			break;
 	
@@ -215,10 +223,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 				" size %lx, ioarg %lx pusBuffer %p\n",
 				rReadData.ulDataLength / 2, ioarg,
 				pusBuffer);
+			lock_kernel();
 			retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
 				iocmd, pusBuffer,
 				rReadData.ulDataLength / 2,
 				rReadData.usDspAddress);
+			unlock_kernel();
 		}
 			break;
 	
@@ -236,10 +246,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 				" size %lx, ioarg %lx pusBuffer %p\n",
 				rWriteData.ulDataLength, ioarg,
 				pusBuffer);
+			lock_kernel();
 			retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
 					iocmd, pusBuffer,
 					rWriteData.ulDataLength,
 					rWriteData.usDspAddress);
+			unlock_kernel();
 		}
 			break;
 	
@@ -257,10 +269,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 				" size %lx, ioarg %lx pusBuffer %p\n",
 				rWriteData.ulDataLength, ioarg,
 				pusBuffer);
+			lock_kernel();
 			retval = tp3780I_ReadWriteDspIStore(&pDrvData->rBDData,
 					iocmd, pusBuffer,
 					rWriteData.ulDataLength,
 					rWriteData.usDspAddress);
+			unlock_kernel();
 		}
 			break;
 	
@@ -281,8 +295,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 						ipcnum);
 				return -EINVAL;
 			}
+			lock_kernel();
 			pDrvData->IPCs[ipcnum].bIsHere = FALSE;
 			pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
+			unlock_kernel();
 	
 			PRINTK_2(TRACE_MWAVE,
 				"mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
@@ -307,6 +323,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 				return -EINVAL;
 			}
 	
+			lock_kernel();
 			if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
 				DECLARE_WAITQUEUE(wait, current);
 
@@ -347,6 +364,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 					" processing\n",
 					ipcnum);
 			}
+			unlock_kernel();
 		}
 			break;
 	
@@ -365,19 +383,18 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
 						ipcnum);
 				return -EINVAL;
 			}
+			lock_kernel();
 			if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
 				pDrvData->IPCs[ipcnum].bIsEnabled = FALSE;
 				if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) {
 					wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue);
 				}
 			}
+			unlock_kernel();
 		}
 			break;
 	
 		default:
-			PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:"
-					" Error: Unrecognized iocmd %x\n",
-					iocmd);
 			return -ENOTTY;
 			break;
 	} /* switch */
@@ -460,7 +477,7 @@ static const struct file_operations mwave_fops = {
 	.owner		= THIS_MODULE,
 	.read		= mwave_read,
 	.write		= mwave_write,
-	.ioctl		= mwave_ioctl,
+	.unlocked_ioctl	= mwave_ioctl,
 	.open		= mwave_open,
 	.release	= mwave_close
 };
-- 
GitLab


From f6759fdcfd79ff1827fd5d4ddfe876164466d30d Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:14 -0700
Subject: [PATCH 765/853] rio: push down the BKL into the firmware ioctl
 handler

TTY side is already done.

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/rio/rio_linux.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index 0cdfee15291..a8f68a3f14d 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -179,7 +179,7 @@ static int rio_set_real_termios(void *ptr);
 static void rio_hungup(void *ptr);
 static void rio_close(void *ptr);
 static int rio_chars_in_buffer(void *ptr);
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 static int rio_init_drivers(void);
 
 static void my_hd(void *addr, int len);
@@ -240,7 +240,7 @@ static struct real_driver rio_real_driver = {
 
 static const struct file_operations rio_fw_fops = {
 	.owner = THIS_MODULE,
-	.ioctl = rio_fw_ioctl,
+	.unlocked_ioctl = rio_fw_ioctl,
 };
 
 static struct miscdevice rio_fw_device = {
@@ -560,13 +560,15 @@ static void rio_close(void *ptr)
 
 
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	int rc = 0;
 	func_enter();
 
 	/* The "dev" argument isn't used. */
+	lock_kernel();
 	rc = riocontrol(p, 0, cmd, arg, capable(CAP_SYS_ADMIN));
+	unlock_kernel();
 
 	func_exit();
 	return rc;
-- 
GitLab


From 11af7478addd34c42999b3b84095903ed9e67038 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:15 -0700
Subject: [PATCH 766/853] sx: push BKL down into the firmware ioctl handler

Also fix the capability checking for firmware load.

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/sx.c | 73 ++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 30 deletions(-)

diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 2162439bbe4..c385206f9db 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -286,8 +286,8 @@ static void sx_close(void *ptr);
 static int sx_chars_in_buffer(void *ptr);
 static int sx_init_board(struct sx_board *board);
 static int sx_init_portstructs(int nboards, int nports);
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
-		unsigned int cmd, unsigned long arg);
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+						unsigned long arg);
 static int sx_init_drivers(void);
 
 static struct tty_driver *sx_driver;
@@ -396,7 +396,7 @@ static struct real_driver sx_real_driver = {
 
 static const struct file_operations sx_fw_fops = {
 	.owner = THIS_MODULE,
-	.ioctl = sx_fw_ioctl,
+	.unlocked_ioctl = sx_fw_ioctl,
 };
 
 static struct miscdevice sx_fw_device = {
@@ -1686,10 +1686,10 @@ static int do_memtest_w(struct sx_board *board, int min, int max)
 }
 #endif
 
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
-		unsigned int cmd, unsigned long arg)
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+							unsigned long arg)
 {
-	int rc = 0;
+	long rc = 0;
 	int __user *descr = (int __user *)arg;
 	int i;
 	static struct sx_board *board = NULL;
@@ -1699,13 +1699,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
 
 	func_enter();
 
-#if 0
-	/* Removed superuser check: Sysops can use the permissions on the device
-	   file to restrict access. Recommendation: Root only. (root.root 600) */
-	if (!capable(CAP_SYS_ADMIN)) {
+	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
-	}
-#endif
+
+	lock_kernel();
 
 	sx_dprintk(SX_DEBUG_FIRMWARE, "IOCTL %x: %lx\n", cmd, arg);
 
@@ -1720,19 +1717,23 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
 		for (i = 0; i < SX_NBOARDS; i++)
 			sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags);
 		sx_dprintk(SX_DEBUG_FIRMWARE, "\n");
+		unlock_kernel();
 		return -EIO;
 	}
 
 	switch (cmd) {
 	case SXIO_SET_BOARD:
 		sx_dprintk(SX_DEBUG_FIRMWARE, "set board to %ld\n", arg);
+		rc = -EIO;
 		if (arg >= SX_NBOARDS)
-			return -EIO;
+			break;
 		sx_dprintk(SX_DEBUG_FIRMWARE, "not out of range\n");
 		if (!(boards[arg].flags & SX_BOARD_PRESENT))
-			return -EIO;
+			break;
 		sx_dprintk(SX_DEBUG_FIRMWARE, ".. and present!\n");
 		board = &boards[arg];
+		rc = 0;
+		/* FIXME: And this does ... nothing?? */
 		break;
 	case SXIO_GET_TYPE:
 		rc = -ENOENT;	/* If we manage to miss one, return error. */
@@ -1746,7 +1747,7 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
 			rc = SX_TYPE_SI;
 		if (IS_EISA_BOARD(board))
 			rc = SX_TYPE_SI;
-		sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %d\n", rc);
+		sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %ld\n", rc);
 		break;
 	case SXIO_DO_RAMTEST:
 		if (sx_initialized)	/* Already initialized: better not ramtest the board.  */
@@ -1760,19 +1761,26 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
 			rc = do_memtest(board, 0, 0x7ff8);
 			/* if (!rc) rc = do_memtest_w (board, 0, 0x7ff8); */
 		}
-		sx_dprintk(SX_DEBUG_FIRMWARE, "returning memtest result= %d\n",
-			   rc);
+		sx_dprintk(SX_DEBUG_FIRMWARE,
+				"returning memtest result= %ld\n", rc);
 		break;
 	case SXIO_DOWNLOAD:
-		if (sx_initialized)	/* Already initialized */
-			return -EEXIST;
-		if (!sx_reset(board))
-			return -EIO;
+		if (sx_initialized) {/* Already initialized */
+			rc = -EEXIST;
+			break;
+		}
+		if (!sx_reset(board)) {
+			rc = -EIO;
+			break;
+		}
 		sx_dprintk(SX_DEBUG_INIT, "reset the board...\n");
 
 		tmp = kmalloc(SX_CHUNK_SIZE, GFP_USER);
-		if (!tmp)
-			return -ENOMEM;
+		if (!tmp) {
+			rc = -ENOMEM;
+			break;
+		}
+		/* FIXME: check returns */
 		get_user(nbytes, descr++);
 		get_user(offset, descr++);
 		get_user(data, descr++);
@@ -1782,7 +1790,8 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
 						(i + SX_CHUNK_SIZE > nbytes) ?
 						nbytes - i : SX_CHUNK_SIZE)) {
 					kfree(tmp);
-					return -EFAULT;
+					rc = -EFAULT;
+					break;
 				}
 				memcpy_toio(board->base2 + offset + i, tmp,
 						(i + SX_CHUNK_SIZE > nbytes) ?
@@ -1798,13 +1807,17 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
 		rc = sx_nports;
 		break;
 	case SXIO_INIT:
-		if (sx_initialized)	/* Already initialized */
-			return -EEXIST;
+		if (sx_initialized) {	/* Already initialized */
+			rc = -EEXIST;
+			break;
+		}
 		/* This is not allowed until all boards are initialized... */
 		for (i = 0; i < SX_NBOARDS; i++) {
 			if ((boards[i].flags & SX_BOARD_PRESENT) &&
-				!(boards[i].flags & SX_BOARD_INITIALIZED))
-				return -EIO;
+				!(boards[i].flags & SX_BOARD_INITIALIZED)) {
+				rc = -EIO;
+				break;
+			}
 		}
 		for (i = 0; i < SX_NBOARDS; i++)
 			if (!(boards[i].flags & SX_BOARD_PRESENT))
@@ -1832,10 +1845,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
 		rc = sx_nports;
 		break;
 	default:
-		printk(KERN_WARNING "Unknown ioctl on firmware device (%x).\n",
-				cmd);
+		rc = -ENOTTY;
 		break;
 	}
+	unlock_kernel();
 	func_exit();
 	return rc;
 }
-- 
GitLab


From e05e9f7c4aeb82eaa23e46b29580ff514590c641 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:16 -0700
Subject: [PATCH 767/853] ixj: push BKL into driver and wrap ioctls

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Domen Puncer <domen@coderock.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/telephony/ixj.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c
index 49cd9793404..ec7aeb502d1 100644
--- a/drivers/telephony/ixj.c
+++ b/drivers/telephony/ixj.c
@@ -6095,15 +6095,15 @@ static int capabilities_check(IXJ *j, struct phone_capability *pcreq)
 	return retval;
 }
 
-static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd, unsigned long arg)
+static long do_ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
 {
 	IXJ_TONE ti;
 	IXJ_FILTER jf;
 	IXJ_FILTER_RAW jfr;
 	void __user *argp = (void __user *)arg;
-
-	unsigned int raise, mant;
+	struct inode *inode = file_p->f_path.dentry->d_inode;
 	unsigned int minor = iminor(inode);
+	unsigned int raise, mant;
 	int board = NUM(inode);
 
 	IXJ *j = get_ixj(NUM(inode));
@@ -6661,6 +6661,15 @@ static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd,
 	return retval;
 }
 
+static long ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
+{
+	long ret;
+	lock_kernel();
+	ret = do_ixj_ioctl(file_p, cmd, arg);
+	unlock_kernel();
+	return ret;
+}
+
 static int ixj_fasync(int fd, struct file *file_p, int mode)
 {
 	IXJ *j = get_ixj(NUM(file_p->f_path.dentry->d_inode));
@@ -6674,7 +6683,7 @@ static const struct file_operations ixj_fops =
         .read           = ixj_enhanced_read,
         .write          = ixj_enhanced_write,
         .poll           = ixj_poll,
-        .ioctl          = ixj_ioctl,
+        .unlocked_ioctl = ixj_ioctl,
         .release        = ixj_release,
         .fasync         = ixj_fasync
 };
-- 
GitLab


From 6d535d3e6ad395345750c361bd2b7f1b9429455d Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:16 -0700
Subject: [PATCH 768/853] ppdev: wrap ioctl handler in driver and push lock
 down

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/ppdev.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 7af7a7e6b9c..bee39fdfba7 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -67,7 +67,7 @@
 #include <linux/major.h>
 #include <linux/ppdev.h>
 #include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define PP_VERSION "ppdev: user-space parallel port driver"
 #define CHRDEV "ppdev"
@@ -328,10 +328,9 @@ static enum ieee1284_phase init_phase (int mode)
 	return IEEE1284_PH_FWD_IDLE;
 }
 
-static int pp_ioctl(struct inode *inode, struct file *file,
-		    unsigned int cmd, unsigned long arg)
+static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	unsigned int minor = iminor(inode);
+	unsigned int minor = iminor(file->f_path.dentry->d_inode);
 	struct pp_struct *pp = file->private_data;
 	struct parport * port;
 	void __user *argp = (void __user *)arg;
@@ -634,6 +633,15 @@ static int pp_ioctl(struct inode *inode, struct file *file,
 	return 0;
 }
 
+static long pp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long ret;
+	lock_kernel();
+	ret = pp_do_ioctl(file, cmd, arg);
+	unlock_kernel();
+	return ret;
+}
+
 static int pp_open (struct inode * inode, struct file * file)
 {
 	unsigned int minor = iminor(inode);
@@ -745,7 +753,7 @@ static const struct file_operations pp_fops = {
 	.read		= pp_read,
 	.write		= pp_write,
 	.poll		= pp_poll,
-	.ioctl		= pp_ioctl,
+	.unlocked_ioctl	= pp_ioctl,
 	.open		= pp_open,
 	.release	= pp_release,
 };
-- 
GitLab


From b8e35919653d76e7dceb8d3b8569c4ec1004d546 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:17 -0700
Subject: [PATCH 769/853] ds1302: push down the BKL into the driver ioctl code

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/ds1302.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index fada6ddefba..c5e67a62395 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -20,10 +20,11 @@
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
 #include <linux/bcd.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
-#include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/rtc.h>
 #if defined(CONFIG_M32R)
 #include <asm/m32r.h>
@@ -153,9 +154,7 @@ static unsigned char days_in_mo[] =
 
 /* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */
 
-static int
-rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-	  unsigned long arg)
+static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	unsigned long flags;
 
@@ -165,7 +164,9 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			struct rtc_time rtc_tm;
 
 			memset(&rtc_tm, 0, sizeof (struct rtc_time));
+			lock_kernel();
 			get_rtc_time(&rtc_tm);
+			unlock_kernel();
 			if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time)))
 				return -EFAULT;
 			return 0;
@@ -217,6 +218,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			BIN_TO_BCD(mon);
 			BIN_TO_BCD(yrs);
 
+			lock_kernel();
 			local_irq_save(flags);
 			CMOS_WRITE(yrs, RTC_YEAR);
 			CMOS_WRITE(mon, RTC_MONTH);
@@ -225,6 +227,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			CMOS_WRITE(min, RTC_MINUTES);
 			CMOS_WRITE(sec, RTC_SECONDS);
 			local_irq_restore(flags);
+			unlock_kernel();
 
 			/* Notice that at this point, the RTC is updated but
 			 * the kernel is still running with the old time.
@@ -244,8 +247,10 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			if(copy_from_user(&tcs_val, (int*)arg, sizeof(int)))
 				return -EFAULT;
 
+			lock_kernel();
 			tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F);
 			ds1302_writereg(RTC_TRICKLECHARGER, tcs_val);
+			unlock_kernel();
 			return 0;
 		}
 		default:
@@ -282,7 +287,7 @@ get_rtc_status(char *buf)
 
 static const struct file_operations rtc_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= rtc_ioctl,
+	.unlocked_ioctl	= rtc_ioctl,
 };
 
 /* Probe for the chip by writing something to its RAM and try reading it back. */
-- 
GitLab


From 236b8756a2b6f90498d45b2c36d43e5372f2d4b8 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 25 Jul 2008 01:48:17 -0700
Subject: [PATCH 770/853] dsp56k: BKL pushdown

Push the BKL down into the driver ioctl methods

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/dsp56k.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 33c466a4888..19b88504e96 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -36,10 +36,10 @@
 #include <linux/smp_lock.h>
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
+#include <linux/uaccess.h>	/* For put_user and get_user */
 
 #include <asm/atarihw.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>	/* For put_user and get_user */
 
 #include <asm/dsp56k.h>
 
@@ -303,8 +303,8 @@ static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t co
 	}
 }
 
-static int dsp56k_ioctl(struct inode *inode, struct file *file,
-			unsigned int cmd, unsigned long arg)
+static long dsp56k_ioctl(struct file *file, unsigned int cmd,
+							unsigned long arg)
 {
 	int dev = iminor(inode) & 0x0f;
 	void __user *argp = (void __user *)arg;
@@ -331,8 +331,9 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
 			if (len > DSP56K_MAX_BINARY_LENGTH) {
 				return -EINVAL;
 			}
-    
+			lock_kernel();
 			r = dsp56k_upload(bin, len);
+			unlock_kernel();
 			if (r < 0) {
 				return r;
 			}
@@ -342,12 +343,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
 		case DSP56K_SET_TX_WSIZE:
 			if (arg > 4 || arg < 1)
 				return -EINVAL;
+			lock_kernel();
 			dsp56k.tx_wsize = (int) arg;
+			unlock_kernel();
 			break;
 		case DSP56K_SET_RX_WSIZE:
 			if (arg > 4 || arg < 1)
 				return -EINVAL;
+			lock_kernel();
 			dsp56k.rx_wsize = (int) arg;
+			unlock_kernel();
 			break;
 		case DSP56K_HOST_FLAGS:
 		{
@@ -359,6 +364,7 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
 			if(get_user(out, &hf->out) < 0)
 				return -EFAULT;
 
+			lock_kernel();
 			if ((dir & 0x1) && (out & 0x1))
 				dsp56k_host_interface.icr |= DSP56K_ICR_HF0;
 			else if (dir & 0x1)
@@ -373,14 +379,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
 			if (dsp56k_host_interface.icr & DSP56K_ICR_HF1) status |= 0x2;
 			if (dsp56k_host_interface.isr & DSP56K_ISR_HF2) status |= 0x4;
 			if (dsp56k_host_interface.isr & DSP56K_ISR_HF3) status |= 0x8;
-
+			unlock_kernel();
 			return put_user(status, &hf->status);
 		}
 		case DSP56K_HOST_CMD:
 			if (arg > 31 || arg < 0)
 				return -EINVAL;
+			lock_kernel();
 			dsp56k_host_interface.cvr = (u_char)((arg & DSP56K_CVR_HV_MASK) |
 							     DSP56K_CVR_HC);
+			unlock_kernel();
 			break;
 		default:
 			return -EINVAL;
@@ -472,7 +480,7 @@ static const struct file_operations dsp56k_fops = {
 	.owner		= THIS_MODULE,
 	.read		= dsp56k_read,
 	.write		= dsp56k_write,
-	.ioctl		= dsp56k_ioctl,
+	.unlocked_ioctl	= dsp56k_ioctl,
 	.open		= dsp56k_open,
 	.release	= dsp56k_release,
 };
-- 
GitLab


From 6ee8928d94841aa764aeaf645ad16daff811dc26 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 25 Jul 2008 01:48:18 -0700
Subject: [PATCH 771/853] nwflash: use simple_read_from_buffer()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Tim Schmielau <tim@physik3.uni-rostock.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nwflash.c | 31 ++++++++-----------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index ba012c2bdf7..f9f72a21129 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -122,35 +122,20 @@ static int flash_ioctl(struct inode *inodep, struct file *filep, unsigned int cm
 static ssize_t flash_read(struct file *file, char __user *buf, size_t size,
 			  loff_t *ppos)
 {
-	unsigned long p = *ppos;
-	unsigned int count = size;
-	int ret = 0;
+	ssize_t ret;
 
 	if (flashdebug)
 		printk(KERN_DEBUG "flash_read: flash_read: offset=0x%lX, "
 		       "buffer=%p, count=0x%X.\n", p, buf, count);
+	/*
+	 * We now lock against reads and writes. --rmk
+	 */
+	if (mutex_lock_interruptible(&nwflash_mutex))
+		return -ERESTARTSYS;
 
-	if (count)
-		ret = -ENXIO;
-
-	if (p < gbFlashSize) {
-		if (count > gbFlashSize - p)
-			count = gbFlashSize - p;
+	ret = simple_read_from_buffer(buf, size, ppos, FLASH_BASE, gbFlashSize);
+	mutex_unlock(&nwflash_mutex);
 
-		/*
-		 * We now lock against reads and writes. --rmk
-		 */
-		if (mutex_lock_interruptible(&nwflash_mutex))
-			return -ERESTARTSYS;
-
-		ret = copy_to_user(buf, (void *)(FLASH_BASE + p), count);
-		if (ret == 0) {
-			ret = count;
-			*ppos += count;
-		} else
-			ret = -EFAULT;
-		mutex_unlock(&nwflash_mutex);
-	}
 	return ret;
 }
 
-- 
GitLab


From 41aee9a121fd0c31ae22dfe57e8f9ee9d6d85c25 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 25 Jul 2008 01:48:19 -0700
Subject: [PATCH 772/853] Char: mxser, ioctl cleanup

- remove break ctl from ioctl handler, it's never reached, since
  tty_ops->break_ctl is defined (mxser break handling is done in software)
- mark MOXA_GET_MAJOR as deprecated
- fix TIOCGICOUNT (some retval non-checks of put_user). Use copy_to_user
  to whole structure instead.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mxser.c | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 4c756bbba94..e5029b149c5 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -16,7 +16,6 @@
  *	Fed through a cleanup, indent and remove of non 2.6 code by Alan Cox
  *	<alan@redhat.com>. The original 1.8 code is available on www.moxa.com.
  *	- Fixed x86_64 cleanness
- *	- Fixed sleep with spinlock held in mxser_send_break
  */
 
 #include <linux/module.h>
@@ -1634,6 +1633,8 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
 
 	switch (cmd) {
 	case MOXA_GET_MAJOR:
+		printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix "
+				"your userspace\n", current->comm, cmd);
 		return put_user(ttymajor, (int __user *)argp);
 
 	case MOXA_CHKPORTENABLE:
@@ -1804,7 +1805,6 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
 {
 	struct mxser_port *info = tty->driver_data;
 	struct async_icount cnow;
-	struct serial_icounter_struct __user *p_cuser;
 	unsigned long flags;
 	void __user *argp = (void __user *)arg;
 	int retval;
@@ -1884,30 +1884,26 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
 	 * NB: both 1->0 and 0->1 transitions are counted except for
 	 *     RI where only 0->1 is counted.
 	 */
-	case TIOCGICOUNT:
+	case TIOCGICOUNT: {
+		struct serial_icounter_struct icnt = { 0 };
 		spin_lock_irqsave(&info->slock, flags);
 		cnow = info->icount;
 		spin_unlock_irqrestore(&info->slock, flags);
-		p_cuser = argp;
-		if (put_user(cnow.frame, &p_cuser->frame))
-			return -EFAULT;
-		if (put_user(cnow.brk, &p_cuser->brk))
-			return -EFAULT;
-		if (put_user(cnow.overrun, &p_cuser->overrun))
-			return -EFAULT;
-		if (put_user(cnow.buf_overrun, &p_cuser->buf_overrun))
-			return -EFAULT;
-		if (put_user(cnow.parity, &p_cuser->parity))
-			return -EFAULT;
-		if (put_user(cnow.rx, &p_cuser->rx))
-			return -EFAULT;
-		if (put_user(cnow.tx, &p_cuser->tx))
-			return -EFAULT;
-		put_user(cnow.cts, &p_cuser->cts);
-		put_user(cnow.dsr, &p_cuser->dsr);
-		put_user(cnow.rng, &p_cuser->rng);
-		put_user(cnow.dcd, &p_cuser->dcd);
-		return 0;
+
+		icnt.frame = cnow.frame;
+		icnt.brk = cnow.brk;
+		icnt.overrun = cnow.overrun;
+		icnt.buf_overrun = cnow.buf_overrun;
+		icnt.parity = cnow.parity;
+		icnt.rx = cnow.rx;
+		icnt.tx = cnow.tx;
+		icnt.cts = cnow.cts;
+		icnt.dsr = cnow.dsr;
+		icnt.rng = cnow.rng;
+		icnt.dcd = cnow.dcd;
+
+		return copy_to_user(argp, &icnt, sizeof(icnt)) ? -EFAULT : 0;
+	}
 	case MOXA_HighSpeedOn:
 		return put_user(info->baud_base != 115200 ? 1 : 0, (int __user *)argp);
 	case MOXA_SDS_RSTICOUNTER:
-- 
GitLab


From 72800df9ba3199df02a95b3830c49fbf16ec4a6d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 25 Jul 2008 01:48:20 -0700
Subject: [PATCH 773/853] Char: mxser, globals cleanup

- remove unused mxvar_diagflag
- move mxser_msr into the only user/function
- GMStatus, hmm, fix race-prone access to it. We need only one instance for
  real, not MXSER_PORTS. Move it to MOXA_GETMSTATUS ioctl.
- mxser_mon_ext, almost the same, but alloc it on heap, since it has more than
  2 kilos.
- fix indexing, `i' is not the index value, `i * MXSER_PORTS_PER_BOARD + j' is

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mxser.c | 130 ++++++++++++++++++-------------------------
 1 file changed, 55 insertions(+), 75 deletions(-)

diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index e5029b149c5..3d7f2a97049 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -286,8 +286,6 @@ struct mxser_mstatus {
 	int dcd;
 };
 
-static struct mxser_mstatus GMStatus[MXSER_PORTS];
-
 static int mxserBoardCAP[MXSER_BOARDS] = {
 	0, 0, 0, 0
 	/*  0x180, 0x280, 0x200, 0x320 */
@@ -296,9 +294,6 @@ static int mxserBoardCAP[MXSER_BOARDS] = {
 static struct mxser_board mxser_boards[MXSER_BOARDS];
 static struct tty_driver *mxvar_sdriver;
 static struct mxser_log mxvar_log;
-static int mxvar_diagflag;
-static unsigned char mxser_msr[MXSER_PORTS + 1];
-static struct mxser_mon_ext mon_data_ext;
 static int mxser_set_baud_method[MXSER_PORTS + 1];
 
 static void mxser_enable_must_enchance_mode(unsigned long baseio)
@@ -542,6 +537,7 @@ static void process_txrx_fifo(struct mxser_port *info)
 
 static unsigned char mxser_get_msr(int baseaddr, int mode, int port)
 {
+	static unsigned char mxser_msr[MXSER_PORTS + 1];
 	unsigned char status = 0;
 
 	status = inb(baseaddr + UART_MSR);
@@ -1652,62 +1648,60 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
 			ret = -EFAULT;
 		unlock_kernel();
 		return ret;
-	case MOXA_GETMSTATUS:
+	case MOXA_GETMSTATUS: {
+		struct mxser_mstatus ms, __user *msu = argp;
 		lock_kernel();
 		for (i = 0; i < MXSER_BOARDS; i++)
 			for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
 				port = &mxser_boards[i].ports[j];
+				memset(&ms, 0, sizeof(ms));
 
-				GMStatus[i].ri = 0;
-				if (!port->ioaddr) {
-					GMStatus[i].dcd = 0;
-					GMStatus[i].dsr = 0;
-					GMStatus[i].cts = 0;
-					continue;
-				}
+				if (!port->ioaddr)
+					goto copy;
 
 				if (!port->port.tty || !port->port.tty->termios)
-					GMStatus[i].cflag =
-						port->normal_termios.c_cflag;
+					ms.cflag = port->normal_termios.c_cflag;
 				else
-					GMStatus[i].cflag =
-						port->port.tty->termios->c_cflag;
+					ms.cflag = port->port.tty->termios->c_cflag;
 
 				status = inb(port->ioaddr + UART_MSR);
-				if (status & 0x80 /*UART_MSR_DCD */ )
-					GMStatus[i].dcd = 1;
-				else
-					GMStatus[i].dcd = 0;
-
-				if (status & 0x20 /*UART_MSR_DSR */ )
-					GMStatus[i].dsr = 1;
-				else
-					GMStatus[i].dsr = 0;
-
-
-				if (status & 0x10 /*UART_MSR_CTS */ )
-					GMStatus[i].cts = 1;
-				else
-					GMStatus[i].cts = 0;
+				if (status & UART_MSR_DCD)
+					ms.dcd = 1;
+				if (status & UART_MSR_DSR)
+					ms.dsr = 1;
+				if (status & UART_MSR_CTS)
+					ms.cts = 1;
+			copy:
+				if (copy_to_user(msu, &ms, sizeof(ms))) {
+					unlock_kernel();
+					return -EFAULT;
+				}
+				msu++;
 			}
 		unlock_kernel();
-		if (copy_to_user(argp, GMStatus,
-				sizeof(struct mxser_mstatus) * MXSER_PORTS))
-			return -EFAULT;
 		return 0;
+	}
 	case MOXA_ASPP_MON_EXT: {
-		int p, shiftbit;
-		unsigned long opmode;
-		unsigned cflag, iflag;
+		struct mxser_mon_ext *me; /* it's 2k, stack unfriendly */
+		unsigned int cflag, iflag, p;
+		u8 opmode;
+
+		me = kzalloc(sizeof(*me), GFP_KERNEL);
+		if (!me)
+			return -ENOMEM;
 
 		lock_kernel();
-		for (i = 0; i < MXSER_BOARDS; i++) {
-			for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
+		for (i = 0, p = 0; i < MXSER_BOARDS; i++) {
+			for (j = 0; j < MXSER_PORTS_PER_BOARD; j++, p++) {
+				if (p >= ARRAY_SIZE(me->rx_cnt)) {
+					i = MXSER_BOARDS;
+					break;
+				}
 				port = &mxser_boards[i].ports[j];
 				if (!port->ioaddr)
 					continue;
 
-				status = mxser_get_msr(port->ioaddr, 0, i);
+				status = mxser_get_msr(port->ioaddr, 0, p);
 
 				if (status & UART_MSR_TERI)
 					port->icount.rng++;
@@ -1719,16 +1713,13 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
 					port->icount.cts++;
 
 				port->mon_data.modem_status = status;
-				mon_data_ext.rx_cnt[i] = port->mon_data.rxcnt;
-				mon_data_ext.tx_cnt[i] = port->mon_data.txcnt;
-				mon_data_ext.up_rxcnt[i] =
-					port->mon_data.up_rxcnt;
-				mon_data_ext.up_txcnt[i] =
-					port->mon_data.up_txcnt;
-				mon_data_ext.modem_status[i] =
+				me->rx_cnt[p] = port->mon_data.rxcnt;
+				me->tx_cnt[p] = port->mon_data.txcnt;
+				me->up_rxcnt[p] = port->mon_data.up_rxcnt;
+				me->up_txcnt[p] = port->mon_data.up_txcnt;
+				me->modem_status[p] =
 					port->mon_data.modem_status;
-				mon_data_ext.baudrate[i] =
-					tty_get_baud_rate(port->port.tty);
+				me->baudrate[p] = tty_get_baud_rate(port->port.tty);
 
 				if (!port->port.tty || !port->port.tty->termios) {
 					cflag = port->normal_termios.c_cflag;
@@ -1738,40 +1729,31 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
 					iflag = port->port.tty->termios->c_iflag;
 				}
 
-				mon_data_ext.databits[i] = cflag & CSIZE;
-
-				mon_data_ext.stopbits[i] = cflag & CSTOPB;
-
-				mon_data_ext.parity[i] =
-					cflag & (PARENB | PARODD | CMSPAR);
-
-				mon_data_ext.flowctrl[i] = 0x00;
+				me->databits[p] = cflag & CSIZE;
+				me->stopbits[p] = cflag & CSTOPB;
+				me->parity[p] = cflag & (PARENB | PARODD |
+						CMSPAR);
 
 				if (cflag & CRTSCTS)
-					mon_data_ext.flowctrl[i] |= 0x03;
+					me->flowctrl[p] |= 0x03;
 
 				if (iflag & (IXON | IXOFF))
-					mon_data_ext.flowctrl[i] |= 0x0C;
+					me->flowctrl[p] |= 0x0C;
 
 				if (port->type == PORT_16550A)
-					mon_data_ext.fifo[i] = 1;
-				else
-					mon_data_ext.fifo[i] = 0;
+					me->fifo[p] = 1;
 
-				p = i % 4;
-				shiftbit = p * 2;
-				opmode = inb(port->opmode_ioaddr) >> shiftbit;
+				opmode = inb(port->opmode_ioaddr) >>
+						((p % 4) * 2);
 				opmode &= OP_MODE_MASK;
-
-				mon_data_ext.iftype[i] = opmode;
-
+				me->iftype[p] = opmode;
 			}
 		}
 		unlock_kernel();
-		if (copy_to_user(argp, &mon_data_ext,
-					sizeof(mon_data_ext)))
-			return -EFAULT;
-		return 0;
+		if (copy_to_user(argp, me, sizeof(*me)))
+			ret = -EFAULT;
+		kfree(me);
+		return ret;
 	}
 	default:
 		return -ENOIOCTLCMD;
@@ -2802,8 +2784,6 @@ static int __init mxser_module_init(void)
 		goto err_put;
 	}
 
-	mxvar_diagflag = 0;
-
 	m = 0;
 	/* Start finding ISA boards here */
 	for (isaloop = 0; isaloop < 2; isaloop++)
-- 
GitLab


From 729f0edbecd0c59c82ee9bf92009acc7e984c425 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 25 Jul 2008 01:48:20 -0700
Subject: [PATCH 774/853] Char: mxser, update documentation

Update Documentation/moxa-smartio to the later document from the mxser
package.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/moxa-smartio | 392 ++++++++++++++++++++++++-------------
 1 file changed, 252 insertions(+), 140 deletions(-)

diff --git a/Documentation/moxa-smartio b/Documentation/moxa-smartio
index fe24ecc6372..5337e80a5b9 100644
--- a/Documentation/moxa-smartio
+++ b/Documentation/moxa-smartio
@@ -1,14 +1,22 @@
 =============================================================================
-
-	MOXA Smartio Family Device Driver Ver 1.1 Installation Guide
-		    for Linux Kernel 2.2.x and 2.0.3x
-	       Copyright (C) 1999, Moxa Technologies Co, Ltd.
+          MOXA Smartio/Industio Family Device Driver Installation Guide
+		    for Linux Kernel 2.4.x, 2.6.x
+	       Copyright (C) 2008, Moxa Inc.
 =============================================================================
+Date: 01/21/2008
+
 Content
 
 1. Introduction
 2. System Requirement
 3. Installation
+   3.1 Hardware installation
+   3.2 Driver files
+   3.3 Device naming convention
+   3.4 Module driver configuration
+   3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
+   3.6 Custom configuration
+   3.7 Verify driver installation
 4. Utilities
 5. Setserial
 6. Troubleshooting
@@ -16,27 +24,48 @@ Content
 -----------------------------------------------------------------------------
 1. Introduction
 
-   The Smartio family Linux driver, Ver. 1.1, supports following multiport
+   The Smartio/Industio/UPCI family Linux driver supports following multiport
    boards.
 
-    -C104P/H/HS, C104H/PCI, C104HS/PCI, CI-104J 4 port multiport board.
-    -C168P/H/HS, C168H/PCI 8 port multiport board.
-
-   This driver has been modified a little and cleaned up from the Moxa
-   contributed driver code and merged into Linux 2.2.14pre. In particular
-   official major/minor numbers have been assigned which are different to
-   those the original Moxa supplied driver used.
+    - 2 ports multiport board
+	CP-102U, CP-102UL, CP-102UF
+	CP-132U-I, CP-132UL,
+	CP-132, CP-132I, CP132S, CP-132IS,
+	CI-132, CI-132I, CI-132IS,
+	(C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
+
+    - 4 ports multiport board
+	CP-104EL,
+	CP-104UL, CP-104JU,
+	CP-134U, CP-134U-I,
+	C104H/PCI, C104HS/PCI,
+	CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
+	C104H, C104HS,
+	CI-104J, CI-104JS,
+	CI-134, CI-134I, CI-134IS,
+	(C114HI, CT-114I, C104P)
+	POS-104UL,
+	CB-114,
+	CB-134I
+
+    - 8 ports multiport board
+	CP-118EL, CP-168EL,
+	CP-118U, CP-168U,
+	C168H/PCI,
+	C168H, C168HS,
+	(C168P),
+	CB-108
 
    This driver and installation procedure have been developed upon Linux Kernel
-   2.2.5 and backward compatible to 2.0.3x. This driver supports Intel x86 and
-   Alpha hardware platform. In order to maintain compatibility, this version
-   has also been properly tested with RedHat, OpenLinux, TurboLinux and
-   S.u.S.E Linux. However, if compatibility problem occurs, please contact
-   Moxa at support@moxa.com.tw.
+   2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
+   to maintain compatibility, this version has also been properly tested with
+   RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
+   occurs, please contact Moxa at support@moxa.com.tw.
 
    In addition to device driver, useful utilities are also provided in this
    version. They are
-    - msdiag     Diagnostic program for detecting installed Moxa Smartio boards.
+    - msdiag     Diagnostic program for displaying installed Moxa
+                 Smartio/Industio boards.
     - msmon      Monitor program to observe data count and line status signals.
     - msterm     A simple terminal program which is useful in testing serial
 	         ports.
@@ -47,8 +76,7 @@ Content
    GNU General Public License in this version. Please refer to GNU General
    Public License announcement in each source code file for more detail.
 
-   In Moxa's ftp sites, you may always find latest driver at
-   ftp://ftp.moxa.com  or ftp://ftp.moxa.com.tw.
+   In Moxa's Web sites, you may always find latest driver at http://web.moxa.com.
 
    This version of driver can be installed as Loadable Module (Module driver)
    or built-in into kernel (Static driver). You may refer to following
@@ -61,18 +89,27 @@ Content
 
 -----------------------------------------------------------------------------
 2. System Requirement
-   - Hardware platform: Intel x86 or Alpha machine
-   - Kernel version: 2.0.3x or 2.2.x
+   - Hardware platform: Intel x86 machine
+   - Kernel version: 2.4.x or 2.6.x
    - gcc version 2.72 or later
    - Maximum 4 boards can be installed in combination
 
 -----------------------------------------------------------------------------
 3. Installation
 
+   3.1 Hardware installation
+   3.2 Driver files
+   3.3 Device naming convention
+   3.4 Module driver configuration
+   3.5 Static driver configuration for Linux kernel 2.4.x, 2.6.x.
+   3.6 Custom configuration
+   3.7 Verify driver installation
+
+
    3.1 Hardware installation
 
-       There are two types of buses, ISA and PCI, for Smartio family multiport
-       board.
+       There are two types of buses, ISA and PCI, for Smartio/Industio
+       family multiport board.
 
        ISA board
        ---------
@@ -81,47 +118,57 @@ Content
        installation procedure in User's Manual before proceed any further.
        Please make sure the JP1 is open after the ISA board is set properly.
 
-       PCI board
-       ---------
+       PCI/UPCI board
+       --------------
        You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
        with other ISA devices. Please refer to hardware installation
        procedure in User's Manual in advance.
 
-       IRQ Sharing
+       PCI IRQ Sharing
        -----------
        Each port within the same multiport board shares the same IRQ. Up to
-       4 Moxa Smartio Family multiport boards can be installed together on
-       one system and they can share the same IRQ.
+       4 Moxa Smartio/Industio PCI Family multiport boards can be installed
+       together on one system and they can share the same IRQ.
+
 
-   3.2 Driver files and device naming convention
+   3.2 Driver files
 
        The driver file may be obtained from ftp, CD-ROM or floppy disk. The
        first step, anyway, is to copy driver file "mxser.tgz" into specified
        directory. e.g. /moxa. The execute commands as below.
 
+       # cd /
+       # mkdir moxa
        # cd /moxa
-       # tar xvf /dev/fd0 
+       # tar xvf /dev/fd0
+
        or
+
+       # cd /
+       # mkdir moxa
        # cd /moxa
        # cp /mnt/cdrom/<driver directory>/mxser.tgz .
        # tar xvfz mxser.tgz
 
+
+   3.3 Device naming convention
+
        You may find all the driver and utilities files in /moxa/mxser.
        Following installation procedure depends on the model you'd like to
-       run the driver. If you prefer module driver, please refer to 3.3.
-       If static driver is required, please refer to 3.4.
+       run the driver. If you prefer module driver, please refer to 3.4.
+       If static driver is required, please refer to 3.5.
 
        Dialin and callout port
        -----------------------
-       This driver remains traditional serial device properties. There're
+       This driver remains traditional serial device properties. There are
        two special file name for each serial port. One is dial-in port
        which is named "ttyMxx". For callout port, the naming convention
        is "cumxx".
 
        Device naming when more than 2 boards installed
        -----------------------------------------------
-       Naming convention for each Smartio multiport board is pre-defined
-       as below.
+       Naming convention for each Smartio/Industio multiport board is
+       pre-defined as below.
 
        Board Num.	 Dial-in Port	      Callout port
        1st board	ttyM0  - ttyM7	      cum0  - cum7
@@ -129,6 +176,12 @@ Content
        3rd board	ttyM16 - ttyM23       cum16 - cum23
        4th board	ttyM24 - ttym31       cum24 - cum31
 
+
+       !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+       Under Kernel 2.6 the cum Device is Obsolete. So use ttyM*
+       device instead.
+       !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
        Board sequence
        --------------
        This driver will activate ISA boards according to the parameter set
@@ -138,69 +191,131 @@ Content
        For PCI boards, their sequence will be after ISA boards and C168H/PCI
        has higher priority than C104H/PCI boards.
 
-   3.3 Module driver configuration
+   3.4 Module driver configuration
        Module driver is easiest way to install. If you prefer static driver
        installation, please skip this paragraph.
-       1. Find "Makefile" in /moxa/mxser, then run
 
-	  # make install
+
+       ------------- Prepare to use the MOXA driver--------------------
+       3.4.1 Create tty device with correct major number
+          Before using MOXA driver, your system must have the tty devices
+          which are created with driver's major number. We offer one shell
+          script "msmknod" to simplify the procedure.
+          This step is only needed to be executed once. But you still
+          need to do this procedure when:
+          a. You change the driver's major number. Please refer the "3.7"
+             section.
+          b. Your total installed MOXA boards number is changed. Maybe you
+             add/delete one MOXA board.
+          c. You want to change the tty name. This needs to modify the
+             shell script "msmknod"
+
+          The procedure is:
+	  # cd /moxa/mxser/driver
+	  # ./msmknod
+
+          This shell script will require the major number for dial-in
+          device and callout device to create tty device. You also need
+          to specify the total installed MOXA board number. Default major
+          numbers for dial-in device and callout device are 30, 35. If
+          you need to change to other number, please refer section "3.7"
+          for more detailed procedure.
+          Msmknod will delete any special files occupying the same device
+          naming.
+
+       3.4.2 Build the MOXA driver and utilities
+          Before using the MOXA driver and utilities, you need compile the
+          all the source code. This step is only need to be executed once.
+          But you still re-compile the source code if you modify the source
+          code. For example, if you change the driver's major number (see
+          "3.7" section), then you need to do this step again.
+
+          Find "Makefile" in /moxa/mxser, then run
+
+	  # make clean; make install
+
+          !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
+	  For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
+	  # make clean; make installsp1
+
+	  For Red Hat Enterprise Linux AS4/ES4/WS4:
+	  # make clean; make installsp2
+          !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
 
 	  The driver files "mxser.o" and utilities will be properly compiled
-	  and copied to system directories respectively.Then run
+	  and copied to system directories respectively.
 
-	  # insmod mxser
+       ------------- Load MOXA driver--------------------
+       3.4.3 Load the MOXA driver
 
-	  to activate the modular driver. You may run "lsmod" to check
-	  if "mxser.o" is activated.
+	  # modprobe mxser <argument>
 
-       2. Create special files by executing "msmknod".
-	  # cd /moxa/mxser/driver
-	  # ./msmknod
+	  will activate the module driver. You may run "lsmod" to check
+	  if "mxser" is activated. If the MOXA board is ISA board, the
+          <argument> is needed. Please refer to section "3.4.5" for more
+          information.
+
+
+       ------------- Load MOXA driver on boot --------------------
+       3.4.4 For the above description, you may manually execute
+          "modprobe mxser" to activate this driver and run
+	  "rmmod mxser" to remove it.
+          However, it's better to have a boot time configuration to
+          eliminate manual operation. Boot time configuration can be
+          achieved by rc file. We offer one "rc.mxser" file to simplify
+          the procedure under "moxa/mxser/driver".
 
-	  Default major numbers for dial-in device and callout device are
-	  174, 175. Msmknod will delete any special files occupying the same
-	  device naming.
+          But if you use ISA board, please modify the "modprobe ..." command
+          to add the argument (see "3.4.5" section). After modifying the
+          rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
+          manually to make sure the modification is ok. If any error
+          encountered, please try to modify again. If the modification is
+          completed, follow the below step.
 
-       3. Up to now, you may manually execute "insmod mxser" to activate
-	  this driver and run "rmmod mxser" to remove it. However, it's
-	  better to have a boot time configuration to eliminate manual
-	  operation.
-	  Boot time configuration can be achieved by rc file. Run following
-	  command for setting rc files.
+	  Run following command for setting rc files.
 
 	  # cd /moxa/mxser/driver
 	  # cp ./rc.mxser /etc/rc.d
 	  # cd /etc/rc.d
 
-	  You may have to modify part of the content in rc.mxser to specify
-          parameters for ISA board. Please refer to rc.mxser for more detail.
-          Find "rc.serial". If "rc.serial" doesn't exist, create it by vi.
-	  Add "rc.mxser" in last line. Next, open rc.local by vi
-	  and append following content.
+	  Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
+	  create it by vi, run "chmod 755 rc.serial" to change the permission.
+	  Add "/etc/rc.d/rc.mxser" in last line,
 
-	  if [ -f /etc/rc.d/rc.serial ]; then
-	     sh /etc/rc.d/rc.serial
-	  fi
+          Reboot and check if moxa.o activated by "lsmod" command.
 
-       4. Reboot and check if mxser.o activated by "lsmod" command.
-       5. If you'd like to drive Smartio ISA boards in the system, you'll
-	  have to add parameter to specify CAP address of given board while
-          activating "mxser.o". The format for parameters are as follows.
+       3.4.5. If you'd like to drive Smartio/Industio ISA boards in the system,
+          you'll have to add parameter to specify CAP address of given
+	  board while activating "mxser.o". The format for parameters are
+	  as follows.
 
-	  insmod mxser ioaddr=0x???,0x???,0x???,0x???
+	  modprobe mxser ioaddr=0x???,0x???,0x???,0x???
 				|      |     |	  |
 				|      |     |	  +- 4th ISA board
 				|      |     +------ 3rd ISA board
 				|      +------------ 2nd ISA board
 				+------------------- 1st ISA board
 
-   3.4 Static driver configuration
+   3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
+
+       Note: To use static driver, you must install the linux kernel
+             source package.
+
+       3.5.1 Backup the built-in driver in the kernel.
+          # cd /usr/src/linux/drivers/char
+          # mv mxser.c mxser.c.old
+
+          For Red Hat 7.x user, you need to create link:
+          # cd /usr/src
+          # ln -s linux-2.4 linux
 
-       1. Create link
+       3.5.2 Create link
 	  # cd /usr/src/linux/drivers/char
 	  # ln -s /moxa/mxser/driver/mxser.c mxser.c
 
-       2. Add CAP address list for ISA boards
+       3.5.3 Add CAP address list for ISA boards. For PCI boards user,
+          please skip this step.
+
 	  In module mode, the CAP address for ISA board is given by
 	  parameter. In static driver configuration, you'll have to
 	  assign it within driver's source code. If you will not
@@ -222,73 +337,55 @@ Content
 	     static int mxserBoardCAP[]
 	     = {0x280, 0x180, 0x00, 0x00};
 
-       3. Modify tty_io.c
-	  # cd /usr/src/linux/drivers/char/
-	  # vi tty_io.c
-	    Find pty_init(), insert "mxser_init()" as
+       3.5.4 Setup kernel configuration
 
-	    pty_init();
-	    mxser_init();
+          Configure the kernel:
 
-       4. Modify tty.h
-	  # cd /usr/src/linux/include/linux
-	  # vi tty.h
-	    Find extern int tty_init(void), insert "mxser_init()" as
+            # cd /usr/src/linux
+            # make menuconfig
 
-	    extern int tty_init(void);
-	    extern int mxser_init(void);
-     
-       5. Modify Makefile
-	  # cd /usr/src/linux/drivers/char
-	  # vi Makefile
-	    Find L_OBJS := tty_io.o ...... random.o, add
-	    "mxser.o" at last of this line as
-	    L_OBJS := tty_io.o ....... mxser.o
+          You will go into a menu-driven system. Please select [Character
+          devices][Non-standard serial port support], enable the [Moxa
+          SmartIO support] driver with "[*]" for built-in (not "[M]"), then
+          select [Exit] to exit this program.
 
-       6. Rebuild kernel
-	  The following are for Linux kernel rebuilding,for your reference only.
+       3.5.5 Rebuild kernel
+	  The following are for Linux kernel rebuilding, for your
+          reference only.
 	  For appropriate details, please refer to the Linux document.
 
-	  If 'lilo' utility is installed, please use 'make zlilo' to rebuild
-	  kernel. If 'lilo' is not installed, please follow the following steps.
-
 	   a. cd /usr/src/linux
-	   b. make clean			     /* take a few minutes */
-	   c. make bzImage		   /* take probably 10-20 minutes */
-	   d. Backup original boot kernel.		  /* optional step */
-	   e. cp /usr/src/linux/arch/i386/boot/bzImage /boot/vmlinuz
+	   b. make clean	     /* take a few minutes */
+	   c. make dep		     /* take a few minutes */
+	   d. make bzImage	     /* take probably 10-20 minutes */
+	   e. make install	     /* copy boot image to correct position */
 	   f. Please make sure the boot kernel (vmlinuz) is in the
-	      correct position. If you use 'lilo' utility, you should
-	      check /etc/lilo.conf 'image' item specified the path
-	      which is the 'vmlinuz' path, or you will load wrong
-	      (or old) boot kernel image (vmlinuz).
-	   g. chmod 400 /vmlinuz
-	   h. lilo
-	   i. rdev -R /vmlinuz 1
-	   j. sync
-
-	  Note that if the result of "make zImage" is ERROR, then you have to
-	  go back to Linux configuration Setup. Type "make config" in directory
-	  /usr/src/linux or "setup".
-
-	  Since system include file, /usr/src/linux/include/linux/interrupt.h,
-	  is modified each time the MOXA driver is installed, kernel rebuilding
-	  is inevitable. And it takes about 10 to 20 minutes depends on the
-	  machine.
-
-       7. Make utility
-	  # cd /moxa/mxser/utility
-	  # make install
-       
-       8. Make special file
+	      correct position.
+	   g. If you use 'lilo' utility, you should check /etc/lilo.conf
+	      'image' item specified the path which is the 'vmlinuz' path,
+	      or you will load wrong (or old) boot kernel image (vmlinuz).
+	      After checking /etc/lilo.conf, please run "lilo".
+
+	  Note that if the result of "make bzImage" is ERROR, then you have to
+	  go back to Linux configuration Setup. Type "make menuconfig" in
+          directory /usr/src/linux.
+
+
+       3.5.6 Make tty device and special file
           # cd /moxa/mxser/driver
           # ./msmknod
 
-       9. Reboot
+       3.5.7 Make utility
+	  # cd /moxa/mxser/utility
+	  # make clean; make install
+
+       3.5.8 Reboot
 
-   3.5 Custom configuration
+
+
+   3.6 Custom configuration
        Although this driver already provides you default configuration, you
-       still can change the device name and major number.The instruction to
+       still can change the device name and major number. The instruction to
        change these parameters are shown as below.
 
        Change Device name
@@ -306,33 +403,37 @@ Content
        2 free major numbers for this driver. There are 3 steps to change
        major numbers.
 
-       1. Find free major numbers
+       3.6.1 Find free major numbers
 	  In /proc/devices, you may find all the major numbers occupied
 	  in the system. Please select 2 major numbers that are available.
 	  e.g. 40, 45.
-       2. Create special files
+       3.6.2 Create special files
 	  Run /moxa/mxser/driver/msmknod to create special files with
 	  specified major numbers.
-       3. Modify driver with new major number
+       3.6.3 Modify driver with new major number
 	  Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
 	  contains "MXSERMAJOR". Change the content as below.
 	  #define	  MXSERMAJOR		  40
 	  #define	  MXSERCUMAJOR		  45
-       4. Run # make install in /moxa/mxser/driver.
+       3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
 
-   3.6 Verify driver installation
+   3.7 Verify driver installation
        You may refer to /var/log/messages to check the latest status
        log reported by this driver whenever it's activated.
+
 -----------------------------------------------------------------------------
 4. Utilities
    There are 3 utilities contained in this driver. They are msdiag, msmon and
    msterm. These 3 utilities are released in form of source code. They should
    be compiled into executable file and copied into /usr/bin.
 
+   Before using these utilities, please load driver (refer 3.4 & 3.5) and
+   make sure you had run the "msmknod" utility.
+
    msdiag - Diagnostic
    --------------------
-   This utility provides the function to detect what Moxa Smartio multiport
-   board exists in the system.
+   This utility provides the function to display what Moxa Smartio/Industio
+   board found by driver in the system.
 
    msmon - Port Monitoring
    -----------------------
@@ -353,12 +454,13 @@ Content
    application, for example, sending AT command to a modem connected to the
    port or used as a terminal for login purpose. Note that this is only a
    dumb terminal emulation without handling full screen operation.
+
 -----------------------------------------------------------------------------
 5. Setserial
 
    Supported Setserial parameters are listed as below.
 
-   uart 	  set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
+   uart		  set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
    close_delay	  set the amount of time(in 1/100 of a second) that DTR
 		  should be kept low while being closed.
    closing_wait   set the amount of time(in 1/100 of a second) that the
@@ -366,7 +468,13 @@ Content
 		  being closed, before the receiver is disable.
    spd_hi	  Use  57.6kb  when  the application requests 38.4kb.
    spd_vhi	  Use  115.2kb	when  the application requests 38.4kb.
+   spd_shi	  Use  230.4kb	when  the application requests 38.4kb.
+   spd_warp	  Use  460.8kb	when  the application requests 38.4kb.
    spd_normal	  Use  38.4kb  when  the application requests 38.4kb.
+   spd_cust	  Use  the custom divisor to set the speed when  the
+		  application requests 38.4kb.
+   divisor	  This option set the custom divison.
+   baud_base	  This option set the base baud rate.
 
 -----------------------------------------------------------------------------
 6. Troubleshooting
@@ -375,8 +483,9 @@ Content
    possible. If all the possible solutions fail, please contact our technical
    support team to get more help.
 
-   Error msg: More than 4 Moxa Smartio family boards found. Fifth board and
-	      after are ignored.
+
+   Error msg: More than 4 Moxa Smartio/Industio family boards found. Fifth board
+              and after are ignored.
    Solution:
    To avoid this problem, please unplug fifth and after board, because Moxa
    driver supports up to 4 boards.
@@ -384,7 +493,7 @@ Content
    Error msg: Request_irq fail, IRQ(?) may be conflict with another device.
    Solution:
    Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
-   which device causes the situation,please check /proc/interrupts to find
+   which device causes the situation, please check /proc/interrupts to find
    free IRQ and simply change another free IRQ for Moxa board.
 
    Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid.
@@ -397,15 +506,18 @@ Content
    Moxa ISA board needs an interrupt vector.Please refer to user's manual
    "Hardware Installation" chapter to set interrupt vector.
 
-   Error msg: Couldn't install MOXA Smartio family driver!
+   Error msg: Couldn't install MOXA Smartio/Industio family driver!
    Solution:
    Load Moxa driver fail, the major number may conflict with other devices.
-   Please refer to previous section 3.5 to change a free major number for
+   Please refer to previous section 3.7 to change a free major number for
    Moxa driver.
 
-   Error msg: Couldn't install MOXA Smartio family callout driver!
+   Error msg: Couldn't install MOXA Smartio/Industio family callout driver!
    Solution:
    Load Moxa callout driver fail, the callout device major number may
-   conflict with other devices. Please refer to previous section 3.5 to
+   conflict with other devices. Please refer to previous section 3.7 to
    change a free callout device major number for Moxa driver.
+
+
 -----------------------------------------------------------------------------
+
-- 
GitLab


From 83766bc63f7e49b0215811026e7802bd09a9c7e1 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 25 Jul 2008 01:48:21 -0700
Subject: [PATCH 775/853] Char: mxser, prints cleanup

- use dev_* for printing in pci probe function
- move ISA p[rints directly into isa find function, do not postpone it.
  Remove macros bound to it then.
- prepend some prints by "mxser: " to know what it belongs to

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mxser.c | 80 ++++++++++++++++++++------------------------
 1 file changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 3d7f2a97049..57570f7db2b 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -55,11 +55,6 @@
 #define MXSER_PORTS		(MXSER_BOARDS * MXSER_PORTS_PER_BOARD)
 #define MXSER_ISR_PASS_LIMIT	100
 
-#define	MXSER_ERR_IOADDR	-1
-#define	MXSER_ERR_IRQ		-2
-#define	MXSER_ERR_IRQ_CONFLIT	-3
-#define	MXSER_ERR_VECTOR	-4
-
 /*CheckIsMoxaMust return value*/
 #define MOXA_OTHER_UART		0x00
 #define MOXA_MUST_MU150_HWID	0x01
@@ -2481,7 +2476,8 @@ static int __devinit mxser_initbrd(struct mxser_board *brd,
 	unsigned int i;
 	int retval;
 
-	printk(KERN_INFO "max. baud rate = %d bps.\n", brd->ports[0].max_baud);
+	printk(KERN_INFO "mxser: max. baud rate = %d bps\n",
+			brd->ports[0].max_baud);
 
 	for (i = 0; i < brd->info->nports; i++) {
 		info = &brd->ports[i];
@@ -2564,28 +2560,32 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd)
 		irq = regs[9] & 0xF000;
 		irq = irq | (irq >> 4);
 		if (irq != (regs[9] & 0xFF00))
-			return MXSER_ERR_IRQ_CONFLIT;
+			goto err_irqconflict;
 	} else if (brd->info->nports == 4) {
 		irq = regs[9] & 0xF000;
 		irq = irq | (irq >> 4);
 		irq = irq | (irq >> 8);
 		if (irq != regs[9])
-			return MXSER_ERR_IRQ_CONFLIT;
+			goto err_irqconflict;
 	} else if (brd->info->nports == 8) {
 		irq = regs[9] & 0xF000;
 		irq = irq | (irq >> 4);
 		irq = irq | (irq >> 8);
 		if ((irq != regs[9]) || (irq != regs[10]))
-			return MXSER_ERR_IRQ_CONFLIT;
+			goto err_irqconflict;
 	}
 
-	if (!irq)
-		return MXSER_ERR_IRQ;
+	if (!irq) {
+		printk(KERN_ERR "mxser: interrupt number unset\n");
+		return -EIO;
+	}
 	brd->irq = ((int)(irq & 0xF000) >> 12);
 	for (i = 0; i < 8; i++)
 		brd->ports[i].ioaddr = (int) regs[i + 1] & 0xFFF8;
-	if ((regs[12] & 0x80) == 0)
-		return MXSER_ERR_VECTOR;
+	if ((regs[12] & 0x80) == 0) {
+		printk(KERN_ERR "mxser: invalid interrupt vector\n");
+		return -EIO;
+	}
 	brd->vector = (int)regs[11];	/* interrupt vector */
 	if (id == 1)
 		brd->vector_mask = 0x00FF;
@@ -2612,13 +2612,26 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd)
 	else
 		brd->uart_type = PORT_16450;
 	if (!request_region(brd->ports[0].ioaddr, 8 * brd->info->nports,
-			"mxser(IO)"))
-		return MXSER_ERR_IOADDR;
+			"mxser(IO)")) {
+		printk(KERN_ERR "mxser: can't request ports I/O region: "
+				"0x%.8lx-0x%.8lx\n",
+				brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+				8 * brd->info->nports - 1);
+		return -EIO;
+	}
 	if (!request_region(brd->vector, 1, "mxser(vector)")) {
 		release_region(brd->ports[0].ioaddr, 8 * brd->info->nports);
-		return MXSER_ERR_VECTOR;
+		printk(KERN_ERR "mxser: can't request interrupt vector region: "
+				"0x%.8lx-0x%.8lx\n",
+				brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+				8 * brd->info->nports - 1);
+		return -EIO;
 	}
 	return brd->info->nports;
+
+err_irqconflict:
+	printk(KERN_ERR "mxser: invalid interrupt number\n");
+	return -EIO;
 }
 
 static int __devinit mxser_probe(struct pci_dev *pdev,
@@ -2635,20 +2648,20 @@ static int __devinit mxser_probe(struct pci_dev *pdev,
 			break;
 
 	if (i >= MXSER_BOARDS) {
-		printk(KERN_ERR "Too many Smartio/Industio family boards found "
-			"(maximum %d), board not configured\n", MXSER_BOARDS);
+		dev_err(&pdev->dev, "too many boards found (maximum %d), board "
+				"not configured\n", MXSER_BOARDS);
 		goto err;
 	}
 
 	brd = &mxser_boards[i];
 	brd->idx = i * MXSER_PORTS_PER_BOARD;
-	printk(KERN_INFO "Found MOXA %s board (BusNo=%d, DevNo=%d)\n",
+	dev_info(&pdev->dev, "found MOXA %s board (BusNo=%d, DevNo=%d)\n",
 		mxser_cards[ent->driver_data].name,
 		pdev->bus->number, PCI_SLOT(pdev->devfn));
 
 	retval = pci_enable_device(pdev);
 	if (retval) {
-		printk(KERN_ERR "Moxa SmartI/O PCI enable fail !\n");
+		dev_err(&pdev->dev, "PCI enable failed\n");
 		goto err;
 	}
 
@@ -2798,33 +2811,14 @@ static int __init mxser_module_init(void)
 
 			brd = &mxser_boards[m];
 			retval = mxser_get_ISA_conf(cap, brd);
-
-			if (retval != 0)
-				printk(KERN_INFO "Found MOXA %s board "
-					"(CAP=0x%x)\n",
-					brd->info->name, ioaddr[b]);
-
 			if (retval <= 0) {
-				if (retval == MXSER_ERR_IRQ)
-					printk(KERN_ERR "Invalid interrupt "
-						"number, board not "
-						"configured\n");
-				else if (retval == MXSER_ERR_IRQ_CONFLIT)
-					printk(KERN_ERR "Invalid interrupt "
-						"number, board not "
-						"configured\n");
-				else if (retval == MXSER_ERR_VECTOR)
-					printk(KERN_ERR "Invalid interrupt "
-						"vector, board not "
-						"configured\n");
-				else if (retval == MXSER_ERR_IOADDR)
-					printk(KERN_ERR "Invalid I/O address, "
-						"board not configured\n");
-
 				brd->info = NULL;
 				continue;
 			}
 
+			printk(KERN_INFO "mxser: found MOXA %s board "
+				"(CAP=0x%x)\n",	brd->info->name, ioaddr[b]);
+
 			/* mxser_initbrd will hook ISR. */
 			if (mxser_initbrd(brd, NULL) < 0) {
 				brd->info = NULL;
@@ -2841,7 +2835,7 @@ static int __init mxser_module_init(void)
 
 	retval = pci_register_driver(&mxser_driver);
 	if (retval) {
-		printk(KERN_ERR "Can't register pci driver\n");
+		printk(KERN_ERR "mxser: can't register pci driver\n");
 		if (!m) {
 			retval = -ENODEV;
 			goto err_unr;
-- 
GitLab


From 1df0092477b8b2df605812e298624f5c35bb4805 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 25 Jul 2008 01:48:22 -0700
Subject: [PATCH 776/853] Char: mxser, remove predefined isa support

Remove a support of ISA addresses predefined at compile time.  It is
unused (filled by zeroes) and prolongs the code.  Don't initialize global
array and add `ioaddr' module param description.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mxser.c | 67 ++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 40 deletions(-)

diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 57570f7db2b..9b4d03cf4e1 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -173,14 +173,15 @@ static struct pci_device_id mxser_pcibrds[] = {
 };
 MODULE_DEVICE_TABLE(pci, mxser_pcibrds);
 
-static int ioaddr[MXSER_BOARDS] = { 0, 0, 0, 0 };
+static unsigned long ioaddr[MXSER_BOARDS];
 static int ttymajor = MXSERMAJOR;
 
 /* Variables for insmod */
 
 MODULE_AUTHOR("Casper Yang");
 MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver");
-module_param_array(ioaddr, int, NULL, 0);
+module_param_array(ioaddr, ulong, NULL, 0);
+MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board");
 module_param(ttymajor, int, 0);
 MODULE_LICENSE("GPL");
 
@@ -281,11 +282,6 @@ struct mxser_mstatus {
 	int dcd;
 };
 
-static int mxserBoardCAP[MXSER_BOARDS] = {
-	0, 0, 0, 0
-	/*  0x180, 0x280, 0x200, 0x320 */
-};
-
 static struct mxser_board mxser_boards[MXSER_BOARDS];
 static struct tty_driver *mxvar_sdriver;
 static struct mxser_log mxvar_log;
@@ -2763,9 +2759,8 @@ static struct pci_driver mxser_driver = {
 static int __init mxser_module_init(void)
 {
 	struct mxser_board *brd;
-	unsigned long cap;
-	unsigned int i, m, isaloop;
-	int retval, b;
+	unsigned int b, i, m;
+	int retval;
 
 	pr_debug("Loading module mxser ...\n");
 
@@ -2797,41 +2792,33 @@ static int __init mxser_module_init(void)
 		goto err_put;
 	}
 
-	m = 0;
 	/* Start finding ISA boards here */
-	for (isaloop = 0; isaloop < 2; isaloop++)
-		for (b = 0; b < MXSER_BOARDS && m < MXSER_BOARDS; b++) {
-			if (!isaloop)
-				cap = mxserBoardCAP[b]; /* predefined */
-			else
-				cap = ioaddr[b]; /* module param */
-
-			if (!cap)
-				continue;
-
-			brd = &mxser_boards[m];
-			retval = mxser_get_ISA_conf(cap, brd);
-			if (retval <= 0) {
-				brd->info = NULL;
-				continue;
-			}
+	for (m = 0, b = 0; b < MXSER_BOARDS; b++) {
+		if (!ioaddr[b])
+			continue;
+
+		brd = &mxser_boards[m];
+		retval = mxser_get_ISA_conf(!ioaddr[b], brd);
+		if (retval <= 0) {
+			brd->info = NULL;
+			continue;
+		}
 
-			printk(KERN_INFO "mxser: found MOXA %s board "
-				"(CAP=0x%x)\n",	brd->info->name, ioaddr[b]);
+		printk(KERN_INFO "mxser: found MOXA %s board (CAP=0x%lx)\n",
+				brd->info->name, ioaddr[b]);
 
-			/* mxser_initbrd will hook ISR. */
-			if (mxser_initbrd(brd, NULL) < 0) {
-				brd->info = NULL;
-				continue;
-			}
+		/* mxser_initbrd will hook ISR. */
+		if (mxser_initbrd(brd, NULL) < 0) {
+			brd->info = NULL;
+			continue;
+		}
 
-			brd->idx = m * MXSER_PORTS_PER_BOARD;
-			for (i = 0; i < brd->info->nports; i++)
-				tty_register_device(mxvar_sdriver, brd->idx + i,
-						NULL);
+		brd->idx = m * MXSER_PORTS_PER_BOARD;
+		for (i = 0; i < brd->info->nports; i++)
+			tty_register_device(mxvar_sdriver, brd->idx + i, NULL);
 
-			m++;
-		}
+		m++;
+	}
 
 	retval = pci_register_driver(&mxser_driver);
 	if (retval) {
-- 
GitLab


From ace7dd96695769f9d76980c7e52139e73228221c Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 25 Jul 2008 01:48:22 -0700
Subject: [PATCH 777/853] Char: mxser, various cleanups

- remove unused macro
- some whitespace cleanup
- useless debug prints removal

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mxser.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 9b4d03cf4e1..e30575e8764 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -48,7 +48,6 @@
 
 #define	MXSER_VERSION	"2.0.4"		/* 1.12 */
 #define	MXSERMAJOR	 174
-#define	MXSERCUMAJOR	 175
 
 #define MXSER_BOARDS		4	/* Max. boards */
 #define MXSER_PORTS_PER_BOARD	8	/* Max. ports per board */
@@ -191,7 +190,6 @@ struct mxser_log {
 	unsigned long txcnt[MXSER_PORTS];
 };
 
-
 struct mxser_mon {
 	unsigned long rxcnt;
 	unsigned long txcnt;
@@ -1305,13 +1303,9 @@ static void mxser_flush_chars(struct tty_struct *tty)
 	struct mxser_port *info = tty->driver_data;
 	unsigned long flags;
 
-	if (info->xmit_cnt <= 0 ||
-			tty->stopped ||
-			!info->port.xmit_buf ||
-			(tty->hw_stopped &&
-			 (info->type != PORT_16550A) &&
-			 (!info->board->chip_flag)
-			))
+	if (info->xmit_cnt <= 0 || tty->stopped || !info->port.xmit_buf ||
+			(tty->hw_stopped && info->type != PORT_16550A &&
+			 !info->board->chip_flag))
 		return;
 
 	spin_lock_irqsave(&info->slock, flags);
@@ -1329,9 +1323,7 @@ static int mxser_write_room(struct tty_struct *tty)
 	int ret;
 
 	ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1;
-	if (ret < 0)
-		ret = 0;
-	return ret;
+	return ret < 0 ? 0 : ret;
 }
 
 static int mxser_chars_in_buffer(struct tty_struct *tty)
@@ -2762,8 +2754,6 @@ static int __init mxser_module_init(void)
 	unsigned int b, i, m;
 	int retval;
 
-	pr_debug("Loading module mxser ...\n");
-
 	mxvar_sdriver = alloc_tty_driver(MXSER_PORTS + 1);
 	if (!mxvar_sdriver)
 		return -ENOMEM;
@@ -2829,8 +2819,6 @@ static int __init mxser_module_init(void)
 		} /* else: we have some ISA cards under control */
 	}
 
-	pr_debug("Done.\n");
-
 	return 0;
 err_unr:
 	tty_unregister_driver(mxvar_sdriver);
@@ -2843,8 +2831,6 @@ static void __exit mxser_module_exit(void)
 {
 	unsigned int i, j;
 
-	pr_debug("Unloading module mxser ...\n");
-
 	pci_unregister_driver(&mxser_driver);
 
 	for (i = 0; i < MXSER_BOARDS; i++) /* ISA remains */
@@ -2858,8 +2844,6 @@ static void __exit mxser_module_exit(void)
 	for (i = 0; i < MXSER_BOARDS; i++)
 		if (mxser_boards[i].info != NULL)
 			mxser_release_res(&mxser_boards[i], NULL, 1);
-
-	pr_debug("Done.\n");
 }
 
 module_init(mxser_module_init);
-- 
GitLab


From ec905a18656daa4d9300bad2bebc02d5dba7883d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 25 Jul 2008 01:48:23 -0700
Subject: [PATCH 778/853] drivers/misc/phantom: note PCI

Tell users that the driver is only for PCI devices to stop asking for
support of firewire and parallel devices.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig   | 4 +++-
 drivers/misc/phantom.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 7e37ba5afe3..321eb913463 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -77,11 +77,13 @@ config IBM_ASM
 	  for your IBM server.
 
 config PHANTOM
-	tristate "Sensable PHANToM"
+	tristate "Sensable PHANToM (PCI)"
 	depends on PCI
 	help
 	  Say Y here if you want to build a driver for Sensable PHANToM device.
 
+	  This driver is only for PCI PHANToMs.
+
 	  If you choose to build module, its name will be phantom. If unsure,
 	  say N here.
 
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 4ce3bdc2f95..daf585689ce 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -563,6 +563,6 @@ module_init(phantom_init);
 module_exit(phantom_exit);
 
 MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
-MODULE_DESCRIPTION("Sensable Phantom driver");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(PHANTOM_VERSION);
-- 
GitLab


From f37e66173e0cc09b4e5a89eb0294abbefc15f435 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 25 Jul 2008 01:48:23 -0700
Subject: [PATCH 779/853] firmware: use memory_read_from_buffer()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Markus Rechberger <markus.rechberger@amd.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/firmware_class.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index b0be1d18fee..c9c92b00fd5 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -184,7 +184,7 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 	struct device *dev = to_dev(kobj);
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
 	struct firmware *fw;
-	ssize_t ret_count = count;
+	ssize_t ret_count;
 
 	mutex_lock(&fw_lock);
 	fw = fw_priv->fw;
@@ -192,14 +192,8 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 		ret_count = -ENODEV;
 		goto out;
 	}
-	if (offset > fw->size) {
-		ret_count = 0;
-		goto out;
-	}
-	if (offset + ret_count > fw->size)
-		ret_count = fw->size - offset;
-
-	memcpy(buffer, fw->data + offset, ret_count);
+	ret_count = memory_read_from_buffer(buffer, count, &offset,
+						fw->data, fw->size);
 out:
 	mutex_unlock(&fw_lock);
 	return ret_count;
-- 
GitLab


From abe19b7b822a8fdbe3dbfd6e066d0698b4eefb06 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 25 Jul 2008 01:48:24 -0700
Subject: [PATCH 780/853] dcdbas: use memory_read_from_buffer()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Doug Warzecha <Douglas_Warzecha@dell.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/dcdbas.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 0b624e927a6..c66817e7717 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -152,20 +152,11 @@ static ssize_t smi_data_read(struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
-	size_t max_read;
 	ssize_t ret;
 
 	mutex_lock(&smi_data_lock);
-
-	if (pos >= smi_data_buf_size) {
-		ret = 0;
-		goto out;
-	}
-
-	max_read = smi_data_buf_size - pos;
-	ret = min(max_read, count);
-	memcpy(buf, smi_data_buf + pos, ret);
-out:
+	ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
+					smi_data_buf_size);
 	mutex_unlock(&smi_data_lock);
 	return ret;
 }
-- 
GitLab


From d805dda412346225a50af2d399d958a4bc676c38 Mon Sep 17 00:00:00 2001
From: Abdel Benamrouche <draconux@gmail.com>
Date: Fri, 25 Jul 2008 01:48:25 -0700
Subject: [PATCH 781/853] fs/partition/check.c: fix return value warning

fs/partitions/check.c:381: warning: ignoring return value of ___device_add___,
  declared with attribute warn_unused_result

[akpm@linux-foundation.org: multiple-return-statements-per-function are evil]
Signed-off-by: Abdel Benamrouche <draconux@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/check.c | 28 ++++++++++++++++++++++------
 include/linux/genhd.h |  2 +-
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index efef715135d..2e6413fbd2d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 		   whole_disk_show, NULL);
 
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
 {
 	struct hd_struct *p;
 	int err;
 
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
-		return;
+		return -ENOMEM;
 
 	if (!init_part_stats(p)) {
-		kfree(p);
-		return;
+		err = -ENOMEM;
+		goto out0;
 	}
 	p->start_sect = start;
 	p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
 
 	/* delay uevent until 'holders' subdir is created */
 	p->dev.uevent_suppress = 1;
-	device_add(&p->dev);
+	err = device_add(&p->dev);
+	if (err)
+		goto out1;
 	partition_sysfs_add_subdir(p);
 	p->dev.uevent_suppress = 0;
-	if (flags & ADDPART_FLAG_WHOLEDISK)
+	if (flags & ADDPART_FLAG_WHOLEDISK) {
 		err = device_create_file(&p->dev, &dev_attr_whole_disk);
+		if (err)
+			goto out2;
+	}
 
 	/* suppress uevent if the disk supresses it */
 	if (!disk->dev.uevent_suppress)
 		kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+
+	return 0;
+
+out2:
+	device_del(&p->dev);
+out1:
+	put_device(&p->dev);
+	free_part_stats(p);
+out0:
+	kfree(p);
+	return err;
 }
 
 /* Not exported, helper to add_disk(). */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e8787417f65..118216f1bd3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -541,7 +541,7 @@ extern dev_t blk_lookup_devt(const char *name, int part);
 extern char *disk_name (struct gendisk *hd, int part, char *buf);
 
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void add_partition(struct gendisk *, int, sector_t, sector_t, int);
+extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
 extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
-- 
GitLab


From 04ebd4aee52b06a2c38127d9208546e5b96f3a19 Mon Sep 17 00:00:00 2001
From: Abdel Benamrouche <draconux@gmail.com>
Date: Fri, 25 Jul 2008 01:48:26 -0700
Subject: [PATCH 782/853] block/ioctl.c and fs/partition/check.c: check value
 returned by add_partition()

Now that add_partition() has been aught to propagate errors, let's check them.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Abdel Benamrouche <draconux@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/ioctl.c         |  5 +++--
 fs/partitions/check.c | 10 ++++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/block/ioctl.c b/block/ioctl.c
index 52d6385216a..77185e5c026 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -17,6 +17,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 	long long start, length;
 	int part;
 	int i;
+	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -61,9 +62,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 				}
 			}
 			/* all seems OK */
-			add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
+			err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
 			mutex_unlock(&bdev->bd_mutex);
-			return 0;
+			return err;
 		case BLKPG_DEL_PARTITION:
 			if (!disk->part[part-1])
 				return -ENXIO;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 2e6413fbd2d..7d6b34e201d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -499,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 		if (!size)
 			continue;
 		if (from + size > get_capacity(disk)) {
-			printk(" %s: p%d exceeds device capacity\n",
+			printk(KERN_ERR " %s: p%d exceeds device capacity\n",
 				disk->disk_name, p);
+			continue;
+		}
+		res = add_partition(disk, p, from, size, state->parts[p].flags);
+		if (res) {
+			printk(KERN_ERR " %s: p%d could not be added: %d\n",
+				disk->disk_name, p, -res);
+			continue;
 		}
-		add_partition(disk, p, from, size, state->parts[p].flags);
 #ifdef CONFIG_BLK_DEV_MD
 		if (state->parts[p].flags & ADDPART_FLAG_RAID)
 			md_autodetect_dev(bdev->bd_dev+p);
-- 
GitLab


From d991696263a704be7f41ac186f1a0ed17963c260 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 25 Jul 2008 01:48:26 -0700
Subject: [PATCH 783/853] fs/partitions/efi: convert to pr_debug

convert the local Dprintk() compile time debug printk wrappers to the
generic pr_debug() wrapper.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/efi.c | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b07006bc4..038a6022152 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
 #include "check.h"
 #include "efi.h"
 
-#undef EFI_DEBUG
-#ifdef EFI_DEBUG
-#define Dprintk(x...) printk(KERN_DEBUG x)
-#else
-#define Dprintk(x...)
-#endif
-
 /* This allows a kernel command line option 'gpt' to override
  * the test for invalid PMBR.  Not __initdata because reloading
  * the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
 
 	/* Check the GUID Partition Table signature */
 	if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
-		Dprintk("GUID Partition Table Header signature is wrong:"
-			"%lld != %lld\n",
-			(unsigned long long)le64_to_cpu((*gpt)->signature),
-			(unsigned long long)GPT_HEADER_SIGNATURE);
+		pr_debug("GUID Partition Table Header signature is wrong:"
+			 "%lld != %lld\n",
+			 (unsigned long long)le64_to_cpu((*gpt)->signature),
+			 (unsigned long long)GPT_HEADER_SIGNATURE);
 		goto fail;
 	}
 
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
 	crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
 
 	if (crc != origcrc) {
-		Dprintk
-		    ("GUID Partition Table Header CRC is wrong: %x != %x\n",
-		     crc, origcrc);
+		pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
+			 crc, origcrc);
 		goto fail;
 	}
 	(*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
 	/* Check that the my_lba entry points to the LBA that contains
 	 * the GUID Partition Table */
 	if (le64_to_cpu((*gpt)->my_lba) != lba) {
-		Dprintk("GPT my_lba incorrect: %lld != %lld\n",
-			(unsigned long long)le64_to_cpu((*gpt)->my_lba),
-			(unsigned long long)lba);
+		pr_debug("GPT my_lba incorrect: %lld != %lld\n",
+			 (unsigned long long)le64_to_cpu((*gpt)->my_lba),
+			 (unsigned long long)lba);
 		goto fail;
 	}
 
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
 	 */
 	lastlba = last_lba(bdev);
 	if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
-		Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n",
-			(unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
-			(unsigned long long)lastlba);
+		pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
+			 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
+			 (unsigned long long)lastlba);
 		goto fail;
 	}
 	if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
-		Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n",
-			(unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
-			(unsigned long long)lastlba);
+		pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
+			 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
+			 (unsigned long long)lastlba);
 		goto fail;
 	}
 
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
 			le32_to_cpu((*gpt)->sizeof_partition_entry));
 
 	if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
-		Dprintk("GUID Partitition Entry Array CRC check failed.\n");
+		pr_debug("GUID Partitition Entry Array CRC check failed.\n");
 		goto fail_ptes;
 	}
 
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
 		return 0;
 	}
 
-	Dprintk("GUID Partition Table is valid!  Yea!\n");
+	pr_debug("GUID Partition Table is valid!  Yea!\n");
 
 	for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
 		if (!is_pte_valid(&ptes[i], last_lba(bdev)))
-- 
GitLab


From 25377479de7539fdc871a0f0ecaa39da42353bbc Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 25 Jul 2008 01:48:27 -0700
Subject: [PATCH 784/853] dell_rbu: use memory_read_from_buffer()

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Abhay Salunke <Abhay_Salunke@dell.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/dell_rbu.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 7430e218cda..13946ebd77d 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -507,11 +507,6 @@ static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count)
 
 static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
 {
-	unsigned char *ptemp = NULL;
-	size_t bytes_left = 0;
-	size_t data_length = 0;
-	ssize_t ret_count = 0;
-
 	/* check to see if we have something to return */
 	if ((rbu_data.image_update_buffer == NULL) ||
 		(rbu_data.bios_image_size == 0)) {
@@ -519,28 +514,11 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
 			"bios_image_size %lu\n",
 			rbu_data.image_update_buffer,
 			rbu_data.bios_image_size);
-		ret_count = -ENOMEM;
-		goto read_rbu_data_exit;
-	}
-
-	if (pos > rbu_data.bios_image_size) {
-		ret_count = 0;
-		goto read_rbu_data_exit;
+		return -ENOMEM;
 	}
 
-	bytes_left = rbu_data.bios_image_size - pos;
-	data_length = min(bytes_left, count);
-
-	ptemp = rbu_data.image_update_buffer;
-	memcpy(buffer, (ptemp + pos), data_length);
-
-	if ((pos + count) > rbu_data.bios_image_size)
-		/* this was the last copy */
-		ret_count = bytes_left;
-	else
-		ret_count = count;
-      read_rbu_data_exit:
-	return ret_count;
+	return memory_read_from_buffer(buffer, count, &pos,
+			rbu_data.image_update_buffer, rbu_data.bios_image_size);
 }
 
 static ssize_t read_rbu_data(struct kobject *kobj,
-- 
GitLab


From cd9a6f1078ed07fe919667b73e829f3bac485573 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:28 -0700
Subject: [PATCH 785/853] unexport proc_clear_tty

With the removal of the Solaris binary emulation the export of
proc_clear_tty became unused.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6f4d856df98..e1b46bc7e43 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3580,7 +3580,6 @@ void proc_clear_tty(struct task_struct *p)
 	p->signal->tty = NULL;
 	spin_unlock_irq(&p->sighand->siglock);
 }
-EXPORT_SYMBOL(proc_clear_tty);
 
 /* Called under the sighand lock */
 
-- 
GitLab


From 6e644c3126149b65460610fe5a00d8a162092abe Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:28 -0700
Subject: [PATCH 786/853] move proc_kmsg_operations to fs/proc/internal.h

This patch moves the extern of struct proc_kmsg_operations to
fs/proc/internal.h and adds an #include "internal.h" to fs/proc/kmsg.c
so that the latter sees the former.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h      | 1 +
 fs/proc/kmsg.c          | 2 ++
 include/linux/proc_fs.h | 1 -
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca80590..8d67616e7bb 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
 extern const struct inode_operations proc_net_inode_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9..9fd5df3f40c 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
+#include "internal.h"
+
 extern wait_queue_head_t log_wait;
 
 extern int do_syslog(int type, char __user *bug, int count);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 15a9eaf4a80..cdabc2fc02f 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -138,7 +138,6 @@ extern int proc_readdir(struct file *, void *, filldir_t);
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
 extern const struct file_operations proc_kcore_operations;
-extern const struct file_operations proc_kmsg_operations;
 extern const struct file_operations ppc_htab_operations;
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
-- 
GitLab


From 881adb85358309ea9c6f707394002719982ec607 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 01:48:29 -0700
Subject: [PATCH 787/853] proc: always do ->release

Current two-stage scheme of removing PDE emphasizes one bug in proc:

		open
				rmmod
				remove_proc_entry
		close

->release won't be called because ->proc_fops were cleared.  In simple
cases it's small memory leak.

For every ->open, ->release has to be done.  List of openers is introduced
which is traversed at remove_proc_entry() if neeeded.

Discussions with Al long ago (sigh).

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c       | 14 ++++++++
 fs/proc/inode.c         | 74 ++++++++++++++++++++++++++++++++++++++---
 fs/proc/internal.h      |  7 ++++
 include/linux/proc_fs.h |  1 +
 4 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cef..bc0a0dd2d84 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 	ent->pde_users = 0;
 	spin_lock_init(&ent->pde_unload_lock);
 	ent->pde_unload_completion = NULL;
+	INIT_LIST_HEAD(&ent->pde_openers);
  out:
 	return ent;
 }
@@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	spin_unlock(&de->pde_unload_lock);
 
 continue_removing:
+	spin_lock(&de->pde_unload_lock);
+	while (!list_empty(&de->pde_openers)) {
+		struct pde_opener *pdeo;
+
+		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+		list_del(&pdeo->lh);
+		spin_unlock(&de->pde_unload_lock);
+		pdeo->release(pdeo->inode, pdeo->file);
+		kfree(pdeo);
+		spin_lock(&de->pde_unload_lock);
+	}
+	spin_unlock(&de->pde_unload_lock);
+
 	if (S_ISDIR(de->mode))
 		parent->nlink--;
 	de->nlink = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d1001791..354c0848582 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -126,12 +126,17 @@ static const struct super_operations proc_sops = {
 	.remount_fs	= proc_remount,
 };
 
-static void pde_users_dec(struct proc_dir_entry *pde)
+static void __pde_users_dec(struct proc_dir_entry *pde)
 {
-	spin_lock(&pde->pde_unload_lock);
 	pde->pde_users--;
 	if (pde->pde_unload_completion && pde->pde_users == 0)
 		complete(pde->pde_unload_completion);
+}
+
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+	spin_lock(&pde->pde_unload_lock);
+	__pde_users_dec(pde);
 	spin_unlock(&pde->pde_unload_lock);
 }
 
@@ -318,36 +323,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
 	int (*open)(struct inode *, struct file *);
+	int (*release)(struct inode *, struct file *);
+	struct pde_opener *pdeo;
+
+	/*
+	 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+	 * sequence. ->release won't be called because ->proc_fops will be
+	 * cleared. Depending on complexity of ->release, consequences vary.
+	 *
+	 * We can't wait for mercy when close will be done for real, it's
+	 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+	 * by hand in remove_proc_entry(). For this, save opener's credentials
+	 * for later.
+	 */
+	pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+	if (!pdeo)
+		return -ENOMEM;
 
 	spin_lock(&pde->pde_unload_lock);
 	if (!pde->proc_fops) {
 		spin_unlock(&pde->pde_unload_lock);
+		kfree(pdeo);
 		return rv;
 	}
 	pde->pde_users++;
 	open = pde->proc_fops->open;
+	release = pde->proc_fops->release;
 	spin_unlock(&pde->pde_unload_lock);
 
 	if (open)
 		rv = open(inode, file);
 
-	pde_users_dec(pde);
+	spin_lock(&pde->pde_unload_lock);
+	if (rv == 0 && release) {
+		/* To know what to release. */
+		pdeo->inode = inode;
+		pdeo->file = file;
+		/* Strictly for "too late" ->release in proc_reg_release(). */
+		pdeo->release = release;
+		list_add(&pdeo->lh, &pde->pde_openers);
+	} else
+		kfree(pdeo);
+	__pde_users_dec(pde);
+	spin_unlock(&pde->pde_unload_lock);
 	return rv;
 }
 
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+					struct inode *inode, struct file *file)
+{
+	struct pde_opener *pdeo;
+
+	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+		if (pdeo->inode == inode && pdeo->file == file)
+			return pdeo;
+	}
+	return NULL;
+}
+
 static int proc_reg_release(struct inode *inode, struct file *file)
 {
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
 	int (*release)(struct inode *, struct file *);
+	struct pde_opener *pdeo;
 
 	spin_lock(&pde->pde_unload_lock);
+	pdeo = find_pde_opener(pde, inode, file);
 	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
+		/*
+		 * Can't simply exit, __fput() will think that everything is OK,
+		 * and move on to freeing struct file. remove_proc_entry() will
+		 * find slacker in opener's list and will try to do non-trivial
+		 * things with struct file. Therefore, remove opener from list.
+		 *
+		 * But if opener is removed from list, who will ->release it?
+		 */
+		if (pdeo) {
+			list_del(&pdeo->lh);
+			spin_unlock(&pde->pde_unload_lock);
+			rv = pdeo->release(inode, file);
+			kfree(pdeo);
+		} else
+			spin_unlock(&pde->pde_unload_lock);
 		return rv;
 	}
 	pde->pde_users++;
 	release = pde->proc_fops->release;
+	if (pdeo) {
+		list_del(&pdeo->lh);
+		kfree(pdeo);
+	}
 	spin_unlock(&pde->pde_unload_lock);
 
 	if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 8d67616e7bb..442202314d5 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -89,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 		filldir_t filldir);
+
+struct pde_opener {
+	struct inode *inode;
+	struct file *file;
+	int (*release)(struct inode *, struct file *);
+	struct list_head lh;
+};
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index cdabc2fc02f..f560d1705af 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,6 +79,7 @@ struct proc_dir_entry {
 	int pde_users;	/* number of callers into module in progress */
 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 	struct completion *pde_unload_completion;
+	struct list_head pde_openers;	/* who did ->open, but not ->release */
 };
 
 struct kcore_list {
-- 
GitLab


From a9bd4a3e070ba7494f154e1a11687a8a957d88dc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 01:48:30 -0700
Subject: [PATCH 788/853] proc: remove pathetic remount code

MS_RMT_MASK will unmask changes in do_remount_sb() anyway.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/inode.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 354c0848582..02eca2ed9dd 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -111,19 +111,12 @@ int __init proc_init_inodecache(void)
 	return 0;
 }
 
-static int proc_remount(struct super_block *sb, int *flags, char *data)
-{
-	*flags |= MS_NODIRATIME;
-	return 0;
-}
-
 static const struct super_operations proc_sops = {
 	.alloc_inode	= proc_alloc_inode,
 	.destroy_inode	= proc_destroy_inode,
 	.drop_inode	= generic_delete_inode,
 	.delete_inode	= proc_delete_inode,
 	.statfs		= simple_statfs,
-	.remount_fs	= proc_remount,
 };
 
 static void __pde_users_dec(struct proc_dir_entry *pde)
-- 
GitLab


From 6eedf8d30d2b48e86fbcee1a32fb2fa5f42219ee Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 01:48:30 -0700
Subject: [PATCH 789/853] proc: move Kconfig to fs/proc/Kconfig

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig      | 60 +------------------------------------------------
 fs/proc/Kconfig | 59 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 59 deletions(-)
 create mode 100644 fs/proc/Kconfig

diff --git a/fs/Kconfig b/fs/Kconfig
index ed563b9e352..97e3bdedb1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
 
 menu "Pseudo filesystems"
 
-config PROC_FS
-	bool "/proc file system support" if EMBEDDED
-	default y
-	help
-	  This is a virtual file system providing information about the status
-	  of the system. "Virtual" means that it doesn't take up any space on
-	  your hard disk: the files are created on the fly by the kernel when
-	  you try to access them. Also, you cannot read the files with older
-	  version of the program less: you need to use more or cat.
-
-	  It's totally cool; for example, "cat /proc/interrupts" gives
-	  information about what the different IRQs are used for at the moment
-	  (there is a small number of Interrupt ReQuest lines in your computer
-	  that are used by the attached devices to gain the CPU's attention --
-	  often a source of trouble if two devices are mistakenly configured
-	  to use the same IRQ). The program procinfo to display some
-	  information about your system gathered from the /proc file system.
-
-	  Before you can use the /proc file system, it has to be mounted,
-	  meaning it has to be given a location in the directory hierarchy.
-	  That location should be /proc. A command such as "mount -t proc proc
-	  /proc" or the equivalent line in /etc/fstab does the job.
-
-	  The /proc file system is explained in the file
-	  <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
-	  ("man 5 proc").
-
-	  This option will enlarge your kernel by about 67 KB. Several
-	  programs depend on this, so everyone should say Y here.
-
-config PROC_KCORE
-	bool "/proc/kcore support" if !ARM
-	depends on PROC_FS && MMU
-
-config PROC_VMCORE
-        bool "/proc/vmcore support (EXPERIMENTAL)"
-        depends on PROC_FS && CRASH_DUMP
-	default y
-        help
-        Exports the dump image of crashed kernel in ELF format.
-
-config PROC_SYSCTL
-	bool "Sysctl support (/proc/sys)" if EMBEDDED
-	depends on PROC_FS
-	select SYSCTL
-	default y
-	---help---
-	  The sysctl interface provides a means of dynamically changing
-	  certain kernel parameters and variables on the fly without requiring
-	  a recompile of the kernel or reboot of the system.  The primary
-	  interface is through /proc/sys.  If you say Y here a tree of
-	  modifiable sysctl entries will be generated beneath the
-          /proc/sys directory. They are explained in the files
-	  in <file:Documentation/sysctl/>.  Note that enabling this
-	  option will enlarge the kernel by at least 8 KB.
-
-	  As it is generally a good thing, you should say Y here unless
-	  building a kernel for install/rescue disks or your system is very
-	  limited in memory.
+source "fs/proc/Kconfig"
 
 config SYSFS
 	bool "sysfs file system support" if EMBEDDED
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 00000000000..73cd7a418f0
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
+config PROC_FS
+	bool "/proc file system support" if EMBEDDED
+	default y
+	help
+	  This is a virtual file system providing information about the status
+	  of the system. "Virtual" means that it doesn't take up any space on
+	  your hard disk: the files are created on the fly by the kernel when
+	  you try to access them. Also, you cannot read the files with older
+	  version of the program less: you need to use more or cat.
+
+	  It's totally cool; for example, "cat /proc/interrupts" gives
+	  information about what the different IRQs are used for at the moment
+	  (there is a small number of Interrupt ReQuest lines in your computer
+	  that are used by the attached devices to gain the CPU's attention --
+	  often a source of trouble if two devices are mistakenly configured
+	  to use the same IRQ). The program procinfo to display some
+	  information about your system gathered from the /proc file system.
+
+	  Before you can use the /proc file system, it has to be mounted,
+	  meaning it has to be given a location in the directory hierarchy.
+	  That location should be /proc. A command such as "mount -t proc proc
+	  /proc" or the equivalent line in /etc/fstab does the job.
+
+	  The /proc file system is explained in the file
+	  <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+	  ("man 5 proc").
+
+	  This option will enlarge your kernel by about 67 KB. Several
+	  programs depend on this, so everyone should say Y here.
+
+config PROC_KCORE
+	bool "/proc/kcore support" if !ARM
+	depends on PROC_FS && MMU
+
+config PROC_VMCORE
+        bool "/proc/vmcore support (EXPERIMENTAL)"
+        depends on PROC_FS && CRASH_DUMP
+	default y
+        help
+        Exports the dump image of crashed kernel in ELF format.
+
+config PROC_SYSCTL
+	bool "Sysctl support (/proc/sys)" if EMBEDDED
+	depends on PROC_FS
+	select SYSCTL
+	default y
+	---help---
+	  The sysctl interface provides a means of dynamically changing
+	  certain kernel parameters and variables on the fly without requiring
+	  a recompile of the kernel or reboot of the system.  The primary
+	  interface is through /proc/sys.  If you say Y here a tree of
+	  modifiable sysctl entries will be generated beneath the
+          /proc/sys directory. They are explained in the files
+	  in <file:Documentation/sysctl/>.  Note that enabling this
+	  option will enlarge the kernel by at least 8 KB.
+
+	  As it is generally a good thing, you should say Y here unless
+	  building a kernel for install/rescue disks or your system is very
+	  limited in memory.
-- 
GitLab


From 339caf2a224fc9af0f01686bf287dda32c6efca6 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Fri, 25 Jul 2008 01:48:31 -0700
Subject: [PATCH 790/853] proc: misplaced export of find_get_pid

Move EXPORT_SYMBOL right after the func

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/pid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index 30bd5d4b2ac..753fd90d9ec 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -435,6 +435,7 @@ struct pid *find_get_pid(pid_t nr)
 
 	return pid;
 }
+EXPORT_SYMBOL_GPL(find_get_pid);
 
 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
 {
@@ -497,7 +498,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 
 	return pid;
 }
-EXPORT_SYMBOL_GPL(find_get_pid);
 
 /*
  * The pid hash table is scaled according to the amount of memory in the
-- 
GitLab


From 99541c23cd32bacf1a591ca537a7c0cb9053ad7e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@parallels.com>
Date: Fri, 25 Jul 2008 01:48:31 -0700
Subject: [PATCH 791/853] sysctl: check for bogus modes

Catch, e. g., 644/0644 typo.

Signed-off-by: Alexey Dobriyan <adobriyan@parallels.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl_check.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index c09350d564f..c35da23ab8f 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1532,6 +1532,8 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
 			sysctl_check_leaf(namespaces, table, &fail);
 		}
 		sysctl_check_bin_path(table, &fail);
+		if (table->mode > 0777)
+			set_fail(&fail, table, "bogus .mode");
 		if (fail) {
 			set_fail(&fail, table, NULL);
 			error = -EINVAL;
-- 
GitLab


From 4ecb90090c84210a8bd2a9d7a5906e616735873c Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 25 Jul 2008 01:48:32 -0700
Subject: [PATCH 792/853] sysctl: allow override of /proc/sys/net with
 CAP_NET_ADMIN

Extend the permission check for networking sysctl's to allow modification
when current process has CAP_NET_ADMIN capability and is not root.  This
version uses the until now unused permissions hook to override the mode
value for /proc/sys/net if accessed by a user with capabilities.

Found while working with Quagga.  It is impossible to turn forwarding
on/off through the command interface because Quagga uses secure coding
practice of dropping privledges during initialization and only raising via
capabilities when necessary.  Since the dameon has reset real/effective
uid after initialization, all attempts to access /proc/sys/net variables
will fail.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morgan <morgan@kernel.org>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sysctl_net.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 007c1a6708e..63ada437fc2 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -35,8 +35,22 @@ net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
 	return &namespaces->net_ns->sysctl_table_headers;
 }
 
+/* Return standard mode bits for table entry. */
+static int net_ctl_permissions(struct ctl_table_root *root,
+			       struct nsproxy *nsproxy,
+			       struct ctl_table *table)
+{
+	/* Allow network administrator to have same access as root. */
+	if (capable(CAP_NET_ADMIN)) {
+		int mode = (table->mode >> 6) & 7;
+		return (mode << 6) | (mode << 3) | mode;
+	}
+	return table->mode;
+}
+
 static struct ctl_table_root net_sysctl_root = {
 	.lookup = net_ctl_header_lookup,
+	.permissions = net_ctl_permissions,
 };
 
 static LIST_HEAD(net_sysctl_ro_tables);
-- 
GitLab


From 3ae4eed34be0177a8e003411a84e4ee212adbced Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:34 -0700
Subject: [PATCH 793/853] proper pid{hash,map}_init() prototypes

This patch adds proper prototypes for pid{hash,map}_init() in
include/linux/pid_namespace.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 3 +++
 init/main.c                   | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index caff5283d15..1a49ab5ec7b 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -85,4 +85,7 @@ static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
 	return tsk->nsproxy->pid_ns->child_reaper;
 }
 
+void pidhash_init(void);
+void pidmap_init(void);
+
 #endif /* _LINUX_PID_NS_H */
diff --git a/init/main.c b/init/main.c
index 2769dc031c6..0604cbcaf1e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -87,8 +87,6 @@ extern void init_IRQ(void);
 extern void fork_init(unsigned long);
 extern void mca_init(void);
 extern void sbus_init(void);
-extern void pidhash_init(void);
-extern void pidmap_init(void);
 extern void prio_tree_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
-- 
GitLab


From 33166b1ffca5e1945246bcaa77d72a22b0d3e531 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Fri, 25 Jul 2008 01:48:35 -0700
Subject: [PATCH 794/853] shrink struct pid by removing padding on 64 bit
 builds

When struct pid is built on a 64 bit platform gcc has to insert padding to
maintain the correct alignment, by simply reordering its members the
memory usage shrinks from 88 bytes to 80.

I've successfully run with this patch on my desktop AMD64 machine.

There are no significant kernel size changes to a default config.X86_64
on the latest git v2.6.26-rc1

   text    data     bss     dec     hex filename
5404828  976760  734280 7115868  6c945c vmlinux
5404811  976760  734280 7115851  6c944b vmlinux.pid-patch

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index c21c7e8124a..6f084b9e2c4 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -57,10 +57,10 @@ struct upid {
 struct pid
 {
 	atomic_t count;
+	unsigned int level;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
 	struct rcu_head rcu;
-	unsigned int level;
 	struct upid numbers[1];
 };
 
-- 
GitLab


From 19b0cfcca41dd772065671ad0584e1cea0f3fd13 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:35 -0700
Subject: [PATCH 795/853] pidns: remove now unused kill_proc function

This function operated on a pid_t to kill a task, which is no longer valid
in a containerized system.

It has finally lost all its users and we can safely remove it from the
tree.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 -
 kernel/signal.c       | 12 ------------
 2 files changed, 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0560999eb1d..134cb5cb506 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1800,7 +1800,6 @@ extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
 extern void zap_other_threads(struct task_struct *p);
-extern int kill_proc(pid_t, int, int);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
diff --git a/kernel/signal.c b/kernel/signal.c
index 5c7b7eaa0dc..82c3545596c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1228,17 +1228,6 @@ int kill_pid(struct pid *pid, int sig, int priv)
 }
 EXPORT_SYMBOL(kill_pid);
 
-int
-kill_proc(pid_t pid, int sig, int priv)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
-	rcu_read_unlock();
-	return ret;
-}
-
 /*
  * These functions support sending signals using preallocated sigqueue
  * structures.  This is needed "because realtime applications cannot
@@ -1906,7 +1895,6 @@ EXPORT_SYMBOL(recalc_sigpending);
 EXPORT_SYMBOL_GPL(dequeue_signal);
 EXPORT_SYMBOL(flush_signals);
 EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(kill_proc);
 EXPORT_SYMBOL(ptrace_notify);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
-- 
GitLab


From e49859e71e0318b564de1546bdc30fab738f9deb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:36 -0700
Subject: [PATCH 796/853] pidns: remove now unused find_pid function.

This one had the only users so far - the kill_proc, which is removed, so
drop this (invalid in namespaced world) call too.

And of course - erase all references on it from comments.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h   | 4 +---
 include/linux/sched.h | 2 +-
 kernel/pid.c          | 8 +-------
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 6f084b9e2c4..ff1b2a5814d 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -48,7 +48,7 @@ enum pid_type
  */
 
 struct upid {
-	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
+	/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
 	int nr;
 	struct pid_namespace *ns;
 	struct hlist_node pid_chain;
@@ -105,14 +105,12 @@ extern struct pid_namespace init_pid_ns;
  * or rcu_read_lock() held.
  *
  * find_pid_ns() finds the pid in the namespace specified
- * find_pid() find the pid by its global id, i.e. in the init namespace
  * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
  *
  * see also find_task_by_pid() set in include/linux/sched.h
  */
 extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
-extern struct pid *find_pid(int nr);
 
 /*
  * Lookup a PID in the hash table, and return with it's count elevated.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 134cb5cb506..182da1550fa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1718,7 +1718,7 @@ extern struct pid_namespace init_pid_ns;
  * find_task_by_pid():
  *      finds a task by its global pid
  *
- * see also find_pid() etc in include/linux/pid.h
+ * see also find_vpid() etc in include/linux/pid.h
  */
 
 extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
diff --git a/kernel/pid.c b/kernel/pid.c
index 753fd90d9ec..064e76afa50 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -309,12 +309,6 @@ struct pid *find_vpid(int nr)
 }
 EXPORT_SYMBOL_GPL(find_vpid);
 
-struct pid *find_pid(int nr)
-{
-	return find_pid_ns(nr, &init_pid_ns);
-}
-EXPORT_SYMBOL_GPL(find_pid);
-
 /*
  * attach_pid() must be called with the tasklist_lock write-held.
  */
@@ -483,7 +477,7 @@ EXPORT_SYMBOL(task_session_nr_ns);
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
- * If there is a pid at nr this function is exactly the same as find_pid.
+ * If there is a pid at nr this function is exactly the same as find_pid_ns.
  */
 struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
-- 
GitLab


From dbda0de52618d13d1b927c7ba7bb839cfddc4e8c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:37 -0700
Subject: [PATCH 797/853] pidns: remove find_task_by_pid, unused for a long
 time

It seems to me that it was a mistake marking this function as deprecated
and scheduling it for removal, rather than resolutely removing it after
the last caller's death.

Anyway - better late, then never.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/feature-removal-schedule.txt | 18 ------------------
 include/linux/pid.h                        |  2 +-
 include/linux/sched.h                      |  6 ------
 3 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 09c4a1efb8e..721c71b86e0 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -138,24 +138,6 @@ Who:	Kay Sievers <kay.sievers@suse.de>
 
 ---------------------------
 
-What:	find_task_by_pid
-When:	2.6.26
-Why:	With pid namespaces, calling this funciton will return the
-	wrong task when called from inside a namespace.
-
-	The best way to save a task pid and find a task by this
-	pid later, is to find this task's struct pid pointer (or get
-	it directly from the task) and call pid_task() later.
-
-	If someone really needs to get a task by its pid_t, then
-	he most likely needs the find_task_by_vpid() to get the
-	task from the same namespace as the current task is in, but
-	this may be not so in general.
-
-Who:	Pavel Emelyanov <xemul@openvz.org>
-
----------------------------
-
 What:	ACPI procfs interface
 When:	July 2008
 Why:	ACPI sysfs conversion should be finished by January 2008.
diff --git a/include/linux/pid.h b/include/linux/pid.h
index ff1b2a5814d..22921ac4cfd 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -107,7 +107,7 @@ extern struct pid_namespace init_pid_ns;
  * find_pid_ns() finds the pid in the namespace specified
  * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
  *
- * see also find_task_by_pid() set in include/linux/sched.h
+ * see also find_task_by_vpid() set in include/linux/sched.h
  */
 extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 182da1550fa..354ef478a80 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1715,8 +1715,6 @@ extern struct pid_namespace init_pid_ns;
  *      finds a task by its pid in the specified namespace
  * find_task_by_vpid():
  *      finds a task by its virtual pid
- * find_task_by_pid():
- *      finds a task by its global pid
  *
  * see also find_vpid() etc in include/linux/pid.h
  */
@@ -1724,10 +1722,6 @@ extern struct pid_namespace init_pid_ns;
 extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
 		struct pid_namespace *ns);
 
-static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr)
-{
-	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
-}
 extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
-- 
GitLab


From 24879a8e3e68f146d4d85528cc0b5dea712b77c5 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Fri, 25 Jul 2008 01:48:38 -0700
Subject: [PATCH 798/853] aoe: convert emsgs_sema into a completion

ATA over Ethernet: The semaphore emsgs_sema is used for signalling an
event, convert it in a completion.

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Cc: "Ed L. Cashin" <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/aoe/aoechr.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index c04440cd6a3..181ebb85f0b 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -6,6 +6,7 @@
 
 #include <linux/hdreg.h>
 #include <linux/blkdev.h>
+#include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/smp_lock.h>
 #include "aoe.h"
@@ -36,7 +37,7 @@ struct ErrMsg {
 
 static struct ErrMsg emsgs[NMSG];
 static int emsgs_head_idx, emsgs_tail_idx;
-static struct semaphore emsgs_sema;
+static struct completion emsgs_comp;
 static spinlock_t emsgs_lock;
 static int nblocked_emsgs_readers;
 static struct class *aoe_class;
@@ -141,7 +142,7 @@ bail:		spin_unlock_irqrestore(&emsgs_lock, flags);
 	spin_unlock_irqrestore(&emsgs_lock, flags);
 
 	if (nblocked_emsgs_readers)
-		up(&emsgs_sema);
+		complete(&emsgs_comp);
 }
 
 static ssize_t
@@ -221,7 +222,7 @@ aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
 
 		spin_unlock_irqrestore(&emsgs_lock, flags);
 
-		n = down_interruptible(&emsgs_sema);
+		n = wait_for_completion_interruptible(&emsgs_comp);
 
 		spin_lock_irqsave(&emsgs_lock, flags);
 
@@ -269,7 +270,7 @@ aoechr_init(void)
 		printk(KERN_ERR "aoe: can't register char device\n");
 		return n;
 	}
-	sema_init(&emsgs_sema, 0);
+	init_completion(&emsgs_comp);
 	spin_lock_init(&emsgs_lock);
 	aoe_class = class_create(THIS_MODULE, "aoe");
 	if (IS_ERR(aoe_class)) {
-- 
GitLab


From 28325df0d9339b7f3aba9c45174d4586223ef46b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 25 Jul 2008 01:48:38 -0700
Subject: [PATCH 799/853] markers: use rcu_barrier_sched() and call_rcu_sched()

rcu_barrier_sched() and call_rcu_sched() were introduced in 2.6.26 for the
Markers.  Change the marker code to use them.

It can be seen as a fix since the marker code was using an ugly,
temporary, #ifdef hack to work around CONFIG_PREEMPT_RCU.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Paul McKenney <paulmck@us.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/marker.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 1abfb923b76..971da531790 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -441,7 +441,7 @@ static int remove_marker(const char *name)
 	hlist_del(&e->hlist);
 	/* Make sure the call_rcu has been executed */
 	if (e->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	kfree(e);
 	return 0;
 }
@@ -476,7 +476,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 	hlist_del(&(*entry)->hlist);
 	/* Make sure the call_rcu has been executed */
 	if ((*entry)->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	kfree(*entry);
 	*entry = e;
 	trace_mark(core_marker_format, "name %s format %s",
@@ -655,7 +655,7 @@ int marker_probe_register(const char *name, const char *format,
 	 * make sure it's executed now.
 	 */
 	if (entry->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	old = marker_entry_add_probe(entry, probe, probe_private);
 	if (IS_ERR(old)) {
 		ret = PTR_ERR(old);
@@ -670,10 +670,7 @@ int marker_probe_register(const char *name, const char *format,
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
 	smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-	synchronize_sched();	/* Until we have the call_rcu_sched() */
-#endif
-	call_rcu(&entry->rcu, free_old_closure);
+	call_rcu_sched(&entry->rcu, free_old_closure);
 end:
 	mutex_unlock(&markers_mutex);
 	return ret;
@@ -704,7 +701,7 @@ int marker_probe_unregister(const char *name,
 	if (!entry)
 		goto end;
 	if (entry->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, probe, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
@@ -716,10 +713,7 @@ int marker_probe_unregister(const char *name,
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
 	smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-	synchronize_sched();	/* Until we have the call_rcu_sched() */
-#endif
-	call_rcu(&entry->rcu, free_old_closure);
+	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(name);	/* Ignore busy error message */
 	ret = 0;
 end:
@@ -786,7 +780,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
 		goto end;
 	}
 	if (entry->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, NULL, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
@@ -797,10 +791,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
 	smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-	synchronize_sched();	/* Until we have the call_rcu_sched() */
-#endif
-	call_rcu(&entry->rcu, free_old_closure);
+	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(entry->name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-- 
GitLab


From a89cc1959d0ea5f36bf7421dc97b34f03809637d Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 01:48:39 -0700
Subject: [PATCH 800/853] markers: fix sparse integer as NULL pointer warning

kernel/trace/trace_sysprof.c:164:20: warning: Using plain integer as NULL pointer

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/trace/trace_sysprof.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 63528086337..ce2d723c10e 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -161,7 +161,7 @@ static void timer_notify(struct pt_regs *regs, int cpu)
 		__trace_special(tr, data, 2, regs->ip, 0);
 
 		while (i < sample_max_depth) {
-			frame.next_fp = 0;
+			frame.next_fp = NULL;
 			frame.return_address = 0;
 			if (!copy_stack_frame(fp, &frame))
 				break;
-- 
GitLab


From 7394f0f6c0baab650ea9194cb1be847df646fb57 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:40 -0700
Subject: [PATCH 801/853] unexport uts_sem

With the removal of the Solaris binary emulation the export of
uts_sem became unused.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 6c218804604..0c9d3fa1f5f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1343,8 +1343,6 @@ EXPORT_SYMBOL(in_egroup_p);
 
 DECLARE_RWSEM(uts_sem);
 
-EXPORT_SYMBOL(uts_sem);
-
 asmlinkage long sys_newuname(struct new_utsname __user * name)
 {
 	int errno = 0;
-- 
GitLab


From 49b5cf34727a6c1be1568ab28e89a2d9a6bf51e0 Mon Sep 17 00:00:00 2001
From: Jonathan Lim <jlim@sgi.com>
Date: Fri, 25 Jul 2008 01:48:40 -0700
Subject: [PATCH 802/853] accounting: account for user time when updating
 memory integrals

Adapt acct_update_integrals() to include user time when calculating the time
difference.  The units of acct_rss_mem1 and acct_vm_mem1 are also changed from
pages-jiffies to pages-usecs to avoid calling jiffies_to_usecs() in
xacct_add_tsk() which might overflow.

Signed-off-by: Jonathan Lim <jlim@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  2 ++
 kernel/tsacct.c       | 21 ++++++++++++++-------
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 354ef478a80..af780f299c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1257,7 +1257,7 @@ struct task_struct {
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	cputime_t acct_stimexpd;/* stime since last update */
+	cputime_t acct_timexpd;	/* stime + utime since last update */
 #endif
 #ifdef CONFIG_CPUSETS
 	nodemask_t mems_allowed;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6acf749d333..0047bd9b96a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,6 +4046,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
 	else
 		cpustat->user = cputime64_add(cpustat->user, tmp);
+	/* Account for user time used */
+	acct_update_integrals(p);
 }
 
 /*
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 4ab1b584961..1da6990af8e 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -84,9 +84,9 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {
 	struct mm_struct *mm;
 
-	/* convert pages-jiffies to Mbyte-usec */
-	stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
-	stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
+	/* convert pages-usec to Mbyte-usec */
+	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
+	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
 	mm = get_task_mm(p);
 	if (mm) {
 		/* adjust to KB unit */
@@ -118,12 +118,19 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 void acct_update_integrals(struct task_struct *tsk)
 {
 	if (likely(tsk->mm)) {
-		long delta = cputime_to_jiffies(
-			cputime_sub(tsk->stime, tsk->acct_stimexpd));
+		cputime_t time, dtime;
+		struct timeval value;
+		u64 delta;
+
+		time = tsk->stime + tsk->utime;
+		dtime = cputime_sub(time, tsk->acct_timexpd);
+		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
+		delta = value.tv_sec;
+		delta = delta * USEC_PER_SEC + value.tv_usec;
 
 		if (delta == 0)
 			return;
-		tsk->acct_stimexpd = tsk->stime;
+		tsk->acct_timexpd = time;
 		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
 		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
 	}
@@ -135,7 +142,7 @@ void acct_update_integrals(struct task_struct *tsk)
  */
 void acct_clear_integrals(struct task_struct *tsk)
 {
-	tsk->acct_stimexpd = 0;
+	tsk->acct_timexpd = 0;
 	tsk->acct_rss_mem1 = 0;
 	tsk->acct_vm_mem1 = 0;
 }
-- 
GitLab


From 081e4c8a75692c21f3a119a81ca3270081879d0e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:42 -0700
Subject: [PATCH 803/853] bsdacct: rename acct_gbls to bsd_acct_struct

After I fixed access to task->tgid in kernel/acct.c, Oleg pointed out some
bad side effects with this accounting vs pid namespaces interaction.  I.e.
 when some task in pid namespace sets this accounting up, this blocks all
the others from doing the same.  Restricting this to init namespace only
could help, but didn't look a graceful solution.

So here is the approach to make this accounting work with pid namespaces
properly.

The idea is simple - when a task dies it accounts itself in each namespace
it is visible from and which set the accounting up.

For example here are the commands run and the output of lastcomm from init
and sub namespaces:

init_ns# accton pacct
 sub_ns# accton pacct (this is a different file - sub ns is run in
                       a chroot-ed environment)
init_ns# cat /dev/null
 sub_ns# ls /dev/null
init_ns# accton
 sub_ns# accton

 sub_ns#  lastcomm -f pacct
ls                      0        [136,0]    0.00 secs Thu May 15 10:30
accton                  0        [136,0]    0.00 secs Thu May 15 10:30

init_ns# lastcomm -f pacct
accton                  root     pts/0      0.00 secs Thu May 15 14:30 << got from sub
cat                     root     pts/1      0.00 secs Thu May 15 14:30
ls                      root     pts/0      0.00 secs Thu May 15 14:30 << got from sub
accton                  root     pts/1      0.00 secs Thu May 15 14:30

That was the summary, the details are in patches.

This patch:

It will be visible in pid_namespace.h file, so fix its name to look better
outside the acct.c file.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index 91e1cfd734d..ee3e605190f 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -82,7 +82,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *);
  * can be placed in the same cache line as the lock.  This primes
  * the cache line to have the data after getting the lock.
  */
-struct acct_glbs {
+struct bsd_acct_struct {
 	spinlock_t		lock;
 	volatile int		active;
 	volatile int		needcheck;
@@ -91,7 +91,7 @@ struct acct_glbs {
 	struct timer_list	timer;
 };
 
-static struct acct_glbs acct_globals __cacheline_aligned =
+static struct bsd_acct_struct acct_globals __cacheline_aligned =
 	{__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
 
 /*
-- 
GitLab


From 84406c153a5bfa5d8b428a0933e9d39db6b59a75 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:42 -0700
Subject: [PATCH 804/853] pidns: use kzalloc when allocating new pid_namespace
 struct

It makes many fields initialization implicit helping in auto-setting
#ifdef-ed fields (bsd-acct related pointer will be such).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/pid_namespace.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 98702b4b885..06331cc1c3f 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -71,7 +71,7 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
 	struct pid_namespace *ns;
 	int i;
 
-	ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
 	if (ns == NULL)
 		goto out;
 
@@ -84,17 +84,13 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
 		goto out_free_map;
 
 	kref_init(&ns->kref);
-	ns->last_pid = 0;
-	ns->child_reaper = NULL;
 	ns->level = level;
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
 
-	for (i = 1; i < PIDMAP_ENTRIES; i++) {
-		ns->pidmap[i].page = NULL;
+	for (i = 1; i < PIDMAP_ENTRIES; i++)
 		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
-	}
 
 	return ns;
 
-- 
GitLab


From 20fad13ac66ac001c19220d3d08b4de5b6cca6e1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:43 -0700
Subject: [PATCH 805/853] pidns: add the struct bsd_acct_struct pointer on
 pid_namespace struct

All the bsdacct-related info will be stored in the area, pointer by this
one.

It will be NULL automatically for all new namespaces.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1a49ab5ec7b..1af82c4e17d 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -14,6 +14,8 @@ struct pidmap {
 
 #define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
 
+struct bsd_acct_struct;
+
 struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
@@ -25,6 +27,9 @@ struct pid_namespace {
 #ifdef CONFIG_PROC_FS
 	struct vfsmount *proc_mnt;
 #endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct bsd_acct_struct *bacct;
+#endif
 };
 
 extern struct pid_namespace init_pid_ns;
-- 
GitLab


From 1c552858ac2b1732a99d234d46b98098baef41ff Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:44 -0700
Subject: [PATCH 806/853] bsdacct: "truthify" a comment near acct_process

The acct_process does not accept any arguments actually.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index ee3e605190f..d9ee1838b4d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -579,7 +579,6 @@ void acct_collect(long exitcode, int group_dead)
 
 /**
  * acct_process - now just a wrapper around do_acct_process
- * @exitcode: task exit code
  *
  * handles process accounting for an exiting task
  */
-- 
GitLab


From e59a04a7aa5ce2483470aee4f2eb79ba6b9afe8b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:44 -0700
Subject: [PATCH 807/853] bsdacct: make check timer accept a bsd_acct_struct
 argument

We're going to have many bsd_acct_struct instances, not just one, so the
timer (currently working with a global one) has to know which one to work
with.

Use a handy setup_timer macro for it (thanks to Oleg for one).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index d9ee1838b4d..05f8bc094a4 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -97,9 +97,10 @@ static struct bsd_acct_struct acct_globals __cacheline_aligned =
 /*
  * Called whenever the timer says to check the free space.
  */
-static void acct_timeout(unsigned long unused)
+static void acct_timeout(unsigned long x)
 {
-	acct_globals.needcheck = 1;
+	struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+	acct->needcheck = 1;
 }
 
 /*
@@ -193,8 +194,8 @@ static void acct_file_reopen(struct file *file)
 		acct_globals.needcheck = 0;
 		acct_globals.active = 1;
 		/* It's been deleted if it was used before so this is safe */
-		init_timer(&acct_globals.timer);
-		acct_globals.timer.function = acct_timeout;
+		setup_timer(&acct_globals.timer, acct_timeout,
+				(unsigned long)&acct_globals);
 		acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
 		add_timer(&acct_globals.timer);
 	}
-- 
GitLab


From a75d97976517dcda69150fd81d6be86ae63324a1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:45 -0700
Subject: [PATCH 808/853] bsdacct: turn the acct_lock from on-the-struct to
 global

Don't use per-bsd-acct-struct lock, but work with a global one.

This lock is taken for short periods, so it doesn't seem it'll become a
bottleneck, but it will allow us to easily avoid many locking difficulties
in the future.

So this is a mostly s/acct_globals.lock/acct_lock/ over the file.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index 05f8bc094a4..fc71c130497 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -83,7 +83,6 @@ static void do_acct_process(struct pid_namespace *ns, struct file *);
  * the cache line to have the data after getting the lock.
  */
 struct bsd_acct_struct {
-	spinlock_t		lock;
 	volatile int		active;
 	volatile int		needcheck;
 	struct file		*file;
@@ -91,8 +90,9 @@ struct bsd_acct_struct {
 	struct timer_list	timer;
 };
 
-static struct bsd_acct_struct acct_globals __cacheline_aligned =
-	{__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
+static DEFINE_SPINLOCK(acct_lock);
+
+static struct bsd_acct_struct acct_globals __cacheline_aligned;
 
 /*
  * Called whenever the timer says to check the free space.
@@ -114,11 +114,11 @@ static int check_free_space(struct file *file)
 	sector_t resume;
 	sector_t suspend;
 
-	spin_lock(&acct_globals.lock);
+	spin_lock(&acct_lock);
 	res = acct_globals.active;
 	if (!file || !acct_globals.needcheck)
 		goto out;
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 
 	/* May block */
 	if (vfs_statfs(file->f_path.dentry, &sbuf))
@@ -140,7 +140,7 @@ static int check_free_space(struct file *file)
 	 * If some joker switched acct_globals.file under us we'ld better be
 	 * silent and _not_ touch anything.
 	 */
-	spin_lock(&acct_globals.lock);
+	spin_lock(&acct_lock);
 	if (file != acct_globals.file) {
 		if (act)
 			res = act>0;
@@ -165,7 +165,7 @@ static int check_free_space(struct file *file)
 	add_timer(&acct_globals.timer);
 	res = acct_globals.active;
 out:
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 	return res;
 }
 
@@ -173,7 +173,7 @@ out:
  * Close the old accounting file (if currently open) and then replace
  * it with file (if non-NULL).
  *
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
  */
 static void acct_file_reopen(struct file *file)
 {
@@ -201,11 +201,11 @@ static void acct_file_reopen(struct file *file)
 	}
 	if (old_acct) {
 		mnt_unpin(old_acct->f_path.mnt);
-		spin_unlock(&acct_globals.lock);
+		spin_unlock(&acct_lock);
 		do_acct_process(old_ns, old_acct);
 		filp_close(old_acct, NULL);
 		put_pid_ns(old_ns);
-		spin_lock(&acct_globals.lock);
+		spin_lock(&acct_lock);
 	}
 }
 
@@ -235,10 +235,10 @@ static int acct_on(char *name)
 		return error;
 	}
 
-	spin_lock(&acct_globals.lock);
+	spin_lock(&acct_lock);
 	mnt_pin(file->f_path.mnt);
 	acct_file_reopen(file);
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 
 	mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
 
@@ -272,9 +272,9 @@ asmlinkage long sys_acct(const char __user *name)
 	} else {
 		error = security_acct(NULL);
 		if (!error) {
-			spin_lock(&acct_globals.lock);
+			spin_lock(&acct_lock);
 			acct_file_reopen(NULL);
-			spin_unlock(&acct_globals.lock);
+			spin_unlock(&acct_lock);
 		}
 	}
 	return error;
@@ -289,10 +289,10 @@ asmlinkage long sys_acct(const char __user *name)
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
-	spin_lock(&acct_globals.lock);
+	spin_lock(&acct_lock);
 	if (acct_globals.file && acct_globals.file->f_path.mnt == m)
 		acct_file_reopen(NULL);
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 }
 
 /**
@@ -304,12 +304,12 @@ void acct_auto_close_mnt(struct vfsmount *m)
  */
 void acct_auto_close(struct super_block *sb)
 {
-	spin_lock(&acct_globals.lock);
+	spin_lock(&acct_lock);
 	if (acct_globals.file &&
 	    acct_globals.file->f_path.mnt->mnt_sb == sb) {
 		acct_file_reopen(NULL);
 	}
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 }
 
 /*
@@ -594,15 +594,15 @@ void acct_process(void)
 	if (!acct_globals.file)
 		return;
 
-	spin_lock(&acct_globals.lock);
+	spin_lock(&acct_lock);
 	file = acct_globals.file;
 	if (unlikely(!file)) {
-		spin_unlock(&acct_globals.lock);
+		spin_unlock(&acct_lock);
 		return;
 	}
 	get_file(file);
 	ns = get_pid_ns(acct_globals.ns);
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 
 	do_acct_process(ns, file);
 	fput(file);
-- 
GitLab


From 6248b1b342005a428b1247b4e89249da1528d88d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:46 -0700
Subject: [PATCH 809/853] bsdacct: make internal code work with passed
 bsd_acct_struct, not global

This adds the appropriate pointer to all the internal (i.e.  static)
functions that work with global acct instance.  API calls pass a global
instance to them (while we still have such).

Mostly this is a s/acct_globals./acct->/ over the file.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 77 ++++++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index fc71c130497..72d4760c8da 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,7 +75,8 @@ int acct_parm[3] = {4, 2, 30};
 /*
  * External references and all of the globals.
  */
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+		struct pid_namespace *ns, struct file *);
 
 /*
  * This structure is used so that all the data protected by lock
@@ -106,7 +107,7 @@ static void acct_timeout(unsigned long x)
 /*
  * Check the amount of free space and suspend/resume accordingly.
  */
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
 {
 	struct kstatfs sbuf;
 	int res;
@@ -115,8 +116,8 @@ static int check_free_space(struct file *file)
 	sector_t suspend;
 
 	spin_lock(&acct_lock);
-	res = acct_globals.active;
-	if (!file || !acct_globals.needcheck)
+	res = acct->active;
+	if (!file || !acct->needcheck)
 		goto out;
 	spin_unlock(&acct_lock);
 
@@ -137,33 +138,33 @@ static int check_free_space(struct file *file)
 		act = 0;
 
 	/*
-	 * If some joker switched acct_globals.file under us we'ld better be
+	 * If some joker switched acct->file under us we'ld better be
 	 * silent and _not_ touch anything.
 	 */
 	spin_lock(&acct_lock);
-	if (file != acct_globals.file) {
+	if (file != acct->file) {
 		if (act)
 			res = act>0;
 		goto out;
 	}
 
-	if (acct_globals.active) {
+	if (acct->active) {
 		if (act < 0) {
-			acct_globals.active = 0;
+			acct->active = 0;
 			printk(KERN_INFO "Process accounting paused\n");
 		}
 	} else {
 		if (act > 0) {
-			acct_globals.active = 1;
+			acct->active = 1;
 			printk(KERN_INFO "Process accounting resumed\n");
 		}
 	}
 
-	del_timer(&acct_globals.timer);
-	acct_globals.needcheck = 0;
-	acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-	add_timer(&acct_globals.timer);
-	res = acct_globals.active;
+	del_timer(&acct->timer);
+	acct->needcheck = 0;
+	acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+	add_timer(&acct->timer);
+	res = acct->active;
 out:
 	spin_unlock(&acct_lock);
 	return res;
@@ -175,34 +176,33 @@ out:
  *
  * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 {
 	struct file *old_acct = NULL;
 	struct pid_namespace *old_ns = NULL;
 
-	if (acct_globals.file) {
-		old_acct = acct_globals.file;
-		old_ns = acct_globals.ns;
-		del_timer(&acct_globals.timer);
-		acct_globals.active = 0;
-		acct_globals.needcheck = 0;
-		acct_globals.file = NULL;
+	if (acct->file) {
+		old_acct = acct->file;
+		old_ns = acct->ns;
+		del_timer(&acct->timer);
+		acct->active = 0;
+		acct->needcheck = 0;
+		acct->file = NULL;
 	}
 	if (file) {
-		acct_globals.file = file;
-		acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
-		acct_globals.needcheck = 0;
-		acct_globals.active = 1;
+		acct->file = file;
+		acct->ns = get_pid_ns(task_active_pid_ns(current));
+		acct->needcheck = 0;
+		acct->active = 1;
 		/* It's been deleted if it was used before so this is safe */
-		setup_timer(&acct_globals.timer, acct_timeout,
-				(unsigned long)&acct_globals);
-		acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-		add_timer(&acct_globals.timer);
+		setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+		acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+		add_timer(&acct->timer);
 	}
 	if (old_acct) {
 		mnt_unpin(old_acct->f_path.mnt);
 		spin_unlock(&acct_lock);
-		do_acct_process(old_ns, old_acct);
+		do_acct_process(acct, old_ns, old_acct);
 		filp_close(old_acct, NULL);
 		put_pid_ns(old_ns);
 		spin_lock(&acct_lock);
@@ -237,7 +237,7 @@ static int acct_on(char *name)
 
 	spin_lock(&acct_lock);
 	mnt_pin(file->f_path.mnt);
-	acct_file_reopen(file);
+	acct_file_reopen(&acct_globals, file);
 	spin_unlock(&acct_lock);
 
 	mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
@@ -273,7 +273,7 @@ asmlinkage long sys_acct(const char __user *name)
 		error = security_acct(NULL);
 		if (!error) {
 			spin_lock(&acct_lock);
-			acct_file_reopen(NULL);
+			acct_file_reopen(&acct_globals, NULL);
 			spin_unlock(&acct_lock);
 		}
 	}
@@ -291,7 +291,7 @@ void acct_auto_close_mnt(struct vfsmount *m)
 {
 	spin_lock(&acct_lock);
 	if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-		acct_file_reopen(NULL);
+		acct_file_reopen(&acct_globals, NULL);
 	spin_unlock(&acct_lock);
 }
 
@@ -307,7 +307,7 @@ void acct_auto_close(struct super_block *sb)
 	spin_lock(&acct_lock);
 	if (acct_globals.file &&
 	    acct_globals.file->f_path.mnt->mnt_sb == sb) {
-		acct_file_reopen(NULL);
+		acct_file_reopen(&acct_globals, NULL);
 	}
 	spin_unlock(&acct_lock);
 }
@@ -426,7 +426,8 @@ static u32 encode_float(u64 value)
 /*
  *  do_acct_process does all actual work. Caller holds the reference to file.
  */
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+		struct pid_namespace *ns, struct file *file)
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
 	acct_t ac;
@@ -441,7 +442,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
 	 * First check to see if there is enough free_space to continue
 	 * the process accounting system.
 	 */
-	if (!check_free_space(file))
+	if (!check_free_space(acct, file))
 		return;
 
 	/*
@@ -604,7 +605,7 @@ void acct_process(void)
 	ns = get_pid_ns(acct_globals.ns);
 	spin_unlock(&acct_lock);
 
-	do_acct_process(ns, file);
+	do_acct_process(&acct_globals, ns, file);
 	fput(file);
 	put_pid_ns(ns);
 }
-- 
GitLab


From 0b6b030fc30d169bb406b34b4fc60d99dde4a9c6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:47 -0700
Subject: [PATCH 810/853] bsdacct: switch from global bsd_acct_struct instance
 to per-pidns one

Allocate the structure on the first call to sys_acct().  After this each
namespace, that ordered the accounting, will live with this structure till
its own death.

Two notes
- routines, that close the accounting on fs umount time use
  the init_pid_ns's acct by now;
- accounting routine accounts to dying task's namespace
  (also by now).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h   |  3 ++
 kernel/acct.c          | 84 +++++++++++++++++++++++++++++++++---------
 kernel/pid_namespace.c |  2 +
 3 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/include/linux/acct.h b/include/linux/acct.h
index e8cae54e8d8..882dc724876 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -120,17 +120,20 @@ struct acct_v3
 struct vfsmount;
 struct super_block;
 struct pacct_struct;
+struct pid_namespace;
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
 extern void acct_init_pacct(struct pacct_struct *pacct);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
+extern void acct_exit_ns(struct pid_namespace *);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
 #define acct_init_pacct(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
+#define acct_exit_ns(ns)	do { } while (0)
 #endif
 
 /*
diff --git a/kernel/acct.c b/kernel/acct.c
index 72d4760c8da..febbbc67157 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -93,8 +93,6 @@ struct bsd_acct_struct {
 
 static DEFINE_SPINLOCK(acct_lock);
 
-static struct bsd_acct_struct acct_globals __cacheline_aligned;
-
 /*
  * Called whenever the timer says to check the free space.
  */
@@ -176,7 +174,8 @@ out:
  *
  * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+		struct pid_namespace *ns)
 {
 	struct file *old_acct = NULL;
 	struct pid_namespace *old_ns = NULL;
@@ -188,10 +187,11 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 		acct->active = 0;
 		acct->needcheck = 0;
 		acct->file = NULL;
+		acct->ns = NULL;
 	}
 	if (file) {
 		acct->file = file;
-		acct->ns = get_pid_ns(task_active_pid_ns(current));
+		acct->ns = ns;
 		acct->needcheck = 0;
 		acct->active = 1;
 		/* It's been deleted if it was used before so this is safe */
@@ -204,7 +204,6 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 		spin_unlock(&acct_lock);
 		do_acct_process(acct, old_ns, old_acct);
 		filp_close(old_acct, NULL);
-		put_pid_ns(old_ns);
 		spin_lock(&acct_lock);
 	}
 }
@@ -213,6 +212,8 @@ static int acct_on(char *name)
 {
 	struct file *file;
 	int error;
+	struct pid_namespace *ns;
+	struct bsd_acct_struct *acct = NULL;
 
 	/* Difference from BSD - they don't do O_APPEND */
 	file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -229,18 +230,34 @@ static int acct_on(char *name)
 		return -EIO;
 	}
 
+	ns = task_active_pid_ns(current);
+	if (ns->bacct == NULL) {
+		acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+		if (acct == NULL) {
+			filp_close(file, NULL);
+			return -ENOMEM;
+		}
+	}
+
 	error = security_acct(file);
 	if (error) {
+		kfree(acct);
 		filp_close(file, NULL);
 		return error;
 	}
 
 	spin_lock(&acct_lock);
+	if (ns->bacct == NULL) {
+		ns->bacct = acct;
+		acct = NULL;
+	}
+
 	mnt_pin(file->f_path.mnt);
-	acct_file_reopen(&acct_globals, file);
+	acct_file_reopen(ns->bacct, file, ns);
 	spin_unlock(&acct_lock);
 
 	mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+	kfree(acct);
 
 	return 0;
 }
@@ -270,10 +287,16 @@ asmlinkage long sys_acct(const char __user *name)
 		error = acct_on(tmp);
 		putname(tmp);
 	} else {
+		struct bsd_acct_struct *acct;
+
+		acct = task_active_pid_ns(current)->bacct;
+		if (acct == NULL)
+			return 0;
+
 		error = security_acct(NULL);
 		if (!error) {
 			spin_lock(&acct_lock);
-			acct_file_reopen(&acct_globals, NULL);
+			acct_file_reopen(acct, NULL, NULL);
 			spin_unlock(&acct_lock);
 		}
 	}
@@ -289,9 +312,15 @@ asmlinkage long sys_acct(const char __user *name)
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
+	struct bsd_acct_struct *acct;
+
+	acct = init_pid_ns.bacct;
+	if (acct == NULL)
+		return;
+
 	spin_lock(&acct_lock);
-	if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-		acct_file_reopen(&acct_globals, NULL);
+	if (acct->file && acct->file->f_path.mnt == m)
+		acct_file_reopen(acct, NULL, NULL);
 	spin_unlock(&acct_lock);
 }
 
@@ -304,10 +333,29 @@ void acct_auto_close_mnt(struct vfsmount *m)
  */
 void acct_auto_close(struct super_block *sb)
 {
+	struct bsd_acct_struct *acct;
+
+	acct = init_pid_ns.bacct;
+	if (acct == NULL)
+		return;
+
 	spin_lock(&acct_lock);
-	if (acct_globals.file &&
-	    acct_globals.file->f_path.mnt->mnt_sb == sb) {
-		acct_file_reopen(&acct_globals, NULL);
+	if (acct->file && acct->file->f_path.mnt->mnt_sb == sb)
+		acct_file_reopen(acct, NULL, NULL);
+	spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+	acct = ns->bacct;
+	if (acct != NULL) {
+		if (acct->file != NULL)
+			acct_file_reopen(acct, NULL, NULL);
+
+		kfree(acct);
 	}
 	spin_unlock(&acct_lock);
 }
@@ -587,25 +635,25 @@ void acct_collect(long exitcode, int group_dead)
 void acct_process(void)
 {
 	struct file *file = NULL;
-	struct pid_namespace *ns;
+	struct pid_namespace *ns = task_active_pid_ns(current);
+	struct bsd_acct_struct *acct;
 
+	acct = ns->bacct;
 	/*
 	 * accelerate the common fastpath:
 	 */
-	if (!acct_globals.file)
+	if (!acct || !acct->file)
 		return;
 
 	spin_lock(&acct_lock);
-	file = acct_globals.file;
+	file = acct->file;
 	if (unlikely(!file)) {
 		spin_unlock(&acct_lock);
 		return;
 	}
 	get_file(file);
-	ns = get_pid_ns(acct_globals.ns);
 	spin_unlock(&acct_lock);
 
-	do_acct_process(&acct_globals, ns, file);
+	do_acct_process(acct, ns, file);
 	fput(file);
-	put_pid_ns(ns);
 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 06331cc1c3f..ea567b78d1a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/err.h>
+#include <linux/acct.h>
 
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 
@@ -181,6 +182,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 
 	/* Child reaper for the pid namespace is going away */
 	pid_ns->child_reaper = NULL;
+	acct_exit_ns(pid_ns);
 	return;
 }
 
-- 
GitLab


From b5a7174875ea570cc675f2c503e800db8efdd6a7 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:47 -0700
Subject: [PATCH 811/853] bsdacct: turn acct off for all pidns-s on umount time

All the bsd_acct_strcts with opened accounting are linked into a global
list.  So, the acct_auto_close(_mnt) walks one and drops the accounting
for each.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index febbbc67157..7fc9f9dd1e9 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -89,9 +89,11 @@ struct bsd_acct_struct {
 	struct file		*file;
 	struct pid_namespace	*ns;
 	struct timer_list	timer;
+	struct list_head	list;
 };
 
 static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
 
 /*
  * Called whenever the timer says to check the free space.
@@ -188,12 +190,14 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
 		acct->needcheck = 0;
 		acct->file = NULL;
 		acct->ns = NULL;
+		list_del(&acct->list);
 	}
 	if (file) {
 		acct->file = file;
 		acct->ns = ns;
 		acct->needcheck = 0;
 		acct->active = 1;
+		list_add(&acct->list, &acct_list);
 		/* It's been deleted if it was used before so this is safe */
 		setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
 		acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
@@ -314,13 +318,13 @@ void acct_auto_close_mnt(struct vfsmount *m)
 {
 	struct bsd_acct_struct *acct;
 
-	acct = init_pid_ns.bacct;
-	if (acct == NULL)
-		return;
-
 	spin_lock(&acct_lock);
-	if (acct->file && acct->file->f_path.mnt == m)
-		acct_file_reopen(acct, NULL, NULL);
+restart:
+	list_for_each_entry(acct, &acct_list, list)
+		if (acct->file && acct->file->f_path.mnt == m) {
+			acct_file_reopen(acct, NULL, NULL);
+			goto restart;
+		}
 	spin_unlock(&acct_lock);
 }
 
@@ -335,13 +339,13 @@ void acct_auto_close(struct super_block *sb)
 {
 	struct bsd_acct_struct *acct;
 
-	acct = init_pid_ns.bacct;
-	if (acct == NULL)
-		return;
-
 	spin_lock(&acct_lock);
-	if (acct->file && acct->file->f_path.mnt->mnt_sb == sb)
-		acct_file_reopen(acct, NULL, NULL);
+restart:
+	list_for_each_entry(acct, &acct_list, list)
+		if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+			acct_file_reopen(acct, NULL, NULL);
+			goto restart;
+		}
 	spin_unlock(&acct_lock);
 }
 
-- 
GitLab


From 7d1e13505be8c2bd2207894f4e0f069e1f9b51c9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:48 -0700
Subject: [PATCH 812/853] bsdacct: account dying tasks in all relevant
 namespaces

This just makes the acct_proces walk the pid namespaces from current up to
the top and account a task in each with the accounting turned on.

ns->parent access if safe lockless, since current it still alive and holds
its namespace, which in turn holds its parent.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index 7fc9f9dd1e9..0feba97e114 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -631,15 +631,9 @@ void acct_collect(long exitcode, int group_dead)
 	spin_unlock_irq(&current->sighand->siglock);
 }
 
-/**
- * acct_process - now just a wrapper around do_acct_process
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
 {
 	struct file *file = NULL;
-	struct pid_namespace *ns = task_active_pid_ns(current);
 	struct bsd_acct_struct *acct;
 
 	acct = ns->bacct;
@@ -661,3 +655,16 @@ void acct_process(void)
 	do_acct_process(acct, ns, file);
 	fput(file);
 }
+
+/**
+ * acct_process - now just a wrapper around do_acct_process
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+	struct pid_namespace *ns;
+
+	for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+		acct_process_in_ns(ns);
+}
-- 
GitLab


From 0c18d7a5df82524e634637c3aec24d4cba096442 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:49 -0700
Subject: [PATCH 813/853] bsdacct: fix and add comments around acct_process()

Fix the one describing what this function is and add one more - about
locking absence around pid namespaces loop.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index 0feba97e114..dd68b905941 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -657,7 +657,8 @@ static void acct_process_in_ns(struct pid_namespace *ns)
 }
 
 /**
- * acct_process - now just a wrapper around do_acct_process
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
  *
  * handles process accounting for an exiting task
  */
@@ -665,6 +666,11 @@ void acct_process(void)
 {
 	struct pid_namespace *ns;
 
+	/*
+	 * This loop is safe lockless, since current is still
+	 * alive and holds its namespace, which in turn holds
+	 * its parent.
+	 */
 	for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
 		acct_process_in_ns(ns);
 }
-- 
GitLab


From 297c5d92634c809cef23d73e7b2556f2528ff7e2 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Fri, 25 Jul 2008 01:48:49 -0700
Subject: [PATCH 814/853] task IO accounting: provide distinct tgid/tid I/O
 statistics

Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate
parent I/O statistics in /proc/pid/io.  This approach follows the same
model used to account per-process and per-thread CPU times.

As a practial application, this allows for example to quickly find the top
I/O consumer when a process spawns many child threads that perform the
actual I/O work, because the aggregated I/O statistics can always be found
in /proc/pid/io.

[ Oleg Nesterov points out that we should check that the task is still
  alive before we iterate over the threads, but also says that we can do
  that fixup on top of this later.  - Linus ]

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: Matt Heaton <matt@hostmonster.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        | 86 +++++++++++++++++++++++++++++++++++--------
 include/linux/sched.h |  4 ++
 kernel/exit.c         | 27 ++++++++++++++
 kernel/fork.c         |  6 +++
 4 files changed, 108 insertions(+), 15 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15..a891fe4cb43 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
 }
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
-{
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+{
+	u64 rchar, wchar, syscr, syscw;
+	struct task_io_accounting ioac;
+
+	if (!whole) {
+		rchar = task->rchar;
+		wchar = task->wchar;
+		syscr = task->syscr;
+		syscw = task->syscw;
+		memcpy(&ioac, &task->ioac, sizeof(ioac));
+	} else {
+		unsigned long flags;
+		struct task_struct *t = task;
+		rchar = wchar = syscr = syscw = 0;
+		memset(&ioac, 0, sizeof(ioac));
+
+		rcu_read_lock();
+		do {
+			rchar += t->rchar;
+			wchar += t->wchar;
+			syscr += t->syscr;
+			syscw += t->syscw;
+
+			ioac.read_bytes += t->ioac.read_bytes;
+			ioac.write_bytes += t->ioac.write_bytes;
+			ioac.cancelled_write_bytes +=
+					t->ioac.cancelled_write_bytes;
+			t = next_thread(t);
+		} while (t != task);
+		rcu_read_unlock();
+
+		if (lock_task_sighand(task, &flags)) {
+			struct signal_struct *sig = task->signal;
+
+			rchar += sig->rchar;
+			wchar += sig->wchar;
+			syscr += sig->syscr;
+			syscw += sig->syscw;
+
+			ioac.read_bytes += sig->ioac.read_bytes;
+			ioac.write_bytes += sig->ioac.write_bytes;
+			ioac.cancelled_write_bytes +=
+					sig->ioac.cancelled_write_bytes;
+
+			unlock_task_sighand(task, &flags);
+		}
+	}
+
 	return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
 			"rchar: %llu\n"
 			"wchar: %llu\n"
 			"syscr: %llu\n"
 			"syscw: %llu\n"
-#endif
 			"read_bytes: %llu\n"
 			"write_bytes: %llu\n"
 			"cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
-			(unsigned long long)task->rchar,
-			(unsigned long long)task->wchar,
-			(unsigned long long)task->syscr,
-			(unsigned long long)task->syscw,
-#endif
-			(unsigned long long)task->ioac.read_bytes,
-			(unsigned long long)task->ioac.write_bytes,
-			(unsigned long long)task->ioac.cancelled_write_bytes);
+			(unsigned long long)rchar,
+			(unsigned long long)wchar,
+			(unsigned long long)syscr,
+			(unsigned long long)syscw,
+			(unsigned long long)ioac.read_bytes,
+			(unsigned long long)ioac.write_bytes,
+			(unsigned long long)ioac.cancelled_write_bytes);
+}
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return do_io_accounting(task, buffer, 0);
 }
-#endif
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 
 /*
  * Thread groups
@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-	INF("io",	S_IRUGO, pid_io_accounting),
+	INF("io",	S_IRUGO, tgid_io_accounting),
 #endif
 };
 
@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	INF("io",	S_IRUGO, tid_io_accounting),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af780f299c7..d22ffe06d0e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -506,6 +506,10 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
+#ifdef CONFIG_TASK_XACCT
+	u64 rchar, wchar, syscr, syscw;
+#endif
+	struct task_io_accounting ioac;
 
 	/*
 	 * Cumulative ns of scheduled CPU time for dead threads in the
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a4d4d12e29..ad933bb29ec 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -120,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->nivcsw += tsk->nivcsw;
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
+#ifdef CONFIG_TASK_XACCT
+		sig->rchar += tsk->rchar;
+		sig->wchar += tsk->wchar;
+		sig->syscr += tsk->syscr;
+		sig->syscw += tsk->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		sig->ioac.read_bytes += tsk->ioac.read_bytes;
+		sig->ioac.write_bytes += tsk->ioac.write_bytes;
+		sig->ioac.cancelled_write_bytes +=
+					tsk->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
@@ -1366,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
+#ifdef CONFIG_TASK_XACCT
+		psig->rchar += p->rchar + sig->rchar;
+		psig->wchar += p->wchar + sig->wchar;
+		psig->syscr += p->syscr + sig->syscr;
+		psig->syscw += p->syscw + sig->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		psig->ioac.read_bytes +=
+			p->ioac.read_bytes + sig->ioac.read_bytes;
+		psig->ioac.write_bytes +=
+			p->ioac.write_bytes + sig->ioac.write_bytes;
+		psig->ioac.cancelled_write_bytes +=
+				p->ioac.cancelled_write_bytes +
+				sig->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 		spin_unlock_irq(&p->parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 813d5c89b9d..b99d73e971a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -812,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+#ifdef CONFIG_TASK_XACCT
+	sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
+#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	memset(&sig->ioac, 0, sizeof(sig->ioac));
+#endif
 	sig->sum_sched_runtime = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
-- 
GitLab


From 3e85ba034deec351f02cb55ff225bbd616463841 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 25 Jul 2008 01:48:50 -0700
Subject: [PATCH 815/853] tsacct: fix bacct_add_tsk()'s use of do_div()

Fix bacct_add_tsk()'s use of do_div() on an s64 by making ac_etime a u64
instead and dividing that.

Possibly this should be guarded lest the interval calculation turn up
negative, but the possible negativity of the result of the division is
cast away, and it shouldn't end up negative anyway.

This was introduced by patch f3cef7a99469afc159fec3a61b42dc7ca5b6824f.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/tsacct.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 1da6990af8e..3da47ccdc5e 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -28,14 +28,14 @@
 void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
 {
 	struct timespec uptime, ts;
-	s64 ac_etime;
+	u64 ac_etime;
 
 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
 
 	/* calculate task elapsed time in timespec */
 	do_posix_clock_monotonic_gettime(&uptime);
 	ts = timespec_sub(uptime, tsk->start_time);
-	/* rebase elapsed time to usec */
+	/* rebase elapsed time to usec (should never be negative) */
 	ac_etime = timespec_to_ns(&ts);
 	do_div(ac_etime, NSEC_PER_USEC);
 	stats->ac_etime = ac_etime;
-- 
GitLab


From 873b47717732c2f33a4b14de02571a4295a02f0c Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Fri, 25 Jul 2008 01:48:52 -0700
Subject: [PATCH 816/853] per-task-delay-accounting: add memory reclaim delay

Sometimes, application responses become bad under heavy memory load.
Applications take a bit time to reclaim memory.  The statistics, how long
memory reclaim takes, will be useful to measure memory usage.

This patch adds accounting memory reclaim to per-task-delay-accounting for
accounting the time of do_try_to_free_pages().

<i.e>

- When System is under low memory load,
  memory reclaim may not occur.

$ free
             total       used       free     shared    buffers     cached
Mem:       8197800    1577300    6620500          0       4808    1516724
-/+ buffers/cache:      55768    8142032
Swap:     16386292          0   16386292

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 5069748  10612 3014060    0    0     0     0    3   26  0  0 100  0
 0  0      0 5069748  10612 3014060    0    0     0     0    4   22  0  0 100  0
 0  0      0 5069748  10612 3014060    0    0     0     0    3   18  0  0 100  0

Measure the time of tar command.

$ ls -s test.dat
1501472 test.dat

$ time tar cvf test.tar test.dat
real    0m13.388s
user    0m0.116s
sys     0m5.304s

$ ./delayget -d -p <pid>
CPU             count     real total  virtual total    delay total
                  428     5528345500     5477116080       62749891
IO              count    delay total
                  338     8078977189
SWAP            count    delay total
                    0              0
RECLAIM         count    delay total
                    0              0

- When system is under heavy memory load
  memory reclaim may occur.

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0 7159032  49724   1812   3012    0    0     0     0    3   24  0  0 100  0
 0  0 7159032  49724   1812   3012    0    0     0     0    4   24  0  0 100  0
 0  0 7159032  49848   1812   3012    0    0     0     0    3   22  0  0 100  0

In this case, one process uses more 8G memory
by execution of malloc() and memset().

$ time tar cvf test.tar test.dat
real    1m38.563s        <-  increased by 85 sec
user    0m0.140s
sys     0m7.060s

$ ./delayget -d -p <pid>
CPU             count     real total  virtual total    delay total
                 9021     7140446250     7315277975      923201824
IO              count    delay total
                 8965    90466349669
SWAP            count    delay total
                    3       21036367
RECLAIM         count    delay total
                  740    61011951153

In the later case, the value of RECLAIM is increasing.
So, taskstats can show how much memory reclaim influences TAT.

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujistu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 19 +++++++++++++++++++
 include/linux/sched.h     |  4 ++++
 kernel/delayacct.c        | 13 +++++++++++++
 mm/vmscan.c               |  5 +++++
 4 files changed, 41 insertions(+)

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index ab94bc08355..f352f06fa06 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -39,6 +39,8 @@ extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(void);
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
+extern void __delayacct_freepages_start(void);
+extern void __delayacct_freepages_end(void);
 
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 {
@@ -107,6 +109,18 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 	return 0;
 }
 
+static inline void delayacct_freepages_start(void)
+{
+	if (current->delays)
+		__delayacct_freepages_start();
+}
+
+static inline void delayacct_freepages_end(void)
+{
+	if (current->delays)
+		__delayacct_freepages_end();
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -129,6 +143,11 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 { return 0; }
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 { return 0; }
+static inline void delayacct_freepages_start(void)
+{}
+static inline void delayacct_freepages_end(void)
+{}
+
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d22ffe06d0e..42036ffe6b0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -672,6 +672,10 @@ struct task_delay_info {
 				/* io operations performed */
 	u32 swapin_count;	/* total count of the number of swapin block */
 				/* io operations performed */
+
+	struct timespec freepages_start, freepages_end;
+	u64 freepages_delay;	/* wait for memory reclaim */
+	u32 freepages_count;	/* total count of memory reclaim */
 };
 #endif	/* CONFIG_TASK_DELAY_ACCT */
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 10e43fd8b72..84b6782a2ce 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -165,3 +165,16 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
 	return ret;
 }
 
+void __delayacct_freepages_start(void)
+{
+	delayacct_start(&current->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+	delayacct_end(&current->delays->freepages_start,
+			&current->delays->freepages_end,
+			&current->delays->freepages_delay,
+			&current->delays->freepages_count);
+}
+
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 967d30ccd92..26672c6cd3c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
+#include <linux/delayacct.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1316,6 +1317,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	struct zone *zone;
 	enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
 
+	delayacct_freepages_start();
+
 	if (scan_global_lru(sc))
 		count_vm_event(ALLOCSTALL);
 	/*
@@ -1396,6 +1399,8 @@ out:
 	} else
 		mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
 
+	delayacct_freepages_end();
+
 	return ret;
 }
 
-- 
GitLab


From 016ae219b920c4e606088761d3d6070cdf8ba706 Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Fri, 25 Jul 2008 01:48:53 -0700
Subject: [PATCH 817/853] per-task-delay-accounting: update taskstats for
 memory reclaim delay

Add members for memory reclaim delay to taskstats, and accumulate them in
__delayacct_add_tsk() .

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/taskstats-struct.txt | 7 +++++++
 include/linux/taskstats.h                     | 6 +++++-
 kernel/delayacct.c                            | 3 +++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index cd784f46bf8..b988d110db5 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -26,6 +26,8 @@ There are three different groups of fields in the struct taskstats:
 
 5) Time accounting for SMT machines
 
+6) Extended delay accounting fields for memory reclaim
+
 Future extension should add fields to the end of the taskstats struct, and
 should not change the relative position of each field within the struct.
 
@@ -170,4 +172,9 @@ struct taskstats {
 	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
 	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
 	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim
+	/* Delay waiting for memory reclaim */
+	__u64	freepages_count;
+	__u64	freepages_delay_total;
 }
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 5d69c0744ff..18269e956a7 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
  */
 
 
-#define TASKSTATS_VERSION	6
+#define TASKSTATS_VERSION	7
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -157,6 +157,10 @@ struct taskstats {
 	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
 	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
 	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+	/* Delay waiting for memory reclaim */
+	__u64	freepages_count;
+	__u64	freepages_delay_total;
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 84b6782a2ce..b3179dad71b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
 	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
 	d->swapin_count += tsk->delays->swapin_count;
+	d->freepages_count += tsk->delays->freepages_count;
 	spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 done:
-- 
GitLab


From 9b0975a20af1ff2f367e3b6b7c150eb114c6b500 Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Fri, 25 Jul 2008 01:48:54 -0700
Subject: [PATCH 818/853] per-task-delay-accounting: update document and
 getdelays.c for memory reclaim

Update document and make getdelays.c show delay accounting for memory reclaim.

For making a distinction between "swapping in pages" and "memory reclaim"
in getdelays.c, MEM is changed to SWAP.

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/delay-accounting.txt | 11 ++++++++---
 Documentation/accounting/getdelays.c          |  8 ++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
index 1443cd71d26..8a12f0730c9 100644
--- a/Documentation/accounting/delay-accounting.txt
+++ b/Documentation/accounting/delay-accounting.txt
@@ -11,6 +11,7 @@ the delays experienced by a task while
 a) waiting for a CPU (while being runnable)
 b) completion of synchronous block I/O initiated by the task
 c) swapping in pages
+d) memory reclaim
 
 and makes these statistics available to userspace through
 the taskstats interface.
@@ -41,7 +42,7 @@ this structure. See
      include/linux/taskstats.h
 for a description of the fields pertaining to delay accounting.
 It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
 
 Taking the difference of two successive readings of a given
 counter (say cpu_delay_total) for a task will give the delay
@@ -94,7 +95,9 @@ CPU	count	real total	virtual total	delay total
 	7876	92005750	100000000	24001500
 IO	count	delay total
 	0	0
-MEM	count	delay total
+SWAP	count	delay total
+	0	0
+RECLAIM	count	delay total
 	0	0
 
 Get delays seen in executing a given simple command
@@ -108,5 +111,7 @@ CPU	count	real total	virtual total	delay total
 	6	4000250		4000000		0
 IO	count	delay total
 	0	0
-MEM	count	delay total
+SWAP	count	delay total
+	0	0
+RECLAIM	count	delay total
 	0	0
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index 40121b5cca1..3f7755f3963 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -196,14 +196,18 @@ void print_delayacct(struct taskstats *t)
 	       "      %15llu%15llu%15llu%15llu\n"
 	       "IO    %15s%15s\n"
 	       "      %15llu%15llu\n"
-	       "MEM   %15s%15s\n"
+	       "SWAP  %15s%15s\n"
+	       "      %15llu%15llu\n"
+	       "RECLAIM  %12s%15s\n"
 	       "      %15llu%15llu\n",
 	       "count", "real total", "virtual total", "delay total",
 	       t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
 	       t->cpu_delay_total,
 	       "count", "delay total",
 	       t->blkio_count, t->blkio_delay_total,
-	       "count", "delay total", t->swapin_count, t->swapin_delay_total);
+	       "count", "delay total", t->swapin_count, t->swapin_delay_total,
+	       "count", "delay total",
+	       t->freepages_count, t->freepages_delay_total);
 }
 
 void task_context_switch_counts(struct taskstats *t)
-- 
GitLab


From b81f3ea92ba1fa676775677679889dc2a7f03c8b Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 25 Jul 2008 01:48:55 -0700
Subject: [PATCH 819/853] taskstats: remove initialization of static per-cpu
 variable

Cc: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/taskstats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 06b17547f4e..bd6be76303c 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -35,7 +35,7 @@
  */
 #define TASKSTATS_CPUMASK_MAXLEN	(100+6*NR_CPUS)
 
-static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
+static DEFINE_PER_CPU(__u32, taskstats_seqnum);
 static int family_registered;
 struct kmem_cache *taskstats_cache;
 
-- 
GitLab


From cc77b1521d06be07c9bb1a4a3e1f775dcaa15093 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:48:55 -0700
Subject: [PATCH 820/853] lockd: dont return EAGAIN for a permanent error

Fix nlm_fopen() to return NLM_FAILED (or NLM_LCK_DENIED_NOLOCKS) instead
of NLM_LCK_DENIED.  The latter means the lock request failed because of a
conflicting lock (i.e.  a temporary error), which is wrong in this case.

Also fix the client to return ENOLCK instead of EAGAIN if a blocking lock
request returns with NLM_LOCK_DENIED.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/lockd/clntproc.c | 10 +++++++++-
 fs/nfsd/lockd.c     | 13 +++++++++----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc518505..31668b690e0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
 	}
 	if (status < 0)
 		goto out_unlock;
-	status = nlm_stat_to_errno(resp->status);
+	/*
+	 * EAGAIN doesn't make sense for sleeping locks, and in some
+	 * cases NLM_LCK_DENIED is returned for a permanent error.  So
+	 * turn it into an ENOLCK.
+	 */
+	if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
+		status = -ENOLCK;
+	else
+		status = nlm_stat_to_errno(resp->status);
 out_unblock:
 	nlmclnt_finish_block(block);
 out:
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 6b6225ac492..15c6faeec77 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
 
+#ifdef CONFIG_LOCKD_V4
+#define nlm_stale_fh	nlm4_stale_fh
+#define nlm_failed	nlm4_failed
+#else
+#define nlm_stale_fh	nlm_lck_denied_nolocks
+#define nlm_failed	nlm_lck_denied_nolocks
+#endif
 /*
  * Note: we hold the dentry use count while the file is open.
  */
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 		return 0;
 	case nfserr_dropit:
 		return nlm_drop_reply;
-#ifdef CONFIG_LOCKD_V4
 	case nfserr_stale:
-		return nlm4_stale_fh;
-#endif
+		return nlm_stale_fh;
 	default:
-		return nlm_lck_denied;
+		return nlm_failed;
 	}
 }
 
-- 
GitLab


From bde74e4bc64415b142e556a34d295a52a1b7da9d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:48:57 -0700
Subject: [PATCH 821/853] locks: add special return value for asynchronous
 locks

Use a special error value FILE_LOCK_DEFERRED to mean that a locking
operation returned asynchronously.  This is returned by

  posix_lock_file() for sleeping locks to mean that the lock has been
  queued on the block list, and will be woken up when it might become
  available and needs to be retried (either fl_lmops->fl_notify() is
  called or fl_wait is woken up).

  f_op->lock() to mean either the above, or that the filesystem will
  call back with fl_lmops->fl_grant() when the result of the locking
  operation is known.  The filesystem can do this for sleeping as well
  as non-sleeping locks.

This is to make sure, that return values of -EAGAIN and -EINPROGRESS by
filesystems are not mistaken to mean an asynchronous locking.

This also makes error handling in fs/locks.c and lockd/svclock.c slightly
cleaner.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dlm/plock.c     |  2 +-
 fs/lockd/svclock.c | 13 ++++---------
 fs/locks.c         | 28 ++++++++++++++--------------
 include/linux/fs.h |  6 ++++++
 4 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781c..eba87ff3177 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 	if (xop->callback == NULL)
 		wait_event(recv_wq, (op->done != 0));
 	else {
-		rv = -EINPROGRESS;
+		rv = FILE_LOCK_DEFERRED;
 		goto out;
 	}
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9acdfb6..cf0d5c2c318 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 		case -EAGAIN:
 			ret = nlm_lck_denied;
-			break;
-		case -EINPROGRESS:
+			goto out;
+		case FILE_LOCK_DEFERRED:
 			if (wait)
 				break;
 			/* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 	}
 
-	ret = nlm_lck_denied;
-	if (!wait)
-		goto out;
-
 	ret = nlm_lck_blocked;
 
 	/* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	}
 
 	error = vfs_test_lock(file->f_file, &lock->fl);
-	if (error == -EINPROGRESS) {
+	if (error == FILE_LOCK_DEFERRED) {
 		ret = nlmsvc_defer_lock_rqst(rqstp, block);
 		goto out;
 	}
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	switch (error) {
 	case 0:
 		break;
-	case -EAGAIN:
-	case -EINPROGRESS:
+	case FILE_LOCK_DEFERRED:
 		dprintk("lockd: lock still blocked error %d\n", error);
 		nlmsvc_insert_block(block, NLM_NEVER);
 		nlmsvc_release_block(block);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371..1ce57b4b362 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -779,8 +779,10 @@ find_conflict:
 		if (!flock_locks_conflict(request, fl))
 			continue;
 		error = -EAGAIN;
-		if (request->fl_flags & FL_SLEEP)
-			locks_insert_block(fl, request);
+		if (!(request->fl_flags & FL_SLEEP))
+			goto out;
+		error = FILE_LOCK_DEFERRED;
+		locks_insert_block(fl, request);
 		goto out;
 	}
 	if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 			error = -EDEADLK;
 			if (posix_locks_deadlock(request, fl))
 				goto out;
-			error = -EAGAIN;
+			error = FILE_LOCK_DEFERRED;
 			locks_insert_block(fl, request);
 			goto out;
   		}
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
 	might_sleep ();
 	for (;;) {
 		error = posix_lock_file(filp, fl, NULL);
-		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
 		if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 
 	for (;;) {
 		error = __posix_lock_file(inode, &fl, NULL);
-		if (error != -EAGAIN)
-			break;
-		if (!(fl.fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
 		if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
 	might_sleep();
 	for (;;) {
 		error = flock_lock_file(filp, fl);
-		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
 		if (!error)
@@ -1716,17 +1716,17 @@ out:
  * fl_grant is set. Callers expecting ->lock() to return asynchronously
  * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
  * the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
  * request completes.
  * If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
- * the result. If the request timed out the callback routine will return a
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
+ * with the result. If the request timed out the callback routine will return a
  * nonzero return code and the file system should release the lock. The file
  * system is also responsible to keep a corresponding posix lock when it
  * grants a lock so the VFS can find out which locks are locally held and do
  * the correct lock cleanup when required.
  * The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
  * return code.
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1804,7 +1804,7 @@ again:
 	else {
 		for (;;) {
 			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != -EAGAIN || cmd == F_SETLK)
+			if (error != FILE_LOCK_DEFERRED)
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
 					!file_lock->fl_next);
@@ -1941,7 +1941,7 @@ again:
 	else {
 		for (;;) {
 			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != -EAGAIN || cmd == F_SETLK64)
+			if (error != FILE_LOCK_DEFERRED)
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
 					!file_lock->fl_next);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b86f806014..49d8eb7a71b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -885,6 +885,12 @@ static inline int file_check_writeable(struct file *filp)
 #define FL_CLOSE	64	/* unlock on close */
 #define FL_SLEEP	128	/* A blocking lock */
 
+/*
+ * Special return value from posix_lock_file() and vfs_lock_file() for
+ * asynchronous locking.
+ */
+#define FILE_LOCK_DEFERRED 1
+
 /*
  * The POSIX file lock owner is determined by
  * the "struct files_struct" in the thread group
-- 
GitLab


From b648a6de00770cc325c22f43bdd4e935f6a2ee55 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:48:57 -0700
Subject: [PATCH 822/853] locks: cleanup code duplication

Extract common code into a function.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/locks.c | 71 ++++++++++++++++++++++++------------------------------
 1 file changed, 31 insertions(+), 40 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 1ce57b4b362..6222e4b580e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1738,6 +1738,35 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
 }
 EXPORT_SYMBOL_GPL(vfs_lock_file);
 
+static int do_lock_file_wait(struct file *filp, unsigned int cmd,
+			     struct file_lock *fl)
+{
+	int error;
+
+	error = security_file_lock(filp, fl->fl_type);
+	if (error)
+		return error;
+
+	if (filp->f_op && filp->f_op->lock != NULL)
+		error = filp->f_op->lock(filp, cmd, fl);
+	else {
+		for (;;) {
+			error = posix_lock_file(filp, fl, NULL);
+			if (error != FILE_LOCK_DEFERRED)
+				break;
+			error = wait_event_interruptible(fl->fl_wait,
+							 !fl->fl_next);
+			if (!error)
+				continue;
+
+			locks_delete_block(fl);
+			break;
+		}
+	}
+
+	return error;
+}
+
 /* Apply the lock described by l to an open file descriptor.
  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
  */
@@ -1795,26 +1824,7 @@ again:
 		goto out;
 	}
 
-	error = security_file_lock(filp, file_lock->fl_type);
-	if (error)
-		goto out;
-
-	if (filp->f_op && filp->f_op->lock != NULL)
-		error = filp->f_op->lock(filp, cmd, file_lock);
-	else {
-		for (;;) {
-			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != FILE_LOCK_DEFERRED)
-				break;
-			error = wait_event_interruptible(file_lock->fl_wait,
-					!file_lock->fl_next);
-			if (!error)
-				continue;
-
-			locks_delete_block(file_lock);
-			break;
-		}
-	}
+	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
 	 * Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1942,7 @@ again:
 		goto out;
 	}
 
-	error = security_file_lock(filp, file_lock->fl_type);
-	if (error)
-		goto out;
-
-	if (filp->f_op && filp->f_op->lock != NULL)
-		error = filp->f_op->lock(filp, cmd, file_lock);
-	else {
-		for (;;) {
-			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != FILE_LOCK_DEFERRED)
-				break;
-			error = wait_event_interruptible(file_lock->fl_wait,
-					!file_lock->fl_next);
-			if (!error)
-				continue;
-
-			locks_delete_block(file_lock);
-			break;
-		}
-	}
+	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
 	 * Attempt to detect a close/fcntl race and recover by
-- 
GitLab


From 764c76b371722e0cba5c24d91225f0f954b69d44 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:48:58 -0700
Subject: [PATCH 823/853] locks: allow ->lock() to return FILE_LOCK_DEFERRED

Allow filesystem's ->lock() method to call posix_lock_file() instead of
posix_lock_file_wait(), and return FILE_LOCK_DEFERRED.  This makes it
possible to implement a such a ->lock() function, that works with the lock
manager, which needs the call to be asynchronous.

Now the vfs_lock_file() helper can be used, so this is a cleanup as well.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/locks.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 6222e4b580e..01490300f7c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1747,21 +1747,16 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
 	if (error)
 		return error;
 
-	if (filp->f_op && filp->f_op->lock != NULL)
-		error = filp->f_op->lock(filp, cmd, fl);
-	else {
-		for (;;) {
-			error = posix_lock_file(filp, fl, NULL);
-			if (error != FILE_LOCK_DEFERRED)
-				break;
-			error = wait_event_interruptible(fl->fl_wait,
-							 !fl->fl_next);
-			if (!error)
-				continue;
-
-			locks_delete_block(fl);
+	for (;;) {
+		error = vfs_lock_file(filp, cmd, fl, NULL);
+		if (error != FILE_LOCK_DEFERRED)
 			break;
-		}
+		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+		if (!error)
+			continue;
+
+		locks_delete_block(fl);
+		break;
 	}
 
 	return error;
-- 
GitLab


From 0de6256daafa3a97a269995e9b29f956bd419bbf Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:48:59 -0700
Subject: [PATCH 824/853] fuse: prepare lookup for nfs export

Use d_splice_alias() instead of d_add() in fuse lookup code, to allow NFS
exporting.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dir.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf06b90..e5217b213b4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -239,18 +239,20 @@ int fuse_valid_type(int m)
  * Add a directory inode to a dentry, ensuring that no other dentry
  * refers to this inode.  Called with fc->inst_mutex.
  */
-static int fuse_d_add_directory(struct dentry *entry, struct inode *inode)
+static struct dentry *fuse_d_add_directory(struct dentry *entry,
+					   struct inode *inode)
 {
 	struct dentry *alias = d_find_alias(inode);
-	if (alias) {
+	if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
 		/* This tries to shrink the subtree below alias */
 		fuse_invalidate_entry(alias);
 		dput(alias);
 		if (!list_empty(&inode->i_dentry))
-			return -EBUSY;
+			return ERR_PTR(-EBUSY);
+	} else {
+		dput(alias);
 	}
-	d_add(entry, inode);
-	return 0;
+	return d_splice_alias(inode, entry);
 }
 
 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
@@ -259,6 +261,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	int err;
 	struct fuse_entry_out outarg;
 	struct inode *inode = NULL;
+	struct dentry *newent;
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req;
 	struct fuse_req *forget_req;
@@ -303,21 +306,22 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 
 	if (inode && S_ISDIR(inode->i_mode)) {
 		mutex_lock(&fc->inst_mutex);
-		err = fuse_d_add_directory(entry, inode);
+		newent = fuse_d_add_directory(entry, inode);
 		mutex_unlock(&fc->inst_mutex);
-		if (err) {
+		if (IS_ERR(newent)) {
 			iput(inode);
-			return ERR_PTR(err);
+			return newent;
 		}
 	} else
-		d_add(entry, inode);
+		newent = d_splice_alias(inode, entry);
 
+	entry = newent ? newent : entry;
 	entry->d_op = &fuse_dentry_operations;
 	if (!err)
 		fuse_change_entry_timeout(entry, &outarg);
 	else
 		fuse_invalidate_entry_cache(entry);
-	return NULL;
+	return newent;
 }
 
 /*
-- 
GitLab


From dbd561d236ff16f8143bc727d91758ddd190e8cb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:49:00 -0700
Subject: [PATCH 825/853] fuse: add export operations

Implement export_operations, to allow fuse filesystems to be exported to
NFS.  This feature has been in the out-of-tree fuse module, and is widely
used and tested.

It has not been originally merged into mainline, because doing the NFS
export in userspace was thought to be a cleaner and more efficient way of
doing it, than through the kernel.

While that is true, it would also have involved a lot of duplicated effort
at reimplementing NFS exporting (all the different versions of the
protocol).  This effort was unfortunately not undertaken by anyone, so we
are left with doing it the easy but less efficient way.

If this feature goes in, the out-of-tree fuse module can go away,
which would have several advantages:

  - not having to maintain two versions
  - less confusion for users
  - no bugs due to kernel API changes

Comment from hch:
 - Use the same fh_type values as XFS, since we use the same fh encoding.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dir.c    |   4 +-
 fs/fuse/fuse_i.h |   4 ++
 fs/fuse/inode.c  | 115 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e5217b213b4..be5450dd638 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
  * timeout is unknown (unlink, rmdir, rename and in some cases
  * lookup)
  */
-static void fuse_invalidate_entry_cache(struct dentry *entry)
+void fuse_invalidate_entry_cache(struct dentry *entry)
 {
 	fuse_dentry_settime(entry, 0);
 }
@@ -225,7 +225,7 @@ static int invalid_nodeid(u64 nodeid)
 	return !nodeid || nodeid == FUSE_ROOT_ID;
 }
 
-static struct dentry_operations fuse_dentry_operations = {
+struct dentry_operations fuse_dentry_operations = {
 	.d_revalidate	= fuse_dentry_revalidate,
 };
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae948657c4..5d3146da64e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -464,6 +464,8 @@ static inline u64 get_node_id(struct inode *inode)
 /** Device operations */
 extern const struct file_operations fuse_dev_operations;
 
+extern struct dentry_operations fuse_dentry_operations;
+
 /**
  * Get a filled in inode
  */
@@ -604,6 +606,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
  */
 void fuse_invalidate_attr(struct inode *inode);
 
+void fuse_invalidate_entry_cache(struct dentry *entry);
+
 /**
  * Acquire reference to fuse_conn
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690558c..71fa76a48a3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
 #include <linux/statfs.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/exportfs.h>
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,119 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
 	return fuse_iget(sb, 1, 0, &attr, 0, 0);
 }
 
+struct fuse_inode_handle
+{
+	u64 nodeid;
+	u32 generation;
+};
+
+static struct dentry *fuse_get_dentry(struct super_block *sb,
+				      struct fuse_inode_handle *handle)
+{
+	struct inode *inode;
+	struct dentry *entry;
+	int err = -ESTALE;
+
+	if (handle->nodeid == 0)
+		goto out_err;
+
+	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
+	if (!inode)
+		goto out_err;
+	err = -ESTALE;
+	if (inode->i_generation != handle->generation)
+		goto out_iput;
+
+	entry = d_alloc_anon(inode);
+	err = -ENOMEM;
+	if (!entry)
+		goto out_iput;
+
+	if (get_node_id(inode) != FUSE_ROOT_ID) {
+		entry->d_op = &fuse_dentry_operations;
+		fuse_invalidate_entry_cache(entry);
+	}
+
+	return entry;
+
+ out_iput:
+	iput(inode);
+ out_err:
+	return ERR_PTR(err);
+}
+
+static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+			   int connectable)
+{
+	struct inode *inode = dentry->d_inode;
+	bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
+	int len = encode_parent ? 6 : 3;
+	u64 nodeid;
+	u32 generation;
+
+	if (*max_len < len)
+		return  255;
+
+	nodeid = get_fuse_inode(inode)->nodeid;
+	generation = inode->i_generation;
+
+	fh[0] = (u32)(nodeid >> 32);
+	fh[1] = (u32)(nodeid & 0xffffffff);
+	fh[2] = generation;
+
+	if (encode_parent) {
+		struct inode *parent;
+
+		spin_lock(&dentry->d_lock);
+		parent = dentry->d_parent->d_inode;
+		nodeid = get_fuse_inode(parent)->nodeid;
+		generation = parent->i_generation;
+		spin_unlock(&dentry->d_lock);
+
+		fh[3] = (u32)(nodeid >> 32);
+		fh[4] = (u32)(nodeid & 0xffffffff);
+		fh[5] = generation;
+	}
+
+	*max_len = len;
+	return encode_parent ? 0x82 : 0x81;
+}
+
+static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
+		struct fid *fid, int fh_len, int fh_type)
+{
+	struct fuse_inode_handle handle;
+
+	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+		return NULL;
+
+	handle.nodeid = (u64) fid->raw[0] << 32;
+	handle.nodeid |= (u64) fid->raw[1];
+	handle.generation = fid->raw[2];
+	return fuse_get_dentry(sb, &handle);
+}
+
+static struct dentry *fuse_fh_to_parent(struct super_block *sb,
+		struct fid *fid, int fh_len, int fh_type)
+{
+	struct fuse_inode_handle parent;
+
+	if (fh_type != 0x82 || fh_len < 6)
+		return NULL;
+
+	parent.nodeid = (u64) fid->raw[3] << 32;
+	parent.nodeid |= (u64) fid->raw[4];
+	parent.generation = fid->raw[5];
+	return fuse_get_dentry(sb, &parent);
+}
+
+
+static const struct export_operations fuse_export_operations = {
+	.fh_to_dentry	= fuse_fh_to_dentry,
+	.fh_to_parent	= fuse_fh_to_parent,
+	.encode_fh	= fuse_encode_fh,
+};
+
 static const struct super_operations fuse_super_operations = {
 	.alloc_inode    = fuse_alloc_inode,
 	.destroy_inode  = fuse_destroy_inode,
@@ -652,6 +766,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_export_op = &fuse_export_operations;
 
 	file = fget(d.fd);
 	if (!file)
-- 
GitLab


From c180eebe1390c2076ead6a9bc95a02efb994edb7 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:49:01 -0700
Subject: [PATCH 826/853] fuse: add fuse_lookup_name() helper

Add a new helper function which sends a LOOKUP request with the supplied
name.  This will be used by the next patch to send special LOOKUP requests
with "." and ".." as the name.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dir.c | 117 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 77 insertions(+), 40 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index be5450dd638..51d0035ff07 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
 	fuse_invalidate_entry_cache(entry);
 }
 
-static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
-			     struct dentry *entry,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+			     u64 nodeid, struct qstr *name,
 			     struct fuse_entry_out *outarg)
 {
-	struct fuse_conn *fc = get_fuse_conn(dir);
-
 	memset(outarg, 0, sizeof(struct fuse_entry_out));
 	req->in.h.opcode = FUSE_LOOKUP;
-	req->in.h.nodeid = get_node_id(dir);
+	req->in.h.nodeid = nodeid;
 	req->in.numargs = 1;
-	req->in.args[0].size = entry->d_name.len + 1;
-	req->in.args[0].value = entry->d_name.name;
+	req->in.args[0].size = name->len + 1;
+	req->in.args[0].value = name->name;
 	req->out.numargs = 1;
 	if (fc->minor < 9)
 		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		attr_version = fuse_get_attr_version(fc);
 
 		parent = dget_parent(entry);
-		fuse_lookup_init(req, parent->d_inode, entry, &outarg);
+		fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+				 &entry->d_name, &outarg);
 		request_send(fc, req);
 		dput(parent);
 		err = req->out.h.error;
@@ -255,73 +254,111 @@ static struct dentry *fuse_d_add_directory(struct dentry *entry,
 	return d_splice_alias(inode, entry);
 }
 
-static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
-				  struct nameidata *nd)
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+		     struct fuse_entry_out *outarg, struct inode **inode)
 {
-	int err;
-	struct fuse_entry_out outarg;
-	struct inode *inode = NULL;
-	struct dentry *newent;
-	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
 	struct fuse_req *req;
 	struct fuse_req *forget_req;
 	u64 attr_version;
+	int err;
 
-	if (entry->d_name.len > FUSE_NAME_MAX)
-		return ERR_PTR(-ENAMETOOLONG);
+	*inode = NULL;
+	err = -ENAMETOOLONG;
+	if (name->len > FUSE_NAME_MAX)
+		goto out;
 
 	req = fuse_get_req(fc);
+	err = PTR_ERR(req);
 	if (IS_ERR(req))
-		return ERR_CAST(req);
+		goto out;
 
 	forget_req = fuse_get_req(fc);
+	err = PTR_ERR(forget_req);
 	if (IS_ERR(forget_req)) {
 		fuse_put_request(fc, req);
-		return ERR_CAST(forget_req);
+		goto out;
 	}
 
 	attr_version = fuse_get_attr_version(fc);
 
-	fuse_lookup_init(req, dir, entry, &outarg);
+	fuse_lookup_init(fc, req, nodeid, name, outarg);
 	request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
-	if (!err && outarg.nodeid &&
-	    (invalid_nodeid(outarg.nodeid) ||
-	     !fuse_valid_type(outarg.attr.mode)))
-		err = -EIO;
-	if (!err && outarg.nodeid) {
-		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-				  &outarg.attr, entry_attr_timeout(&outarg),
-				  attr_version);
-		if (!inode) {
-			fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
-			return ERR_PTR(-ENOMEM);
-		}
+	if (err || !outarg->nodeid)
+		goto out_put_forget;
+
+	err = -EIO;
+	if (!outarg->nodeid)
+		goto out_put_forget;
+	if (!fuse_valid_type(outarg->attr.mode))
+		goto out_put_forget;
+
+	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
+			   &outarg->attr, entry_attr_timeout(outarg),
+			   attr_version);
+	err = -ENOMEM;
+	if (!*inode) {
+		fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+		goto out;
 	}
+	err = 0;
+
+ out_put_forget:
 	fuse_put_request(fc, forget_req);
-	if (err && err != -ENOENT)
-		return ERR_PTR(err);
+ out:
+	return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+				  struct nameidata *nd)
+{
+	int err;
+	struct fuse_entry_out outarg;
+	struct inode *inode;
+	struct dentry *newent;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	bool outarg_valid = true;
+
+	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+			       &outarg, &inode);
+	if (err == -ENOENT) {
+		outarg_valid = false;
+		err = 0;
+	}
+	if (err)
+		goto out_err;
+
+	err = -EIO;
+	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+		goto out_iput;
 
 	if (inode && S_ISDIR(inode->i_mode)) {
 		mutex_lock(&fc->inst_mutex);
 		newent = fuse_d_add_directory(entry, inode);
 		mutex_unlock(&fc->inst_mutex);
-		if (IS_ERR(newent)) {
-			iput(inode);
-			return newent;
-		}
-	} else
+		err = PTR_ERR(newent);
+		if (IS_ERR(newent))
+			goto out_iput;
+	} else {
 		newent = d_splice_alias(inode, entry);
+	}
 
 	entry = newent ? newent : entry;
 	entry->d_op = &fuse_dentry_operations;
-	if (!err)
+	if (outarg_valid)
 		fuse_change_entry_timeout(entry, &outarg);
 	else
 		fuse_invalidate_entry_cache(entry);
+
 	return newent;
+
+ out_iput:
+	iput(inode);
+ out_err:
+	return ERR_PTR(err);
 }
 
 /*
-- 
GitLab


From 33670fa296860283f04a7975b8c790f101e43a6e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:49:02 -0700
Subject: [PATCH 827/853] fuse: nfs export special lookups

Implement the get_parent export operation by sending a LOOKUP request with
".." as the name.

Implement looking up an inode by node ID after it has been evicted from
the cache.  This is done by seding a LOOKUP request with "." as the name
(for all file types, not just directories).

The filesystem can set the FUSE_EXPORT_SUPPORT flag in the INIT reply, to
indicate that it supports these special lookups.

Thanks to John Muir for the original implementation of this feature.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/fuse_i.h     |  6 ++++
 fs/fuse/inode.c      | 66 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fuse.h |  3 ++
 3 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5d3146da64e..3a876076bdd 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
 	/** Do not send separate SETATTR request before open(O_TRUNC)  */
 	unsigned atomic_o_trunc : 1;
 
+	/** Filesystem supports NFS exporting.  Only set in INIT */
+	unsigned export_support : 1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -473,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 			int generation, struct fuse_attr *attr,
 			u64 attr_valid, u64 attr_version);
 
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+		     struct fuse_entry_out *outarg, struct inode **inode);
+
 /**
  * Send FORGET command
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 71fa76a48a3..7d2f7d6e22e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -562,6 +562,7 @@ struct fuse_inode_handle
 static struct dentry *fuse_get_dentry(struct super_block *sb,
 				      struct fuse_inode_handle *handle)
 {
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
 	struct inode *inode;
 	struct dentry *entry;
 	int err = -ESTALE;
@@ -570,8 +571,27 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
 		goto out_err;
 
 	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
-	if (!inode)
-		goto out_err;
+	if (!inode) {
+		struct fuse_entry_out outarg;
+		struct qstr name;
+
+		if (!fc->export_support)
+			goto out_err;
+
+		name.len = 1;
+		name.name = ".";
+		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+				       &inode);
+		if (err && err != -ENOENT)
+			goto out_err;
+		if (err || !inode) {
+			err = -ESTALE;
+			goto out_err;
+		}
+		err = -EIO;
+		if (get_node_id(inode) != handle->nodeid)
+			goto out_iput;
+	}
 	err = -ESTALE;
 	if (inode->i_generation != handle->generation)
 		goto out_iput;
@@ -659,11 +679,46 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb,
 	return fuse_get_dentry(sb, &parent);
 }
 
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+	struct inode *child_inode = child->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(child_inode);
+	struct inode *inode;
+	struct dentry *parent;
+	struct fuse_entry_out outarg;
+	struct qstr name;
+	int err;
+
+	if (!fc->export_support)
+		return ERR_PTR(-ESTALE);
+
+	name.len = 2;
+	name.name = "..";
+	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+			       &name, &outarg, &inode);
+	if (err && err != -ENOENT)
+		return ERR_PTR(err);
+	if (err || !inode)
+		return ERR_PTR(-ESTALE);
+
+	parent = d_alloc_anon(inode);
+	if (!parent) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	if (get_node_id(inode) != FUSE_ROOT_ID) {
+		parent->d_op = &fuse_dentry_operations;
+		fuse_invalidate_entry_cache(parent);
+	}
+
+	return parent;
+}
 
 static const struct export_operations fuse_export_operations = {
 	.fh_to_dentry	= fuse_fh_to_dentry,
 	.fh_to_parent	= fuse_fh_to_parent,
 	.encode_fh	= fuse_encode_fh,
+	.get_parent	= fuse_get_parent,
 };
 
 static const struct super_operations fuse_super_operations = {
@@ -695,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->no_lock = 1;
 			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
 				fc->atomic_o_trunc = 1;
+			if (arg->minor >= 9) {
+				/* LOOKUP has dependency on proto version */
+				if (arg->flags & FUSE_EXPORT_SUPPORT)
+					fc->export_support = 1;
+			}
 			if (arg->flags & FUSE_BIG_WRITES)
 				fc->big_writes = 1;
 		} else {
@@ -721,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-		FUSE_BIG_WRITES;
+		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index d4828219769..265635dc990 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -104,11 +104,14 @@ struct fuse_file_lock {
 
 /**
  * INIT request/reply flags
+ *
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
 #define FUSE_FILE_OPS		(1 << 2)
 #define FUSE_ATOMIC_O_TRUNC	(1 << 3)
+#define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
 
 /**
-- 
GitLab


From 48e90761b570ff57f58b726229d229729949c5bb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:49:02 -0700
Subject: [PATCH 828/853] fuse: lockd support

If fuse filesystem doesn't define it's own lock operations, then allow the
lock manager to work with fuse.

Adding lockd support for remote locking is also possible, but more rarely
used, so leave it till later.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/file.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d9fd1..67ff2c6a8f6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
 	int err;
 
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+		/* NLM needs asynchronous locks, which we don't support yet */
+		return -ENOLCK;
+	}
+
 	/* Unlock on close is handled by the flush method */
 	if (fl->fl_flags & FL_CLOSE)
 		return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	int err;
 
-	if (cmd == F_GETLK) {
+	if (cmd == F_CANCELLK) {
+		err = 0;
+	} else if (cmd == F_GETLK) {
 		if (fc->no_lock) {
 			posix_test_lock(file, fl);
 			err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
 			err = fuse_getlk(file, fl);
 	} else {
 		if (fc->no_lock)
-			err = posix_lock_file_wait(file, fl);
+			err = posix_lock_file(file, fl, NULL);
 		else
 			err = fuse_setlk(file, fl, 0);
 	}
-- 
GitLab


From 8f421c595a9145959d8aab09172743132abdffdb Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:04 -0700
Subject: [PATCH 829/853] edac: i5100 new intel chipset driver

Preliminary support for the Intel 5100 MCH.  CE and UE errors are reported
along with the current DIMM label information and other memory parameters.

Reasons why this is preliminary:

1) This chip has 2 independent memory controllers which, for best
   perforance, use interleaved accesses to the DDR2 memory.  This
   architecture does not map very well to the current edac data structures
   which depend on symmetric channel access to the interleaved data.
   Without core changes, the best I could do for now is to map both memory
   controllers to different csrows (first all ranks of controller 0, then
   all ranks of controller 1).  Someone much more familiar with the edac
   core than I will probably need to come up with a more general data
   structure to handle the interleaving and de-interleaving of the two
   memory controllers.

2) I have not yet tackled the de-interleaving of the rank/controller
   address space into the physical address space of the CPU.  There is
   nothing fundamentally missing, it is just ending up to be a lot of
   code, and I'd rather keep it separate for now, esp since it doesn't
   work yet...

3) The code depends on a particular i5100 chip select to DIMM mainboard
   chip select mapping.  This mapping seems obvious to me in order to
   support dual and single ranked memory, but it is not unique and DIMM
   labels could be wrong on other mainboards.  There is no way to query
   this mapping that I know of.

4) The code requires that the i5100 is in 32GB mode.  Only 4 ranks per
   controller, 2 ranks per DIMM are supported.  I do not have hardware
   (nor do I expect to have hardware anytime soon) for the 48GB (6 ranks
   per controller) mode.

5) The serial presence detect code should be broken out into a "real"
   i2c driver so that decode-dimms.pl can work.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/Kconfig      |   7 +
 drivers/edac/Makefile     |   1 +
 drivers/edac/i5100_edac.c | 827 ++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h   |   3 +
 4 files changed, 838 insertions(+)
 create mode 100644 drivers/edac/i5100_edac.c

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 6e6c3c4aea6..5a11e3cbcae 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -123,6 +123,13 @@ config EDAC_I5000
 	  Support for error detection and correction the Intel
 	  Greekcreek/Blackford chipsets.
 
+config EDAC_I5100
+	tristate "Intel San Clemente MCH"
+	depends on EDAC_MM_EDAC && X86 && PCI
+	help
+	  Support for error detection and correction the Intel
+	  San Clemente MCH.
+
 config EDAC_MPC85XX
 	tristate "Freescale MPC85xx"
 	depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 83807731d4a..e5e9104b552 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -19,6 +19,7 @@ endif
 
 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
 obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
+obj-$(CONFIG_EDAC_I5100)		+= i5100_edac.o
 obj-$(CONFIG_EDAC_E7XXX)		+= e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)		+= e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)		+= i82443bxgx_edac.o
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
new file mode 100644
index 00000000000..43430bf7018
--- /dev/null
+++ b/drivers/edac/i5100_edac.c
@@ -0,0 +1,827 @@
+/*
+ * Intel 5100 Memory Controllers kernel module
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * This module is based on the following document:
+ *
+ * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet
+ *      http://download.intel.com/design/chipsets/datashts/318378.pdf
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/delay.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/* register addresses and bit field accessors... */
+
+/* device 16, func 1 */
+#define I5100_MS		0x44	/* Memory Status Register */
+#define I5100_SPDDATA		0x48	/* Serial Presence Detect Status Reg */
+#define		I5100_SPDDATA_RDO(a)	((a) >> 15 & 1)
+#define		I5100_SPDDATA_SBE(a)	((a) >> 13 & 1)
+#define		I5100_SPDDATA_BUSY(a)	((a) >> 12 & 1)
+#define		I5100_SPDDATA_DATA(a)	((a)       & ((1 << 8) - 1))
+#define I5100_SPDCMD		0x4c	/* Serial Presence Detect Command Reg */
+#define		I5100_SPDCMD_DTI(a)	(((a) & ((1 << 4) - 1)) << 28)
+#define		I5100_SPDCMD_CKOVRD(a)	(((a) & 1)              << 27)
+#define		I5100_SPDCMD_SA(a)	(((a) & ((1 << 3) - 1)) << 24)
+#define		I5100_SPDCMD_BA(a)	(((a) & ((1 << 8) - 1)) << 16)
+#define		I5100_SPDCMD_DATA(a)	(((a) & ((1 << 8) - 1)) <<  8)
+#define		I5100_SPDCMD_CMD(a)	((a) & 1)
+#define I5100_TOLM		0x6c	/* Top of Low Memory */
+#define		I5100_TOLM_TOLM(a)	((a) >> 12 & ((1 << 4) - 1))
+#define I5100_MIR0		0x80	/* Memory Interleave Range 0 */
+#define I5100_MIR1		0x84	/* Memory Interleave Range 1 */
+#define I5100_AMIR_0		0x8c	/* Adjusted Memory Interleave Range 0 */
+#define I5100_AMIR_1		0x90	/* Adjusted Memory Interleave Range 1 */
+#define		I5100_MIR_LIMIT(a)	((a) >> 4 & ((1 << 12) - 1))
+#define		I5100_MIR_WAY1(a)	((a) >> 1 & 1)
+#define		I5100_MIR_WAY0(a)	((a)      & 1)
+#define I5100_FERR_NF_MEM	0xa0	/* MC First Non Fatal Errors */
+#define		I5100_FERR_NF_MEM_CHAN_INDX(a)	((a) >> 28 & 1)
+#define		I5100_FERR_NF_MEM_SPD_MASK	(1 << 18)
+#define		I5100_FERR_NF_MEM_M16ERR_MASK	(1 << 16)
+#define		I5100_FERR_NF_MEM_M15ERR_MASK	(1 << 15)
+#define		I5100_FERR_NF_MEM_M14ERR_MASK	(1 << 14)
+#define		I5100_FERR_NF_MEM_
+#define		I5100_FERR_NF_MEM_
+#define		I5100_FERR_NF_MEM_ANY_MASK	\
+			(I5100_FERR_NF_MEM_M16ERR_MASK | \
+			I5100_FERR_NF_MEM_M15ERR_MASK | \
+			I5100_FERR_NF_MEM_M14ERR_MASK)
+#define		I5100_FERR_NF_MEM_ANY(a)  ((a) & I5100_FERR_NF_MEM_ANY_MASK)
+#define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
+#define		I5100_NERR_NF_MEM_ANY(a)  I5100_FERR_NF_MEM_ANY(a)
+
+/* device 21 and 22, func 0 */
+#define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
+#define I5100_DMIR	0x15c	/* DIMM Interleave Range */
+#define		I5100_DMIR_LIMIT(a)	((a) >> 16 & ((1 << 11) - 1))
+#define		I5100_DMIR_RANK(a, i)	((a) >> (4 * i) & ((1 <<  2) - 1))
+#define I5100_MTR_4	0x1b0	/* Memory Technology Registers 4,5 */
+#define		I5100_MTR_PRESENT(a)	((a) >> 10 & 1)
+#define		I5100_MTR_ETHROTTLE(a)	((a) >>  9 & 1)
+#define		I5100_MTR_WIDTH(a)	((a) >>  8 & 1)
+#define		I5100_MTR_NUMBANK(a)	((a) >>  6 & 1)
+#define		I5100_MTR_NUMROW(a)	((a) >>  2 & ((1 << 2) - 1))
+#define		I5100_MTR_NUMCOL(a)	((a)       & ((1 << 2) - 1))
+#define	I5100_VALIDLOG	0x18c	/* Valid Log Markers */
+#define		I5100_VALIDLOG_REDMEMVALID(a)	((a) >> 2 & 1)
+#define		I5100_VALIDLOG_RECMEMVALID(a)	((a) >> 1 & 1)
+#define		I5100_VALIDLOG_NRECMEMVALID(a)	((a)      & 1)
+#define	I5100_NRECMEMA	0x190	/* Non-Recoverable Memory Error Log Reg A */
+#define		I5100_NRECMEMA_MERR(a)		((a) >> 15 & ((1 << 5) - 1))
+#define		I5100_NRECMEMA_BANK(a)		((a) >> 12 & ((1 << 3) - 1))
+#define		I5100_NRECMEMA_RANK(a)		((a) >>  8 & ((1 << 3) - 1))
+#define		I5100_NRECMEMA_DM_BUF_ID(a)	((a)       & ((1 << 8) - 1))
+#define	I5100_NRECMEMB	0x194	/* Non-Recoverable Memory Error Log Reg B */
+#define		I5100_NRECMEMB_CAS(a)		((a) >> 16 & ((1 << 13) - 1))
+#define		I5100_NRECMEMB_RAS(a)		((a)       & ((1 << 16) - 1))
+#define	I5100_REDMEMA	0x198	/* Recoverable Memory Data Error Log Reg A */
+#define		I5100_REDMEMA_SYNDROME(a)	(a)
+#define	I5100_REDMEMB	0x19c	/* Recoverable Memory Data Error Log Reg B */
+#define		I5100_REDMEMB_ECC_LOCATOR(a)	((a) & ((1 << 18) - 1))
+#define	I5100_RECMEMA	0x1a0	/* Recoverable Memory Error Log Reg A */
+#define		I5100_RECMEMA_MERR(a)		I5100_NRECMEMA_MERR(a)
+#define		I5100_RECMEMA_BANK(a)		I5100_NRECMEMA_BANK(a)
+#define		I5100_RECMEMA_RANK(a)		I5100_NRECMEMA_RANK(a)
+#define		I5100_RECMEMA_DM_BUF_ID(a)	I5100_NRECMEMA_DM_BUF_ID(a)
+#define	I5100_RECMEMB	0x1a4	/* Recoverable Memory Error Log Reg B */
+#define		I5100_RECMEMB_CAS(a)		I5100_NRECMEMB_CAS(a)
+#define		I5100_RECMEMB_RAS(a)		I5100_NRECMEMB_RAS(a)
+
+/* some generic limits */
+#define I5100_MAX_RANKS_PER_CTLR	6
+#define I5100_MAX_CTLRS			2
+#define I5100_MAX_RANKS_PER_DIMM	4
+#define I5100_DIMM_ADDR_LINES		(6 - 3)	/* 64 bits / 8 bits per byte */
+#define I5100_MAX_DIMM_SLOTS_PER_CTLR	4
+#define I5100_MAX_RANK_INTERLEAVE	4
+#define I5100_MAX_DMIRS			5
+
+struct i5100_priv {
+	/* ranks on each dimm -- 0 maps to not present -- obtained via SPD */
+	int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR];
+
+	/*
+	 * mainboard chip select map -- maps i5100 chip selects to
+	 * DIMM slot chip selects.  In the case of only 4 ranks per
+	 * controller, the mapping is fairly obvious but not unique.
+	 * we map -1 -> NC and assume both controllers use the same
+	 * map...
+	 *
+	 */
+	int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM];
+
+	/* memory interleave range */
+	struct {
+		u64	 limit;
+		unsigned way[2];
+	} mir[I5100_MAX_CTLRS];
+
+	/* adjusted memory interleave range register */
+	unsigned amir[I5100_MAX_CTLRS];
+
+	/* dimm interleave range */
+	struct {
+		unsigned rank[I5100_MAX_RANK_INTERLEAVE];
+		u64	 limit;
+	} dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS];
+
+	/* memory technology registers... */
+	struct {
+		unsigned present;	/* 0 or 1 */
+		unsigned ethrottle;	/* 0 or 1 */
+		unsigned width;		/* 4 or 8 bits  */
+		unsigned numbank;	/* 2 or 3 lines */
+		unsigned numrow;	/* 13 .. 16 lines */
+		unsigned numcol;	/* 11 .. 12 lines */
+	} mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR];
+
+	u64 tolm;		/* top of low memory in bytes */
+	unsigned ranksperctlr;	/* number of ranks per controller */
+
+	struct pci_dev *mc;	/* device 16 func 1 */
+	struct pci_dev *ch0mm;	/* device 21 func 0 */
+	struct pci_dev *ch1mm;	/* device 22 func 0 */
+};
+
+/* map a rank/ctlr to a slot number on the mainboard */
+static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
+			      int ctlr, int rank)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+		int j;
+		const int numrank = priv->dimm_numrank[ctlr][i];
+
+		for (j = 0; j < numrank; j++)
+			if (priv->dimm_csmap[i][j] == rank)
+				return i * 2 + ctlr;
+	}
+
+	return -1;
+}
+
+/*
+ * The processor bus memory addresses are broken into three
+ * pieces, whereas the controller addresses are contiguous.
+ *
+ * here we map from the controller address space to the
+ * processor address space:
+ *
+ *    Processor Address Space
+ * +-----------------------------+
+ * |                             |
+ * |  "high" memory addresses    |
+ * |                             |
+ * +-----------------------------+ <- 4GB on the i5100
+ * |                             |
+ * |  other non-memory addresses |
+ * |                             |
+ * +-----------------------------+ <- top of low memory
+ * |                             |
+ * | "low" memory addresses      |
+ * |                             |
+ * +-----------------------------+
+ */
+static unsigned long i5100_ctl_page_to_phys(struct mem_ctl_info *mci,
+					    unsigned long cntlr_addr)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	if (cntlr_addr < priv->tolm)
+		return cntlr_addr;
+
+	return (1ULL << 32) + (cntlr_addr - priv->tolm);
+}
+
+static const char *i5100_err_msg(unsigned err)
+{
+	const char *merrs[] = {
+		"unknown", /* 0 */
+		"uncorrectable data ECC on replay", /* 1 */
+		"unknown", /* 2 */
+		"unknown", /* 3 */
+		"aliased uncorrectable demand data ECC", /* 4 */
+		"aliased uncorrectable spare-copy data ECC", /* 5 */
+		"aliased uncorrectable patrol data ECC", /* 6 */
+		"unknown", /* 7 */
+		"unknown", /* 8 */
+		"unknown", /* 9 */
+		"non-aliased uncorrectable demand data ECC", /* 10 */
+		"non-aliased uncorrectable spare-copy data ECC", /* 11 */
+		"non-aliased uncorrectable patrol data ECC", /* 12 */
+		"unknown", /* 13 */
+		"correctable demand data ECC", /* 14 */
+		"correctable spare-copy data ECC", /* 15 */
+		"correctable patrol data ECC", /* 16 */
+		"unknown", /* 17 */
+		"SPD protocol error", /* 18 */
+		"unknown", /* 19 */
+		"spare copy initiated", /* 20 */
+		"spare copy completed", /* 21 */
+	};
+	unsigned i;
+
+	for (i = 0; i < ARRAY_SIZE(merrs); i++)
+		if (1 << i & err)
+			return merrs[i];
+
+	return "none";
+}
+
+/* convert csrow index into a rank (per controller -- 0..5) */
+static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return csrow % priv->ranksperctlr;
+}
+
+/* convert csrow index into a controller (0..1) */
+static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return csrow / priv->ranksperctlr;
+}
+
+static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
+				    int ctlr, int rank)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return ctlr * priv->ranksperctlr + rank;
+}
+
+static void i5100_handle_ce(struct mem_ctl_info *mci,
+			    int ctlr,
+			    unsigned bank,
+			    unsigned rank,
+			    unsigned long syndrome,
+			    unsigned cas,
+			    unsigned ras,
+			    const char *msg)
+{
+	const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+	printk(KERN_ERR
+		"CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+		ctlr, bank, rank, syndrome, cas, ras,
+		csrow, mci->csrows[csrow].channels[0].label, msg);
+
+	mci->ce_count++;
+	mci->csrows[csrow].ce_count++;
+	mci->csrows[csrow].channels[0].ce_count++;
+}
+
+static void i5100_handle_ue(struct mem_ctl_info *mci,
+			    int ctlr,
+			    unsigned bank,
+			    unsigned rank,
+			    unsigned long syndrome,
+			    unsigned cas,
+			    unsigned ras,
+			    const char *msg)
+{
+	const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+	printk(KERN_ERR
+		"UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+		ctlr, bank, rank, syndrome, cas, ras,
+		csrow, mci->csrows[csrow].channels[0].label, msg);
+
+	mci->ue_count++;
+	mci->csrows[csrow].ue_count++;
+}
+
+static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
+			   u32 ferr, u32 nerr)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
+	u32 dw;
+	u32 dw2;
+	unsigned syndrome = 0;
+	unsigned ecc_loc = 0;
+	unsigned merr;
+	unsigned bank;
+	unsigned rank;
+	unsigned cas;
+	unsigned ras;
+
+	pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
+
+	if (I5100_VALIDLOG_REDMEMVALID(dw)) {
+		pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
+		syndrome = I5100_REDMEMA_SYNDROME(dw2);
+		pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
+		ecc_loc = I5100_REDMEMB_ECC_LOCATOR(dw2);
+	}
+
+	if (I5100_VALIDLOG_RECMEMVALID(dw)) {
+		const char *msg;
+
+		pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
+		merr = I5100_RECMEMA_MERR(dw2);
+		bank = I5100_RECMEMA_BANK(dw2);
+		rank = I5100_RECMEMA_RANK(dw2);
+
+		pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
+		cas = I5100_RECMEMB_CAS(dw2);
+		ras = I5100_RECMEMB_RAS(dw2);
+
+		/* FIXME:  not really sure if this is what merr is...
+		 */
+		if (!merr)
+			msg = i5100_err_msg(ferr);
+		else
+			msg = i5100_err_msg(nerr);
+
+		i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+	}
+
+	if (I5100_VALIDLOG_NRECMEMVALID(dw)) {
+		const char *msg;
+
+		pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
+		merr = I5100_NRECMEMA_MERR(dw2);
+		bank = I5100_NRECMEMA_BANK(dw2);
+		rank = I5100_NRECMEMA_RANK(dw2);
+
+		pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
+		cas = I5100_NRECMEMB_CAS(dw2);
+		ras = I5100_NRECMEMB_RAS(dw2);
+
+		/* FIXME:  not really sure if this is what merr is...
+		 */
+		if (!merr)
+			msg = i5100_err_msg(ferr);
+		else
+			msg = i5100_err_msg(nerr);
+
+		i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+	}
+
+	pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
+}
+
+static void i5100_check_error(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u32 dw;
+
+
+	pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
+	if (I5100_FERR_NF_MEM_ANY(dw)) {
+		u32 dw2;
+
+		pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
+		if (dw2)
+			pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
+					       dw2);
+		pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
+
+		i5100_read_log(mci, I5100_FERR_NF_MEM_CHAN_INDX(dw),
+			       I5100_FERR_NF_MEM_ANY(dw),
+			       I5100_NERR_NF_MEM_ANY(dw2));
+	}
+}
+
+static struct pci_dev *pci_get_device_func(unsigned vendor,
+					   unsigned device,
+					   unsigned func)
+{
+	struct pci_dev *ret = NULL;
+
+	while (1) {
+		ret = pci_get_device(vendor, device, ret);
+
+		if (!ret)
+			break;
+
+		if (PCI_FUNC(ret->devfn) == func)
+			break;
+	}
+
+	return ret;
+}
+
+static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
+					    int csrow)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
+	const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
+	unsigned addr_lines;
+
+	/* dimm present? */
+	if (!priv->mtr[ctlr][ctlr_rank].present)
+		return 0ULL;
+
+	addr_lines =
+		I5100_DIMM_ADDR_LINES +
+		priv->mtr[ctlr][ctlr_rank].numcol +
+		priv->mtr[ctlr][ctlr_rank].numrow +
+		priv->mtr[ctlr][ctlr_rank].numbank;
+
+	return (unsigned long)
+		((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
+}
+
+static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+	int i;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+		struct pci_dev *pdev = mms[i];
+
+		for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) {
+			const unsigned addr =
+				(j < 4) ? I5100_MTR_0 + j * 2 :
+					  I5100_MTR_4 + (j - 4) * 2;
+			u16 w;
+
+			pci_read_config_word(pdev, addr, &w);
+
+			priv->mtr[i][j].present = I5100_MTR_PRESENT(w);
+			priv->mtr[i][j].ethrottle = I5100_MTR_ETHROTTLE(w);
+			priv->mtr[i][j].width = 4 + 4 * I5100_MTR_WIDTH(w);
+			priv->mtr[i][j].numbank = 2 + I5100_MTR_NUMBANK(w);
+			priv->mtr[i][j].numrow = 13 + I5100_MTR_NUMROW(w);
+			priv->mtr[i][j].numcol = 10 + I5100_MTR_NUMCOL(w);
+		}
+	}
+}
+
+/*
+ * FIXME: make this into a real i2c adapter (so that dimm-decode
+ * will work)?
+ */
+static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
+			       u8 ch, u8 slot, u8 addr, u8 *byte)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u16 w;
+	u32 dw;
+	unsigned long et;
+
+	pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+	if (I5100_SPDDATA_BUSY(w))
+		return -1;
+
+	dw =	I5100_SPDCMD_DTI(0xa) |
+		I5100_SPDCMD_CKOVRD(1) |
+		I5100_SPDCMD_SA(ch * 4 + slot) |
+		I5100_SPDCMD_BA(addr) |
+		I5100_SPDCMD_DATA(0) |
+		I5100_SPDCMD_CMD(0);
+	pci_write_config_dword(priv->mc, I5100_SPDCMD, dw);
+
+	/* wait up to 100ms */
+	et = jiffies + HZ / 10;
+	udelay(100);
+	while (1) {
+		pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+		if (!I5100_SPDDATA_BUSY(w))
+			break;
+		udelay(100);
+	}
+
+	if (!I5100_SPDDATA_RDO(w) || I5100_SPDDATA_SBE(w))
+		return -1;
+
+	*byte = I5100_SPDDATA_DATA(w);
+
+	return 0;
+}
+
+/*
+ * fill dimm chip select map
+ *
+ * FIXME:
+ *   o only valid for 4 ranks per controller
+ *   o not the only way to may chip selects to dimm slots
+ *   o investigate if there is some way to obtain this map from the bios
+ */
+static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	WARN_ON(priv->ranksperctlr != 4);
+
+	for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+		int j;
+
+		for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++)
+			priv->dimm_csmap[i][j] = -1; /* default NC */
+	}
+
+	/* only 2 chip selects per slot... */
+	priv->dimm_csmap[0][0] = 0;
+	priv->dimm_csmap[0][1] = 3;
+	priv->dimm_csmap[1][0] = 1;
+	priv->dimm_csmap[1][1] = 2;
+	priv->dimm_csmap[2][0] = 2;
+	priv->dimm_csmap[3][0] = 3;
+}
+
+static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
+					     struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+
+		for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) {
+			u8 rank;
+
+			if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0)
+				priv->dimm_numrank[i][j] = 0;
+			else
+				priv->dimm_numrank[i][j] = (rank & 3) + 1;
+		}
+	}
+
+	i5100_init_dimm_csmap(mci);
+}
+
+static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
+					      struct mem_ctl_info *mci)
+{
+	u16 w;
+	u32 dw;
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+	int i;
+
+	pci_read_config_word(pdev, I5100_TOLM, &w);
+	priv->tolm = (u64) I5100_TOLM_TOLM(w) * 256 * 1024 * 1024;
+
+	pci_read_config_word(pdev, I5100_MIR0, &w);
+	priv->mir[0].limit = (u64) I5100_MIR_LIMIT(w) << 28;
+	priv->mir[0].way[1] = I5100_MIR_WAY1(w);
+	priv->mir[0].way[0] = I5100_MIR_WAY0(w);
+
+	pci_read_config_word(pdev, I5100_MIR1, &w);
+	priv->mir[1].limit = (u64) I5100_MIR_LIMIT(w) << 28;
+	priv->mir[1].way[1] = I5100_MIR_WAY1(w);
+	priv->mir[1].way[0] = I5100_MIR_WAY0(w);
+
+	pci_read_config_word(pdev, I5100_AMIR_0, &w);
+	priv->amir[0] = w;
+	pci_read_config_word(pdev, I5100_AMIR_1, &w);
+	priv->amir[1] = w;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+
+		for (j = 0; j < 5; j++) {
+			int k;
+
+			pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
+
+			priv->dmir[i][j].limit =
+				(u64) I5100_DMIR_LIMIT(dw) << 28;
+			for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
+				priv->dmir[i][j].rank[k] =
+					I5100_DMIR_RANK(dw, k);
+		}
+	}
+
+	i5100_init_mtr(mci);
+}
+
+static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
+{
+	int i;
+	unsigned long total_pages = 0UL;
+	struct i5100_priv *priv = mci->pvt_info;
+
+	for (i = 0; i < mci->nr_csrows; i++) {
+		const unsigned long npages = i5100_npages(mci, i);
+		const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
+		const unsigned rank = i5100_csrow_to_rank(mci, i);
+
+		if (!npages)
+			continue;
+
+		/*
+		 * FIXME: these two are totally bogus -- I don't see how to
+		 * map them correctly to this structure...
+		 */
+		mci->csrows[i].first_page = total_pages;
+		mci->csrows[i].last_page = total_pages + npages - 1;
+		mci->csrows[i].page_mask = 0UL;
+
+		mci->csrows[i].nr_pages = npages;
+		mci->csrows[i].grain = 32;
+		mci->csrows[i].csrow_idx = i;
+		mci->csrows[i].dtype =
+			(priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
+		mci->csrows[i].ue_count = 0;
+		mci->csrows[i].ce_count = 0;
+		mci->csrows[i].mtype = MEM_RDDR2;
+		mci->csrows[i].edac_mode = EDAC_SECDED;
+		mci->csrows[i].mci = mci;
+		mci->csrows[i].nr_channels = 1;
+		mci->csrows[i].channels[0].chan_idx = 0;
+		mci->csrows[i].channels[0].ce_count = 0;
+		mci->csrows[i].channels[0].csrow = mci->csrows + i;
+		snprintf(mci->csrows[i].channels[0].label,
+			 sizeof(mci->csrows[i].channels[0].label),
+			 "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
+
+		total_pages += npages;
+	}
+}
+
+static int __devinit i5100_init_one(struct pci_dev *pdev,
+				    const struct pci_device_id *id)
+{
+	int rc;
+	struct mem_ctl_info *mci;
+	struct i5100_priv *priv;
+	struct pci_dev *ch0mm, *ch1mm;
+	int ret = 0;
+	u32 dw;
+	int ranksperch;
+
+	if (PCI_FUNC(pdev->devfn) != 1)
+		return -ENODEV;
+
+	rc = pci_enable_device(pdev);
+	if (rc < 0) {
+		ret = rc;
+		goto bail;
+	}
+
+	/* figure out how many ranks, from strapped state of 48GB_Mode input */
+	pci_read_config_dword(pdev, I5100_MS, &dw);
+	ranksperch = !!(dw & (1 << 8)) * 2 + 4;
+
+	if (ranksperch != 4) {
+		/* FIXME: get 6 ranks / controller to work - need hw... */
+		printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	/* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
+	ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_21, 0);
+	if (!ch0mm)
+		return -ENODEV;
+
+	rc = pci_enable_device(ch0mm);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_ch0;
+	}
+
+	/* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
+	ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_22, 0);
+	if (!ch1mm) {
+		ret = -ENODEV;
+		goto bail_ch0;
+	}
+
+	rc = pci_enable_device(ch1mm);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_ch1;
+	}
+
+	mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+	if (!mci) {
+		ret = -ENOMEM;
+		goto bail_ch1;
+	}
+
+	mci->dev = &pdev->dev;
+
+	priv = mci->pvt_info;
+	priv->ranksperctlr = ranksperch;
+	priv->mc = pdev;
+	priv->ch0mm = ch0mm;
+	priv->ch1mm = ch1mm;
+
+	i5100_init_dimm_layout(pdev, mci);
+	i5100_init_interleaving(pdev, mci);
+
+	mci->mtype_cap = MEM_FLAG_FB_DDR2;
+	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = "i5100_edac.c";
+	mci->mod_ver = "not versioned";
+	mci->ctl_name = "i5100";
+	mci->dev_name = pci_name(pdev);
+	mci->ctl_page_to_phys = i5100_ctl_page_to_phys;
+
+	mci->edac_check = i5100_check_error;
+
+	i5100_init_csrows(mci);
+
+	/* this strange construction seems to be in every driver, dunno why */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_NMI:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_POLL;
+		break;
+	}
+
+	if (edac_mc_add_mc(mci)) {
+		ret = -ENODEV;
+		goto bail_mc;
+	}
+
+	goto bail;
+
+bail_mc:
+	edac_mc_free(mci);
+
+bail_ch1:
+	pci_dev_put(ch1mm);
+
+bail_ch0:
+	pci_dev_put(ch0mm);
+
+bail:
+	return ret;
+}
+
+static void __devexit i5100_remove_one(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct i5100_priv *priv;
+
+	mci = edac_mc_del_mc(&pdev->dev);
+
+	if (!mci)
+		return;
+
+	priv = mci->pvt_info;
+	pci_dev_put(priv->ch0mm);
+	pci_dev_put(priv->ch1mm);
+
+	edac_mc_free(mci);
+}
+
+static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
+	/* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
+
+static struct pci_driver i5100_driver = {
+	.name = KBUILD_BASENAME,
+	.probe = i5100_init_one,
+	.remove = __devexit_p(i5100_remove_one),
+	.id_table = i5100_pci_tbl,
+};
+
+static int __init i5100_init(void)
+{
+	int pci_rc;
+
+	pci_rc = pci_register_driver(&i5100_driver);
+
+	return (pci_rc < 0) ? pci_rc : 0;
+}
+
+static void __exit i5100_exit(void)
+{
+	pci_unregister_driver(&i5100_driver);
+}
+
+module_init(i5100_init);
+module_exit(i5100_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR
+    ("Arthur Jones <ajones@riverbed.com>");
+MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 119ae7b8f02..c3b1761aba2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
+#define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
 #define PCI_DEVICE_ID_INTEL_5400_FBD0	0x4035
 #define PCI_DEVICE_ID_INTEL_5400_FBD1	0x4036
-- 
GitLab


From f7952ffcffa88c9a3fa92c26081f4ec9143c680f Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:05 -0700
Subject: [PATCH 830/853] edac: i5100 fix missing bits

The error mask we use to trigger ECC notifications is missing many bits of
interest.  We add these bits here so that all possible ECC errors can be
reported.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/i5100_edac.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 43430bf7018..a8767a6c148 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -52,12 +52,24 @@
 #define		I5100_FERR_NF_MEM_M16ERR_MASK	(1 << 16)
 #define		I5100_FERR_NF_MEM_M15ERR_MASK	(1 << 15)
 #define		I5100_FERR_NF_MEM_M14ERR_MASK	(1 << 14)
-#define		I5100_FERR_NF_MEM_
-#define		I5100_FERR_NF_MEM_
+#define		I5100_FERR_NF_MEM_M12ERR_MASK	(1 << 12)
+#define		I5100_FERR_NF_MEM_M11ERR_MASK	(1 << 11)
+#define		I5100_FERR_NF_MEM_M10ERR_MASK	(1 << 10)
+#define		I5100_FERR_NF_MEM_M6ERR_MASK	(1 << 6)
+#define		I5100_FERR_NF_MEM_M5ERR_MASK	(1 << 5)
+#define		I5100_FERR_NF_MEM_M4ERR_MASK	(1 << 4)
+#define		I5100_FERR_NF_MEM_M1ERR_MASK	1
 #define		I5100_FERR_NF_MEM_ANY_MASK	\
 			(I5100_FERR_NF_MEM_M16ERR_MASK | \
 			I5100_FERR_NF_MEM_M15ERR_MASK | \
-			I5100_FERR_NF_MEM_M14ERR_MASK)
+			I5100_FERR_NF_MEM_M14ERR_MASK | \
+			I5100_FERR_NF_MEM_M12ERR_MASK | \
+			I5100_FERR_NF_MEM_M11ERR_MASK | \
+			I5100_FERR_NF_MEM_M10ERR_MASK | \
+			I5100_FERR_NF_MEM_M6ERR_MASK | \
+			I5100_FERR_NF_MEM_M5ERR_MASK | \
+			I5100_FERR_NF_MEM_M4ERR_MASK | \
+			I5100_FERR_NF_MEM_M1ERR_MASK)
 #define		I5100_FERR_NF_MEM_ANY(a)  ((a) & I5100_FERR_NF_MEM_ANY_MASK)
 #define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
 #define		I5100_NERR_NF_MEM_ANY(a)  I5100_FERR_NF_MEM_ANY(a)
-- 
GitLab


From 43920a598f9358a12eb59eeddc4cd950f03aea8c Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:06 -0700
Subject: [PATCH 831/853] edac: i5100 fix enable ecc hardware

It is possible that the BIOS did not enable ECC at boot time.  We check
for that case and fail to load if it is true.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/i5100_edac.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index a8767a6c148..509eec860c3 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -24,6 +24,8 @@
 /* register addresses and bit field accessors... */
 
 /* device 16, func 1 */
+#define I5100_MC		0x40	/* Memory Control Register */
+#define		I5100_MC_ERRDETEN(a)	((a) >> 5 & 1)
 #define I5100_MS		0x44	/* Memory Status Register */
 #define I5100_SPDDATA		0x48	/* Serial Presence Detect Status Reg */
 #define		I5100_SPDDATA_RDO(a)	((a) >> 15 & 1)
@@ -688,6 +690,14 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 		goto bail;
 	}
 
+	/* ECC enabled? */
+	pci_read_config_dword(pdev, I5100_MC, &dw);
+	if (!I5100_MC_ERRDETEN(dw)) {
+		printk(KERN_INFO "i5100_edac: ECC not enabled.\n");
+		ret = -ENODEV;
+		goto bail;
+	}
+
 	/* figure out how many ranks, from strapped state of 48GB_Mode input */
 	pci_read_config_dword(pdev, I5100_MS, &dw);
 	ranksperch = !!(dw & (1 << 8)) * 2 + 4;
-- 
GitLab


From 178d5a742291976d13bff55fa2b130879d4510de Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:06 -0700
Subject: [PATCH 832/853] edac: i5100 fix unmask ecc bits

Explicitly unmask ECC errors we are interested in reporting.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/i5100_edac.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 509eec860c3..d85e7992eb6 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -75,6 +75,7 @@
 #define		I5100_FERR_NF_MEM_ANY(a)  ((a) & I5100_FERR_NF_MEM_ANY_MASK)
 #define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
 #define		I5100_NERR_NF_MEM_ANY(a)  I5100_FERR_NF_MEM_ANY(a)
+#define I5100_EMASK_MEM		0xa8	/* MC Error Mask Register */
 
 /* device 21 and 22, func 0 */
 #define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
@@ -709,6 +710,11 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 		goto bail;
 	}
 
+	/* enable error reporting... */
+	pci_read_config_dword(pdev, I5100_EMASK_MEM, &dw);
+	dw &= ~I5100_FERR_NF_MEM_ANY_MASK;
+	pci_write_config_dword(pdev, I5100_EMASK_MEM, dw);
+
 	/* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
 	ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
 				    PCI_DEVICE_ID_INTEL_5100_21, 0);
-- 
GitLab


From b238e57723a6fb2c365fc35de5d7c48ccf9300cd Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:08 -0700
Subject: [PATCH 833/853] edac: i5100: cleanup

Some code cleanliness issues found by Andrew Morton (thanks!) which should
not affect functionality, but which should help make the code more
maintainable.

In particular, we now:

* convert all #define's w/ a parameter to static inlines
* use 1UL rather than 1ULL when calculating an unsigned long
* use pci_disable_device

The resulting code is tested and seems to work fine...

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Cc: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/i5100_edac.c | 396 +++++++++++++++++++++++++-------------
 1 file changed, 261 insertions(+), 135 deletions(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index d85e7992eb6..22db05a67bf 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -21,36 +21,19 @@
 
 #include "edac_core.h"
 
-/* register addresses and bit field accessors... */
+/* register addresses */
 
 /* device 16, func 1 */
 #define I5100_MC		0x40	/* Memory Control Register */
-#define		I5100_MC_ERRDETEN(a)	((a) >> 5 & 1)
 #define I5100_MS		0x44	/* Memory Status Register */
 #define I5100_SPDDATA		0x48	/* Serial Presence Detect Status Reg */
-#define		I5100_SPDDATA_RDO(a)	((a) >> 15 & 1)
-#define		I5100_SPDDATA_SBE(a)	((a) >> 13 & 1)
-#define		I5100_SPDDATA_BUSY(a)	((a) >> 12 & 1)
-#define		I5100_SPDDATA_DATA(a)	((a)       & ((1 << 8) - 1))
 #define I5100_SPDCMD		0x4c	/* Serial Presence Detect Command Reg */
-#define		I5100_SPDCMD_DTI(a)	(((a) & ((1 << 4) - 1)) << 28)
-#define		I5100_SPDCMD_CKOVRD(a)	(((a) & 1)              << 27)
-#define		I5100_SPDCMD_SA(a)	(((a) & ((1 << 3) - 1)) << 24)
-#define		I5100_SPDCMD_BA(a)	(((a) & ((1 << 8) - 1)) << 16)
-#define		I5100_SPDCMD_DATA(a)	(((a) & ((1 << 8) - 1)) <<  8)
-#define		I5100_SPDCMD_CMD(a)	((a) & 1)
 #define I5100_TOLM		0x6c	/* Top of Low Memory */
-#define		I5100_TOLM_TOLM(a)	((a) >> 12 & ((1 << 4) - 1))
 #define I5100_MIR0		0x80	/* Memory Interleave Range 0 */
 #define I5100_MIR1		0x84	/* Memory Interleave Range 1 */
 #define I5100_AMIR_0		0x8c	/* Adjusted Memory Interleave Range 0 */
 #define I5100_AMIR_1		0x90	/* Adjusted Memory Interleave Range 1 */
-#define		I5100_MIR_LIMIT(a)	((a) >> 4 & ((1 << 12) - 1))
-#define		I5100_MIR_WAY1(a)	((a) >> 1 & 1)
-#define		I5100_MIR_WAY0(a)	((a)      & 1)
 #define I5100_FERR_NF_MEM	0xa0	/* MC First Non Fatal Errors */
-#define		I5100_FERR_NF_MEM_CHAN_INDX(a)	((a) >> 28 & 1)
-#define		I5100_FERR_NF_MEM_SPD_MASK	(1 << 18)
 #define		I5100_FERR_NF_MEM_M16ERR_MASK	(1 << 16)
 #define		I5100_FERR_NF_MEM_M15ERR_MASK	(1 << 15)
 #define		I5100_FERR_NF_MEM_M14ERR_MASK	(1 << 14)
@@ -72,47 +55,214 @@
 			I5100_FERR_NF_MEM_M5ERR_MASK | \
 			I5100_FERR_NF_MEM_M4ERR_MASK | \
 			I5100_FERR_NF_MEM_M1ERR_MASK)
-#define		I5100_FERR_NF_MEM_ANY(a)  ((a) & I5100_FERR_NF_MEM_ANY_MASK)
 #define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
-#define		I5100_NERR_NF_MEM_ANY(a)  I5100_FERR_NF_MEM_ANY(a)
 #define I5100_EMASK_MEM		0xa8	/* MC Error Mask Register */
 
 /* device 21 and 22, func 0 */
 #define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
 #define I5100_DMIR	0x15c	/* DIMM Interleave Range */
-#define		I5100_DMIR_LIMIT(a)	((a) >> 16 & ((1 << 11) - 1))
-#define		I5100_DMIR_RANK(a, i)	((a) >> (4 * i) & ((1 <<  2) - 1))
-#define I5100_MTR_4	0x1b0	/* Memory Technology Registers 4,5 */
-#define		I5100_MTR_PRESENT(a)	((a) >> 10 & 1)
-#define		I5100_MTR_ETHROTTLE(a)	((a) >>  9 & 1)
-#define		I5100_MTR_WIDTH(a)	((a) >>  8 & 1)
-#define		I5100_MTR_NUMBANK(a)	((a) >>  6 & 1)
-#define		I5100_MTR_NUMROW(a)	((a) >>  2 & ((1 << 2) - 1))
-#define		I5100_MTR_NUMCOL(a)	((a)       & ((1 << 2) - 1))
 #define	I5100_VALIDLOG	0x18c	/* Valid Log Markers */
-#define		I5100_VALIDLOG_REDMEMVALID(a)	((a) >> 2 & 1)
-#define		I5100_VALIDLOG_RECMEMVALID(a)	((a) >> 1 & 1)
-#define		I5100_VALIDLOG_NRECMEMVALID(a)	((a)      & 1)
 #define	I5100_NRECMEMA	0x190	/* Non-Recoverable Memory Error Log Reg A */
-#define		I5100_NRECMEMA_MERR(a)		((a) >> 15 & ((1 << 5) - 1))
-#define		I5100_NRECMEMA_BANK(a)		((a) >> 12 & ((1 << 3) - 1))
-#define		I5100_NRECMEMA_RANK(a)		((a) >>  8 & ((1 << 3) - 1))
-#define		I5100_NRECMEMA_DM_BUF_ID(a)	((a)       & ((1 << 8) - 1))
 #define	I5100_NRECMEMB	0x194	/* Non-Recoverable Memory Error Log Reg B */
-#define		I5100_NRECMEMB_CAS(a)		((a) >> 16 & ((1 << 13) - 1))
-#define		I5100_NRECMEMB_RAS(a)		((a)       & ((1 << 16) - 1))
 #define	I5100_REDMEMA	0x198	/* Recoverable Memory Data Error Log Reg A */
-#define		I5100_REDMEMA_SYNDROME(a)	(a)
 #define	I5100_REDMEMB	0x19c	/* Recoverable Memory Data Error Log Reg B */
-#define		I5100_REDMEMB_ECC_LOCATOR(a)	((a) & ((1 << 18) - 1))
 #define	I5100_RECMEMA	0x1a0	/* Recoverable Memory Error Log Reg A */
-#define		I5100_RECMEMA_MERR(a)		I5100_NRECMEMA_MERR(a)
-#define		I5100_RECMEMA_BANK(a)		I5100_NRECMEMA_BANK(a)
-#define		I5100_RECMEMA_RANK(a)		I5100_NRECMEMA_RANK(a)
-#define		I5100_RECMEMA_DM_BUF_ID(a)	I5100_NRECMEMA_DM_BUF_ID(a)
 #define	I5100_RECMEMB	0x1a4	/* Recoverable Memory Error Log Reg B */
-#define		I5100_RECMEMB_CAS(a)		I5100_NRECMEMB_CAS(a)
-#define		I5100_RECMEMB_RAS(a)		I5100_NRECMEMB_RAS(a)
+#define I5100_MTR_4	0x1b0	/* Memory Technology Registers 4,5 */
+
+/* bit field accessors */
+
+static inline u32 i5100_mc_errdeten(u32 mc)
+{
+	return mc >> 5 & 1;
+}
+
+static inline u16 i5100_spddata_rdo(u16 a)
+{
+	return a >> 15 & 1;
+}
+
+static inline u16 i5100_spddata_sbe(u16 a)
+{
+	return a >> 13 & 1;
+}
+
+static inline u16 i5100_spddata_busy(u16 a)
+{
+	return a >> 12 & 1;
+}
+
+static inline u16 i5100_spddata_data(u16 a)
+{
+	return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_spdcmd_create(u32 dti, u32 ckovrd, u32 sa, u32 ba,
+				      u32 data, u32 cmd)
+{
+	return	((dti & ((1 << 4) - 1))  << 28) |
+		((ckovrd & 1)            << 27) |
+		((sa & ((1 << 3) - 1))   << 24) |
+		((ba & ((1 << 8) - 1))   << 16) |
+		((data & ((1 << 8) - 1)) <<  8) |
+		(cmd & 1);
+}
+
+static inline u16 i5100_tolm_tolm(u16 a)
+{
+	return a >> 12 & ((1 << 4) - 1);
+}
+
+static inline u16 i5100_mir_limit(u16 a)
+{
+	return a >> 4 & ((1 << 12) - 1);
+}
+
+static inline u16 i5100_mir_way1(u16 a)
+{
+	return a >> 1 & 1;
+}
+
+static inline u16 i5100_mir_way0(u16 a)
+{
+	return a & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_chan_indx(u32 a)
+{
+	return a >> 28 & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_any(u32 a)
+{
+	return a & I5100_FERR_NF_MEM_ANY_MASK;
+}
+
+static inline u32 i5100_nerr_nf_mem_any(u32 a)
+{
+	return i5100_ferr_nf_mem_any(a);
+}
+
+static inline u32 i5100_dmir_limit(u32 a)
+{
+	return a >> 16 & ((1 << 11) - 1);
+}
+
+static inline u32 i5100_dmir_rank(u32 a, u32 i)
+{
+	return a >> (4 * i) & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_present(u16 a)
+{
+	return a >> 10 & 1;
+}
+
+static inline u16 i5100_mtr_ethrottle(u16 a)
+{
+	return a >> 9 & 1;
+}
+
+static inline u16 i5100_mtr_width(u16 a)
+{
+	return a >> 8 & 1;
+}
+
+static inline u16 i5100_mtr_numbank(u16 a)
+{
+	return a >> 6 & 1;
+}
+
+static inline u16 i5100_mtr_numrow(u16 a)
+{
+	return a >> 2 & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_numcol(u16 a)
+{
+	return a & ((1 << 2) - 1);
+}
+
+
+static inline u32 i5100_validlog_redmemvalid(u32 a)
+{
+	return a >> 2 & 1;
+}
+
+static inline u32 i5100_validlog_recmemvalid(u32 a)
+{
+	return a >> 1 & 1;
+}
+
+static inline u32 i5100_validlog_nrecmemvalid(u32 a)
+{
+	return a & 1;
+}
+
+static inline u32 i5100_nrecmema_merr(u32 a)
+{
+	return a >> 15 & ((1 << 5) - 1);
+}
+
+static inline u32 i5100_nrecmema_bank(u32 a)
+{
+	return a >> 12 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_rank(u32 a)
+{
+	return a >>  8 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_dm_buf_id(u32 a)
+{
+	return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_nrecmemb_cas(u32 a)
+{
+	return a >> 16 & ((1 << 13) - 1);
+}
+
+static inline u32 i5100_nrecmemb_ras(u32 a)
+{
+	return a & ((1 << 16) - 1);
+}
+
+static inline u32 i5100_redmemb_ecc_locator(u32 a)
+{
+	return a & ((1 << 18) - 1);
+}
+
+static inline u32 i5100_recmema_merr(u32 a)
+{
+	return i5100_nrecmema_merr(a);
+}
+
+static inline u32 i5100_recmema_bank(u32 a)
+{
+	return i5100_nrecmema_bank(a);
+}
+
+static inline u32 i5100_recmema_rank(u32 a)
+{
+	return i5100_nrecmema_rank(a);
+}
+
+static inline u32 i5100_recmema_dm_buf_id(u32 a)
+{
+	return i5100_nrecmema_dm_buf_id(a);
+}
+
+static inline u32 i5100_recmemb_cas(u32 a)
+{
+	return i5100_nrecmemb_cas(a);
+}
+
+static inline u32 i5100_recmemb_ras(u32 a)
+{
+	return i5100_nrecmemb_ras(a);
+}
 
 /* some generic limits */
 #define I5100_MAX_RANKS_PER_CTLR	6
@@ -189,42 +339,9 @@ static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
 	return -1;
 }
 
-/*
- * The processor bus memory addresses are broken into three
- * pieces, whereas the controller addresses are contiguous.
- *
- * here we map from the controller address space to the
- * processor address space:
- *
- *    Processor Address Space
- * +-----------------------------+
- * |                             |
- * |  "high" memory addresses    |
- * |                             |
- * +-----------------------------+ <- 4GB on the i5100
- * |                             |
- * |  other non-memory addresses |
- * |                             |
- * +-----------------------------+ <- top of low memory
- * |                             |
- * | "low" memory addresses      |
- * |                             |
- * +-----------------------------+
- */
-static unsigned long i5100_ctl_page_to_phys(struct mem_ctl_info *mci,
-					    unsigned long cntlr_addr)
-{
-	const struct i5100_priv *priv = mci->pvt_info;
-
-	if (cntlr_addr < priv->tolm)
-		return cntlr_addr;
-
-	return (1ULL << 32) + (cntlr_addr - priv->tolm);
-}
-
 static const char *i5100_err_msg(unsigned err)
 {
-	const char *merrs[] = {
+	static const char *merrs[] = {
 		"unknown", /* 0 */
 		"uncorrectable data ECC on replay", /* 1 */
 		"unknown", /* 2 */
@@ -341,24 +458,24 @@ static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
 
 	pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
 
-	if (I5100_VALIDLOG_REDMEMVALID(dw)) {
+	if (i5100_validlog_redmemvalid(dw)) {
 		pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
-		syndrome = I5100_REDMEMA_SYNDROME(dw2);
+		syndrome = dw2;
 		pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
-		ecc_loc = I5100_REDMEMB_ECC_LOCATOR(dw2);
+		ecc_loc = i5100_redmemb_ecc_locator(dw2);
 	}
 
-	if (I5100_VALIDLOG_RECMEMVALID(dw)) {
+	if (i5100_validlog_recmemvalid(dw)) {
 		const char *msg;
 
 		pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
-		merr = I5100_RECMEMA_MERR(dw2);
-		bank = I5100_RECMEMA_BANK(dw2);
-		rank = I5100_RECMEMA_RANK(dw2);
+		merr = i5100_recmema_merr(dw2);
+		bank = i5100_recmema_bank(dw2);
+		rank = i5100_recmema_rank(dw2);
 
 		pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
-		cas = I5100_RECMEMB_CAS(dw2);
-		ras = I5100_RECMEMB_RAS(dw2);
+		cas = i5100_recmemb_cas(dw2);
+		ras = i5100_recmemb_ras(dw2);
 
 		/* FIXME:  not really sure if this is what merr is...
 		 */
@@ -370,17 +487,17 @@ static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
 		i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
 	}
 
-	if (I5100_VALIDLOG_NRECMEMVALID(dw)) {
+	if (i5100_validlog_nrecmemvalid(dw)) {
 		const char *msg;
 
 		pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
-		merr = I5100_NRECMEMA_MERR(dw2);
-		bank = I5100_NRECMEMA_BANK(dw2);
-		rank = I5100_NRECMEMA_RANK(dw2);
+		merr = i5100_nrecmema_merr(dw2);
+		bank = i5100_nrecmema_bank(dw2);
+		rank = i5100_nrecmema_rank(dw2);
 
 		pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
-		cas = I5100_NRECMEMB_CAS(dw2);
-		ras = I5100_NRECMEMB_RAS(dw2);
+		cas = i5100_nrecmemb_cas(dw2);
+		ras = i5100_nrecmemb_ras(dw2);
 
 		/* FIXME:  not really sure if this is what merr is...
 		 */
@@ -402,7 +519,7 @@ static void i5100_check_error(struct mem_ctl_info *mci)
 
 
 	pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
-	if (I5100_FERR_NF_MEM_ANY(dw)) {
+	if (i5100_ferr_nf_mem_any(dw)) {
 		u32 dw2;
 
 		pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
@@ -411,9 +528,9 @@ static void i5100_check_error(struct mem_ctl_info *mci)
 					       dw2);
 		pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
 
-		i5100_read_log(mci, I5100_FERR_NF_MEM_CHAN_INDX(dw),
-			       I5100_FERR_NF_MEM_ANY(dw),
-			       I5100_NERR_NF_MEM_ANY(dw2));
+		i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
+			       i5100_ferr_nf_mem_any(dw),
+			       i5100_nerr_nf_mem_any(dw2));
 	}
 }
 
@@ -476,12 +593,12 @@ static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
 
 			pci_read_config_word(pdev, addr, &w);
 
-			priv->mtr[i][j].present = I5100_MTR_PRESENT(w);
-			priv->mtr[i][j].ethrottle = I5100_MTR_ETHROTTLE(w);
-			priv->mtr[i][j].width = 4 + 4 * I5100_MTR_WIDTH(w);
-			priv->mtr[i][j].numbank = 2 + I5100_MTR_NUMBANK(w);
-			priv->mtr[i][j].numrow = 13 + I5100_MTR_NUMROW(w);
-			priv->mtr[i][j].numcol = 10 + I5100_MTR_NUMCOL(w);
+			priv->mtr[i][j].present = i5100_mtr_present(w);
+			priv->mtr[i][j].ethrottle = i5100_mtr_ethrottle(w);
+			priv->mtr[i][j].width = 4 + 4 * i5100_mtr_width(w);
+			priv->mtr[i][j].numbank = 2 + i5100_mtr_numbank(w);
+			priv->mtr[i][j].numrow = 13 + i5100_mtr_numrow(w);
+			priv->mtr[i][j].numcol = 10 + i5100_mtr_numcol(w);
 		}
 	}
 }
@@ -495,35 +612,30 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
 {
 	struct i5100_priv *priv = mci->pvt_info;
 	u16 w;
-	u32 dw;
 	unsigned long et;
 
 	pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
-	if (I5100_SPDDATA_BUSY(w))
+	if (i5100_spddata_busy(w))
 		return -1;
 
-	dw =	I5100_SPDCMD_DTI(0xa) |
-		I5100_SPDCMD_CKOVRD(1) |
-		I5100_SPDCMD_SA(ch * 4 + slot) |
-		I5100_SPDCMD_BA(addr) |
-		I5100_SPDCMD_DATA(0) |
-		I5100_SPDCMD_CMD(0);
-	pci_write_config_dword(priv->mc, I5100_SPDCMD, dw);
+	pci_write_config_dword(priv->mc, I5100_SPDCMD,
+			       i5100_spdcmd_create(0xa, 1, ch * 4 + slot, addr,
+						   0, 0));
 
 	/* wait up to 100ms */
 	et = jiffies + HZ / 10;
 	udelay(100);
 	while (1) {
 		pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
-		if (!I5100_SPDDATA_BUSY(w))
+		if (!i5100_spddata_busy(w))
 			break;
 		udelay(100);
 	}
 
-	if (!I5100_SPDDATA_RDO(w) || I5100_SPDDATA_SBE(w))
+	if (!i5100_spddata_rdo(w) || i5100_spddata_sbe(w))
 		return -1;
 
-	*byte = I5100_SPDDATA_DATA(w);
+	*byte = i5100_spddata_data(w);
 
 	return 0;
 }
@@ -591,17 +703,17 @@ static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
 	int i;
 
 	pci_read_config_word(pdev, I5100_TOLM, &w);
-	priv->tolm = (u64) I5100_TOLM_TOLM(w) * 256 * 1024 * 1024;
+	priv->tolm = (u64) i5100_tolm_tolm(w) * 256 * 1024 * 1024;
 
 	pci_read_config_word(pdev, I5100_MIR0, &w);
-	priv->mir[0].limit = (u64) I5100_MIR_LIMIT(w) << 28;
-	priv->mir[0].way[1] = I5100_MIR_WAY1(w);
-	priv->mir[0].way[0] = I5100_MIR_WAY0(w);
+	priv->mir[0].limit = (u64) i5100_mir_limit(w) << 28;
+	priv->mir[0].way[1] = i5100_mir_way1(w);
+	priv->mir[0].way[0] = i5100_mir_way0(w);
 
 	pci_read_config_word(pdev, I5100_MIR1, &w);
-	priv->mir[1].limit = (u64) I5100_MIR_LIMIT(w) << 28;
-	priv->mir[1].way[1] = I5100_MIR_WAY1(w);
-	priv->mir[1].way[0] = I5100_MIR_WAY0(w);
+	priv->mir[1].limit = (u64) i5100_mir_limit(w) << 28;
+	priv->mir[1].way[1] = i5100_mir_way1(w);
+	priv->mir[1].way[0] = i5100_mir_way0(w);
 
 	pci_read_config_word(pdev, I5100_AMIR_0, &w);
 	priv->amir[0] = w;
@@ -617,10 +729,10 @@ static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
 			pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
 
 			priv->dmir[i][j].limit =
-				(u64) I5100_DMIR_LIMIT(dw) << 28;
+				(u64) i5100_dmir_limit(dw) << 28;
 			for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
 				priv->dmir[i][j].rank[k] =
-					I5100_DMIR_RANK(dw, k);
+					i5100_dmir_rank(dw, k);
 		}
 	}
 
@@ -693,10 +805,10 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 
 	/* ECC enabled? */
 	pci_read_config_dword(pdev, I5100_MC, &dw);
-	if (!I5100_MC_ERRDETEN(dw)) {
+	if (!i5100_mc_errdeten(dw)) {
 		printk(KERN_INFO "i5100_edac: ECC not enabled.\n");
 		ret = -ENODEV;
-		goto bail;
+		goto bail_pdev;
 	}
 
 	/* figure out how many ranks, from strapped state of 48GB_Mode input */
@@ -707,7 +819,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 		/* FIXME: get 6 ranks / controller to work - need hw... */
 		printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
 		ret = -ENODEV;
-		goto bail;
+		goto bail_pdev;
 	}
 
 	/* enable error reporting... */
@@ -718,8 +830,10 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 	/* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
 	ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
 				    PCI_DEVICE_ID_INTEL_5100_21, 0);
-	if (!ch0mm)
-		return -ENODEV;
+	if (!ch0mm) {
+		ret = -ENODEV;
+		goto bail_pdev;
+	}
 
 	rc = pci_enable_device(ch0mm);
 	if (rc < 0) {
@@ -732,7 +846,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 				    PCI_DEVICE_ID_INTEL_5100_22, 0);
 	if (!ch1mm) {
 		ret = -ENODEV;
-		goto bail_ch0;
+		goto bail_disable_ch0;
 	}
 
 	rc = pci_enable_device(ch1mm);
@@ -744,7 +858,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 	mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
 	if (!mci) {
 		ret = -ENOMEM;
-		goto bail_ch1;
+		goto bail_disable_ch1;
 	}
 
 	mci->dev = &pdev->dev;
@@ -765,7 +879,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 	mci->mod_ver = "not versioned";
 	mci->ctl_name = "i5100";
 	mci->dev_name = pci_name(pdev);
-	mci->ctl_page_to_phys = i5100_ctl_page_to_phys;
+	mci->ctl_page_to_phys = NULL;
 
 	mci->edac_check = i5100_check_error;
 
@@ -786,17 +900,26 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
 		goto bail_mc;
 	}
 
-	goto bail;
+	return ret;
 
 bail_mc:
 	edac_mc_free(mci);
 
+bail_disable_ch1:
+	pci_disable_device(ch1mm);
+
 bail_ch1:
 	pci_dev_put(ch1mm);
 
+bail_disable_ch0:
+	pci_disable_device(ch0mm);
+
 bail_ch0:
 	pci_dev_put(ch0mm);
 
+bail_pdev:
+	pci_disable_device(pdev);
+
 bail:
 	return ret;
 }
@@ -812,6 +935,9 @@ static void __devexit i5100_remove_one(struct pci_dev *pdev)
 		return;
 
 	priv = mci->pvt_info;
+	pci_disable_device(pdev);
+	pci_disable_device(priv->ch0mm);
+	pci_disable_device(priv->ch1mm);
 	pci_dev_put(priv->ch0mm);
 	pci_dev_put(priv->ch1mm);
 
-- 
GitLab


From 14cc571bb1d072d3f4be2875ea520ab03e093471 Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:08 -0700
Subject: [PATCH 834/853] edac: core fix to use dynamic kobject

Static kobjects are not supported in linux kernel.  Convert the
edac_pci_top_main_kobj from static to dynamic.  This avoids the double
free of the edac_pci_top_main_kobj.name that we see on module reload of
the e752x edac driver (and probably others as well).

In addition Greg KH <greg@kroah.com> has pointed out that this code may be
cleaned up significantly.  I will look at that as a follow-on patch, for
now, I just want the minimum fix to get this double-free oops bug
squashed...

Many thanks to Greg KH for his patience in showing me what the
Documentation/kobject.txt already said (oops)...

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/edac_pci_sysfs.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 2c1fa1bb6df..5c153dccc95 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -28,7 +28,7 @@ static int edac_pci_poll_msec = 1000;	/* one second workq period */
 static atomic_t pci_parity_count = ATOMIC_INIT(0);
 static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
 
-static struct kobject edac_pci_top_main_kobj;
+static struct kobject *edac_pci_top_main_kobj;
 static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
 
 /* getter functions for the data variables */
@@ -83,7 +83,7 @@ static void edac_pci_instance_release(struct kobject *kobj)
 	pci = to_instance(kobj);
 
 	/* decrement reference count on top main kobj */
-	kobject_put(&edac_pci_top_main_kobj);
+	kobject_put(edac_pci_top_main_kobj);
 
 	kfree(pci);	/* Free the control struct */
 }
@@ -166,7 +166,7 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
 	 * track the number of PCI instances we have, and thus nest
 	 * properly on keeping the module loaded
 	 */
-	main_kobj = kobject_get(&edac_pci_top_main_kobj);
+	main_kobj = kobject_get(edac_pci_top_main_kobj);
 	if (!main_kobj) {
 		err = -ENODEV;
 		goto error_out;
@@ -174,11 +174,11 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
 
 	/* And now register this new kobject under the main kobj */
 	err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance,
-				   &edac_pci_top_main_kobj, "pci%d", idx);
+				   edac_pci_top_main_kobj, "pci%d", idx);
 	if (err != 0) {
 		debugf2("%s() failed to register instance pci%d\n",
 			__func__, idx);
-		kobject_put(&edac_pci_top_main_kobj);
+		kobject_put(edac_pci_top_main_kobj);
 		goto error_out;
 	}
 
@@ -316,9 +316,10 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = {
  */
 static void edac_pci_release_main_kobj(struct kobject *kobj)
 {
-
 	debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
 
+	kfree(kobj);
+
 	/* last reference to top EDAC PCI kobject has been removed,
 	 * NOW release our ref count on the core module
 	 */
@@ -369,8 +370,16 @@ static int edac_pci_main_kobj_setup(void)
 		goto decrement_count_fail;
 	}
 
+	edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+	if (!edac_pci_top_main_kobj) {
+		debugf1("Failed to allocate\n");
+		err = -ENOMEM;
+		goto kzalloc_fail;
+	}
+
 	/* Instanstiate the pci object */
-	err = kobject_init_and_add(&edac_pci_top_main_kobj, &ktype_edac_pci_main_kobj,
+	err = kobject_init_and_add(edac_pci_top_main_kobj,
+				   &ktype_edac_pci_main_kobj,
 				   &edac_class->kset.kobj, "pci");
 	if (err) {
 		debugf1("Failed to register '.../edac/pci'\n");
@@ -381,13 +390,16 @@ static int edac_pci_main_kobj_setup(void)
 	 * for EDAC PCI, then edac_pci_main_kobj_teardown()
 	 * must be used, for resources to be cleaned up properly
 	 */
-	kobject_uevent(&edac_pci_top_main_kobj, KOBJ_ADD);
+	kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD);
 	debugf1("Registered '.../edac/pci' kobject\n");
 
 	return 0;
 
 	/* Error unwind statck */
 kobject_init_and_add_fail:
+	kfree(edac_pci_top_main_kobj);
+
+kzalloc_fail:
 	module_put(THIS_MODULE);
 
 decrement_count_fail:
@@ -414,7 +426,7 @@ static void edac_pci_main_kobj_teardown(void)
 	if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
 		debugf0("%s() called kobject_put on main kobj\n",
 			__func__);
-		kobject_put(&edac_pci_top_main_kobj);
+		kobject_put(edac_pci_top_main_kobj);
 	}
 }
 
-- 
GitLab


From 096846e2b0ef39cb7c348f837f06984ef6ba8aa7 Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:09 -0700
Subject: [PATCH 835/853] edac: core fix workq timer

When updating the edac_mc_poll_msec module parameter from the sysfs
/sys/module/edac_core/parameters/edac_mc_poll_msec file, we don't update
the workq timers.  So that, if we move from a big poll time to a small
one, the small one won't take effect until the big one has timed out.

Here we provide a new module parameter set method to call out to the
update routine.  This brings the /sys/module/edac_core/parameters
functionality up to that provided by the /sys/drivers/system/edac/mc sysfs
module parameter files so that we can remove them or at least link to the
/sys/module files...

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/edac_mc_sysfs.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 021d1879514..7bb9c1532b9 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -44,6 +44,25 @@ int edac_mc_get_poll_msec(void)
 	return edac_mc_poll_msec;
 }
 
+static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
+{
+	long l;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = strict_strtol(val, 0, &l);
+	if (ret == -EINVAL || ((int)l != l))
+		return -EINVAL;
+	*((int *)kp->arg) = l;
+
+	/* notify edac_mc engine to reset the poll period */
+	edac_mc_reset_delay_period(l);
+
+	return 0;
+}
+
 /* Parameter declarations for above */
 module_param(edac_mc_panic_on_ue, int, 0644);
 MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
@@ -53,7 +72,8 @@ MODULE_PARM_DESC(edac_mc_log_ue,
 module_param(edac_mc_log_ce, int, 0644);
 MODULE_PARM_DESC(edac_mc_log_ce,
 		 "Log correctable error to console: 0=off 1=on");
-module_param(edac_mc_poll_msec, int, 0644);
+module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int,
+		  &edac_mc_poll_msec, 0644);
 MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds");
 
 /*
-- 
GitLab


From 327dafb1c61c9da7b95ac6cc7634a2340cc9509c Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:10 -0700
Subject: [PATCH 836/853] edac: core fix redundant sysfs controls to parameters

/sys/devices/system/edac/mc has a few files which are duplicated in
/sys/module/edac_core/parameters.  Now that all the functionality is
duplicated between these two locations, we remove the former kobject
attributes and update the documentation.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/edac.txt       | 151 +++++++++++++++++------------------
 drivers/edac/edac_mc_sysfs.c | 117 +--------------------------
 2 files changed, 73 insertions(+), 195 deletions(-)

diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index a5c36842ece..ced52738800 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -222,74 +222,9 @@ both csrow2 and csrow3 are populated, this indicates a dual ranked
 set of DIMMs for channels 0 and 1.
 
 
-Within each of the 'mc','mcX' and 'csrowX' directories are several
+Within each of the 'mcX' and 'csrowX' directories are several
 EDAC control and attribute files.
 
-
-============================================================================
-DIRECTORY 'mc'
-
-In directory 'mc' are EDAC system overall control and attribute files:
-
-
-Panic on UE control file:
-
-	'edac_mc_panic_on_ue'
-
-	An uncorrectable error will cause a machine panic.  This is usually
-	desirable.  It is a bad idea to continue when an uncorrectable error
-	occurs - it is indeterminate what was uncorrected and the operating
-	system context might be so mangled that continuing will lead to further
-	corruption. If the kernel has MCE configured, then EDAC will never
-	notice the UE.
-
-	LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
-
-	RUN TIME:  echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue
-
-
-Log UE control file:
-
-	'edac_mc_log_ue'
-
-	Generate kernel messages describing uncorrectable errors.  These errors
-	are reported through the system message log system.  UE statistics
-	will be accumulated even when UE logging is disabled.
-
-	LOAD TIME: module/kernel parameter: log_ue=[0|1]
-
-	RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue
-
-
-Log CE control file:
-
-	'edac_mc_log_ce'
-
-	Generate kernel messages describing correctable errors.  These
-	errors are reported through the system message log system.
-	CE statistics will be accumulated even when CE logging is disabled.
-
-	LOAD TIME: module/kernel parameter: log_ce=[0|1]
-
-	RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce
-
-
-Polling period control file:
-
-	'edac_mc_poll_msec'
-
-	The time period, in milliseconds, for polling for error information.
-	Too small a value wastes resources.  Too large a value might delay
-	necessary handling of errors and might loose valuable information for
-	locating the error.  1000 milliseconds (once each second) is the current
-	default. Systems which require all the bandwidth they can get, may
-	increase this.
-
-	LOAD TIME: module/kernel parameter: poll_msec=[0|1]
-
-	RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec
-
-
 ============================================================================
 'mcX' DIRECTORIES
 
@@ -537,7 +472,6 @@ Channel 1 DIMM Label control file:
 	motherboard specific and determination of this information
 	must occur in userland at this time.
 
-
 ============================================================================
 SYSTEM LOGGING
 
@@ -570,7 +504,6 @@ error type, a notice of "no info" and then an optional,
 driver-specific error message.
 
 
-
 ============================================================================
 PCI Bus Parity Detection
 
@@ -604,6 +537,74 @@ Enable/Disable PCI Parity checking control file:
 	echo "0" >/sys/devices/system/edac/pci/check_pci_parity
 
 
+Parity Count:
+
+	'pci_parity_count'
+
+	This attribute file will display the number of parity errors that
+	have been detected.
+
+
+============================================================================
+MODULE PARAMETERS
+
+Panic on UE control file:
+
+	'edac_mc_panic_on_ue'
+
+	An uncorrectable error will cause a machine panic.  This is usually
+	desirable.  It is a bad idea to continue when an uncorrectable error
+	occurs - it is indeterminate what was uncorrected and the operating
+	system context might be so mangled that continuing will lead to further
+	corruption. If the kernel has MCE configured, then EDAC will never
+	notice the UE.
+
+	LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1]
+
+	RUN TIME:  echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
+
+
+Log UE control file:
+
+	'edac_mc_log_ue'
+
+	Generate kernel messages describing uncorrectable errors.  These errors
+	are reported through the system message log system.  UE statistics
+	will be accumulated even when UE logging is disabled.
+
+	LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1]
+
+	RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
+
+
+Log CE control file:
+
+	'edac_mc_log_ce'
+
+	Generate kernel messages describing correctable errors.  These
+	errors are reported through the system message log system.
+	CE statistics will be accumulated even when CE logging is disabled.
+
+	LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1]
+
+	RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
+
+
+Polling period control file:
+
+	'edac_mc_poll_msec'
+
+	The time period, in milliseconds, for polling for error information.
+	Too small a value wastes resources.  Too large a value might delay
+	necessary handling of errors and might loose valuable information for
+	locating the error.  1000 milliseconds (once each second) is the current
+	default. Systems which require all the bandwidth they can get, may
+	increase this.
+
+	LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1]
+
+	RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
+
 
 Panic on PCI PARITY Error:
 
@@ -614,21 +615,13 @@ Panic on PCI PARITY Error:
 	error has been detected.
 
 
-	module/kernel parameter: panic_on_pci_parity=[0|1]
+	module/kernel parameter: edac_panic_on_pci_pe=[0|1]
 
 	Enable:
-	echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity
+	echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
 
 	Disable:
-	echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity
-
-
-Parity Count:
-
-	'pci_parity_count'
-
-	This attribute file will display the number of parity errors that
-	have been detected.
+	echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
 
 
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 7bb9c1532b9..cbe1a17e42f 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -123,16 +123,6 @@ static const char *edac_caps[] = {
 
 
-/*
- * /sys/devices/system/edac/mc;
- *	data structures and methods
- */
-static ssize_t memctrl_int_show(void *ptr, char *buffer)
-{
-	int *value = (int *)ptr;
-	return sprintf(buffer, "%u\n", *value);
-}
-
 static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
 {
 	int *value = (int *)ptr;
@@ -143,23 +133,6 @@ static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
 	return count;
 }
 
-/*
- * mc poll_msec time value
- */
-static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count)
-{
-	int *value = (int *)ptr;
-
-	if (isdigit(*buffer)) {
-		*value = simple_strtoul(buffer, NULL, 0);
-
-		/* notify edac_mc engine to reset the poll period */
-		edac_mc_reset_delay_period(*value);
-	}
-
-	return count;
-}
-
 
 /* EDAC sysfs CSROW data structures and methods
  */
@@ -669,98 +642,10 @@ static struct kobj_type ktype_mci = {
 	.default_attrs = (struct attribute **)mci_attr,
 };
 
-/* show/store, tables, etc for the MC kset */
-
-
-struct memctrl_dev_attribute {
-	struct attribute attr;
-	void *value;
-	 ssize_t(*show) (void *, char *);
-	 ssize_t(*store) (void *, const char *, size_t);
-};
-
-/* Set of show/store abstract level functions for memory control object */
-static ssize_t memctrl_dev_show(struct kobject *kobj,
-				struct attribute *attr, char *buffer)
-{
-	struct memctrl_dev_attribute *memctrl_dev;
-	memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
-	if (memctrl_dev->show)
-		return memctrl_dev->show(memctrl_dev->value, buffer);
-
-	return -EIO;
-}
-
-static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
-				 const char *buffer, size_t count)
-{
-	struct memctrl_dev_attribute *memctrl_dev;
-	memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
-	if (memctrl_dev->store)
-		return memctrl_dev->store(memctrl_dev->value, buffer, count);
-
-	return -EIO;
-}
-
-static struct sysfs_ops memctrlfs_ops = {
-	.show = memctrl_dev_show,
-	.store = memctrl_dev_store
-};
-
-#define MEMCTRL_ATTR(_name, _mode, _show, _store)			\
-static struct memctrl_dev_attribute attr_##_name = {			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.value  = &_name,					\
-	.show   = _show,					\
-	.store  = _store,					\
-};
-
-#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store)	\
-static struct memctrl_dev_attribute attr_##_name = {			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.value  = _data,					\
-	.show   = _show,					\
-	.store  = _store,					\
-};
-
-/* csrow<id> control files */
-MEMCTRL_ATTR(edac_mc_panic_on_ue,
-	S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ue,
-	S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ce,
-	S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_poll_msec,
-	S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store);
-
-/* Base Attributes of the memory ECC object */
-static struct memctrl_dev_attribute *memctrl_attr[] = {
-	&attr_edac_mc_panic_on_ue,
-	&attr_edac_mc_log_ue,
-	&attr_edac_mc_log_ce,
-	&attr_edac_mc_poll_msec,
-	NULL,
-};
-
-
-/* the ktype for the mc_kset internal kobj */
-static struct kobj_type ktype_mc_set_attribs = {
-	.sysfs_ops = &memctrlfs_ops,
-	.default_attrs = (struct attribute **)memctrl_attr,
-};
-
 /* EDAC memory controller sysfs kset:
  *	/sys/devices/system/edac/mc
  */
-static struct kset mc_kset = {
-	.kobj = {.ktype = &ktype_mc_set_attribs },
-};
-
+static struct kset mc_kset;
 
 /*
  * edac_mc_register_sysfs_main_kobj
-- 
GitLab


From f9fc82adca43d38a1b79128d80750bd361e15abe Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:11 -0700
Subject: [PATCH 837/853] edac: core fix static to dynamic kset

Static kobjects and ksets are not supported in Linux kernel.  Convert the
mc_kset from static to dynamic.  This patch depends on my previous patch
to remove the module parameter attributes from mc...

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/edac_mc_sysfs.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index cbe1a17e42f..479492819db 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -645,7 +645,7 @@ static struct kobj_type ktype_mci = {
 /* EDAC memory controller sysfs kset:
  *	/sys/devices/system/edac/mc
  */
-static struct kset mc_kset;
+static struct kset *mc_kset;
 
 /*
  * edac_mc_register_sysfs_main_kobj
@@ -676,7 +676,7 @@ int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci)
 	}
 
 	/* this instance become part of the mc_kset */
-	kobj_mci->kset = &mc_kset;
+	kobj_mci->kset = mc_kset;
 
 	/* register the mc<id> kobject to the mc_kset */
 	err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL,
@@ -906,12 +906,9 @@ int edac_sysfs_setup_mc_kset(void)
 	}
 
 	/* Init the MC's kobject */
-	kobject_set_name(&mc_kset.kobj, "mc");
-	mc_kset.kobj.parent = &edac_class->kset.kobj;
-
-	/* register the mc_kset */
-	err = kset_register(&mc_kset);
-	if (err) {
+	mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj);
+	if (!mc_kset) {
+		err = -ENOMEM;
 		debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
 		goto fail_out;
 	}
@@ -933,6 +930,6 @@ fail_out:
  */
 void edac_sysfs_teardown_mc_kset(void)
 {
-	kset_unregister(&mc_kset);
+	kset_unregister(mc_kset);
 }
 
-- 
GitLab


From 124682c78563e10ba8b2ecd21b0f1098903b7808 Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:12 -0700
Subject: [PATCH 838/853] edac: core fix added newline to sysfs dimm labels

The channel DIMM label does not seem to be used much in the edac code.
However, where it is used (in the core code), it is assumed to not have a
newline embedded.  This leaves the sysfs file newline free which looks
funny when cat'ing it.  Here we just add the trailing newline to the sysfs
chX_dimm_label output...

[Doug Thompson note: the DIMM label is one of the primary uses of EDAC.
User space daemon scripts, edac-utils@sourceforge, populate the DIMM label
fields, via /sys/devices/system/edac attributes, with the silk screen
labels of the motherboard in use.  dmidecode access BIOS tables, but BIOS
tables are well known to be incorrect and useless in these respects.
edac-utils will strip off any newlines before its use of the output, when
displaying DIMM slot silk screen labels.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/edac_mc_sysfs.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 479492819db..ad218fe4942 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -178,7 +178,11 @@ static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data,
 static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
 				char *data, int channel)
 {
-	return snprintf(data, EDAC_MC_LABEL_LEN, "%s",
+	/* if field has not been initialized, there is nothing to send */
+	if (!csrow->channels[channel].label[0])
+		return 0;
+
+	return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
 			csrow->channels[channel].label);
 }
 
-- 
GitLab


From 10d33e9c36827e5371479e55ef4089e000af2638 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Fri, 25 Jul 2008 01:49:12 -0700
Subject: [PATCH 839/853] edac: e752x fix too loud on nonmemory errors

This module harvests more than just memory errors, it also harvests
various bus and dma errors that the Chipset detects.  Previously, it would
report all such errors, which would cause output to be TOO loud.

This patches therefore adds a parameter which is used to turn off
NON-MEMORY error reports by default.  Or the reporting can be enabled via
the parameter

Also did code style cleanup: less than 80 characters per line rule

Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/e752x_edac.c | 59 ++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index c94a0eb492c..facfdb1fa71 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -28,6 +28,7 @@
 #define E752X_REVISION	" Ver: 2.0.2 " __DATE__
 #define EDAC_MOD_STR	"e752x_edac"
 
+static int report_non_memory_errors;
 static int force_function_unhide;
 static int sysbus_parity = -1;
 
@@ -117,7 +118,7 @@ static struct edac_pci_ctl_info *e752x_pci;
 #define E752X_BUF_FERR		0x70	/* Memory buffer first error reg (8b) */
 #define E752X_BUF_NERR		0x72	/* Memory buffer next error reg (8b) */
 #define E752X_BUF_ERRMASK	0x74	/* Memory buffer error mask reg (8b) */
-#define E752X_BUF_SMICMD	0x7A	/* Memory buffer SMI command reg (8b) */
+#define E752X_BUF_SMICMD	0x7A	/* Memory buffer SMI cmd reg (8b) */
 #define E752X_DRAM_FERR		0x80	/* DRAM first error register (16b) */
 #define E752X_DRAM_NERR		0x82	/* DRAM next error register (16b) */
 #define E752X_DRAM_ERRMASK	0x84	/* DRAM error mask register (8b) */
@@ -127,7 +128,7 @@ static struct edac_pci_ctl_info *e752x_pci;
 					/*     error address register (32b) */
 					/*
 					 * 31    Reserved
-					 * 30:2  CE address (64 byte block 34:6)
+					 * 30:2  CE address (64 byte block 34:6
 					 * 1     Reserved
 					 * 0     HiLoCS
 					 */
@@ -147,11 +148,11 @@ static struct edac_pci_ctl_info *e752x_pci;
 					 * 1     Reserved
 					 * 0     HiLoCS
 					 */
-#define E752X_DRAM_SCRB_ADD	0xA8	/* DRAM first uncorrectable scrub memory */
+#define E752X_DRAM_SCRB_ADD	0xA8	/* DRAM 1st uncorrectable scrub mem */
 					/*     error address register (32b) */
 					/*
 					 * 31    Reserved
-					 * 30:2  CE address (64 byte block 34:6)
+					 * 30:2  CE address (64 byte block 34:6
 					 * 1     Reserved
 					 * 0     HiLoCS
 					 */
@@ -394,9 +395,12 @@ static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error,
 	struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
 
 	error_1b = retry_add;
-	page = error_1b >> (PAGE_SHIFT - 4);	/* convert the addr to 4k page */
-	row = pvt->mc_symmetric ? ((page >> 1) & 3) :	/* chip select are bits 14 & 13 */
+	page = error_1b >> (PAGE_SHIFT - 4);  /* convert the addr to 4k page */
+
+	/* chip select are bits 14 & 13 */
+	row = pvt->mc_symmetric ? ((page >> 1) & 3) :
 		edac_mc_find_csrow_by_page(mci, page);
+
 	e752x_mc_printk(mci, KERN_WARNING,
 			"CE page 0x%lx, row %d : Memory read retry\n",
 			(long unsigned int)page, row);
@@ -422,12 +426,21 @@ static inline void process_threshold_ce(struct mem_ctl_info *mci, u16 error,
 }
 
 static char *global_message[11] = {
-	"PCI Express C1", "PCI Express C", "PCI Express B1",
-	"PCI Express B", "PCI Express A1", "PCI Express A",
-	"DMA Controler", "HUB or NS Interface", "System Bus",
-	"DRAM Controler", "Internal Buffer"
+	"PCI Express C1",
+	"PCI Express C",
+	"PCI Express B1",
+	"PCI Express B",
+	"PCI Express A1",
+	"PCI Express A",
+	"DMA Controller",
+	"HUB or NS Interface",
+	"System Bus",
+	"DRAM Controller",  /* 9th entry */
+	"Internal Buffer"
 };
 
+#define DRAM_ENTRY	9
+
 static char *fatal_message[2] = { "Non-Fatal ", "Fatal " };
 
 static void do_global_error(int fatal, u32 errors)
@@ -435,9 +448,16 @@ static void do_global_error(int fatal, u32 errors)
 	int i;
 
 	for (i = 0; i < 11; i++) {
-		if (errors & (1 << i))
-			e752x_printk(KERN_WARNING, "%sError %s\n",
-				fatal_message[fatal], global_message[i]);
+		if (errors & (1 << i)) {
+			/* If the error is from DRAM Controller OR
+			 * we are to report ALL errors, then
+			 * report the error
+			 */
+			if ((i == DRAM_ENTRY) || report_non_memory_errors)
+				e752x_printk(KERN_WARNING, "%sError %s\n",
+					fatal_message[fatal],
+					global_message[i]);
+		}
 	}
 }
 
@@ -1021,7 +1041,7 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx,
 	struct pci_dev *dev;
 
 	pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
-					pvt->dev_info->err_dev, pvt->bridge_ck);
+				pvt->dev_info->err_dev, pvt->bridge_ck);
 
 	if (pvt->bridge_ck == NULL)
 		pvt->bridge_ck = pci_scan_single_device(pdev->bus,
@@ -1034,8 +1054,9 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx,
 		return 1;
 	}
 
-	dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev,
-			NULL);
+	dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+				e752x_devs[dev_idx].ctl_dev,
+				NULL);
 
 	if (dev == NULL)
 		goto fail;
@@ -1316,7 +1337,8 @@ MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers");
 
 module_param(force_function_unhide, int, 0444);
 MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:"
-		 " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access");
+		 " 1=force unhide and hope BIOS doesn't fight driver for "
+		"Dev0:Fun1 access");
 
 module_param(edac_op_state, int, 0444);
 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
@@ -1324,3 +1346,6 @@ MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
 module_param(sysbus_parity, int, 0444);
 MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking,"
 		" 1=enable system bus parity checking, default=auto-detect");
+module_param(report_non_memory_errors, int, 0644);
+MODULE_PARM_DESC(report_non_memory_errors, "0=disable non-memory error "
+		"reporting, 1=enable non-memory error reporting");
-- 
GitLab


From 596d3941035d4d4b484c820f10f57fd4816c6615 Mon Sep 17 00:00:00 2001
From: Dave Jiang <djiang@mvista.com>
Date: Fri, 25 Jul 2008 01:49:13 -0700
Subject: [PATCH 840/853] edac: mv64x60 fix get_property

Update get_property() call to use of_get_property() in order to fix compile

Signed-off-by: Dave Jiang <djiang@mvista.com>
Signed-off-by: Doug Thompson <dougthompson.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/mv64x60_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index bf071f140a0..de69163ff5b 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -612,7 +612,7 @@ static void get_total_mem(struct mv64x60_mc_pdata *pdata)
 	if (!np)
 		return;
 
-	reg = get_property(np, "reg", NULL);
+	reg = of_get_property(np, "reg", NULL);
 
 	pdata->total_mem = reg[1];
 }
-- 
GitLab


From fcb19171d196172a4f57e056f7a60e6d1e2e8c85 Mon Sep 17 00:00:00 2001
From: Dave Jiang <djiang@mvista.com>
Date: Fri, 25 Jul 2008 01:49:14 -0700
Subject: [PATCH 841/853] edac: mv64x60 add pci fixup

Fixup of missing bit 0 on 64360 PCIx_ERR_MASK and errata FEr-#11 and
FEr-#16 for the 64460.  Bit 0 must remain 0.

Signed-off-by: Dave Jiang <djiang@mvista.com>
Signed-off-by: Doug Thompson <dougthompson.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/mv64x60_edac.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index de69163ff5b..083ce8d0c63 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -71,6 +71,35 @@ static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+/*
+ * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of
+ * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as
+ * well.  IOW, don't set bit 0.
+ */
+
+/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */
+static int __init mv64x60_pci_fixup(struct platform_device *pdev)
+{
+	struct resource *r;
+	void __iomem *pci_serr;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!r) {
+		printk(KERN_ERR "%s: Unable to get resource for "
+		       "PCI err regs\n", __func__);
+		return -ENOENT;
+	}
+
+	pci_serr = ioremap(r->start, r->end - r->start + 1);
+	if (!pci_serr)
+		return -ENOMEM;
+
+	out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+	iounmap(pci_serr);
+
+	return 0;
+}
+
 static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
 {
 	struct edac_pci_ctl_info *pci;
@@ -128,6 +157,12 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
 		goto err;
 	}
 
+	res = mv64x60_pci_fixup(pdev);
+	if (res < 0) {
+		printk(KERN_ERR "%s: PCI fixup failed\n", __func__);
+		goto err;
+	}
+
 	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
 	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
 	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
-- 
GitLab


From f87bd330edf06fd49b3fbc368d90fb180375f2a2 Mon Sep 17 00:00:00 2001
From: Dave Jiang <djiang@mvista.com>
Date: Fri, 25 Jul 2008 01:49:14 -0700
Subject: [PATCH 842/853] edac: mpc85xx fix pci ofdev 2nd pass

Convert PCI err device from platform to open firmware of_dev to comply
with powerpc schemes.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Dave Jiang <djiang@mvista.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/mpc85xx_edac.c | 67 ++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 24 deletions(-)

diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index d49361bfe67..2265d9ca153 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -195,14 +195,15 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
+static int __devinit mpc85xx_pci_err_probe(struct of_device *op,
+					   const struct of_device_id *match)
 {
 	struct edac_pci_ctl_info *pci;
 	struct mpc85xx_pci_pdata *pdata;
-	struct resource *r;
+	struct resource r;
 	int res = 0;
 
-	if (!devres_open_group(&pdev->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
+	if (!devres_open_group(&op->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
 		return -ENOMEM;
 
 	pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mpc85xx_pci_err");
@@ -212,34 +213,37 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
 	pdata = pci->pvt_info;
 	pdata->name = "mpc85xx_pci_err";
 	pdata->irq = NO_IRQ;
-	platform_set_drvdata(pdev, pci);
-	pci->dev = &pdev->dev;
+	dev_set_drvdata(&op->dev, pci);
+	pci->dev = &op->dev;
 	pci->mod_name = EDAC_MOD_STR;
 	pci->ctl_name = pdata->name;
-	pci->dev_name = pdev->dev.bus_id;
+	pci->dev_name = op->dev.bus_id;
 
 	if (edac_op_state == EDAC_OPSTATE_POLL)
 		pci->edac_check = mpc85xx_pci_check;
 
 	pdata->edac_idx = edac_pci_idx++;
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r) {
+	res = of_address_to_resource(op->node, 0, &r);
+	if (res) {
 		printk(KERN_ERR "%s: Unable to get resource for "
 		       "PCI err regs\n", __func__);
 		goto err;
 	}
 
-	if (!devm_request_mem_region(&pdev->dev, r->start,
-				     r->end - r->start + 1, pdata->name)) {
+	/* we only need the error registers */
+	r.start += 0xe00;
+
+	if (!devm_request_mem_region(&op->dev, r.start,
+					r.end - r.start + 1, pdata->name)) {
 		printk(KERN_ERR "%s: Error while requesting mem region\n",
 		       __func__);
 		res = -EBUSY;
 		goto err;
 	}
 
-	pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start,
-					r->end - r->start + 1);
+	pdata->pci_vbase = devm_ioremap(&op->dev, r.start,
+					r.end - r.start + 1);
 	if (!pdata->pci_vbase) {
 		printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
 		res = -ENOMEM;
@@ -266,14 +270,15 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
 	}
 
 	if (edac_op_state == EDAC_OPSTATE_INT) {
-		pdata->irq = platform_get_irq(pdev, 0);
-		res = devm_request_irq(&pdev->dev, pdata->irq,
+		pdata->irq = irq_of_parse_and_map(op->node, 0);
+		res = devm_request_irq(&op->dev, pdata->irq,
 				       mpc85xx_pci_isr, IRQF_DISABLED,
 				       "[EDAC] PCI err", pci);
 		if (res < 0) {
 			printk(KERN_ERR
 			       "%s: Unable to requiest irq %d for "
 			       "MPC85xx PCI err\n", __func__, pdata->irq);
+			irq_dispose_mapping(pdata->irq);
 			res = -ENODEV;
 			goto err2;
 		}
@@ -282,23 +287,23 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
 		       pdata->irq);
 	}
 
-	devres_remove_group(&pdev->dev, mpc85xx_pci_err_probe);
+	devres_remove_group(&op->dev, mpc85xx_pci_err_probe);
 	debugf3("%s(): success\n", __func__);
 	printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n");
 
 	return 0;
 
 err2:
-	edac_pci_del_device(&pdev->dev);
+	edac_pci_del_device(&op->dev);
 err:
 	edac_pci_free_ctl_info(pci);
-	devres_release_group(&pdev->dev, mpc85xx_pci_err_probe);
+	devres_release_group(&op->dev, mpc85xx_pci_err_probe);
 	return res;
 }
 
-static int mpc85xx_pci_err_remove(struct platform_device *pdev)
+static int mpc85xx_pci_err_remove(struct of_device *op)
 {
-	struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
+	struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
 	struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
 
 	debugf0("%s()\n", __func__);
@@ -318,12 +323,26 @@ static int mpc85xx_pci_err_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_driver mpc85xx_pci_err_driver = {
+static struct of_device_id mpc85xx_pci_err_of_match[] = {
+	{
+	 .compatible = "fsl,mpc8540-pcix",
+	 },
+	{
+	 .compatible = "fsl,mpc8540-pci",
+	},
+	{},
+};
+
+static struct of_platform_driver mpc85xx_pci_err_driver = {
+	.owner = THIS_MODULE,
+	.name = "mpc85xx_pci_err",
+	.match_table = mpc85xx_pci_err_of_match,
 	.probe = mpc85xx_pci_err_probe,
 	.remove = __devexit_p(mpc85xx_pci_err_remove),
 	.driver = {
-		.name = "mpc85xx_pci_err",
-	}
+		   .name = "mpc85xx_pci_err",
+		   .owner = THIS_MODULE,
+		   },
 };
 
 #endif				/* CONFIG_PCI */
@@ -1002,7 +1021,7 @@ static int __init mpc85xx_mc_init(void)
 		printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
 
 #ifdef CONFIG_PCI
-	res = platform_driver_register(&mpc85xx_pci_err_driver);
+	res = of_register_platform_driver(&mpc85xx_pci_err_driver);
 	if (res)
 		printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
 #endif
@@ -1025,7 +1044,7 @@ static void __exit mpc85xx_mc_exit(void)
 {
 	mtspr(SPRN_HID1, orig_hid1);
 #ifdef CONFIG_PCI
-	platform_driver_unregister(&mpc85xx_pci_err_driver);
+	of_unregister_platform_driver(&mpc85xx_pci_err_driver);
 #endif
 	of_unregister_platform_driver(&mpc85xx_l2_err_driver);
 	of_unregister_platform_driver(&mpc85xx_mc_err_driver);
-- 
GitLab


From 93082f0b15841b8926c38ef224d0e6f720000635 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 25 Jul 2008 10:56:36 -0700
Subject: [PATCH 843/853] Fix ahci driver 'flags' type

The new type checking of the flags arguments to irqsave and friends
(commit 3f307891ce0e7b0438c432af1aacd656a092ff45) pointed out this thing
with a big nice warning.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/ata/ahci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index dc7596f028b..ef3e5522e1a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1273,7 +1273,7 @@ static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
 	void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR];
 	u32 em_ctl;
 	u32 message[] = {0, 0};
-	unsigned int flags;
+	unsigned long flags;
 	int pmp;
 	struct ahci_em_priv *emp;
 
-- 
GitLab


From 44463f7dd6c8039904333e4374e5c6e9ad83006f Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Fri, 25 Jul 2008 14:17:19 -0400
Subject: [PATCH 844/853] firmware: create firmware binaries during 'make
 modules'.

This means that we no longer need write access to the source tree while
doing 'make modules_install'.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Makefile                |  1 +
 scripts/Makefile.fwinst | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 4bcd1cf90cb..3cad7db5eba 100644
--- a/Makefile
+++ b/Makefile
@@ -1061,6 +1061,7 @@ modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux)
 	$(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.order) > $(objtree)/modules.order
 	@echo '  Building modules, stage 2.';
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_modbuild
 
 
 # Target to prepare building external modules
diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst
index c972c0f54ce..f63a663de15 100644
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst
@@ -17,14 +17,15 @@ include $(srctree)/$(obj)/Makefile
 
 include scripts/Makefile.host
 
-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-m))
-
+mod-fw := $(fw-shipped-m)
 # If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the 
 # firmware for in-kernel drivers too.
 ifndef CONFIG_FIRMWARE_IN_KERNEL
-mod-fw += $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-y))
+mod-fw += $(fw-shipped-y)
 endif
 
+installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
+
 installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
 installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/.
 
@@ -49,7 +50,8 @@ PHONY +=  __fw_install __fw_modinst FORCE
 .PHONY: $(PHONY)
 
 __fw_install: $(installed-fw)
-__fw_modinst: $(mod-fw)
+__fw_modinst: $(installed-mod-fw)
+__fw_modbuild: $(addprefix $(obj)/,$(mod-fw))
 
 FORCE:
 
-- 
GitLab


From e44d1b2998d62a1f2f4d7eb17b56ba396535509f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 25 Jul 2008 12:57:41 +0200
Subject: [PATCH 845/853] mm/hugetlb.c: fix build failure with !CONFIG_SYSCTL

on !CONFIG_SYSCTL on x86 with latest -git i get:

     mm/hugetlb.c: In function 'decrement_hugepage_resv_vma':
     mm/hugetlb.c:83: error: 'reserve' undeclared (first use in this function)
     mm/hugetlb.c:83: error: (Each undeclared identifier is reported only once
     mm/hugetlb.c:83: error: for each function it appears in.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 41341c41419..a8bf4ab01f8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1026,6 +1026,17 @@ static void __init report_hugepages(void)
 	}
 }
 
+static unsigned int cpuset_mems_nr(unsigned int *array)
+{
+	int node;
+	unsigned int nr = 0;
+
+	for_each_node_mask(node, cpuset_current_mems_allowed)
+		nr += array[node];
+
+	return nr;
+}
+
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
 static void try_to_free_low(struct hstate *h, unsigned long count)
@@ -1375,17 +1386,6 @@ static int __init hugetlb_default_setup(char *s)
 }
 __setup("default_hugepagesz=", hugetlb_default_setup);
 
-static unsigned int cpuset_mems_nr(unsigned int *array)
-{
-	int node;
-	unsigned int nr = 0;
-
-	for_each_node_mask(node, cpuset_current_mems_allowed)
-		nr += array[node];
-
-	return nr;
-}
-
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 			   struct file *file, void __user *buffer,
 			   size_t *length, loff_t *ppos)
-- 
GitLab


From 9b81361631bbb1d85c99ddec677d42afe516737b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 25 Jul 2008 13:02:37 +0200
Subject: [PATCH 846/853] signalfd: fix undefined reference to
 `compat_sys_signalfd4' when !CONFIG_SIGNALFD

fix:

arch/x86/ia32/built-in.o: In function `ia32_sys_call_table':
(.rodata+0xa38): undefined reference to `compat_sys_signalfd4'

on !CONFIG_SIGNALFD.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys_ni.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 55eca1594da..08d6e1bb99a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -160,6 +160,7 @@ cond_syscall(sys_ioprio_get);
 cond_syscall(sys_signalfd);
 cond_syscall(sys_signalfd4);
 cond_syscall(compat_sys_signalfd);
+cond_syscall(compat_sys_signalfd4);
 cond_syscall(sys_timerfd_create);
 cond_syscall(sys_timerfd_settime);
 cond_syscall(sys_timerfd_gettime);
-- 
GitLab


From 7dcf2a9fced59e58e4694cdcf15850c01fdba89b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 1 Jul 2008 19:27:16 +0300
Subject: [PATCH 847/853] remove dummy asm/kvm.h files

This patch removes the dummy asm/kvm.h files on architectures not (yet)
supporting KVM and uses the same conditional headers installation as
already used for a.out.h .

Also removed are superfluous install rules in the s390 and x86 Kbuild
files (they are already in Kbuild.asm).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/asm-alpha/kvm.h        | 6 ------
 include/asm-arm/kvm.h          | 6 ------
 include/asm-avr32/kvm.h        | 6 ------
 include/asm-blackfin/kvm.h     | 6 ------
 include/asm-cris/kvm.h         | 6 ------
 include/asm-frv/kvm.h          | 6 ------
 include/asm-generic/Kbuild.asm | 2 ++
 include/asm-h8300/kvm.h        | 6 ------
 include/asm-m32r/kvm.h         | 6 ------
 include/asm-m68k/kvm.h         | 6 ------
 include/asm-m68knommu/kvm.h    | 6 ------
 include/asm-mips/kvm.h         | 6 ------
 include/asm-mn10300/kvm.h      | 6 ------
 include/asm-parisc/kvm.h       | 6 ------
 include/asm-s390/Kbuild        | 1 -
 include/asm-sh/kvm.h           | 6 ------
 include/asm-sparc/kvm.h        | 6 ------
 include/asm-sparc64/kvm.h      | 1 -
 include/asm-um/kvm.h           | 6 ------
 include/asm-x86/Kbuild         | 1 -
 include/asm-xtensa/kvm.h       | 6 ------
 include/linux/Kbuild           | 2 ++
 22 files changed, 4 insertions(+), 105 deletions(-)
 delete mode 100644 include/asm-alpha/kvm.h
 delete mode 100644 include/asm-arm/kvm.h
 delete mode 100644 include/asm-avr32/kvm.h
 delete mode 100644 include/asm-blackfin/kvm.h
 delete mode 100644 include/asm-cris/kvm.h
 delete mode 100644 include/asm-frv/kvm.h
 delete mode 100644 include/asm-h8300/kvm.h
 delete mode 100644 include/asm-m32r/kvm.h
 delete mode 100644 include/asm-m68k/kvm.h
 delete mode 100644 include/asm-m68knommu/kvm.h
 delete mode 100644 include/asm-mips/kvm.h
 delete mode 100644 include/asm-mn10300/kvm.h
 delete mode 100644 include/asm-parisc/kvm.h
 delete mode 100644 include/asm-sh/kvm.h
 delete mode 100644 include/asm-sparc/kvm.h
 delete mode 100644 include/asm-sparc64/kvm.h
 delete mode 100644 include/asm-um/kvm.h
 delete mode 100644 include/asm-xtensa/kvm.h

diff --git a/include/asm-alpha/kvm.h b/include/asm-alpha/kvm.h
deleted file mode 100644
index b9daec42968..00000000000
--- a/include/asm-alpha/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ALPHA_H
-#define __LINUX_KVM_ALPHA_H
-
-/* alpha does not support KVM */
-
-#endif
diff --git a/include/asm-arm/kvm.h b/include/asm-arm/kvm.h
deleted file mode 100644
index cb3c08cbcb9..00000000000
--- a/include/asm-arm/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ARM_H
-#define __LINUX_KVM_ARM_H
-
-/* arm does not support KVM */
-
-#endif
diff --git a/include/asm-avr32/kvm.h b/include/asm-avr32/kvm.h
deleted file mode 100644
index 8c5777020e2..00000000000
--- a/include/asm-avr32/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_AVR32_H
-#define __LINUX_KVM_AVR32_H
-
-/* avr32 does not support KVM */
-
-#endif
diff --git a/include/asm-blackfin/kvm.h b/include/asm-blackfin/kvm.h
deleted file mode 100644
index e3477d77c01..00000000000
--- a/include/asm-blackfin/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_BLACKFIN_H
-#define __LINUX_KVM_BLACKFIN_H
-
-/* blackfin does not support KVM */
-
-#endif
diff --git a/include/asm-cris/kvm.h b/include/asm-cris/kvm.h
deleted file mode 100644
index c860f51149f..00000000000
--- a/include/asm-cris/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_CRIS_H
-#define __LINUX_KVM_CRIS_H
-
-/* cris does not support KVM */
-
-#endif
diff --git a/include/asm-frv/kvm.h b/include/asm-frv/kvm.h
deleted file mode 100644
index 9c8a4f08d0a..00000000000
--- a/include/asm-frv/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_FRV_H
-#define __LINUX_KVM_FRV_H
-
-/* frv does not support KVM */
-
-#endif
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 7cd25b8e7c9..1170dc60e63 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,4 +1,6 @@
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
 header-y  += kvm.h
+endif
 
 ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
 unifdef-y += a.out.h
diff --git a/include/asm-h8300/kvm.h b/include/asm-h8300/kvm.h
deleted file mode 100644
index bdbed7b987e..00000000000
--- a/include/asm-h8300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_H8300_H
-#define __LINUX_KVM_H8300_H
-
-/* h8300 does not support KVM */
-
-#endif
diff --git a/include/asm-m32r/kvm.h b/include/asm-m32r/kvm.h
deleted file mode 100644
index 99a40515b77..00000000000
--- a/include/asm-m32r/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M32R_H
-#define __LINUX_KVM_M32R_H
-
-/* m32r does not support KVM */
-
-#endif
diff --git a/include/asm-m68k/kvm.h b/include/asm-m68k/kvm.h
deleted file mode 100644
index 7ed27fce524..00000000000
--- a/include/asm-m68k/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68K_H
-#define __LINUX_KVM_M68K_H
-
-/* m68k does not support KVM */
-
-#endif
diff --git a/include/asm-m68knommu/kvm.h b/include/asm-m68knommu/kvm.h
deleted file mode 100644
index b49d4258dab..00000000000
--- a/include/asm-m68knommu/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68KNOMMU_H
-#define __LINUX_KVM_M68KNOMMU_H
-
-/* m68knommu does not support KVM */
-
-#endif
diff --git a/include/asm-mips/kvm.h b/include/asm-mips/kvm.h
deleted file mode 100644
index 093a5b7f796..00000000000
--- a/include/asm-mips/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MIPS_H
-#define __LINUX_KVM_MIPS_H
-
-/* mips does not support KVM */
-
-#endif
diff --git a/include/asm-mn10300/kvm.h b/include/asm-mn10300/kvm.h
deleted file mode 100644
index f6b609ff4a5..00000000000
--- a/include/asm-mn10300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MN10300_H
-#define __LINUX_KVM_MN10300_H
-
-/* mn10300 does not support KVM */
-
-#endif
diff --git a/include/asm-parisc/kvm.h b/include/asm-parisc/kvm.h
deleted file mode 100644
index 00cc4581254..00000000000
--- a/include/asm-parisc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_PARISC_H
-#define __LINUX_KVM_PARISC_H
-
-/* parisc does not support KVM */
-
-#endif
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
index bb5e9edb982..63a23415fba 100644
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -7,7 +7,6 @@ header-y += tape390.h
 header-y += ucontext.h
 header-y += vtoc.h
 header-y += zcrypt.h
-header-y += kvm.h
 header-y += chsc.h
 
 unifdef-y += cmb.h
diff --git a/include/asm-sh/kvm.h b/include/asm-sh/kvm.h
deleted file mode 100644
index 6af51dbab2d..00000000000
--- a/include/asm-sh/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SH_H
-#define __LINUX_KVM_SH_H
-
-/* sh does not support KVM */
-
-#endif
diff --git a/include/asm-sparc/kvm.h b/include/asm-sparc/kvm.h
deleted file mode 100644
index 2e5478da381..00000000000
--- a/include/asm-sparc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SPARC_H
-#define __LINUX_KVM_SPARC_H
-
-/* sparc does not support KVM */
-
-#endif
diff --git a/include/asm-sparc64/kvm.h b/include/asm-sparc64/kvm.h
deleted file mode 100644
index 53564ad86b1..00000000000
--- a/include/asm-sparc64/kvm.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-sparc/kvm.h>
diff --git a/include/asm-um/kvm.h b/include/asm-um/kvm.h
deleted file mode 100644
index 66aa7709455..00000000000
--- a/include/asm-um/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_UM_H
-#define __LINUX_KVM_UM_H
-
-/* um does not support KVM */
-
-#endif
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 1e3554596f7..811e9828ccb 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm
 header-y += boot.h
 header-y += bootparam.h
 header-y += debugreg.h
-header-y += kvm.h
 header-y += ldt.h
 header-y += msr-index.h
 header-y += prctl.h
diff --git a/include/asm-xtensa/kvm.h b/include/asm-xtensa/kvm.h
deleted file mode 100644
index bda4e331e98..00000000000
--- a/include/asm-xtensa/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_XTENSA_H
-#define __LINUX_KVM_XTENSA_H
-
-/* xtensa does not support KVM */
-
-#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 71d70d1fbce..402c8f55d71 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -256,7 +256,9 @@ unifdef-y += kd.h
 unifdef-y += kernelcapi.h
 unifdef-y += kernel.h
 unifdef-y += keyboard.h
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
 unifdef-y += kvm.h
+endif
 unifdef-y += llc.h
 unifdef-y += loop.h
 unifdef-y += lp.h
-- 
GitLab


From c6af5e9f8a57467df2e55e428316a43480174521 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Fri, 25 Jul 2008 15:48:04 +0200
Subject: [PATCH 848/853] bootmem: Move node allocation macros back to
 !HAVE_ARCH_BOOTMEM_NODE

These got unintentionally moved, put them back as x86 provides its own
versions.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4ddf2922fc8..652470b687c 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -103,17 +103,16 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-
-extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
-				   int flags);
-
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
+#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+
+extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
+				   int flags);
 
 extern void *alloc_bootmem_section(unsigned long size,
 				   unsigned long section_nr);
-- 
GitLab


From f1373da87be917e5b2356af44764620487376a07 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 25 Jul 2008 15:18:31 -0700
Subject: [PATCH 849/853] sparc: Wire up new system calls.

This wires up the recently added Wire up signalfd4, eventfd2,
epoll_create1, dup3, pipe2, and inotify_init1 system calls.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/systbls.S   | 3 ++-
 arch/sparc64/kernel/systbls.S | 6 ++++--
 include/asm-sparc/unistd_32.h | 8 +++++++-
 include/asm-sparc/unistd_64.h | 8 +++++++-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index 5a7c4c8345c..e1b9233b90a 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -80,4 +80,5 @@ sys_call_table:
 /*300*/	.long sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
 /*305*/	.long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
-/*315*/	.long sys_timerfd_settime, sys_timerfd_gettime
+/*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
+/*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index 8b5282d433c..1095bf4c510 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -81,7 +81,8 @@ sys_call_table32:
 /*300*/	.word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages, compat_sys_mbind, compat_sys_get_mempolicy
 	.word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
-	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime
+	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1
 
 #endif /* CONFIG_COMPAT */
 
@@ -154,4 +155,5 @@ sys_call_table:
 /*300*/	.word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
 	.word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
-	.word sys_timerfd_settime, sys_timerfd_gettime
+	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1
diff --git a/include/asm-sparc/unistd_32.h b/include/asm-sparc/unistd_32.h
index 2338a027637..648643a9f13 100644
--- a/include/asm-sparc/unistd_32.h
+++ b/include/asm-sparc/unistd_32.h
@@ -332,8 +332,14 @@
 #define __NR_fallocate		314
 #define __NR_timerfd_settime	315
 #define __NR_timerfd_gettime	316
+#define __NR_signalfd4		317
+#define __NR_eventfd2		318
+#define __NR_epoll_create1	319
+#define __NR_dup3		320
+#define __NR_pipe2		321
+#define __NR_inotify_init1	322
 
-#define NR_SYSCALLS		317
+#define NR_SYSCALLS		323
 
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
  * it never had the plain ones and there is no value to adding those
diff --git a/include/asm-sparc/unistd_64.h b/include/asm-sparc/unistd_64.h
index 13be4453a1f..c5cc0e05232 100644
--- a/include/asm-sparc/unistd_64.h
+++ b/include/asm-sparc/unistd_64.h
@@ -334,8 +334,14 @@
 #define __NR_fallocate		314
 #define __NR_timerfd_settime	315
 #define __NR_timerfd_gettime	316
+#define __NR_signalfd4		317
+#define __NR_eventfd2		318
+#define __NR_epoll_create1	319
+#define __NR_dup3		320
+#define __NR_pipe2		321
+#define __NR_inotify_init1	322
 
-#define NR_SYSCALLS		317
+#define NR_SYSCALLS		323
 
 #ifdef __KERNEL__
 #define __ARCH_WANT_IPC_PARSE_VERSION
-- 
GitLab


From fc532f810832beb3306b42526a78f411972281c7 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Fri, 25 Jul 2008 17:50:30 -0500
Subject: [PATCH 850/853] powerpc: Fix boot problem due to AT_BASE_PLATFORM
 change

Commit 9115d13453dee22473a1e8cacc90a8d64a9c4bc9 ("powerpc: Enable
AT_BASE_PLATFORM aux vector") broke boot on 32-bit powerpc systems; we
have to use PTRRELOC to initialize powerpc_base_platform this early in
boot.

Bug reported by Jon Smirl.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/cputable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 25a052c1675..25c273c761d 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1660,8 +1660,8 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
 			 * Set the base platform string once; assumes
 			 * we're called with real pvr first.
 			 */
-			if (powerpc_base_platform == NULL)
-				powerpc_base_platform = t->platform;
+			if (*PTRRELOC(&powerpc_base_platform) == NULL)
+				*PTRRELOC(&powerpc_base_platform) = t->platform;
 
 #if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
 			/* ppc64 and booke expect identify_cpu to also call
-- 
GitLab


From b4615e69b6c6353878b734a8202b65efbc554df4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 13:19:22 -0700
Subject: [PATCH 851/853] sys_paccept definition missing __user annotation

Introduced by commit aaca0bdca573f3f51ea03139f9c7289541e7bca3 ("flag
parameters: paccept"):

  net/socket.c:1515:17: error: symbol 'sys_paccept' redeclared with different type (originally declared at include/linux/syscalls.h:413) - incompatible argument 4 (different address spaces)

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 06f2bf76c03..d6ff145919c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -411,7 +411,7 @@ asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
-			    const sigset_t *, size_t, int);
+			    const __user sigset_t *, size_t, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
-- 
GitLab


From 8d25b36b77fe32c296ece83e94ca6ae4d17f3e25 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 26 Jul 2008 02:38:00 +0300
Subject: [PATCH 852/853] MFD_TC6393XB is ARM-only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compile error on other architectures:

    CC      drivers/mfd/tc6393xb.o
  /home/bunk/linux/kernel-2.6/git/linux-2.6/drivers/mfd/tc6393xb.c: In function ‘tc6393xb_attach_irq’:
  /home/bunk/linux/kernel-2.6/git/linux-2.6/drivers/mfd/tc6393xb.c:324: error: implicit declaration of function ‘set_irq_flags’
  ...

Reported-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 1f57a99fd96..883e7ea31de 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -52,7 +52,7 @@ config HTC_PASIC3
 
 config MFD_TC6393XB
 	bool "Support Toshiba TC6393XB"
-	depends on GPIOLIB
+	depends on GPIOLIB && ARM
 	select MFD_CORE
 	help
 	  Support for Toshiba Mobile IO Controller TC6393XB
-- 
GitLab


From 024e8ac04453b3525448c31ef39848cf675ba6db Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 20:00:10 -0700
Subject: [PATCH 853/853] x86_64: fix ia32 AMD syscall audit fast-path

The new code in commit 5cbf1565f29eb57a86a305b08836613508e294d7
has a bug in the version supporting the AMD 'syscall' instruction.
It clobbers the user's %ecx register value (with the %ebp value).

This change fixes it.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/ia32/ia32entry.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e4bd1793a5e..ffc1bb4fed7 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -201,7 +201,7 @@ sysexit_from_sys_call:
 	movl RDI-ARGOFFSET(%rsp),%r8d	/* reload 5th syscall arg */
 	.endm
 
-	.macro auditsys_exit exit
+	.macro auditsys_exit exit,ebpsave=RBP
 	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
 	jnz int_ret_from_sys_call
 	TRACE_IRQS_ON
@@ -214,7 +214,7 @@ sysexit_from_sys_call:
 	call audit_syscall_exit
 	GET_THREAD_INFO(%r10)
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall return value */
-	movl RBP-ARGOFFSET(%rsp),%ebp	/* reload user register value */
+	movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	cli
 	TRACE_IRQS_OFF
@@ -347,7 +347,7 @@ cstar_auditsys:
 	jmp cstar_dispatch
 
 sysretl_audit:
-	auditsys_exit sysretl_from_sys_call
+	auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */
 #endif
 
 cstar_tracesys:
-- 
GitLab