Discussion:
[PATCH 03/22] HWPOISON: Add support for poison swap entries v2
Wu Fengguang
2009-06-15 02:45:23 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

Memory migration uses special swap entry types to trigger special actions on
page faults. Extend this mechanism to also support poisoned swap entries, to
trigger poison handling on page faults. This allows follow-on patches to
prevent processes from faulting in poisoned pages again.

v2: Fix overflow in MAX_SWAPFILES (Fengguang Wu)
v3: Better overflow fix (Hidehiro Kawai)

Reviewed-by: Wu Fengguang <***@intel.com>
Reviewed-by: Hidehiro Kawai <***@hitachi.com>
Signed-off-by: Andi Kleen <***@linux.intel.com>

---
include/linux/swap.h | 34 ++++++++++++++++++++++++++++------
include/linux/swapops.h | 38 ++++++++++++++++++++++++++++++++++++++
mm/swapfile.c | 4 ++--
3 files changed, 68 insertions(+), 8 deletions(-)

--- sound-2.6.orig/include/linux/swap.h
+++ sound-2.6/include/linux/swap.h
@@ -34,16 +34,38 @@ static inline int current_is_kswapd(void
* the type/offset into the pte as 5/27 as well.
*/
#define MAX_SWAPFILES_SHIFT 5
-#ifndef CONFIG_MIGRATION
-#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT)
+
+/*
+ * Use some of the swap files numbers for other purposes. This
+ * is a convenient way to hook into the VM to trigger special
+ * actions on faults.
+ */
+
+/*
+ * NUMA node memory migration support
+ */
+#ifdef CONFIG_MIGRATION
+#define SWP_MIGRATION_NUM 2
+#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM)
+#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
#else
-/* Use last two entries for page migration swap entries */
-#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2)
-#define SWP_MIGRATION_READ MAX_SWAPFILES
-#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1)
+#define SWP_MIGRATION_NUM 0
#endif

/*
+ * Handling of hardware poisoned pages with memory corruption.
+ */
+#ifdef CONFIG_MEMORY_FAILURE
+#define SWP_HWPOISON_NUM 1
+#define SWP_HWPOISON MAX_SWAPFILES
+#else
+#define SWP_HWPOISON_NUM 0
+#endif
+
+#define MAX_SWAPFILES \
+ ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+
+/*
* Magic header for a swap area. The first part of the union is
* what the swap magic looks like for the old (limited to 128MB)
* swap area format, the second part of the union adds - in the
--- sound-2.6.orig/include/linux/swapops.h
+++ sound-2.6/include/linux/swapops.h
@@ -131,3 +131,41 @@ static inline int is_write_migration_ent

#endif

+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
+static inline int non_swap_entry(swp_entry_t entry)
+{
+ return swp_type(entry) >= MAX_SWAPFILES;
+}
+#else
+static inline int non_swap_entry(swp_entry_t entry)
+{
+ return 0;
+}
+#endif
--- sound-2.6.orig/mm/swapfile.c
+++ sound-2.6/mm/swapfile.c
@@ -697,7 +697,7 @@ int free_swap_and_cache(swp_entry_t entr
struct swap_info_struct *p;
struct page *page = NULL;

- if (is_migration_entry(entry))
+ if (non_swap_entry(entry))
return 1;

p = swap_info_get(entry);
@@ -2083,7 +2083,7 @@ static int __swap_duplicate(swp_entry_t
int count;
bool has_cache;

- if (is_migration_entry(entry))
+ if (non_swap_entry(entry))
return -EINVAL;

type = swp_type(entry);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:34 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

Useful for some testing scenarios, although specific testing is often
done better through MADV_POISON

This can be done with the x86 level MCE injector too, but this interface
allows it to do independently from low level x86 changes.

Open issues:
Should be disabled for cgroups.

Signed-off-by: Andi Kleen <***@linux.intel.com>

---
mm/Kconfig | 4 ++++
mm/Makefile | 1 +
mm/hwpoison-inject.c | 41 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 46 insertions(+)

--- /dev/null
+++ sound-2.6/mm/hwpoison-inject.c
@@ -0,0 +1,41 @@
+/* Inject a hwpoison memory failure on a arbitary pfn */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+static struct dentry *hwpoison_dir, *corrupt_pfn;
+
+static int hwpoison_inject(void *data, u64 val)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val);
+ memory_failure(val, 18);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+
+static void pfn_inject_exit(void)
+{
+ if (hwpoison_dir)
+ debugfs_remove_recursive(hwpoison_dir);
+}
+
+static int pfn_inject_init(void)
+{
+ hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
+ if (hwpoison_dir == NULL)
+ return -ENOMEM;
+ corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+ NULL, &hwpoison_fops);
+ if (corrupt_pfn == NULL) {
+ pfn_inject_exit();
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+module_init(pfn_inject_init);
+module_exit(pfn_inject_exit);
--- sound-2.6.orig/mm/Kconfig
+++ sound-2.6/mm/Kconfig
@@ -242,6 +242,10 @@ config KSM
config MEMORY_FAILURE
bool

+config HWPOISON_INJECT
+ tristate "Poison pages injector"
+ depends on MEMORY_FAILURE && DEBUG_KERNEL
+
config NOMMU_INITIAL_TRIM_EXCESS
int "Turn on mmap() excess space trimming before booting"
depends on !MMU
--- sound-2.6.orig/mm/Makefile
+++ sound-2.6/mm/Makefile
@@ -43,5 +43,6 @@ endif
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
+obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:22 UTC
Permalink
Needed for later patch that walks rmap entries on its own.

This used to be very frowned upon, but memory-failure.c does
some rather specialized rmap walking and rmap has been stable
for quite some time, so I think it's ok now to export it.

Signed-off-by: Andi Kleen <***@linux.intel.com>

---
include/linux/rmap.h | 6 ++++++
mm/rmap.c | 4 ++--
2 files changed, 8 insertions(+), 2 deletions(-)

--- sound-2.6.orig/include/linux/rmap.h
+++ sound-2.6/include/linux/rmap.h
@@ -116,6 +116,12 @@ int try_to_munlock(struct page *);
int page_wrprotect(struct page *page, int *odirect_sync, int count_offset);
#endif

+/*
+ * Called by memory-failure.c to kill processes.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page);
+void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
#else /* !CONFIG_MMU */

#define anon_vma_init() do {} while (0)
--- sound-2.6.orig/mm/rmap.c
+++ sound-2.6/mm/rmap.c
@@ -191,7 +191,7 @@ void __init anon_vma_init(void)
* Getting a lock on a stable anon_vma from a page off the LRU is
* tricky: page_lock_anon_vma rely on RCU to guard against the races.
*/
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
{
struct anon_vma *anon_vma;
unsigned long anon_mapping;
@@ -211,7 +211,7 @@ out:
return NULL;
}

-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
spin_unlock(&anon_vma->lock);
rcu_read_unlock();
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:27 UTC
Permalink
From: Wu Fengguang <***@intel.com>

So as to eliminate one #ifdef in the c source.

Proposed by Nick Piggin.

Acked-by: Nick Piggin <***@suse.de>
Signed-off-by: Wu Fengguang <***@intel.com>
---
arch/x86/mm/fault.c | 3 +--
include/linux/mm.h | 7 ++++++-
2 files changed, 7 insertions(+), 3 deletions(-)

--- sound-2.6.orig/arch/x86/mm/fault.c
+++ sound-2.6/arch/x86/mm/fault.c
@@ -820,14 +820,13 @@ do_sigbus(struct pt_regs *regs, unsigned
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;

-#ifdef CONFIG_MEMORY_FAILURE
if (fault & VM_FAULT_HWPOISON) {
printk(KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
tsk->comm, tsk->pid, address);
code = BUS_MCEERR_AR;
}
-#endif
+
force_sig_info_fault(SIGBUS, code, address, tsk);
}

--- sound-2.6.orig/include/linux/mm.h
+++ sound-2.6/include/linux/mm.h
@@ -700,11 +700,16 @@ static inline int page_mapped(struct pag
#define VM_FAULT_SIGBUS 0x0002
#define VM_FAULT_MAJOR 0x0004
#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */
-#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */

#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */

+#ifdef CONFIG_MEMORY_FAILURE
+#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */
+#else
+#define VM_FAULT_HWPOISON 0
+#endif
+
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)

/*
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:21 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

Hardware poisoned pages need special handling in the VM and shouldn't be
touched again. This requires a new page flag. Define it here.

The page flags wars seem to be over, so it shouldn't be a problem
to get a new one.

v2: Add TestSetHWPoison (suggested by Johannes Weiner)
v3: Define TestSetHWPoison on !CONFIG_MEMORY_FAILURE (Fengguang)

Acked-by: Christoph Lameter <***@linux.com>
Reviewed-by: Wu Fengguang <***@intel.com>
Signed-off-by: Andi Kleen <***@linux.intel.com>

---
include/linux/page-flags.h | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)

--- sound-2.6.orig/include/linux/page-flags.h
+++ sound-2.6/include/linux/page-flags.h
@@ -51,6 +51,9 @@
* PG_buddy is set to indicate that the page is free and in the buddy system
* (see mm/page_alloc.c).
*
+ * PG_hwpoison indicates that a page got corrupted in hardware and contains
+ * data with incorrect ECC bits that triggered a machine check. Accessing is
+ * not safe since it may cause another machine check. Don't touch!
*/

/*
@@ -102,6 +105,9 @@ enum pageflags {
#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
PG_uncached, /* Page has been mapped as uncached */
#endif
+#ifdef CONFIG_MEMORY_FAILURE
+ PG_hwpoison, /* hardware poisoned page. Don't touch */
+#endif
__NR_PAGEFLAGS,

/* Filesystems */
@@ -182,6 +188,9 @@ static inline void ClearPage##uname(stru
#define __CLEARPAGEFLAG_NOOP(uname) \
static inline void __ClearPage##uname(struct page *page) { }

+#define TESTSETFLAG_FALSE(uname) \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
#define TESTCLEARFLAG_FALSE(uname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }

@@ -265,6 +274,16 @@ PAGEFLAG(Uncached, uncached)
PAGEFLAG_FALSE(Uncached)
#endif

+#ifdef CONFIG_MEMORY_FAILURE
+PAGEFLAG(HWPoison, hwpoison)
+TESTSETFLAG(HWPoison, hwpoison)
+#define __PG_HWPOISON (1UL << PG_hwpoison)
+#else
+PAGEFLAG_FALSE(HWPoison)
+TESTSETFLAG_FALSE(HWPoison)
+#define __PG_HWPOISON 0
+#endif
+
static inline int PageUptodate(struct page *page)
{
int ret = test_bit(PG_uptodate, &(page)->flags);
@@ -389,7 +408,7 @@ static inline void __ClearPageTail(struc
1 << PG_private | 1 << PG_private_2 | \
1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
- 1 << PG_unevictable | __PG_MLOCKED)
+ 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)

/*
* Flags checked when a page is prepped for return by the page allocator.
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:40 UTC
Permalink
When a page corrupted, users may care about
- does it hit some important areas?
- can its data be recovered?
- can it be isolated to avoid a deadly future reference?
so that they can take proper actions like emergency sync/shutdown or
schedule reboot at some convenient time.

Signed-off-by: Wu Fengguang <***@intel.com>
---
mm/memory-failure.c | 78 +++++++++++++++++++++++++++++++++++-------
1 file changed, 66 insertions(+), 12 deletions(-)

--- sound-2.6.orig/mm/memory-failure.c
+++ sound-2.6/mm/memory-failure.c
@@ -312,11 +312,32 @@ static const char *hwpoison_outcome_name
[RECOVERED] = "Recovered",
};

+enum hwpoison_page_type {
+ PAGE_IS_KERNEL,
+ PAGE_IS_FS_METADATA,
+ PAGE_IS_FILE_DATA,
+ PAGE_IS_ANON_DATA,
+ PAGE_IS_SWAP_CACHE,
+ PAGE_IS_FREE,
+};
+
+static const char *hwpoison_page_type_name[] = {
+ [ PAGE_IS_KERNEL ] = "kernel",
+ [ PAGE_IS_FS_METADATA ] = "fs_metadata",
+ [ PAGE_IS_FILE_DATA ] = "file_data",
+ [ PAGE_IS_ANON_DATA ] = "anon_data",
+ [ PAGE_IS_SWAP_CACHE ] = "swap_cache",
+ [ PAGE_IS_FREE ] = "free",
+};
+
struct hwpoison_control {
unsigned long pfn;
struct page *p; /* corrupted page */
struct page *page; /* compound page head */
int outcome;
+ int page_type;
+ unsigned data_recoverable:1;
+ unsigned page_isolated:1;
};

/*
@@ -358,8 +379,14 @@ static int me_pagecache_clean(struct hwp
page_cache_release(p);

mapping = page_mapping(p);
- if (mapping == NULL)
+ if (mapping == NULL) {
+ hpc->page_isolated = 1;
return RECOVERED;
+ }
+
+ /* clean file backed page is recoverable */
+ if (!PageDirty(p) && !PageSwapBacked(p))
+ hpc->data_recoverable = 1;

/*
* Now truncate the page in the page cache. This is really
@@ -368,12 +395,14 @@ static int me_pagecache_clean(struct hwp
* has a reference, because it could be file system metadata
* and that's not safe to truncate.
*/
- if (!S_ISREG(mapping->host->i_mode) &&
- !invalidate_complete_page(mapping, p)) {
- printk(KERN_ERR
- "MCE %#lx: failed to invalidate metadata page\n",
- hpc->pfn);
- return FAILED;
+ if (!S_ISREG(mapping->host->i_mode)) {
+ hpc->page_type = PAGE_IS_FS_METADATA;
+ if (!invalidate_complete_page(mapping, p)) {
+ printk(KERN_ERR
+ "MCE %#lx: failed to invalidate metadata page\n",
+ hpc->pfn);
+ return FAILED;
+ }
}

truncate_inode_page(mapping, p);
@@ -382,6 +411,8 @@ static int me_pagecache_clean(struct hwp
hpc->pfn);
return FAILED;
}
+
+ hpc->page_isolated = 1;
return RECOVERED;
}

@@ -467,6 +498,7 @@ static int me_swapcache_dirty(struct hwp
if (!isolate_lru_page(p))
page_cache_release(p);

+ hpc->page_isolated = 1;
return DELAYED;
}

@@ -478,6 +510,8 @@ static int me_swapcache_clean(struct hwp
page_cache_release(p);

delete_from_swap_cache(p);
+ hpc->data_recoverable = 1;
+ hpc->page_isolated = 1;

return RECOVERED;
}
@@ -587,6 +621,10 @@ static void page_action(struct page_stat
"MCE %#lx: %s page still referenced by %d users\n",
hpc->pfn, ps->msg, page_count(hpc->page) - 1);

+ if (page_count(hpc->page) > 1 ||
+ page_mapcount(hpc->page) > 0)
+ hpc->page_isolated = 0;
+
/* Could do more checks here if page looks ok */
atomic_long_add(1, &mce_bad_pages);

@@ -735,6 +773,10 @@ void memory_failure(unsigned long pfn, i
hpc.p = p;
hpc.page = p = compound_head(p);

+ hpc.page_type = PAGE_IS_KERNEL;
+ hpc.data_recoverable = 0;
+ hpc.page_isolated = 0;
+
/*
* We need/can do nothing about count=0 pages.
* 1) it's a free page, and therefore in safe hand:
@@ -747,9 +789,12 @@ void memory_failure(unsigned long pfn, i
* that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
*/
if (!get_page_unless_zero(p)) {
- if (is_free_buddy_page(p))
+ if (is_free_buddy_page(p)) {
+ hpc.page_type = PAGE_IS_FREE;
+ hpc.data_recoverable = 1;
+ hpc.page_isolated = 1;
action_result(&hpc, "free buddy", DELAYED);
- else
+ } else
action_result(&hpc, "high order kernel", IGNORED);
return;
}
@@ -770,9 +815,18 @@ void memory_failure(unsigned long pfn, i
/*
* Torn down by someone else?
*/
- if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
- action_result(&hpc, "already truncated LRU", IGNORED);
- goto out;
+ if (PageLRU(p)) {
+ if (PageSwapCache(p))
+ hpc.page_type = PAGE_IS_SWAP_CACHE;
+ else if (PageAnon(p))
+ hpc.page_type = PAGE_IS_ANON_DATA;
+ else
+ hpc.page_type = PAGE_IS_FILE_DATA;
+ if (!PageSwapCache(p) && p->mapping == NULL) {
+ action_result(&hpc, "already truncated LRU", IGNORED);
+ hpc.page_type = PAGE_IS_FREE;
+ goto out;
+ }
}

for (ps = error_states;; ps++) {
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:26 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

Add VM_FAULT_HWPOISON handling to the x86 page fault handler. This is
very similar to VM_FAULT_OOM, the only difference is that a different
si_code is passed to user space and the new addr_lsb field is initialized.

v2: Make the printk more verbose/unique

Signed-off-by: Andi Kleen <***@linux.intel.com>

---
arch/x86/mm/fault.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)

--- sound-2.6.orig/arch/x86/mm/fault.c
+++ sound-2.6/arch/x86/mm/fault.c
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int s
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
+ info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;

force_sig_info(si_signo, &info, tsk);
}
@@ -798,10 +799,12 @@ out_of_memory(struct pt_regs *regs, unsi
}

static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+ unsigned int fault)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ int code = BUS_ADRERR;

up_read(&mm->mmap_sem);

@@ -817,7 +820,15 @@ do_sigbus(struct pt_regs *regs, unsigned
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;

- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+ if (fault & VM_FAULT_HWPOISON) {
+ printk(KERN_ERR
+ "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+ tsk->comm, tsk->pid, address);
+ code = BUS_MCEERR_AR;
+ }
+#endif
+ force_sig_info_fault(SIGBUS, code, address, tsk);
}

static noinline void
@@ -827,8 +838,8 @@ mm_fault_error(struct pt_regs *regs, uns
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & VM_FAULT_SIGBUS)
- do_sigbus(regs, error_code, address);
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ do_sigbus(regs, error_code, address, fault);
else
BUG();
}

--
Wu Fengguang
2009-06-15 02:45:24 UTC
Permalink
Add new SIGBUS codes for reporting machine checks as signals. When
the hardware detects an uncorrected ECC error it can trigger these
signals.

This is needed for telling KVM's qemu about machine checks that happen to
guests, so that it can inject them, but might be also useful for other programs.
I find it useful in my test programs.

This patch merely defines the new types.

- Define two new si_codes for SIGBUS. BUS_MCEERR_AO and BUS_MCEERR_AR
* BUS_MCEERR_AO is for "Action Optional" machine checks, which means that some
corruption has been detected in the background, but nothing has been consumed
so far. The program can ignore those if it wants (but most programs would
already get killed)
* BUS_MCEERR_AR is for "Action Required" machine checks. This happens
when corrupted data is consumed or the application ran into an area
which has been known to be corrupted earlier. These require immediate
action and cannot just returned to. Most programs would kill themselves.
- They report the address of the corruption in the user address space
in si_addr.
- Define a new si_addr_lsb field that reports the extent of the corruption
to user space. That's currently always a (small) page. The user application
cannot tell where in this page the corruption happened.

AK: I plan to write a man page update before anyone asks.

Signed-off-by: Andi Kleen <***@linux.intel.com>

---
include/asm-generic/siginfo.h | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)

--- sound-2.6.orig/include/asm-generic/siginfo.h
+++ sound-2.6/include/asm-generic/siginfo.h
@@ -82,6 +82,7 @@ typedef struct siginfo {
#ifdef __ARCH_SI_TRAPNO
int _trapno; /* TRAP # which caused the signal */
#endif
+ short _addr_lsb; /* LSB of the reported address */
} _sigfault;

/* SIGPOLL */
@@ -112,6 +113,7 @@ typedef struct siginfo {
#ifdef __ARCH_SI_TRAPNO
#define si_trapno _sifields._sigfault._trapno
#endif
+#define si_addr_lsb _sifields._sigfault._addr_lsb
#define si_band _sifields._sigpoll._band
#define si_fd _sifields._sigpoll._fd

@@ -192,7 +194,11 @@ typedef struct siginfo {
#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */
#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */
#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */
-#define NSIGBUS 3
+/* hardware memory error consumed on a machine check: action required */
+#define BUS_MCEERR_AR (__SI_FAULT|4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define BUS_MCEERR_AO (__SI_FAULT|5)
+#define NSIGBUS 5

/*
* SIGTRAP si_codes
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:36 UTC
Permalink
We'll be exporting them in other places than /proc/kpageflags.
For example, in hwpoison uevents for describing the poisoned page.

Signed-off-by: Wu Fengguang <***@intel.com>
---
fs/proc/page.c | 40 +--------------------------------
include/linux/page-flags.h | 42 +++++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+), 38 deletions(-)

--- sound-2.6.orig/fs/proc/page.c
+++ sound-2.6/fs/proc/page.c
@@ -72,48 +72,12 @@ static const struct file_operations proc

/* These macros are used to decouple internal flags from exported ones */

-#define KPF_LOCKED 0
-#define KPF_ERROR 1
-#define KPF_REFERENCED 2
-#define KPF_UPTODATE 3
-#define KPF_DIRTY 4
-#define KPF_LRU 5
-#define KPF_ACTIVE 6
-#define KPF_SLAB 7
-#define KPF_WRITEBACK 8
-#define KPF_RECLAIM 9
-#define KPF_BUDDY 10
-
-/* 11-20: new additions in 2.6.31 */
-#define KPF_MMAP 11
-#define KPF_ANON 12
-#define KPF_SWAPCACHE 13
-#define KPF_SWAPBACKED 14
-#define KPF_COMPOUND_HEAD 15
-#define KPF_COMPOUND_TAIL 16
-#define KPF_HUGE 17
-#define KPF_UNEVICTABLE 18
-#define KPF_HWPOISON 19
-#define KPF_NOPAGE 20
-
-/* kernel hacking assistances
- * WARNING: subject to change, never rely on them!
- */
-#define KPF_RESERVED 32
-#define KPF_MLOCKED 33
-#define KPF_MAPPEDTODISK 34
-#define KPF_PRIVATE 35
-#define KPF_PRIVATE_2 36
-#define KPF_OWNER_PRIVATE 37
-#define KPF_ARCH 38
-#define KPF_UNCACHED 39
-
static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
{
return ((kflags >> kbit) & 1) << ubit;
}

-static u64 get_uflags(struct page *page)
+u64 page_uflags(struct page *page)
{
u64 k;
u64 u;
@@ -214,7 +178,7 @@ static ssize_t kpageflags_read(struct fi
else
ppage = NULL;

- if (put_user(get_uflags(ppage), out)) {
+ if (put_user(page_uflags(ppage), out)) {
ret = -EFAULT;
break;
}
--- sound-2.6.orig/include/linux/page-flags.h
+++ sound-2.6/include/linux/page-flags.h
@@ -132,6 +132,46 @@ enum pageflags {
PG_slub_debug = PG_error,
};

+/*
+ * stable flag numbers exported to user space
+ */
+
+#define KPF_LOCKED 0
+#define KPF_ERROR 1
+#define KPF_REFERENCED 2
+#define KPF_UPTODATE 3
+#define KPF_DIRTY 4
+#define KPF_LRU 5
+#define KPF_ACTIVE 6
+#define KPF_SLAB 7
+#define KPF_WRITEBACK 8
+#define KPF_RECLAIM 9
+#define KPF_BUDDY 10
+
+/* 11-20: new additions in 2.6.31 */
+#define KPF_MMAP 11
+#define KPF_ANON 12
+#define KPF_SWAPCACHE 13
+#define KPF_SWAPBACKED 14
+#define KPF_COMPOUND_HEAD 15
+#define KPF_COMPOUND_TAIL 16
+#define KPF_HUGE 17
+#define KPF_UNEVICTABLE 18
+#define KPF_HWPOISON 19
+#define KPF_NOPAGE 20
+
+/* kernel hacking assistances
+ * WARNING: subject to change, never rely on them!
+ */
+#define KPF_RESERVED 32
+#define KPF_MLOCKED 33
+#define KPF_MAPPEDTODISK 34
+#define KPF_PRIVATE 35
+#define KPF_PRIVATE_2 36
+#define KPF_OWNER_PRIVATE 37
+#define KPF_ARCH 38
+#define KPF_UNCACHED 39
+
#ifndef __GENERATING_BOUNDS_H

/*
@@ -284,6 +324,8 @@ TESTSETFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
#endif

+u64 page_uflags(struct page *page);
+
static inline int PageUptodate(struct page *page)
{
int ret = test_bit(PG_uptodate, &(page)->flags);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:33 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

Impact: optional, useful for debugging

Add a new madvice sub command to inject poison for some
pages in a process' address space. This is useful for
testing the poison page handling.

Open issues:

- This patch allows root to tie up arbitary amounts of memory.
Should this be disabled inside containers?
- There's a small race window between getting the page and injecting.
The patch drops the ref count because otherwise memory_failure
complains about dangling references. In theory with a multi threaded
injector one could inject poison for a process foreign page this way.
Not a serious issue right now.

v2: Use write flag for get_user_pages to make sure to always get
a fresh page
v3: Don't request write mapping (Fengguang Wu)

Reviewed-by: Wu Fengguang <***@intel.com>
Signed-off-by: Andi Kleen <***@linux.intel.com>

---
include/asm-generic/mman-common.h | 1
mm/madvise.c | 36 ++++++++++++++++++++++++++++
2 files changed, 37 insertions(+)

--- sound-2.6.orig/mm/madvise.c
+++ sound-2.6/mm/madvise.c
@@ -207,6 +207,38 @@ static long madvise_remove(struct vm_are
return error;
}

+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(unsigned long start, unsigned long end)
+{
+ /*
+ * RED-PEN
+ * This allows to tie up arbitary amounts of memory.
+ * Might be a good idea to disable it inside containers even for root.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ for (; start < end; start += PAGE_SIZE) {
+ struct page *p;
+ int ret = get_user_pages(current, current->mm, start, 1,
+ 0, 0, &p, NULL);
+ if (ret != 1)
+ return ret;
+ put_page(p);
+ /*
+ * RED-PEN page can be reused in a short window, but otherwise
+ * we'll have to fight with the reference count.
+ */
+ printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+ page_to_pfn(p), start);
+ memory_failure(page_to_pfn(p), 0);
+ }
+ return 0;
+}
+#endif
+
static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
@@ -307,6 +339,10 @@ SYSCALL_DEFINE3(madvise, unsigned long,
int write;
size_t len;

+#ifdef CONFIG_MEMORY_FAILURE
+ if (behavior == MADV_HWPOISON)
+ return madvise_hwpoison(start, start+len_in);
+#endif
if (!madvise_behavior_valid(behavior))
return error;

--- sound-2.6.orig/include/asm-generic/mman-common.h
+++ sound-2.6/include/asm-generic/mman-common.h
@@ -34,6 +34,7 @@
#define MADV_REMOVE 9 /* remove these pages & resources */
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */
+#define MADV_HWPOISON 12 /* poison a page for testing */

/* compatibility flags */
#define MAP_FILE 0
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:35 UTC
Permalink
- check for page_mapped_in_vma() on anon pages
- test and use page->mapping instead of page_mapping()
- cleanup some comments

If no objections, this patch will be folded into the big high-level patch.

Signed-off-by: Wu Fengguang <***@intel.com>
---
include/linux/rmap.h | 1 +
mm/memory-failure.c | 20 +++++++++++---------
mm/rmap.c | 2 +-
3 files changed, 13 insertions(+), 10 deletions(-)

--- sound-2.6.orig/mm/memory-failure.c
+++ sound-2.6/mm/memory-failure.c
@@ -122,8 +122,6 @@ struct to_kill {

/*
* Schedule a process for later kill.
- * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
- * TBD would GFP_NOIO be enough?
*/
static void add_to_kill(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma,
@@ -227,6 +225,9 @@ static void collect_procs_anon(struct pa
if (!tsk->mm)
continue;
list_for_each_entry (vma, &av->head, anon_vma_node) {
+ if (!page_mapped_in_vma(page, vma))
+ continue;
+
if (vma->vm_mm == tsk->mm)
add_to_kill(tsk, page, vma, to_kill, tkc);
}
@@ -245,7 +246,7 @@ static void collect_procs_file(struct pa
struct vm_area_struct *vma;
struct task_struct *tsk;
struct prio_tree_iter iter;
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = page->mapping;

/*
* A note on the locking order between the two locks.
@@ -275,16 +276,17 @@ static void collect_procs_file(struct pa

/*
* Collect the processes who have the corrupted page mapped to kill.
- * This is done in two steps for locking reasons.
- * First preallocate one tokill structure outside the spin locks,
- * so that we can kill at least one process reasonably reliable.
*/
static void collect_procs(struct page *page, struct list_head *tokill)
{
struct to_kill *tk;

- tk = kmalloc(sizeof(struct to_kill), GFP_KERNEL);
- /* memory allocation failure is implicitly handled */
+ /*
+ * First preallocate one to_kill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+ tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
+
if (PageAnon(page))
collect_procs_anon(page, tokill, &tk);
else
@@ -657,7 +659,7 @@ static void hwpoison_user_mappings(struc
* Error handling: We ignore errors here because
* there's nothing that can be done.
*/
- if (kill)
+ if (kill && p->mapping)
collect_procs(p, &tokill);

/*
--- sound-2.6.orig/include/linux/rmap.h
+++ sound-2.6/include/linux/rmap.h
@@ -134,6 +134,7 @@ int page_wrprotect(struct page *page, in
*/
struct anon_vma *page_lock_anon_vma(struct page *page);
void page_unlock_anon_vma(struct anon_vma *anon_vma);
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);

#else /* !CONFIG_MMU */

--- sound-2.6.orig/mm/rmap.c
+++ sound-2.6/mm/rmap.c
@@ -315,7 +315,7 @@ pte_t *page_check_address(struct page *p
* if the page is not mapped into the page tables of this VMA. Only
* valid for normal file or anonymous VMAs.
*/
-static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
{
unsigned long address;
pte_t *pte;
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 02:45:41 UTC
Permalink
This allows the user space to do some flexible policies.
For example, it may either do emergency sync/shutdown
or to schedule reboot at some convenient time, depending
on the severeness of the corruption.

Signed-off-by: Wu Fengguang <***@intel.com>
---
Documentation/vm/memory-failure | 68 ++++++++++++++++++
mm/memory-failure.c | 110 +++++++++++++++++++++++++++++-
2 files changed, 175 insertions(+), 3 deletions(-)

--- /dev/null
+++ sound-2.6/Documentation/vm/memory-failure
@@ -0,0 +1,68 @@
+Memory failure and hardware poison events
+
+Memory may have soft errors and the more memory you have the more errors.
+Normally hardware hides that from you by correcting it, but in some cases you
+can get multi-bit errors which lead to uncorrected errors the hardware cannot
+hide.
+
+This does not necessarily mean that the hardware is broken; for example it can
+be caused by cosmic particles hitting a unlucky transistor. So it can really
+happen in normal operation.
+
+Some hardwares (eg. Nehalem-EX) support background memory scrubbing in order to
+report the memory corruption before they are consumed. The kernel will then try
+to isolate the corrupted memory page, restore data, and finally send a uevent
+to the user space.
+
+A memory poison uevent will be
+
+ # udevadm monitor --environment --kernel
+ KERNEL[1245030313.702625] change /kernel/mm/hwpoison/hwpoison (hwpoison)
+ UDEV_LOG=3
+ ACTION=change
+ DEVPATH=/kernel/mm/hwpoison/hwpoison
+ SUBSYSTEM=hwpoison
+ EVENT=poison
+ PHYS_ADDR=0x19e1c000
+ PAGE_FLAGS=0x80008083c
+ PAGE_COUNT=3
+ PAGE_MAPCOUNT=1
+ PAGE_DEV=8:2
+ PAGE_INODE=56169
+ PAGE_INDEX=9
+ PAGE_TYPE=file_data
+ PAGE_ISOLATED=1
+ DATA_RECOVERABLE=0
+ SEQNUM=2109
+
+where
+
+ PHYS_ADDR the physical page address
+ PAGE_FLAGS the kpageflags bits defined at Documentation/vm/pagemap.txt
+ PAGE_COUNT the original page reference count
+ PAGE_MAPCOUNT the original page map count
+
+ PAGE_TYPE where the error lands, can be one of
+ "kernel" - a kernel page that may contain some critical data structure
+ "fs_metadata" - a filesystem metadata page
+ "file_data" - a file data page
+ "anon_data" - a page belong to some process(es)
+ "swap_cache" - it's in the swap cache; the kernel cannot tell if it was an
+ anon_data page or a tmpfs' file_data page
+ "free" - a free page; not used by anyone
+
+For "file_data" pages, the following three vars are available:
+
+ PAGE_DEV the file's MAJOR:MINOR device numbers in decimal
+ PAGE_INODE the file's inode number in decimal
+ PAGE_INDEX the file offset in page size
+
+ PAGE_ISOLATED if 1, we are sure that the page won't be consumed in the future.
+ if 0, the error page is still referenced by someone, and may be
+ consumed at anytime, which will be detected/stopped by hardware,
+ and trigger instant machine reboot.
+
+ DATA_RECOVERABLE if 1, no data are lost. For example, it's a free page, or a
+ clean page whose data can be reloaded from disk. In these
+ cases, the user space will not see the error at all.
+
--- sound-2.6.orig/mm/memory-failure.c
+++ sound-2.6/mm/memory-failure.c
@@ -330,7 +330,11 @@ static const char *hwpoison_page_type_na
[ PAGE_IS_FREE ] = "free",
};

+static struct kset *hwpoison_kset;
+static struct kobject hwpoison_kobj;
+
struct hwpoison_control {
+ struct kobj_uevent_env *env;
unsigned long pfn;
struct page *p; /* corrupted page */
struct page *page; /* compound page head */
@@ -340,6 +344,51 @@ struct hwpoison_control {
unsigned page_isolated:1;
};

+static void hwpoison_uevent_page(struct hwpoison_control *hpc)
+{
+ struct page *p = hpc->page;
+
+ if (hpc->env == NULL)
+ return;
+
+ add_uevent_var(hpc->env, "EVENT=poison");
+ add_uevent_var(hpc->env, "PHYS_ADDR=%#lx", hpc->pfn << PAGE_SHIFT);
+ add_uevent_var(hpc->env, "PAGE_FLAGS=%#Lx", page_uflags(p));
+ add_uevent_var(hpc->env, "PAGE_COUNT=%d", page_count(p));
+ add_uevent_var(hpc->env, "PAGE_MAPCOUNT=%d", page_mapcount(p));
+}
+
+static void hwpoison_uevent_file(struct hwpoison_control *hpc)
+{
+ struct address_space *mapping = page_mapping(hpc->page);
+
+ if (hpc->env == NULL)
+ return;
+
+ if (!mapping || !mapping->host)
+ return;
+
+ add_uevent_var(hpc->env, "PAGE_DEV=%d:%d",
+ MAJOR(mapping->host->i_sb->s_dev),
+ MINOR(mapping->host->i_sb->s_dev));
+ add_uevent_var(hpc->env, "PAGE_INODE=%lu", mapping->host->i_ino);
+ add_uevent_var(hpc->env, "PAGE_INDEX=%lu", hpc->page->index);
+}
+
+static void hwpoison_uevent_send(struct hwpoison_control *hpc)
+{
+ if (hpc->env == NULL)
+ return;
+
+ add_uevent_var(hpc->env, "PAGE_TYPE=%s",
+ hwpoison_page_type_name[hpc->page_type]);
+ add_uevent_var(hpc->env, "PAGE_ISOLATED=%d",
+ hpc->page_isolated);
+ add_uevent_var(hpc->env, "DATA_RECOVERABLE=%d",
+ hpc->data_recoverable);
+ kobject_uevent_env(&hwpoison_kobj, KOBJ_CHANGE, hpc->env->envp);
+}
+
/*
* Error hit kernel page.
* Do nothing, try to be lucky and not touch this instead. For a few cases we
@@ -769,10 +818,19 @@ void memory_failure(unsigned long pfn, i
return;
}

+ hpc.env = kzalloc(sizeof(struct kobj_uevent_env), GFP_NOIO);
+ if (!hpc.env) {
+ printk(KERN_ERR
+ "MCE %#lx: cannot allocate memory for uevent\n",
+ pfn);
+ }
+
hpc.pfn = pfn;
hpc.p = p;
hpc.page = p = compound_head(p);

+ hwpoison_uevent_page(&hpc);
+
hpc.page_type = PAGE_IS_KERNEL;
hpc.data_recoverable = 0;
hpc.page_isolated = 0;
@@ -796,7 +854,7 @@ void memory_failure(unsigned long pfn, i
action_result(&hpc, "free buddy", DELAYED);
} else
action_result(&hpc, "high order kernel", IGNORED);
- return;
+ goto out;
}

/*
@@ -825,16 +883,62 @@ void memory_failure(unsigned long pfn, i
if (!PageSwapCache(p) && p->mapping == NULL) {
action_result(&hpc, "already truncated LRU", IGNORED);
hpc.page_type = PAGE_IS_FREE;
- goto out;
+ goto out_unlock;
}
}

+ hwpoison_uevent_file(&hpc);
+
for (ps = error_states;; ps++) {
if ((p->flags & ps->mask) == ps->res) {
page_action(ps, &hpc);
break;
}
}
-out:
+out_unlock:
unlock_page(p);
+out:
+ hwpoison_uevent_send(&hpc);
+}
+
+static void hwpoison_release(struct kobject *kobj)
+{
+}
+
+static struct kobj_type hwpoison_ktype = {
+ .release = hwpoison_release,
+};
+
+static int hwpoison_kobj_init(void)
+{
+ int err;
+
+ hwpoison_kset = kset_create_and_add("hwpoison", NULL, mm_kobj);
+ if (!hwpoison_kset)
+ return -ENOMEM;
+
+ hwpoison_kobj.kset = hwpoison_kset;
+
+ err = kobject_init_and_add(&hwpoison_kobj, &hwpoison_ktype, NULL,
+ "hwpoison");
+ if (err)
+ return -ENOMEM;
+
+ kobject_uevent(&hwpoison_kobj, KOBJ_ADD);
+
+ return 0;
}
+
+
+static int __init hwpoison_init(void)
+{
+ return hwpoison_kobj_init();
+}
+
+static void __exit hwpoison_exit(void)
+{
+ kset_unregister(hwpoison_kset);
+}
+
+module_init(hwpoison_init);
+module_exit(hwpoison_exit);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Andi Kleen
2009-06-15 06:29:34 UTC
Permalink
Post by Wu Fengguang
This allows the user space to do some flexible policies.
For example, it may either do emergency sync/shutdown
or to schedule reboot at some convenient time, depending
on the severeness of the corruption.
I don't think it's a good idea to export that much detailed information.
That would become a stable ABI, but might not be possible to keep
all these details stable. e.g. map count or reference count are
internal implementation details that shouldn't be exposed.
And what is an user space application to do with the inode? Run
find -inum?

Also we already report the event using low level logging mechanism.
in a relatively stable form.

It's also unclear to me what an application would do with that much
detail.

I would suggest to drop this part and the earlier flags move.

Please only bug fixes are this stage.

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Wu Fengguang
2009-06-15 09:56:39 UTC
Permalink