Discussion:
[PATCH] fuse: make fuse daemon frozen along with kernel threads
Li Fei
2013-02-06 01:11:52 UTC
Permalink
There is well known issue that freezing will fail in case that fuse
daemon is frozen firstly with some requests not handled, as the fuse
usage task is waiting for the response from fuse daemon and can't be
frozen.

To solve the issue above, make fuse daemon frozen after all all user
space processes frozen and during the kernel threads frozen phase.
PF_FREEZE_DAEMON flag is added to indicate that the current thread is
the fuse daemon, set on connection, and clear on disconnection.
It works as all fuse requests are handled during user space processes
frozen, there will no further fuse request, and it's safe to continue
to freeze fuse daemon together with kernel freezable tasks.

Signed-off-by: Wang Biao <***@intel.com>
Signed-off-by: Liu Chuansheng <***@intel.com>
Signed-off-by: Li Fei <***@intel.com>
---
fs/fuse/inode.c | 9 +++++++++
include/linux/freezer.h | 4 ++++
include/linux/sched.h | 2 ++
kernel/freezer.c | 33 ++++++++++++++++++++++++++++++++-
kernel/power/process.c | 2 +-
5 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b7..fe476e8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -20,6 +20,7 @@
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/exportfs.h>
+#include <linux/freezer.h>

MODULE_AUTHOR("Miklos Szeredi <***@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -367,6 +368,10 @@ void fuse_conn_kill(struct fuse_conn *fc)
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
wake_up_all(&fc->reserved_req_waitq);
+
+ /* Clear daemon flag after disconnection */
+ clear_freeze_daemon_flag();
+
}
EXPORT_SYMBOL_GPL(fuse_conn_kill);

@@ -1054,6 +1059,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err_unlock;

list_add_tail(&fc->entry, &fuse_conn_list);
+
+ /* Set daemon flag just before connection */
+ set_freeze_daemon_flag();
+
sb->s_root = root_dentry;
fc->connected = 1;
file->private_data = fuse_conn_get(fc);
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index e4238ce..9f0d0cf 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -51,6 +51,8 @@ static inline bool try_to_freeze(void)

extern bool freeze_task(struct task_struct *p);
extern bool set_freezable(void);
+extern bool set_freeze_daemon_flag(void);
+extern bool clear_freeze_daemon_flag(void);

#ifdef CONFIG_CGROUP_FREEZER
extern bool cgroup_freezing(struct task_struct *task);
@@ -217,6 +219,8 @@ static inline void freezer_do_not_count(void) {}
static inline void freezer_count(void) {}
static inline int freezer_should_skip(struct task_struct *p) { return 0; }
static inline void set_freezable(void) {}
+static inline void set_freeze_daemon_flag(void) {}
+static inline void clear_freeze_daemon_flag(void) {}

#define freezable_schedule() schedule()

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d211247..6cdc969 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1826,6 +1826,8 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
+/* Daemon threads to be frozen along with kernel threads */
+#define PF_FREEZER_DAEMON 0x80000000

/*
* Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c38893b..eff9440 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -39,7 +39,8 @@ bool freezing_slow_path(struct task_struct *p)
if (pm_nosig_freezing || cgroup_freezing(p))
return true;

- if (pm_freezing && !(p->flags & PF_KTHREAD))
+ if (pm_freezing && !(p->flags & PF_KTHREAD) &&
+ !(p->flags & PF_FREEZE_DAEMON))
return true;

return false;
@@ -162,3 +163,33 @@ bool set_freezable(void)
return try_to_freeze();
}
EXPORT_SYMBOL(set_freezable);
+
+/**
+ * set_freeze_daemon_flag - make %current as daemon
+ *
+ * Make %current being frozen by freezer along with kernel threads
+ */
+bool set_freeze_daemon_flag(void)
+{
+ spin_lock_irq(&freezer_lock);
+ current->flags |= PF_FREEZE_DAEMON;
+ spin_unlock_irq(&freezer_lock);
+
+ return try_to_freeze();
+}
+EXPORT_SYMBOL(set_freeze_daemon_flag);
+
+/**
+ * clear_freeze_daemon_flag - make %current as usual user space process
+ *
+ * Make %current being frozen by freezer along with user space processes
+ */
+bool clear_freeze_daemon_flag(void)
+{
+ spin_lock_irq(&freezer_lock);
+ current->flags &= ~PF_FREEZE_DAEMON;
+ spin_unlock_irq(&freezer_lock);
+
+ return try_to_freeze();
+}
+EXPORT_SYMBOL(clear_freeze_daemon_flag);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d5a258b..a4489ae 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -199,7 +199,7 @@ void thaw_kernel_threads(void)

read_lock(&tasklist_lock);
do_each_thread(g, p) {
- if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
+ if (p->flags & (PF_KTHREAD | PF_WQ_WORKER | PF_FREEZE_DAEMON))
__thaw_task(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
--
1.7.4.1




--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Miklos Szeredi
2013-02-06 09:27:40 UTC
Permalink
Post by Li Fei
There is well known issue that freezing will fail in case that fuse
daemon is frozen firstly with some requests not handled, as the fuse
usage task is waiting for the response from fuse daemon and can't be
frozen.
To solve the issue above, make fuse daemon frozen after all all user
space processes frozen and during the kernel threads frozen phase.
PF_FREEZE_DAEMON flag is added to indicate that the current thread is
the fuse daemon,
Okay and how do you know which thread, process or processes belong to
the "fuse daemon"?

The only entity which may have a chance of defining the correct answer
to the above question is the fuse daemon itself.

What I mean is that automatically setting PF_FREEZE_DAEMON by the
kernel is the wrong approach. On the other hand an ioctl to set this
flag on a defined task or task group (*) would actually make some
sense. Add some, possibly optional, inheritence behavior (i.e. a
cloned process would inherit the PF_FREEZE_DAMON flag) and it might
actually take care of most of the cases.

But I think it definitely needs more thought and testing. Your patch
looks like it won't work on most of the unprivileged fuse filesystems
which use the fusermount(1) helper to perform the actual mounting.

Thanks,
Miklos

(*) setting this flag for other than the current process should, I
think, be a privileged operation, otherwise it might allow misuse.
Post by Li Fei
set on connection, and clear on disconnection.
It works as all fuse requests are handled during user space processes
frozen, there will no further fuse request, and it's safe to continue
to freeze fuse daemon together with kernel freezable tasks.
---
fs/fuse/inode.c | 9 +++++++++
include/linux/freezer.h | 4 ++++
include/linux/sched.h | 2 ++
kernel/freezer.c | 33 ++++++++++++++++++++++++++++++++-
kernel/power/process.c | 2 +-
5 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b7..fe476e8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -20,6 +20,7 @@
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/exportfs.h>
+#include <linux/freezer.h>
MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -367,6 +368,10 @@ void fuse_conn_kill(struct fuse_conn *fc)
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
wake_up_all(&fc->reserved_req_waitq);
+
+ /* Clear daemon flag after disconnection */
+ clear_freeze_daemon_flag();
+
}
EXPORT_SYMBOL_GPL(fuse_conn_kill);
@@ -1054,6 +1059,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err_unlock;
list_add_tail(&fc->entry, &fuse_conn_list);
+
+ /* Set daemon flag just before connection */
+ set_freeze_daemon_flag();
+
sb->s_root = root_dentry;
fc->connected = 1;
file->private_data = fuse_conn_get(fc);
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index e4238ce..9f0d0cf 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -51,6 +51,8 @@ static inline bool try_to_freeze(void)
extern bool freeze_task(struct task_struct *p);
extern bool set_freezable(void);
+extern bool set_freeze_daemon_flag(void);
+extern bool clear_freeze_daemon_flag(void);
#ifdef CONFIG_CGROUP_FREEZER
extern bool cgroup_freezing(struct task_struct *task);
@@ -217,6 +219,8 @@ static inline void freezer_do_not_count(void) {}
static inline void freezer_count(void) {}
static inline int freezer_should_skip(struct task_struct *p) { return 0; }
static inline void set_freezable(void) {}
+static inline void set_freeze_daemon_flag(void) {}
+static inline void clear_freeze_daemon_flag(void) {}
#define freezable_schedule() schedule()
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d211247..6cdc969 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1826,6 +1826,8 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
+/* Daemon threads to be frozen along with kernel threads */
+#define PF_FREEZER_DAEMON 0x80000000
/*
* Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c38893b..eff9440 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -39,7 +39,8 @@ bool freezing_slow_path(struct task_struct *p)
if (pm_nosig_freezing || cgroup_freezing(p))
return true;
- if (pm_freezing && !(p->flags & PF_KTHREAD))
+ if (pm_freezing && !(p->flags & PF_KTHREAD) &&
+ !(p->flags & PF_FREEZE_DAEMON))
return true;
return false;
@@ -162,3 +163,33 @@ bool set_freezable(void)
return try_to_freeze();
}
EXPORT_SYMBOL(set_freezable);
+
+/**
+ * set_freeze_daemon_flag - make %current as daemon
+ *
+ * Make %current being frozen by freezer along with kernel threads
+ */
+bool set_freeze_daemon_flag(void)
+{
+ spin_lock_irq(&freezer_lock);
+ current->flags |= PF_FREEZE_DAEMON;
+ spin_unlock_irq(&freezer_lock);
+
+ return try_to_freeze();
+}
+EXPORT_SYMBOL(set_freeze_daemon_flag);
+
+/**
+ * clear_freeze_daemon_flag - make %current as usual user space process
+ *
+ * Make %current being frozen by freezer along with user space processes
+ */
+bool clear_freeze_daemon_flag(void)
+{
+ spin_lock_irq(&freezer_lock);
+ current->flags &= ~PF_FREEZE_DAEMON;
+ spin_unlock_irq(&freezer_lock);
+
+ return try_to_freeze();
+}
+EXPORT_SYMBOL(clear_freeze_daemon_flag);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d5a258b..a4489ae 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -199,7 +199,7 @@ void thaw_kernel_threads(void)
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
+ if (p->flags & (PF_KTHREAD | PF_WQ_WORKER | PF_FREEZE_DAEMON))
__thaw_task(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
--
1.7.4.1
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Miklos Szeredi
2013-02-07 09:59:19 UTC
Permalink
[CC list restored]
Post by Miklos Szeredi
Post by Li Fei
There is well known issue that freezing will fail in case that fuse
daemon is frozen firstly with some requests not handled, as the fuse
usage task is waiting for the response from fuse daemon and can't be
frozen.
To solve the issue above, make fuse daemon frozen after all all user
space processes frozen and during the kernel threads frozen phase.
PF_FREEZE_DAEMON flag is added to indicate that the current thread is
the fuse daemon,
Okay and how do you know which thread, process or processes belong to
the "fuse daemon"?
Maybe I'm talking about the wrong thing but isn't any process having
/dev/fuse open "the fuse daemon"? And that doesn't even cover cases
where one thread reads requests from the kernel and hands them to
worker threads (that do not have /dev/fuse themself). Or the fuse
request might need mysql to finish a request.
I believe figuring out what processes handle fuse requests is a lost
proposition.
Pretty much.
Secondly how does freezing the daemon second garanty that it has
replied to all pending requests? Or how is leaving it thawed the right
decision?
Instead the kernel side of fuse should be half frozen and stop sending
out new requests. Then it should wait for all pending requests to
complete. Then and only then can userspace processes be frozen safely.
The problem with that is one fuse filesystem might be calling into
another. Say two fuse filesystems are mounted at /mnt/A and /mnt/B,
Process X starts a read request on /mnt/A. This is handled by process
A, which in turn starts a read request on /mnt/B, which is handled by
B. If we freeze the system at the moment when A starts handling the
request but hasn't yet sent the request to B then things wil be stuck
and the freezing will fail.

So the order should be: Freeze the "topmost" fuse filesystems (A in
the example) first and if all pending requests are completed then
freeze the next ones in the hierarchy, etc... This would work if this
dependency between filesystems were known. But it's not and again it
looks like an impossible task.

The only way to *reliably* freeze fuse filesystems is to let it freeze
even if there are outstanding requests. But that's the hardest to
implement, because then it needs to allow freezing of tasks waiting on
i_mutex, for example, which is currently not possible. But this is
the only way I see that would not have unsolvable corner cases that
prop up at the worst moment.

And yes, it would be prudent to wait some time for pending requests
to finish before freezing. But it's not a requirement, things
*should* work without that: suspending a machine is just like a very
long pause by the CPU, as long as no hardware is involved. And with
fuse filesystems there is no hardware involved directly by definition.

But I'm open to ideas and at this stage I think even patches that
improve the situation for the majority of the cases would be
acceptable, since this is turning out to be a major problem for a lot
of people.

Thanks,
Miklos
Goswin von Brederlow
2013-02-08 14:11:23 UTC
Permalink
Post by Miklos Szeredi
[CC list restored]
Post by Miklos Szeredi
Post by Li Fei
There is well known issue that freezing will fail in case that fuse
daemon is frozen firstly with some requests not handled, as the fuse
usage task is waiting for the response from fuse daemon and can't be
frozen.
To solve the issue above, make fuse daemon frozen after all all user
space processes frozen and during the kernel threads frozen phase.
PF_FREEZE_DAEMON flag is added to indicate that the current thread is
the fuse daemon,
Okay and how do you know which thread, process or processes belong to
the "fuse daemon"?
Maybe I'm talking about the wrong thing but isn't any process having
/dev/fuse open "the fuse daemon"? And that doesn't even cover cases
where one thread reads requests from the kernel and hands them to
worker threads (that do not have /dev/fuse themself). Or the fuse
request might need mysql to finish a request.
I believe figuring out what processes handle fuse requests is a lost
proposition.
Pretty much.
Secondly how does freezing the daemon second garanty that it has
replied to all pending requests? Or how is leaving it thawed the right
decision?
Instead the kernel side of fuse should be half frozen and stop sending
out new requests. Then it should wait for all pending requests to
complete. Then and only then can userspace processes be frozen safely.
The problem with that is one fuse filesystem might be calling into
another. Say two fuse filesystems are mounted at /mnt/A and /mnt/B,
Process X starts a read request on /mnt/A. This is handled by process
A, which in turn starts a read request on /mnt/B, which is handled by
B. If we freeze the system at the moment when A starts handling the
request but hasn't yet sent the request to B then things wil be stuck
and the freezing will fail.
So the order should be: Freeze the "topmost" fuse filesystems (A in
the example) first and if all pending requests are completed then
freeze the next ones in the hierarchy, etc... This would work if this
dependency between filesystems were known. But it's not and again it
looks like an impossible task.
What is topmost? The kernel can't know that for sure.
Post by Miklos Szeredi
The only way to *reliably* freeze fuse filesystems is to let it freeze
even if there are outstanding requests. But that's the hardest to
implement, because then it needs to allow freezing of tasks waiting on
i_mutex, for example, which is currently not possible. But this is
the only way I see that would not have unsolvable corner cases that
prop up at the worst moment.
And yes, it would be prudent to wait some time for pending requests
to finish before freezing. But it's not a requirement, things
*should* work without that: suspending a machine is just like a very
long pause by the CPU, as long as no hardware is involved. And with
fuse filesystems there is no hardware involved directly by definition.
But I'm open to ideas and at this stage I think even patches that
improve the situation for the majority of the cases would be
acceptable, since this is turning out to be a major problem for a lot
of people.
Thanks,
Miklos
For shutdown in userspace there is the sendsigs.omit.d/ to avoid the
problem of halting/killing processes of the fuse filesystems (or other
services) prematurely. I guess something similar needs to be done for
freeze. The fuse filesystem has to tell the kernel what is up.

MfG
Goswin
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Pavel Machek
2013-02-09 17:49:10 UTC
Permalink
Hi!
Post by Goswin von Brederlow
Post by Miklos Szeredi
The only way to *reliably* freeze fuse filesystems is to let it freeze
even if there are outstanding requests. But that's the hardest to
implement, because then it needs to allow freezing of tasks waiting on
i_mutex, for example, which is currently not possible. But this is
the only way I see that would not have unsolvable corner cases that
prop up at the worst moment.
And yes, it would be prudent to wait some time for pending requests
to finish before freezing. But it's not a requirement, things
*should* work without that: suspending a machine is just like a very
long pause by the CPU, as long as no hardware is involved. And with
fuse filesystems there is no hardware involved directly by definition.
But I'm open to ideas and at this stage I think even patches that
improve the situation for the majority of the cases would be
acceptable, since this is turning out to be a major problem for a lot
of people.
For shutdown in userspace there is the sendsigs.omit.d/ to avoid the
problem of halting/killing processes of the fuse filesystems (or other
services) prematurely. I guess something similar needs to be done for
freeze. The fuse filesystem has to tell the kernel what is up.
Would it be feasible to create some kind of fuse-stop-script.sh, and
run it before suspend (from userspace)? It should be pretty similar to
sendsigs.omit.d/ mechanism AFAICT.

I'm sorry, freezer is not too suitable for fuse.

(BTW: for suspend, we may be able to improve it so that it is possible
to remove freezer from it. For hibernation, it would be very hard.)
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-09 20:31:10 UTC
Permalink
Hi,
Post by Pavel Machek
Hi!
Post by Goswin von Brederlow
Post by Miklos Szeredi
The only way to *reliably* freeze fuse filesystems is to let it freeze
even if there are outstanding requests. But that's the hardest to
implement, because then it needs to allow freezing of tasks waiting on
i_mutex, for example, which is currently not possible. But this is
the only way I see that would not have unsolvable corner cases that
prop up at the worst moment.
And yes, it would be prudent to wait some time for pending requests
to finish before freezing. But it's not a requirement, things
*should* work without that: suspending a machine is just like a very
long pause by the CPU, as long as no hardware is involved. And with
fuse filesystems there is no hardware involved directly by definition.
But I'm open to ideas and at this stage I think even patches that
improve the situation for the majority of the cases would be
acceptable, since this is turning out to be a major problem for a lot
of people.
For shutdown in userspace there is the sendsigs.omit.d/ to avoid the
problem of halting/killing processes of the fuse filesystems (or other
services) prematurely. I guess something similar needs to be done for
freeze. The fuse filesystem has to tell the kernel what is up.
Would it be feasible to create some kind of fuse-stop-script.sh, and
run it before suspend (from userspace)? It should be pretty similar to
sendsigs.omit.d/ mechanism AFAICT.
I'm sorry, freezer is not too suitable for fuse.
(BTW: for suspend, we may be able to improve it so that it is possible
to remove freezer from it. For hibernation, it would be very hard.)
Well, this is so incorrect that it's not even funny. :-(

The whole memory shrinking we do for hibernation is now done by allocating
memory, so the freezer is not necessary for *that* and there's *zero*
difference between suspend and hibernation with respect to why the freezer is
used.

And *the* reason why it's used is that intercepting all of the possible ways
user space could interfere with the suspend process without the freezer is
simply totally impractical. System calls (including ioctls and fctls),
sysfs accesses, /proc accesses, debugfs, to name a few, and mmap (I wonder how
you would handle *that* alone). And I'm sure there's more I didn't even
think about. Moreover, all of the drivers we have assume that user space will
be frozen before their suspend callbacks are run by now and changing that is
totally unrealistic.

So please don't tell people that something is doable while it practically
realisticly isn't.

Thanks,
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Pavel Machek
2013-02-10 10:33:45 UTC
Permalink
Hi!
Post by Rafael J. Wysocki
Post by Pavel Machek
Post by Goswin von Brederlow
For shutdown in userspace there is the sendsigs.omit.d/ to avoid the
problem of halting/killing processes of the fuse filesystems (or other
services) prematurely. I guess something similar needs to be done for
freeze. The fuse filesystem has to tell the kernel what is up.
Would it be feasible to create some kind of fuse-stop-script.sh, and
run it before suspend (from userspace)? It should be pretty similar to
sendsigs.omit.d/ mechanism AFAICT.
I'm sorry, freezer is not too suitable for fuse.
(BTW: for suspend, we may be able to improve it so that it is possible
to remove freezer from it. For hibernation, it would be very hard.)
Well, this is so incorrect that it's not even funny. :-(
The whole memory shrinking we do for hibernation is now done by allocating
memory, so the freezer is not necessary for *that* and there's *zero*
difference between suspend and hibernation with respect to why the freezer is
used.
Funny. Freezer was put there so that hibernation image was safe to
write out. You need disk subsystems in workable state for hibernation.
Post by Rafael J. Wysocki
And *the* reason why it's used is that intercepting all of the possible ways
user space could interfere with the suspend process without the freezer is
simply totally impractical. System calls (including ioctls and fctls),
It is a lot of work, agreed. But runtime power management is already
moving in that direction. And I could imagine introducing "big suspend
lock" to be grabbed pretty much everywhere.

(And IIRC n900 pretty much suspends when idle.)
Post by Rafael J. Wysocki
sysfs accesses, /proc accesses, debugfs, to name a few, and mmap (I wonder how
you would handle *that* alone). And I'm sure there's more I didn't even
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Post by Rafael J. Wysocki
So please don't tell people that something is doable while it practically
realisticly isn't.
I can imagine doing that for suspend, and believe we are moving in
that direction with runtime suspend. For hibernation, we need to write
image the image to the disk (which is the important difference, not
memory shrinking), and "big suspend lock" would interfere with
that. I can not imagine reasonable solution for that.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-10 13:51:22 UTC
Permalink
Post by Pavel Machek
Hi!
Post by Rafael J. Wysocki
Post by Pavel Machek
Post by Goswin von Brederlow
For shutdown in userspace there is the sendsigs.omit.d/ to avoid the
problem of halting/killing processes of the fuse filesystems (or other
services) prematurely. I guess something similar needs to be done for
freeze. The fuse filesystem has to tell the kernel what is up.
Would it be feasible to create some kind of fuse-stop-script.sh, and
run it before suspend (from userspace)? It should be pretty similar to
sendsigs.omit.d/ mechanism AFAICT.
I'm sorry, freezer is not too suitable for fuse.
(BTW: for suspend, we may be able to improve it so that it is possible
to remove freezer from it. For hibernation, it would be very hard.)
Well, this is so incorrect that it's not even funny. :-(
The whole memory shrinking we do for hibernation is now done by allocating
memory, so the freezer is not necessary for *that* and there's *zero*
difference between suspend and hibernation with respect to why the freezer is
used.
Funny. Freezer was put there so that hibernation image was safe to
write out. You need disk subsystems in workable state for hibernation.
I'm not really sure what you're talking about. Why do you think the freezer is
necessary for that?
Post by Pavel Machek
Post by Rafael J. Wysocki
And *the* reason why it's used is that intercepting all of the possible ways
user space could interfere with the suspend process without the freezer is
simply totally impractical. System calls (including ioctls and fctls),
It is a lot of work, agreed. But runtime power management is already
moving in that direction.
Runtime PM is a different thing. It works on the "this device is not in use
right now, so it can be suspended" basis, while the system suspend's principle
is "I want everything to quiesce now and go low-power". So system suspend can
trigger while devices are still in use and there are good reasons for that to
happen sometimes.
Post by Pavel Machek
And I could imagine introducing "big suspend lock" to be grabbed pretty much
everywhere.
Sorry, I can't imagine *that*.
Post by Pavel Machek
(And IIRC n900 pretty much suspends when idle.)
Post by Rafael J. Wysocki
sysfs accesses, /proc accesses, debugfs, to name a few, and mmap (I wonder how
you would handle *that* alone). And I'm sure there's more I didn't even
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
Post by Pavel Machek
Post by Rafael J. Wysocki
So please don't tell people that something is doable while it practically
realisticly isn't.
I can imagine doing that for suspend, and believe we are moving in
that direction with runtime suspend. For hibernation, we need to write
image the image to the disk (which is the important difference, not
memory shrinking), and "big suspend lock" would interfere with
that. I can not imagine reasonable solution for that.
Again, I'm not sure what you're talking about. Once the image has been
created, it can be saved while user space is running just fine.

And that "big suspend lock" would be pretty much the same as the BKL that
people spent quite some time an effort to remove, so I don't think your idea
to reintroduce something similar just for suspend purposes would be popular.

And since the freezer is now used for other things beyond suspend and
hibernation, introducing such a lock wouldn't even address the Fuse problem.

Thanks,
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-10 14:22:07 UTC
Permalink
Post by Rafael J. Wysocki
Post by Pavel Machek
Hi!
Post by Rafael J. Wysocki
Post by Pavel Machek
Post by Goswin von Brederlow
For shutdown in userspace there is the sendsigs.omit.d/ to avoid the
problem of halting/killing processes of the fuse filesystems (or other
services) prematurely. I guess something similar needs to be done for
freeze. The fuse filesystem has to tell the kernel what is up.
Would it be feasible to create some kind of fuse-stop-script.sh, and
run it before suspend (from userspace)? It should be pretty similar to
sendsigs.omit.d/ mechanism AFAICT.
I'm sorry, freezer is not too suitable for fuse.
(BTW: for suspend, we may be able to improve it so that it is possible
to remove freezer from it. For hibernation, it would be very hard.)
Well, this is so incorrect that it's not even funny. :-(
The whole memory shrinking we do for hibernation is now done by allocating
memory, so the freezer is not necessary for *that* and there's *zero*
difference between suspend and hibernation with respect to why the freezer is
used.
Funny. Freezer was put there so that hibernation image was safe to
write out. You need disk subsystems in workable state for hibernation.
I'm not really sure what you're talking about. Why do you think the freezer is
necessary for that?
Post by Pavel Machek
Post by Rafael J. Wysocki
And *the* reason why it's used is that intercepting all of the possible ways
user space could interfere with the suspend process without the freezer is
simply totally impractical. System calls (including ioctls and fctls),
It is a lot of work, agreed. But runtime power management is already
moving in that direction.
Runtime PM is a different thing. It works on the "this device is not in use
right now, so it can be suspended" basis, while the system suspend's principle
is "I want everything to quiesce now and go low-power". So system suspend can
trigger while devices are still in use and there are good reasons for that to
happen sometimes.
Post by Pavel Machek
And I could imagine introducing "big suspend lock" to be grabbed pretty much
everywhere.
Sorry, I can't imagine *that*.
Post by Pavel Machek
(And IIRC n900 pretty much suspends when idle.)
Post by Rafael J. Wysocki
sysfs accesses, /proc accesses, debugfs, to name a few, and mmap (I wonder how
you would handle *that* alone). And I'm sure there's more I didn't even
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
Post by Pavel Machek
Post by Rafael J. Wysocki
So please don't tell people that something is doable while it practically
realisticly isn't.
I can imagine doing that for suspend, and believe we are moving in
that direction with runtime suspend. For hibernation, we need to write
image the image to the disk (which is the important difference, not
memory shrinking), and "big suspend lock" would interfere with
that. I can not imagine reasonable solution for that.
Again, I'm not sure what you're talking about. Once the image has been
created, it can be saved while user space is running just fine.
Of course, we don't want random changes to be made to persistent storage after
the image has been created, because those changes might not be consistent with
the memory contents stored in the image, and that's why user space is still
frozen when we're saving the image.
Post by Rafael J. Wysocki
And that "big suspend lock" would be pretty much the same as the BKL that
people spent quite some time an effort to remove, so I don't think your idea
to reintroduce something similar just for suspend purposes would be popular.
And since the freezer is now used for other things beyond suspend and
hibernation, introducing such a lock wouldn't even address the Fuse problem.
BTW, Miklos, wouldn't that help if the kernel notified user space somehow
before starting to freeze processes? Perhaps the Fuse's user space part could
subscribe to such notifications and do something before the freezing kicks in?

Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Pavel Machek
2013-02-10 18:55:05 UTC
Permalink
Hi!
Post by Rafael J. Wysocki
Post by Rafael J. Wysocki
Post by Pavel Machek
Post by Rafael J. Wysocki
The whole memory shrinking we do for hibernation is now done by allocating
memory, so the freezer is not necessary for *that* and there's *zero*
difference between suspend and hibernation with respect to why the freezer is
used.
Funny. Freezer was put there so that hibernation image was safe to
write out. You need disk subsystems in workable state for hibernation.
I'm not really sure what you're talking about. Why do you think the freezer is
necessary for that?
Well, from freezer you need:

1) user process frozen.

2) essential locks _not_ held so that block devices are still functional.
Post by Rafael J. Wysocki
Post by Rafael J. Wysocki
Post by Pavel Machek
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
"Don't do it, then". Yes, can happen, but hopefully is not too common
these days. [And... freezer doing 1) but not 2) would be enough to
handle that. Freezer doing 1) but not 2) would also be simpler...]
Post by Rafael J. Wysocki
Post by Rafael J. Wysocki
Again, I'm not sure what you're talking about. Once the image has been
created, it can be saved while user space is running just fine.
Of course, we don't want random changes to be made to persistent storage after
the image has been created, because those changes might not be consistent with
the memory contents stored in the image, and that's why user space is still
frozen when we're saving the image.
Yes. That's the reason 1) for freezer.

Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-10 23:31:03 UTC
Permalink
Post by Pavel Machek
Hi!
Post by Rafael J. Wysocki
Post by Pavel Machek
Post by Rafael J. Wysocki
The whole memory shrinking we do for hibernation is now done by allocating
memory, so the freezer is not necessary for *that* and there's *zero*
difference between suspend and hibernation with respect to why the freezer is
used.
Funny. Freezer was put there so that hibernation image was safe to
write out. You need disk subsystems in workable state for hibernation.
I'm not really sure what you're talking about. Why do you think the freezer is
necessary for that?
1) user process frozen.
2) essential locks _not_ held so that block devices are still functional.
Post by Rafael J. Wysocki
Post by Pavel Machek
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
"Don't do it, then". Yes, can happen, but hopefully is not too common
these days. [And... freezer doing 1) but not 2) would be enough to
handle that. Freezer doing 1) but not 2) would also be simpler...]
Again, I'm not sure what you mean.

Are you trying to say that it would be OK to freeze user space tasks in
the D state?

Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Miklos Szeredi
2013-02-11 10:11:40 UTC
Permalink
Post by Rafael J. Wysocki
Post by Pavel Machek
1) user process frozen.
2) essential locks _not_ held so that block devices are still functional.
Post by Rafael J. Wysocki
Post by Pavel Machek
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
"Don't do it, then". Yes, can happen, but hopefully is not too common
these days. [And... freezer doing 1) but not 2) would be enough to
handle that. Freezer doing 1) but not 2) would also be simpler...]
Again, I'm not sure what you mean.
Are you trying to say that it would be OK to freeze user space tasks in
the D state?
I think that's what Pavel is saying. Processes in D state sleeping
on non-device mutexes _are_ actually OK to freeze. And that would
nicely solve the fuse freeze problem. The only little detail is how
do we implement that...

Thanks,
Miklos
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-11 12:08:56 UTC
Permalink
This post might be inappropriate. Click to display it.
Miklos Szeredi
2013-02-11 13:59:56 UTC
Permalink
Post by Rafael J. Wysocki
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Post by Pavel Machek
1) user process frozen.
2) essential locks _not_ held so that block devices are still functional.
Post by Rafael J. Wysocki
Post by Pavel Machek
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
"Don't do it, then". Yes, can happen, but hopefully is not too common
these days. [And... freezer doing 1) but not 2) would be enough to
handle that. Freezer doing 1) but not 2) would also be simpler...]
Again, I'm not sure what you mean.
Are you trying to say that it would be OK to freeze user space tasks in
the D state?
I think that's what Pavel is saying. Processes in D state sleeping
on non-device mutexes _are_ actually OK to freeze. And that would
nicely solve the fuse freeze problem.
That's potentially deeadlock-prone, because a task waiting for mutex X may
very well be holding mutex Y, so if there's another task waiting for mutex Y,
it needs to be frozen at the same time.
Post by Miklos Szeredi
The only little detail is how do we implement that...
This means the only way I can see would be to hack the mutex code so that the
try_to_freeze() was called for user space tasks after the
sched_preempt_enable_no_resched() before schedule().
That shouldn't be a big deal performance-wise, because we are in the slow
path anyway then. I'm not sure if Peter Z will like it, though.
Moreover, a task waiting for a mutex may be holding a semaphore or be
participating in some other mutual-exclusion mechanism, so we'd need to
address them all. Plus, as noted by Pavel, freezing those things would make
it difficult to save hibernation images to us.
Well, as a first step I could cook up a patch that adds a flag to the
mutex indicating that it's freezable. Fuse would mark its mutexes
(and the mutexes that VFS uses on its behalf) as freezable. That way
we don't interfere with hibernation, except if hibernation uses fuse
but that's a very special case.
Post by Rafael J. Wysocki
What about having a "freeze me after all of my children" flag that will be
inherited from parents? Would that help the fuse case?
With kernel filesystems there's a clear distinction between tasks that
may originate filesystem requests (userspace processes) and those that
serve these requests (kernel threads). So its possible to freeze the
originators first and the servers afterwards.

With fuse there's no such difference. Even if it were known which
processes are servers and which are originators (it is not known in
the general case) then there would be a problem that some processes
are servers AND originators at the same time.

Thanks,
Miklos
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-11 19:28:46 UTC
Permalink
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Post by Pavel Machek
1) user process frozen.
2) essential locks _not_ held so that block devices are still functional.
Post by Rafael J. Wysocki
Post by Pavel Machek
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
"Don't do it, then". Yes, can happen, but hopefully is not too common
these days. [And... freezer doing 1) but not 2) would be enough to
handle that. Freezer doing 1) but not 2) would also be simpler...]
Again, I'm not sure what you mean.
Are you trying to say that it would be OK to freeze user space tasks in
the D state?
I think that's what Pavel is saying. Processes in D state sleeping
on non-device mutexes _are_ actually OK to freeze. And that would
nicely solve the fuse freeze problem.
That's potentially deeadlock-prone, because a task waiting for mutex X may
very well be holding mutex Y, so if there's another task waiting for mutex Y,
it needs to be frozen at the same time.
Post by Miklos Szeredi
The only little detail is how do we implement that...
This means the only way I can see would be to hack the mutex code so that the
try_to_freeze() was called for user space tasks after the
sched_preempt_enable_no_resched() before schedule().
That shouldn't be a big deal performance-wise, because we are in the slow
path anyway then. I'm not sure if Peter Z will like it, though.
Moreover, a task waiting for a mutex may be holding a semaphore or be
participating in some other mutual-exclusion mechanism, so we'd need to
address them all. Plus, as noted by Pavel, freezing those things would make
it difficult to save hibernation images to us.
Well, as a first step I could cook up a patch that adds a flag to the
mutex indicating that it's freezable. Fuse would mark its mutexes
(and the mutexes that VFS uses on its behalf) as freezable. That way
we don't interfere with hibernation, except if hibernation uses fuse
but that's a very special case.
Yes, that may be worth a shot. :-)

Thanks,
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Miklos Szeredi
2013-02-13 17:34:16 UTC
Permalink
(After all, with FUSE, filesystem clients are just doing IPC. In ideal
world, that would be freezeable and killable with -9).
Well, I thought this can be constrained to locks directly related to
the fuse filesystem itself, but it can't... The reason is page
faults. Pretty much everyone and their brother uses get_user_pages*,
holding various locks (mmap_sem for sure but others as well). A fuse
filesystem frozen during a page read will block those.

Separating those parts of the kernel that can be frozen from those
that can't looks increasingly difficult.

So I think the PF_FREEZE_DAEMON idea (the patch from Li Fei that
started this thread) may still be our best bet at handling this
situation. The idea being that pure "originator" processes (ones that
take no part in serving filesystem syscalls) can be frozen up-front.
Then the "fuse daemon" (or "server") processes are hopefully in a
quiescent state and can be frozen without difficulty.

Unfortunately it needs help from userspace: the kernel can't easily
guess which processes are part of a "fuse daemon" and which aren't.
Fortunately we have a standard library (libfuse) that can tell it to
the kernel for the vast majority of cases.

Thanks,
Miklos
Pavel Machek
2013-02-13 20:16:09 UTC
Permalink
Post by Miklos Szeredi
(After all, with FUSE, filesystem clients are just doing IPC. In ideal
world, that would be freezeable and killable with -9).
Well, I thought this can be constrained to locks directly related to
the fuse filesystem itself, but it can't... The reason is page
And it looked so easy and nice...
Post by Miklos Szeredi
faults. Pretty much everyone and their brother uses get_user_pages*,
holding various locks (mmap_sem for sure but others as well). A fuse
filesystem frozen during a page read will block those.
Is it even valid for get_user_pages to be used on FUSE?

What happens if fused tries to do some operation (it is userspace, it
can do lot of operations) that needs one of those locks?

It seems to me that fused will want to do operations hibernation would
want to do, at the very least (reading/writing block devices). Thus,
if it does not deadlock with fused, it should not deadlock with
hibernation, either...

[Or am i mistaken and FUSE's get_user_pages* needs FUSE locks but does
not wait for fused to finish the operation?]
Post by Miklos Szeredi
Separating those parts of the kernel that can be frozen from those
that can't looks increasingly difficult.
:-(.

Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Miklos Szeredi
2013-02-14 10:31:53 UTC
Permalink
Post by Pavel Machek
Post by Miklos Szeredi
(After all, with FUSE, filesystem clients are just doing IPC. In ideal
world, that would be freezeable and killable with -9).
Well, I thought this can be constrained to locks directly related to
the fuse filesystem itself, but it can't... The reason is page
And it looked so easy and nice...
Post by Miklos Szeredi
faults. Pretty much everyone and their brother uses get_user_pages*,
holding various locks (mmap_sem for sure but others as well). A fuse
filesystem frozen during a page read will block those.
Is it even valid for get_user_pages to be used on FUSE?
Disabling all mmap (which is what you question) would make fuse a
useless piece of toy. So yes, definitely it's valid.
Post by Pavel Machek
What happens if fused tries to do some operation (it is userspace, it
can do lot of operations) that needs one of those locks?
E.g. mmap_sem is taken for the process _accessing_ a vma for a fuse
filesystem. The server process better not do this (i.e. access the
filesystem it's serving) or deadlocking (*) is basically guaranteed.
That's nothing new.

The thing that makes this hard for freeze is that those locks
(mmap_sem, etc.) are only indirectly related to the fuse filesystem by
the _actions_ of a process (accessing mmaped fuse area).

Thanks,
Miklos

(*) userspace induced deadlocks in fuse are not hard deadlocks, they
can be forced open by "umount -f" or "echo >
/sys/fs/fuse/connections/$DEV/abort".
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-13 21:21:11 UTC
Permalink
This post might be inappropriate. Click to display it.
Miklos Szeredi
2013-02-14 10:41:16 UTC
Permalink
Post by Rafael J. Wysocki
Post by Miklos Szeredi
So I think the PF_FREEZE_DAEMON idea (the patch from Li Fei that
started this thread) may still be our best bet at handling this
situation. The idea being that pure "originator" processes (ones that
take no part in serving filesystem syscalls) can be frozen up-front.
Then the "fuse daemon" (or "server") processes are hopefully in a
quiescent state and can be frozen without difficulty.
Unfortunately it needs help from userspace: the kernel can't easily
guess which processes are part of a "fuse daemon" and which aren't.
Fortunately we have a standard library (libfuse) that can tell it to
the kernel for the vast majority of cases.
So basically the idea would be to introduce something like PF_FREEZE_LATE
for user space processes that need to be frozen after all of the other
(non-PF_FREEZE_LATE) user space processes have been frozen and hack fuse
to use that flag?
Yes.

It is essentially the same mechanism that is used to delay the
freezing of kernel threads after userspace tasks have been frozen.
Except it's a lot more difficult to determine which userspace tasks
need to be suspended late and which aren't.

Thanks,
Miklos
Rafael J. Wysocki
2013-02-14 12:11:01 UTC
Permalink
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Post by Miklos Szeredi
So I think the PF_FREEZE_DAEMON idea (the patch from Li Fei that
started this thread) may still be our best bet at handling this
situation. The idea being that pure "originator" processes (ones that
take no part in serving filesystem syscalls) can be frozen up-front.
Then the "fuse daemon" (or "server") processes are hopefully in a
quiescent state and can be frozen without difficulty.
Unfortunately it needs help from userspace: the kernel can't easily
guess which processes are part of a "fuse daemon" and which aren't.
Fortunately we have a standard library (libfuse) that can tell it to
the kernel for the vast majority of cases.
So basically the idea would be to introduce something like PF_FREEZE_LATE
for user space processes that need to be frozen after all of the other
(non-PF_FREEZE_LATE) user space processes have been frozen and hack fuse
to use that flag?
Yes.
It is essentially the same mechanism that is used to delay the
freezing of kernel threads after userspace tasks have been frozen.
Except it's a lot more difficult to determine which userspace tasks
need to be suspended late and which aren't.
Well, I suppose that information is available to user space.

Do we need an interface for a process to mark itself as PF_FREEZE_LATE or
do we need an interface for one process to mark another process as
PF_FREEZE_LATE, or both?

Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Miklos Szeredi
2013-02-14 13:09:50 UTC
Permalink
Post by Rafael J. Wysocki
Post by Miklos Szeredi
It is essentially the same mechanism that is used to delay the
freezing of kernel threads after userspace tasks have been frozen.
Except it's a lot more difficult to determine which userspace tasks
need to be suspended late and which aren't.
Well, I suppose that information is available to user space.
Do we need an interface for a process to mark itself as PF_FREEZE_LATE or
do we need an interface for one process to mark another process as
PF_FREEZE_LATE, or both?
As a first step marking self with PF_FREEZE_LATE and inheriting this
flag across fork/clone would work for most cases, I think.

Marking an unrelated process would have all sorts of issues: Who has
permission to do this? Won't it be misused to "fix" random freezer
issues.

Thanks,
Miklos
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki
2013-02-14 17:38:02 UTC
Permalink
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Post by Miklos Szeredi
It is essentially the same mechanism that is used to delay the
freezing of kernel threads after userspace tasks have been frozen.
Except it's a lot more difficult to determine which userspace tasks
need to be suspended late and which aren't.
Well, I suppose that information is available to user space.
Do we need an interface for a process to mark itself as PF_FREEZE_LATE or
do we need an interface for one process to mark another process as
PF_FREEZE_LATE, or both?
As a first step marking self with PF_FREEZE_LATE and inheriting this
flag across fork/clone would work for most cases, I think.
OK, so we can just have a switch for that in /proc I suppose.
Post by Miklos Szeredi
Marking an unrelated process would have all sorts of issues: Who has
permission to do this? Won't it be misused to "fix" random freezer
issues.
Yes, that's why I was asking.

Thanks,
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Li, Fei
2013-02-18 06:26:34 UTC
Permalink
-----Original Message-----
Sent: Friday, February 15, 2013 1:38 AM
To: Miklos Szeredi
Cc: Pavel Machek; Goswin von Brederlow; Li, Fei; Brown, Len;
make fuse daemon frozen along with kernel threads]
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Post by Miklos Szeredi
It is essentially the same mechanism that is used to delay the
freezing of kernel threads after userspace tasks have been frozen.
Except it's a lot more difficult to determine which userspace tasks
need to be suspended late and which aren't.
Well, I suppose that information is available to user space.
Do we need an interface for a process to mark itself as PF_FREEZE_LATE or
do we need an interface for one process to mark another process as
PF_FREEZE_LATE, or both?
As a first step marking self with PF_FREEZE_LATE and inheriting this
flag across fork/clone would work for most cases, I think.
OK, so we can just have a switch for that in /proc I suppose.
Thanks for feedback and suggestion.

We have ever tried similar idea, expose interface /sys/power/pm_freeze_daemon,
userspace tasks write 1 to this attribute to make itself to be frozen at the same time
with kernel tasks, and it worked in our experiment.

Do you think it's suitable and enough to use such attribute /sys/power/pm_freeze_late,
or other more suitable place under /proc suggested?
If needed, I can prepare the patch.

Best Regards,
Li Fei
Post by Miklos Szeredi
Marking an unrelated process would have all sorts of issues: Who has
permission to do this? Won't it be misused to "fix" random freezer
issues.
Yes, that's why I was asking.
Thanks,
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
��{.n�+�������+%��lzwm��b�맲��r��zX��f�{ay�ʇڙ�,j��f���h���z��w���
Rafael J. Wysocki
2013-02-18 12:26:09 UTC
Permalink
Post by Li, Fei
-----Original Message-----
Sent: Friday, February 15, 2013 1:38 AM
To: Miklos Szeredi
Cc: Pavel Machek; Goswin von Brederlow; Li, Fei; Brown, Len;
make fuse daemon frozen along with kernel threads]
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Post by Miklos Szeredi
It is essentially the same mechanism that is used to delay the
freezing of kernel threads after userspace tasks have been frozen.
Except it's a lot more difficult to determine which userspace tasks
need to be suspended late and which aren't.
Well, I suppose that information is available to user space.
Do we need an interface for a process to mark itself as PF_FREEZE_LATE or
do we need an interface for one process to mark another process as
PF_FREEZE_LATE, or both?
As a first step marking self with PF_FREEZE_LATE and inheriting this
flag across fork/clone would work for most cases, I think.
OK, so we can just have a switch for that in /proc I suppose.
Thanks for feedback and suggestion.
We have ever tried similar idea, expose interface /sys/power/pm_freeze_daemon,
userspace tasks write 1 to this attribute to make itself to be frozen at the same time
with kernel tasks, and it worked in our experiment.
Do you think it's suitable and enough to use such attribute /sys/power/pm_freeze_late,
or other more suitable place under /proc suggested?
I think it should be inder /proc, because that's where controls related to
process behavior are located. E.g. /proc/PID/freeze_late or something like
that.

Thanks,
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Pavel Machek
2013-02-19 10:46:33 UTC
Permalink
Hi!
Post by Rafael J. Wysocki
Post by Li, Fei
Post by Rafael J. Wysocki
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Well, I suppose that information is available to user space.
Do we need an interface for a process to mark itself as PF_FREEZE_LATE or
do we need an interface for one process to mark another process as
PF_FREEZE_LATE, or both?
As a first step marking self with PF_FREEZE_LATE and inheriting this
flag across fork/clone would work for most cases, I think.
OK, so we can just have a switch for that in /proc I suppose.
Thanks for feedback and suggestion.
We have ever tried similar idea, expose interface /sys/power/pm_freeze_daemon,
userspace tasks write 1 to this attribute to make itself to be frozen at the same time
with kernel tasks, and it worked in our experiment.
Do you think it's suitable and enough to use such attribute /sys/power/pm_freeze_late,
or other more suitable place under /proc suggested?
I think it should be inder /proc, because that's where controls related to
process behavior are located. E.g. /proc/PID/freeze_late or something like
that.
freeze_priority?

I _hope_ we will not need more than three priorities, (user, fused, kernel), but
I hoped not no need more than two before, so...
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Li, Fei
2013-02-20 02:54:20 UTC
Permalink
-----Original Message-----
Sent: Tuesday, February 19, 2013 6:47 PM
To: Rafael J. Wysocki
Cc: Li, Fei; Miklos Szeredi; Goswin von Brederlow; Brown, Len;
make fuse daemon frozen along with kernel threads]
Hi!
Post by Rafael J. Wysocki
Post by Li, Fei
Post by Rafael J. Wysocki
Post by Miklos Szeredi
Post by Rafael J. Wysocki
Well, I suppose that information is available to user space.
Do we need an interface for a process to mark itself as PF_FREEZE_LATE
or
Post by Rafael J. Wysocki
Post by Li, Fei
Post by Rafael J. Wysocki
Post by Miklos Szeredi
Post by Rafael J. Wysocki
do we need an interface for one process to mark another process as
PF_FREEZE_LATE, or both?
As a first step marking self with PF_FREEZE_LATE and inheriting this
flag across fork/clone would work for most cases, I think.
OK, so we can just have a switch for that in /proc I suppose.
Thanks for feedback and suggestion.
We have ever tried similar idea, expose interface
/sys/power/pm_freeze_daemon,
Post by Rafael J. Wysocki
Post by Li, Fei
userspace tasks write 1 to this attribute to make itself to be frozen at the
same time
Post by Rafael J. Wysocki
Post by Li, Fei
with kernel tasks, and it worked in our experiment.
Do you think it's suitable and enough to use such attribute
/sys/power/pm_freeze_late,
Post by Rafael J. Wysocki
Post by Li, Fei
or other more suitable place under /proc suggested?
I think it should be inder /proc, because that's where controls related to
process behavior are located. E.g. /proc/PID/freeze_late or something like
that.
freeze_priority?
I _hope_ we will not need more than three priorities, (user, fused, kernel), but
I hoped not no need more than two before, so...
Pavel
--
IMHO, we still use two priorities, with the exception that user space process such
as fuse daemon can be configured to be frozen in kernel threads frozen phase
instead of user space processes frozen phase.

I submit a patch on https://lkml.org/lkml/2013/2/19/705 for such implementation.
Could you please help to check whether it's suitable?

Thanks and Regards,
Li Fei
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Pavel Machek
2013-02-11 10:53:14 UTC
Permalink
Hi!
Post by Rafael J. Wysocki
Post by Pavel Machek
Post by Rafael J. Wysocki
Post by Pavel Machek
Post by Rafael J. Wysocki
The whole memory shrinking we do for hibernation is now done by allocating
memory, so the freezer is not necessary for *that* and there's *zero*
difference between suspend and hibernation with respect to why the freezer is
used.
Funny. Freezer was put there so that hibernation image was safe to
write out. You need disk subsystems in workable state for hibernation.
I'm not really sure what you're talking about. Why do you think the freezer is
necessary for that?
1) user process frozen.
2) essential locks _not_ held so that block devices are still functional.
Post by Rafael J. Wysocki
Post by Pavel Machek
mmap... what is problem with mmap? For suspend, memory is powered, so
you can permit people changing it.
Suppose mmap is used to make the registers of some device available to user
space. Yes, that can happen.
"Don't do it, then". Yes, can happen, but hopefully is not too common
these days. [And... freezer doing 1) but not 2) would be enough to
handle that. Freezer doing 1) but not 2) would also be simpler...]
Again, I'm not sure what you mean.
Are you trying to say that it would be OK to freeze user space tasks in
the D state?
Yes. For suspend, that should pretty much work. (With caveats, as
Miklos noticed).

Unfortunately, that is not going to be too easy for hibernation.

We could mae the situation easier by specifying we only support
hibernation to local SATA or SCSI disk, not over NBD, iSCSI,
etc... That would reduce number of locks that need to be available for
hibernation.

Unfortunately, uswsusp did not make that easy as userland may want to
do arbitrary stuff with the image.

Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Han-Wen Nienhuys
2013-02-06 09:56:11 UTC
Permalink
Post by Li Fei
There is well known issue that freezing will fail in case that fuse
daemon is frozen firstly with some requests not handled, as the fuse
usage task is waiting for the response from fuse daemon and can't be
frozen.
To solve the issue above, make fuse daemon frozen after all all user
space processes frozen and during the kernel threads frozen phase.
PF_FREEZE_DAEMON flag is added to indicate that the current thread is
the fuse daemon, set on connection, and clear on disconnection.
It works as all fuse requests are handled during user space processes
frozen, there will no further fuse request, and it's safe to continue
to freeze fuse daemon together with kernel freezable tasks.
Will this work correctly if one FUSE daemon is opening files in from
another FUSE filesystem?
--
Han-Wen Nienhuys - ***@xs4all.nl - http://www.xs4all.nl/~hanwen
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Miklos Szeredi
2013-02-06 14:59:08 UTC
Permalink
Post by Han-Wen Nienhuys
Post by Li Fei
There is well known issue that freezing will fail in case that fuse
daemon is frozen firstly with some requests not handled, as the fuse
usage task is waiting for the response from fuse daemon and can't be
frozen.
To solve the issue above, make fuse daemon frozen after all all user
space processes frozen and during the kernel threads frozen phase.
PF_FREEZE_DAEMON flag is added to indicate that the current thread is
the fuse daemon, set on connection, and clear on disconnection.
It works as all fuse requests are handled during user space processes
frozen, there will no further fuse request, and it's safe to continue
to freeze fuse daemon together with kernel freezable tasks.
Will this work correctly if one FUSE daemon is opening files in from
another FUSE filesystem?
As long as only non-fuse-daemon processes are generating the requests
(i.e. fuse daemons don't spontaneously generate new requests) it
should work.

I think more problematic is that userspace processes tend to
communicate with each other, sometimes in a non-trivial way. For
example gethostbyname(3) will turn to nscd(8) for name service cache
results. So a fuse daemon that might call gethostbyname() should mark
nscd with PF_FREEZE_DAEMON but that requires careful audit (or nscd
might mark itself PF_FREEZE_DAEMON for that reason).

And that's just an example of a most basic C library function. There
are many more such hidden interactions that can trip up this scheme.

Thanks,
Miklos
Loading...