Loading...
1/* SPDX-License-Identifier: GPL-2.0-only */
2#ifndef _KERNEL_TIME_MIGRATION_H
3#define _KERNEL_TIME_MIGRATION_H
4
5/* Per group capacity. Must be a power of 2! */
6#define TMIGR_CHILDREN_PER_GROUP 8
7
8/**
9 * struct tmigr_event - a timer event associated to a CPU
10 * @nextevt: The node to enqueue an event in the parent group queue
11 * @cpu: The CPU to which this event belongs
12 * @ignore: Hint whether the event could be ignored; it is set when
13 * CPU or group is active;
14 */
15struct tmigr_event {
16 struct timerqueue_node nextevt;
17 unsigned int cpu;
18 bool ignore;
19};
20
21/**
22 * struct tmigr_group - timer migration hierarchy group
23 * @lock: Lock protecting the event information and group hierarchy
24 * information during setup
25 * @parent: Pointer to the parent group. Pointer is updated when a
26 * new hierarchy level is added because of a CPU coming
27 * online the first time. Once it is set, the pointer will
28 * not be removed or updated. When accessing parent pointer
29 * lock less to decide whether to abort a propagation or
30 * not, it is not a problem. The worst outcome is an
31 * unnecessary/early CPU wake up. But do not access parent
32 * pointer several times in the same 'action' (like
33 * activation, deactivation, check for remote expiry,...)
34 * without holding the lock as it is not ensured that value
35 * will not change.
36 * @groupevt: Next event of the group which is only used when the
37 * group is !active. The group event is then queued into
38 * the parent timer queue.
39 * Ignore bit of @groupevt is set when the group is active.
40 * @next_expiry: Base monotonic expiry time of the next event of the
41 * group; It is used for the racy lockless check whether a
42 * remote expiry is required; it is always reliable
43 * @events: Timer queue for child events queued in the group
44 * @migr_state: State of the group (see union tmigr_state)
45 * @level: Hierarchy level of the group; Required during setup
46 * @numa_node: Required for setup only to make sure CPU and low level
47 * group information is NUMA local. It is set to NUMA node
48 * as long as the group level is per NUMA node (level <
49 * tmigr_crossnode_level); otherwise it is set to
50 * NUMA_NO_NODE
51 * @num_children: Counter of group children to make sure the group is only
52 * filled with TMIGR_CHILDREN_PER_GROUP; Required for setup
53 * only
54 * @groupmask: mask of the group in the parent group; is set during
55 * setup and will never change; can be read lockless
56 * @list: List head that is added to the per level
57 * tmigr_level_list; is required during setup when a
58 * new group needs to be connected to the existing
59 * hierarchy groups
60 */
61struct tmigr_group {
62 raw_spinlock_t lock;
63 struct tmigr_group *parent;
64 struct tmigr_event groupevt;
65 u64 next_expiry;
66 struct timerqueue_head events;
67 atomic_t migr_state;
68 unsigned int level;
69 int numa_node;
70 unsigned int num_children;
71 u8 groupmask;
72 struct list_head list;
73};
74
75/**
76 * struct tmigr_cpu - timer migration per CPU group
77 * @lock: Lock protecting the tmigr_cpu group information
78 * @online: Indicates whether the CPU is online; In deactivate path
79 * it is required to know whether the migrator in the top
80 * level group is to be set offline, while a timer is
81 * pending. Then another online CPU needs to be notified to
82 * take over the migrator role. Furthermore the information
83 * is required in CPU hotplug path as the CPU is able to go
84 * idle before the timer migration hierarchy hotplug AP is
85 * reached. During this phase, the CPU has to handle the
86 * global timers on its own and must not act as a migrator.
87 * @idle: Indicates whether the CPU is idle in the timer migration
88 * hierarchy
89 * @remote: Is set when timers of the CPU are expired remotely
90 * @tmgroup: Pointer to the parent group
91 * @groupmask: mask of tmigr_cpu in the parent group
92 * @wakeup: Stores the first timer when the timer migration
93 * hierarchy is completely idle and remote expiry was done;
94 * is returned to timer code in the idle path and is only
95 * used in idle path.
96 * @cpuevt: CPU event which could be enqueued into the parent group
97 */
98struct tmigr_cpu {
99 raw_spinlock_t lock;
100 bool online;
101 bool idle;
102 bool remote;
103 struct tmigr_group *tmgroup;
104 u8 groupmask;
105 u64 wakeup;
106 struct tmigr_event cpuevt;
107};
108
109/**
110 * union tmigr_state - state of tmigr_group
111 * @state: Combined version of the state - only used for atomic
112 * read/cmpxchg function
113 * @struct: Split version of the state - only use the struct members to
114 * update information to stay independent of endianness
115 */
116union tmigr_state {
117 u32 state;
118 /**
119 * struct - split state of tmigr_group
120 * @active: Contains each mask bit of the active children
121 * @migrator: Contains mask of the child which is migrator
122 * @seq: Sequence counter needs to be increased when an update
123 * to the tmigr_state is done. It prevents a race when
124 * updates in the child groups are propagated in changed
125 * order. Detailed information about the scenario is
126 * given in the documentation at the begin of
127 * timer_migration.c.
128 */
129 struct {
130 u8 active;
131 u8 migrator;
132 u16 seq;
133 } __packed;
134};
135
136#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
137extern void tmigr_handle_remote(void);
138extern bool tmigr_requires_handle_remote(void);
139extern void tmigr_cpu_activate(void);
140extern u64 tmigr_cpu_deactivate(u64 nextevt);
141extern u64 tmigr_cpu_new_timer(u64 nextevt);
142extern u64 tmigr_quick_check(u64 nextevt);
143#else
144static inline void tmigr_handle_remote(void) { }
145static inline bool tmigr_requires_handle_remote(void) { return false; }
146static inline void tmigr_cpu_activate(void) { }
147#endif
148
149#endif
1/* SPDX-License-Identifier: GPL-2.0-only */
2#ifndef _KERNEL_TIME_MIGRATION_H
3#define _KERNEL_TIME_MIGRATION_H
4
5/* Per group capacity. Must be a power of 2! */
6#define TMIGR_CHILDREN_PER_GROUP 8
7
8/**
9 * struct tmigr_event - a timer event associated to a CPU
10 * @nextevt: The node to enqueue an event in the parent group queue
11 * @cpu: The CPU to which this event belongs
12 * @ignore: Hint whether the event could be ignored; it is set when
13 * CPU or group is active;
14 */
15struct tmigr_event {
16 struct timerqueue_node nextevt;
17 unsigned int cpu;
18 bool ignore;
19};
20
21/**
22 * struct tmigr_group - timer migration hierarchy group
23 * @lock: Lock protecting the event information and group hierarchy
24 * information during setup
25 * @parent: Pointer to the parent group
26 * @groupevt: Next event of the group which is only used when the
27 * group is !active. The group event is then queued into
28 * the parent timer queue.
29 * Ignore bit of @groupevt is set when the group is active.
30 * @next_expiry: Base monotonic expiry time of the next event of the
31 * group; It is used for the racy lockless check whether a
32 * remote expiry is required; it is always reliable
33 * @events: Timer queue for child events queued in the group
34 * @migr_state: State of the group (see union tmigr_state)
35 * @level: Hierarchy level of the group; Required during setup
36 * @numa_node: Required for setup only to make sure CPU and low level
37 * group information is NUMA local. It is set to NUMA node
38 * as long as the group level is per NUMA node (level <
39 * tmigr_crossnode_level); otherwise it is set to
40 * NUMA_NO_NODE
41 * @num_children: Counter of group children to make sure the group is only
42 * filled with TMIGR_CHILDREN_PER_GROUP; Required for setup
43 * only
44 * @childmask: childmask of the group in the parent group; is set
45 * during setup and will never change; can be read
46 * lockless
47 * @list: List head that is added to the per level
48 * tmigr_level_list; is required during setup when a
49 * new group needs to be connected to the existing
50 * hierarchy groups
51 */
52struct tmigr_group {
53 raw_spinlock_t lock;
54 struct tmigr_group *parent;
55 struct tmigr_event groupevt;
56 u64 next_expiry;
57 struct timerqueue_head events;
58 atomic_t migr_state;
59 unsigned int level;
60 int numa_node;
61 unsigned int num_children;
62 u8 childmask;
63 struct list_head list;
64};
65
66/**
67 * struct tmigr_cpu - timer migration per CPU group
68 * @lock: Lock protecting the tmigr_cpu group information
69 * @online: Indicates whether the CPU is online; In deactivate path
70 * it is required to know whether the migrator in the top
71 * level group is to be set offline, while a timer is
72 * pending. Then another online CPU needs to be notified to
73 * take over the migrator role. Furthermore the information
74 * is required in CPU hotplug path as the CPU is able to go
75 * idle before the timer migration hierarchy hotplug AP is
76 * reached. During this phase, the CPU has to handle the
77 * global timers on its own and must not act as a migrator.
78 * @idle: Indicates whether the CPU is idle in the timer migration
79 * hierarchy
80 * @remote: Is set when timers of the CPU are expired remotely
81 * @tmgroup: Pointer to the parent group
82 * @childmask: childmask of tmigr_cpu in the parent group
83 * @wakeup: Stores the first timer when the timer migration
84 * hierarchy is completely idle and remote expiry was done;
85 * is returned to timer code in the idle path and is only
86 * used in idle path.
87 * @cpuevt: CPU event which could be enqueued into the parent group
88 */
89struct tmigr_cpu {
90 raw_spinlock_t lock;
91 bool online;
92 bool idle;
93 bool remote;
94 struct tmigr_group *tmgroup;
95 u8 childmask;
96 u64 wakeup;
97 struct tmigr_event cpuevt;
98};
99
100/**
101 * union tmigr_state - state of tmigr_group
102 * @state: Combined version of the state - only used for atomic
103 * read/cmpxchg function
104 * @struct: Split version of the state - only use the struct members to
105 * update information to stay independent of endianness
106 */
107union tmigr_state {
108 u32 state;
109 /**
110 * struct - split state of tmigr_group
111 * @active: Contains each childmask bit of the active children
112 * @migrator: Contains childmask of the child which is migrator
113 * @seq: Sequence counter needs to be increased when an update
114 * to the tmigr_state is done. It prevents a race when
115 * updates in the child groups are propagated in changed
116 * order. Detailed information about the scenario is
117 * given in the documentation at the begin of
118 * timer_migration.c.
119 */
120 struct {
121 u8 active;
122 u8 migrator;
123 u16 seq;
124 } __packed;
125};
126
127#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
128extern void tmigr_handle_remote(void);
129extern bool tmigr_requires_handle_remote(void);
130extern void tmigr_cpu_activate(void);
131extern u64 tmigr_cpu_deactivate(u64 nextevt);
132extern u64 tmigr_cpu_new_timer(u64 nextevt);
133extern u64 tmigr_quick_check(u64 nextevt);
134#else
135static inline void tmigr_handle_remote(void) { }
136static inline bool tmigr_requires_handle_remote(void) { return false; }
137static inline void tmigr_cpu_activate(void) { }
138#endif
139
140#endif