Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Regression1
4 * Description:
5 * Salman Qazi describes the following radix-tree bug:
6 *
7 * In the following case, we get can get a deadlock:
8 *
9 * 0. The radix tree contains two items, one has the index 0.
10 * 1. The reader (in this case find_get_pages) takes the rcu_read_lock.
11 * 2. The reader acquires slot(s) for item(s) including the index 0 item.
12 * 3. The non-zero index item is deleted, and as a consequence the other item
13 * is moved to the root of the tree. The place where it used to be is queued
14 * for deletion after the readers finish.
15 * 3b. The zero item is deleted, removing it from the direct slot, it remains in
16 * the rcu-delayed indirect node.
17 * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref
18 * count
19 * 5. The reader looks at it again, hoping that the item will either be freed
20 * or the ref count will increase. This never happens, as the slot it is
21 * looking at will never be updated. Also, this slot can never be reclaimed
22 * because the reader is holding rcu_read_lock and is in an infinite loop.
23 *
24 * The fix is to re-use the same "indirect" pointer case that requires a slot
25 * lookup retry into a general "retry the lookup" bit.
26 *
27 * Running:
28 * This test should run to completion in a few seconds. The above bug would
29 * cause it to hang indefinitely.
30 *
31 * Upstream commit:
32 * Not yet
33 */
34#include <linux/kernel.h>
35#include <linux/gfp.h>
36#include <linux/slab.h>
37#include <linux/radix-tree.h>
38#include <linux/rcupdate.h>
39#include <stdlib.h>
40#include <pthread.h>
41#include <stdio.h>
42#include <assert.h>
43
44#include "regression.h"
45
46static RADIX_TREE(mt_tree, GFP_KERNEL);
47static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
48
49struct page {
50 pthread_mutex_t lock;
51 struct rcu_head rcu;
52 int count;
53 unsigned long index;
54};
55
56static struct page *page_alloc(void)
57{
58 struct page *p;
59 p = malloc(sizeof(struct page));
60 p->count = 1;
61 p->index = 1;
62 pthread_mutex_init(&p->lock, NULL);
63
64 return p;
65}
66
67static void page_rcu_free(struct rcu_head *rcu)
68{
69 struct page *p = container_of(rcu, struct page, rcu);
70 assert(!p->count);
71 pthread_mutex_destroy(&p->lock);
72 free(p);
73}
74
75static void page_free(struct page *p)
76{
77 call_rcu(&p->rcu, page_rcu_free);
78}
79
80static unsigned find_get_pages(unsigned long start,
81 unsigned int nr_pages, struct page **pages)
82{
83 unsigned int i;
84 unsigned int ret;
85 unsigned int nr_found;
86
87 rcu_read_lock();
88restart:
89 nr_found = radix_tree_gang_lookup_slot(&mt_tree,
90 (void ***)pages, NULL, start, nr_pages);
91 ret = 0;
92 for (i = 0; i < nr_found; i++) {
93 struct page *page;
94repeat:
95 page = radix_tree_deref_slot((void **)pages[i]);
96 if (unlikely(!page))
97 continue;
98
99 if (radix_tree_exception(page)) {
100 if (radix_tree_deref_retry(page)) {
101 /*
102 * Transient condition which can only trigger
103 * when entry at index 0 moves out of or back
104 * to root: none yet gotten, safe to restart.
105 */
106 assert((start | i) == 0);
107 goto restart;
108 }
109 /*
110 * No exceptional entries are inserted in this test.
111 */
112 assert(0);
113 }
114
115 pthread_mutex_lock(&page->lock);
116 if (!page->count) {
117 pthread_mutex_unlock(&page->lock);
118 goto repeat;
119 }
120 /* don't actually update page refcount */
121 pthread_mutex_unlock(&page->lock);
122
123 /* Has the page moved? */
124 if (unlikely(page != *((void **)pages[i]))) {
125 goto repeat;
126 }
127
128 pages[ret] = page;
129 ret++;
130 }
131 rcu_read_unlock();
132 return ret;
133}
134
135static pthread_barrier_t worker_barrier;
136
137static void *regression1_fn(void *arg)
138{
139 rcu_register_thread();
140
141 if (pthread_barrier_wait(&worker_barrier) ==
142 PTHREAD_BARRIER_SERIAL_THREAD) {
143 int j;
144
145 for (j = 0; j < 1000000; j++) {
146 struct page *p;
147
148 p = page_alloc();
149 pthread_mutex_lock(&mt_lock);
150 radix_tree_insert(&mt_tree, 0, p);
151 pthread_mutex_unlock(&mt_lock);
152
153 p = page_alloc();
154 pthread_mutex_lock(&mt_lock);
155 radix_tree_insert(&mt_tree, 1, p);
156 pthread_mutex_unlock(&mt_lock);
157
158 pthread_mutex_lock(&mt_lock);
159 p = radix_tree_delete(&mt_tree, 1);
160 pthread_mutex_lock(&p->lock);
161 p->count--;
162 pthread_mutex_unlock(&p->lock);
163 pthread_mutex_unlock(&mt_lock);
164 page_free(p);
165
166 pthread_mutex_lock(&mt_lock);
167 p = radix_tree_delete(&mt_tree, 0);
168 pthread_mutex_lock(&p->lock);
169 p->count--;
170 pthread_mutex_unlock(&p->lock);
171 pthread_mutex_unlock(&mt_lock);
172 page_free(p);
173 }
174 } else {
175 int j;
176
177 for (j = 0; j < 100000000; j++) {
178 struct page *pages[10];
179
180 find_get_pages(0, 10, pages);
181 }
182 }
183
184 rcu_unregister_thread();
185
186 return NULL;
187}
188
189static pthread_t *threads;
190void regression1_test(void)
191{
192 int nr_threads;
193 int i;
194 long arg;
195
196 /* Regression #1 */
197 printv(1, "running regression test 1, should finish in under a minute\n");
198 nr_threads = 2;
199 pthread_barrier_init(&worker_barrier, NULL, nr_threads);
200
201 threads = malloc(nr_threads * sizeof(pthread_t *));
202
203 for (i = 0; i < nr_threads; i++) {
204 arg = i;
205 if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
206 perror("pthread_create");
207 exit(1);
208 }
209 }
210
211 for (i = 0; i < nr_threads; i++) {
212 if (pthread_join(threads[i], NULL)) {
213 perror("pthread_join");
214 exit(1);
215 }
216 }
217
218 free(threads);
219
220 printv(1, "regression test 1, done\n");
221}
1/*
2 * Regression1
3 * Description:
4 * Salman Qazi describes the following radix-tree bug:
5 *
6 * In the following case, we get can get a deadlock:
7 *
8 * 0. The radix tree contains two items, one has the index 0.
9 * 1. The reader (in this case find_get_pages) takes the rcu_read_lock.
10 * 2. The reader acquires slot(s) for item(s) including the index 0 item.
11 * 3. The non-zero index item is deleted, and as a consequence the other item
12 * is moved to the root of the tree. The place where it used to be is queued
13 * for deletion after the readers finish.
14 * 3b. The zero item is deleted, removing it from the direct slot, it remains in
15 * the rcu-delayed indirect node.
16 * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref
17 * count
18 * 5. The reader looks at it again, hoping that the item will either be freed
19 * or the ref count will increase. This never happens, as the slot it is
20 * looking at will never be updated. Also, this slot can never be reclaimed
21 * because the reader is holding rcu_read_lock and is in an infinite loop.
22 *
23 * The fix is to re-use the same "indirect" pointer case that requires a slot
24 * lookup retry into a general "retry the lookup" bit.
25 *
26 * Running:
27 * This test should run to completion in a few seconds. The above bug would
28 * cause it to hang indefinitely.
29 *
30 * Upstream commit:
31 * Not yet
32 */
33#include <linux/kernel.h>
34#include <linux/gfp.h>
35#include <linux/slab.h>
36#include <linux/radix-tree.h>
37#include <linux/rcupdate.h>
38#include <stdlib.h>
39#include <pthread.h>
40#include <stdio.h>
41#include <assert.h>
42
43#include "regression.h"
44
45static RADIX_TREE(mt_tree, GFP_KERNEL);
46static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
47
48struct page {
49 pthread_mutex_t lock;
50 struct rcu_head rcu;
51 int count;
52 unsigned long index;
53};
54
55static struct page *page_alloc(void)
56{
57 struct page *p;
58 p = malloc(sizeof(struct page));
59 p->count = 1;
60 p->index = 1;
61 pthread_mutex_init(&p->lock, NULL);
62
63 return p;
64}
65
66static void page_rcu_free(struct rcu_head *rcu)
67{
68 struct page *p = container_of(rcu, struct page, rcu);
69 assert(!p->count);
70 pthread_mutex_destroy(&p->lock);
71 free(p);
72}
73
74static void page_free(struct page *p)
75{
76 call_rcu(&p->rcu, page_rcu_free);
77}
78
79static unsigned find_get_pages(unsigned long start,
80 unsigned int nr_pages, struct page **pages)
81{
82 unsigned int i;
83 unsigned int ret;
84 unsigned int nr_found;
85
86 rcu_read_lock();
87restart:
88 nr_found = radix_tree_gang_lookup_slot(&mt_tree,
89 (void ***)pages, NULL, start, nr_pages);
90 ret = 0;
91 for (i = 0; i < nr_found; i++) {
92 struct page *page;
93repeat:
94 page = radix_tree_deref_slot((void **)pages[i]);
95 if (unlikely(!page))
96 continue;
97
98 if (radix_tree_exception(page)) {
99 if (radix_tree_deref_retry(page)) {
100 /*
101 * Transient condition which can only trigger
102 * when entry at index 0 moves out of or back
103 * to root: none yet gotten, safe to restart.
104 */
105 assert((start | i) == 0);
106 goto restart;
107 }
108 /*
109 * No exceptional entries are inserted in this test.
110 */
111 assert(0);
112 }
113
114 pthread_mutex_lock(&page->lock);
115 if (!page->count) {
116 pthread_mutex_unlock(&page->lock);
117 goto repeat;
118 }
119 /* don't actually update page refcount */
120 pthread_mutex_unlock(&page->lock);
121
122 /* Has the page moved? */
123 if (unlikely(page != *((void **)pages[i]))) {
124 goto repeat;
125 }
126
127 pages[ret] = page;
128 ret++;
129 }
130 rcu_read_unlock();
131 return ret;
132}
133
134static pthread_barrier_t worker_barrier;
135
136static void *regression1_fn(void *arg)
137{
138 rcu_register_thread();
139
140 if (pthread_barrier_wait(&worker_barrier) ==
141 PTHREAD_BARRIER_SERIAL_THREAD) {
142 int j;
143
144 for (j = 0; j < 1000000; j++) {
145 struct page *p;
146
147 p = page_alloc();
148 pthread_mutex_lock(&mt_lock);
149 radix_tree_insert(&mt_tree, 0, p);
150 pthread_mutex_unlock(&mt_lock);
151
152 p = page_alloc();
153 pthread_mutex_lock(&mt_lock);
154 radix_tree_insert(&mt_tree, 1, p);
155 pthread_mutex_unlock(&mt_lock);
156
157 pthread_mutex_lock(&mt_lock);
158 p = radix_tree_delete(&mt_tree, 1);
159 pthread_mutex_lock(&p->lock);
160 p->count--;
161 pthread_mutex_unlock(&p->lock);
162 pthread_mutex_unlock(&mt_lock);
163 page_free(p);
164
165 pthread_mutex_lock(&mt_lock);
166 p = radix_tree_delete(&mt_tree, 0);
167 pthread_mutex_lock(&p->lock);
168 p->count--;
169 pthread_mutex_unlock(&p->lock);
170 pthread_mutex_unlock(&mt_lock);
171 page_free(p);
172 }
173 } else {
174 int j;
175
176 for (j = 0; j < 100000000; j++) {
177 struct page *pages[10];
178
179 find_get_pages(0, 10, pages);
180 }
181 }
182
183 rcu_unregister_thread();
184
185 return NULL;
186}
187
188static pthread_t *threads;
189void regression1_test(void)
190{
191 int nr_threads;
192 int i;
193 long arg;
194
195 /* Regression #1 */
196 printf("running regression test 1, should finish in under a minute\n");
197 nr_threads = 2;
198 pthread_barrier_init(&worker_barrier, NULL, nr_threads);
199
200 threads = malloc(nr_threads * sizeof(pthread_t *));
201
202 for (i = 0; i < nr_threads; i++) {
203 arg = i;
204 if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
205 perror("pthread_create");
206 exit(1);
207 }
208 }
209
210 for (i = 0; i < nr_threads; i++) {
211 if (pthread_join(threads[i], NULL)) {
212 perror("pthread_join");
213 exit(1);
214 }
215 }
216
217 free(threads);
218
219 printf("regression test 1, done\n");
220}