Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2023 Red Hat */ #include <linux/delay.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include "logger.h" #include "memory-alloc.h" #include "permassert.h" /* * UDS and VDO keep track of which threads are allowed to allocate memory freely, and which threads * must be careful to not do a memory allocation that does an I/O request. The 'allocating_threads' * thread_registry and its associated methods implement this tracking. */ static struct thread_registry allocating_threads; static inline bool allocations_allowed(void) { return vdo_lookup_thread(&allocating_threads) != NULL; } /* * Register the current thread as an allocating thread. * * An optional flag location can be supplied indicating whether, at any given point in time, the * threads associated with that flag should be allocating storage. If the flag is false, a message * will be logged. * * If no flag is supplied, the thread is always allowed to allocate storage without complaint. * * @new_thread: registered_thread structure to use for the current thread * @flag_ptr: Location of the allocation-allowed flag */ void vdo_register_allocating_thread(struct registered_thread *new_thread, const bool *flag_ptr) { if (flag_ptr == NULL) { static const bool allocation_always_allowed = true; flag_ptr = &allocation_always_allowed; } vdo_register_thread(&allocating_threads, new_thread, flag_ptr); } /* Unregister the current thread as an allocating thread. */ void vdo_unregister_allocating_thread(void) { vdo_unregister_thread(&allocating_threads); } /* * We track how much memory has been allocated and freed. When we unload the module, we log an * error if we have not freed all the memory that we allocated. Nearly all memory allocation and * freeing is done using this module. * * We do not use kernel functions like the kvasprintf() method, which allocate memory indirectly * using kmalloc. * * These data structures and methods are used to track the amount of memory used. */ /* * We allocate very few large objects, and allocation/deallocation isn't done in a * performance-critical stage for us, so a linked list should be fine. */ struct vmalloc_block_info { void *ptr; size_t size; struct vmalloc_block_info *next; }; static struct { spinlock_t lock; size_t kmalloc_blocks; size_t kmalloc_bytes; size_t vmalloc_blocks; size_t vmalloc_bytes; size_t peak_bytes; struct vmalloc_block_info *vmalloc_list; } memory_stats __cacheline_aligned; static void update_peak_usage(void) { size_t total_bytes = memory_stats.kmalloc_bytes + memory_stats.vmalloc_bytes; if (total_bytes > memory_stats.peak_bytes) memory_stats.peak_bytes = total_bytes; } static void add_kmalloc_block(size_t size) { unsigned long flags; spin_lock_irqsave(&memory_stats.lock, flags); memory_stats.kmalloc_blocks++; memory_stats.kmalloc_bytes += size; update_peak_usage(); spin_unlock_irqrestore(&memory_stats.lock, flags); } static void remove_kmalloc_block(size_t size) { unsigned long flags; spin_lock_irqsave(&memory_stats.lock, flags); memory_stats.kmalloc_blocks--; memory_stats.kmalloc_bytes -= size; spin_unlock_irqrestore(&memory_stats.lock, flags); } static void add_vmalloc_block(struct vmalloc_block_info *block) { unsigned long flags; spin_lock_irqsave(&memory_stats.lock, flags); block->next = memory_stats.vmalloc_list; memory_stats.vmalloc_list = block; memory_stats.vmalloc_blocks++; memory_stats.vmalloc_bytes += block->size; update_peak_usage(); spin_unlock_irqrestore(&memory_stats.lock, flags); } static void remove_vmalloc_block(void *ptr) { struct vmalloc_block_info *block; struct vmalloc_block_info **block_ptr; unsigned long flags; spin_lock_irqsave(&memory_stats.lock, flags); for (block_ptr = &memory_stats.vmalloc_list; (block = *block_ptr) != NULL; block_ptr = &block->next) { if (block->ptr == ptr) { *block_ptr = block->next; memory_stats.vmalloc_blocks--; memory_stats.vmalloc_bytes -= block->size; break; } } spin_unlock_irqrestore(&memory_stats.lock, flags); if (block != NULL) vdo_free(block); else vdo_log_info("attempting to remove ptr %px not found in vmalloc list", ptr); } /* * Determine whether allocating a memory block should use kmalloc or __vmalloc. * * vmalloc can allocate any integral number of pages. * * kmalloc can allocate any number of bytes up to a configured limit, which defaults to 8 megabytes * on some systems. kmalloc is especially good when memory is being both allocated and freed, and * it does this efficiently in a multi CPU environment. * * kmalloc usually rounds the size of the block up to the next power of two, so when the requested * block is bigger than PAGE_SIZE / 2 bytes, kmalloc will never give you less space than the * corresponding vmalloc allocation. Sometimes vmalloc will use less overhead than kmalloc. * * The advantages of kmalloc do not help out UDS or VDO, because we allocate all our memory up * front and do not free and reallocate it. Sometimes we have problems using kmalloc, because the * Linux memory page map can become so fragmented that kmalloc will not give us a 32KB chunk. We * have used vmalloc as a backup to kmalloc in the past, and a follow-up vmalloc of 32KB will work. * But there is no strong case to be made for using kmalloc over vmalloc for these size chunks. * * The kmalloc/vmalloc boundary is set at 4KB, and kmalloc gets the 4KB requests. There is no * strong reason for favoring either kmalloc or vmalloc for 4KB requests, except that tracking * vmalloc statistics uses a linked list implementation. Using a simple test, this choice of * boundary results in 132 vmalloc calls. Using vmalloc for requests of exactly 4KB results in an * additional 6374 vmalloc calls, which is much less efficient for tracking. * * @size: How many bytes to allocate */ static inline bool use_kmalloc(size_t size) { return size <= PAGE_SIZE; } /* * Allocate storage based on memory size and alignment, logging an error if the allocation fails. * The memory will be zeroed. * * @size: The size of an object * @align: The required alignment * @what: What is being allocated (for error logging) * @ptr: A pointer to hold the allocated memory * * Return: VDO_SUCCESS or an error code */ int vdo_allocate_memory(size_t size, size_t align, const char *what, void *ptr) { /* * The __GFP_RETRY_MAYFAIL flag means the VM implementation will retry memory reclaim * procedures that have previously failed if there is some indication that progress has * been made elsewhere. It can wait for other tasks to attempt high level approaches to * freeing memory such as compaction (which removes fragmentation) and page-out. There is * still a definite limit to the number of retries, but it is a larger limit than with * __GFP_NORETRY. Allocations with this flag may fail, but only when there is genuinely * little unused memory. While these allocations do not directly trigger the OOM killer, * their failure indicates that the system is likely to need to use the OOM killer soon. * The caller must handle failure, but can reasonably do so by failing a higher-level * request, or completing it only in a much less efficient manner. */ const gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL; unsigned int noio_flags; bool allocations_restricted = !allocations_allowed(); unsigned long start_time; void *p = NULL; if (unlikely(ptr == NULL)) return -EINVAL; if (size == 0) { *((void **) ptr) = NULL; return VDO_SUCCESS; } if (allocations_restricted) noio_flags = memalloc_noio_save(); start_time = jiffies; if (use_kmalloc(size) && (align < PAGE_SIZE)) { p = kmalloc(size, gfp_flags | __GFP_NOWARN); if (p == NULL) { /* * It is possible for kmalloc to fail to allocate memory because there is * no page available. A short sleep may allow the page reclaimer to * free a page. */ fsleep(1000); p = kmalloc(size, gfp_flags); } if (p != NULL) add_kmalloc_block(ksize(p)); } else { struct vmalloc_block_info *block; if (vdo_allocate(1, struct vmalloc_block_info, __func__, &block) == VDO_SUCCESS) { /* * It is possible for __vmalloc to fail to allocate memory because there * are no pages available. A short sleep may allow the page reclaimer * to free enough pages for a small allocation. * * For larger allocations, the page_alloc code is racing against the page * reclaimer. If the page reclaimer can stay ahead of page_alloc, the * __vmalloc will succeed. But if page_alloc overtakes the page reclaimer, * the allocation fails. It is possible that more retries will succeed. */ for (;;) { p = __vmalloc(size, gfp_flags | __GFP_NOWARN); if (p != NULL) break; if (jiffies_to_msecs(jiffies - start_time) > 1000) { /* Try one more time, logging a failure for this call. */ p = __vmalloc(size, gfp_flags); break; } fsleep(1000); } if (p == NULL) { vdo_free(block); } else { block->ptr = p; block->size = PAGE_ALIGN(size); add_vmalloc_block(block); } } } if (allocations_restricted) memalloc_noio_restore(noio_flags); if (unlikely(p == NULL)) { vdo_log_error("Could not allocate %zu bytes for %s in %u msecs", size, what, jiffies_to_msecs(jiffies - start_time)); return -ENOMEM; } *((void **) ptr) = p; return VDO_SUCCESS; } /* * Allocate storage based on memory size, failing immediately if the required memory is not * available. The memory will be zeroed. * * @size: The size of an object. * @what: What is being allocated (for error logging) * * Return: pointer to the allocated memory, or NULL if the required space is not available. */ void *vdo_allocate_memory_nowait(size_t size, const char *what __maybe_unused) { void *p = kmalloc(size, GFP_NOWAIT | __GFP_ZERO); if (p != NULL) add_kmalloc_block(ksize(p)); return p; } void vdo_free(void *ptr) { if (ptr != NULL) { if (is_vmalloc_addr(ptr)) { remove_vmalloc_block(ptr); vfree(ptr); } else { remove_kmalloc_block(ksize(ptr)); kfree(ptr); } } } /* * Reallocate dynamically allocated memory. There are no alignment guarantees for the reallocated * memory. If the new memory is larger than the old memory, the new space will be zeroed. * * @ptr: The memory to reallocate. * @old_size: The old size of the memory * @size: The new size to allocate * @what: What is being allocated (for error logging) * @new_ptr: A pointer to hold the reallocated pointer * * Return: VDO_SUCCESS or an error code */ int vdo_reallocate_memory(void *ptr, size_t old_size, size_t size, const char *what, void *new_ptr) { int result; if (size == 0) { vdo_free(ptr); *(void **) new_ptr = NULL; return VDO_SUCCESS; } result = vdo_allocate(size, char, what, new_ptr); if (result != VDO_SUCCESS) return result; if (ptr != NULL) { if (old_size < size) size = old_size; memcpy(*((void **) new_ptr), ptr, size); vdo_free(ptr); } return VDO_SUCCESS; } int vdo_duplicate_string(const char *string, const char *what, char **new_string) { int result; u8 *dup; result = vdo_allocate(strlen(string) + 1, u8, what, &dup); if (result != VDO_SUCCESS) return result; memcpy(dup, string, strlen(string) + 1); *new_string = dup; return VDO_SUCCESS; } void vdo_memory_init(void) { spin_lock_init(&memory_stats.lock); vdo_initialize_thread_registry(&allocating_threads); } void vdo_memory_exit(void) { VDO_ASSERT_LOG_ONLY(memory_stats.kmalloc_bytes == 0, "kmalloc memory used (%zd bytes in %zd blocks) is returned to the kernel", memory_stats.kmalloc_bytes, memory_stats.kmalloc_blocks); VDO_ASSERT_LOG_ONLY(memory_stats.vmalloc_bytes == 0, "vmalloc memory used (%zd bytes in %zd blocks) is returned to the kernel", memory_stats.vmalloc_bytes, memory_stats.vmalloc_blocks); vdo_log_debug("peak usage %zd bytes", memory_stats.peak_bytes); } void vdo_get_memory_stats(u64 *bytes_used, u64 *peak_bytes_used) { unsigned long flags; spin_lock_irqsave(&memory_stats.lock, flags); *bytes_used = memory_stats.kmalloc_bytes + memory_stats.vmalloc_bytes; *peak_bytes_used = memory_stats.peak_bytes; spin_unlock_irqrestore(&memory_stats.lock, flags); } /* * Report stats on any allocated memory that we're tracking. Not all allocation types are * guaranteed to be tracked in bytes (e.g., bios). */ void vdo_report_memory_usage(void) { unsigned long flags; u64 kmalloc_blocks; u64 kmalloc_bytes; u64 vmalloc_blocks; u64 vmalloc_bytes; u64 peak_usage; u64 total_bytes; spin_lock_irqsave(&memory_stats.lock, flags); kmalloc_blocks = memory_stats.kmalloc_blocks; kmalloc_bytes = memory_stats.kmalloc_bytes; vmalloc_blocks = memory_stats.vmalloc_blocks; vmalloc_bytes = memory_stats.vmalloc_bytes; peak_usage = memory_stats.peak_bytes; spin_unlock_irqrestore(&memory_stats.lock, flags); total_bytes = kmalloc_bytes + vmalloc_bytes; vdo_log_info("current module memory tracking (actual allocation sizes, not requested):"); vdo_log_info(" %llu bytes in %llu kmalloc blocks", (unsigned long long) kmalloc_bytes, (unsigned long long) kmalloc_blocks); vdo_log_info(" %llu bytes in %llu vmalloc blocks", (unsigned long long) vmalloc_bytes, (unsigned long long) vmalloc_blocks); vdo_log_info(" total %llu bytes, peak usage %llu bytes", (unsigned long long) total_bytes, (unsigned long long) peak_usage); } |