Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | // SPDX-License-Identifier: GPL-2.0 /* * fill_buf benchmark * * Copyright (C) 2018 Intel Corporation * * Authors: * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, * Fenghua Yu <fenghua.yu@intel.com> */ #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <sys/types.h> #include <sys/wait.h> #include <inttypes.h> #include <string.h> #include "resctrl.h" #define CL_SIZE (64) #define PAGE_SIZE (4 * 1024) #define MB (1024 * 1024) static void sb(void) { #if defined(__i386) || defined(__x86_64) asm volatile("sfence\n\t" : : : "memory"); #endif } static void cl_flush(void *p) { #if defined(__i386) || defined(__x86_64) asm volatile("clflush (%0)\n\t" : : "r"(p) : "memory"); #endif } void mem_flush(unsigned char *buf, size_t buf_size) { unsigned char *cp = buf; size_t i = 0; buf_size = buf_size / CL_SIZE; /* mem size in cache lines */ for (i = 0; i < buf_size; i++) cl_flush(&cp[i * CL_SIZE]); sb(); } /* * Buffer index step advance to workaround HW prefetching interfering with * the measurements. * * Must be a prime to step through all indexes of the buffer. * * Some primes work better than others on some architectures (from MBA/MBM * result stability point of view). */ #define FILL_IDX_MULT 23 static int fill_one_span_read(unsigned char *buf, size_t buf_size) { unsigned int size = buf_size / (CL_SIZE / 2); unsigned int i, idx = 0; unsigned char sum = 0; /* * Read the buffer in an order that is unexpected by HW prefetching * optimizations to prevent them interfering with the caching pattern. * * The read order is (in terms of halves of cachelines): * i * FILL_IDX_MULT % size * The formula is open-coded below to avoiding modulo inside the loop * as it improves MBA/MBM result stability on some architectures. */ for (i = 0; i < size; i++) { sum += buf[idx * (CL_SIZE / 2)]; idx += FILL_IDX_MULT; while (idx >= size) idx -= size; } return sum; } static void fill_one_span_write(unsigned char *buf, size_t buf_size) { unsigned char *end_ptr = buf + buf_size; unsigned char *p; p = buf; while (p < end_ptr) { *p = '1'; p += (CL_SIZE / 2); } } void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) { int ret = 0; while (1) { ret = fill_one_span_read(buf, buf_size); if (once) break; } /* Consume read result so that reading memory is not optimized out. */ *value_sink = ret; } static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once) { while (1) { fill_one_span_write(buf, buf_size); if (once) break; } } unsigned char *alloc_buffer(size_t buf_size, int memflush) { void *buf = NULL; uint64_t *p64; size_t s64; int ret; ret = posix_memalign(&buf, PAGE_SIZE, buf_size); if (ret < 0) return NULL; /* Initialize the buffer */ p64 = buf; s64 = buf_size / sizeof(uint64_t); while (s64 > 0) { *p64 = (uint64_t)rand(); p64 += (CL_SIZE / sizeof(uint64_t)); s64 -= (CL_SIZE / sizeof(uint64_t)); } /* Flush the memory before using to avoid "cache hot pages" effect */ if (memflush) mem_flush(buf, buf_size); return buf; } int run_fill_buf(size_t buf_size, int memflush, int op, bool once) { unsigned char *buf; buf = alloc_buffer(buf_size, memflush); if (!buf) return -1; if (op == 0) fill_cache_read(buf, buf_size, once); else fill_cache_write(buf, buf_size, once); free(buf); return 0; } |