Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2019 Facebook
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of version 2 of the GNU General Public
  6 * License as published by the Free Software Foundation.
  7 *
  8 * Example program for Host Bandwidth Managment
  9 *
 10 * This program loads a cgroup skb BPF program to enforce cgroup output
 11 * (egress) or input (ingress) bandwidth limits.
 12 *
 13 * USAGE: hbm [-d] [-l] [-n <id>] [-r <rate>] [-s] [-t <secs>] [-w] [-h] [prog]
 14 *   Where:
 15 *    -d	Print BPF trace debug buffer
 16 *    -l	Also limit flows doing loopback
 17 *    -n <#>	To create cgroup \"/hbm#\" and attach prog
 18 *		Default is /hbm1
 19 *    --no_cn   Do not return cn notifications
 20 *    -r <rate>	Rate limit in Mbps
 21 *    -s	Get HBM stats (marked, dropped, etc.)
 22 *    -t <time>	Exit after specified seconds (default is 0)
 23 *    -w	Work conserving flag. cgroup can increase its bandwidth
 24 *		beyond the rate limit specified while there is available
 25 *		bandwidth. Current implementation assumes there is only
 26 *		NIC (eth0), but can be extended to support multiple NICs.
 27 *		Currrently only supported for egress.
 28 *    -h	Print this info
 29 *    prog	BPF program file name. Name defaults to hbm_out_kern.o
 30 */
 31
 32#define _GNU_SOURCE
 33
 34#include <stdio.h>
 35#include <stdlib.h>
 36#include <assert.h>
 37#include <sys/resource.h>
 38#include <sys/time.h>
 39#include <unistd.h>
 40#include <errno.h>
 41#include <fcntl.h>
 42#include <linux/unistd.h>
 43#include <linux/compiler.h>
 44
 45#include <linux/bpf.h>
 46#include <bpf/bpf.h>
 47#include <getopt.h>
 48
 49#include "bpf_rlimit.h"
 50#include "cgroup_helpers.h"
 51#include "hbm.h"
 52#include "bpf_util.h"
 53#include <bpf/libbpf.h>
 54
 55bool outFlag = true;
 56int minRate = 1000;		/* cgroup rate limit in Mbps */
 57int rate = 1000;		/* can grow if rate conserving is enabled */
 58int dur = 1;
 59bool stats_flag;
 60bool loopback_flag;
 61bool debugFlag;
 62bool work_conserving_flag;
 63bool no_cn_flag;
 64bool edt_flag;
 65
 66static void Usage(void);
 67static void read_trace_pipe2(void);
 68static void do_error(char *msg, bool errno_flag);
 69
 70#define DEBUGFS "/sys/kernel/debug/tracing/"
 71
 72static struct bpf_program *bpf_prog;
 73static struct bpf_object *obj;
 74static int queue_stats_fd;
 75
 76static void read_trace_pipe2(void)
 77{
 78	int trace_fd;
 79	FILE *outf;
 80	char *outFname = "hbm_out.log";
 81
 82	trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
 83	if (trace_fd < 0) {
 84		printf("Error opening trace_pipe\n");
 85		return;
 86	}
 87
 88//	Future support of ingress
 89//	if (!outFlag)
 90//		outFname = "hbm_in.log";
 91	outf = fopen(outFname, "w");
 92
 93	if (outf == NULL)
 94		printf("Error creating %s\n", outFname);
 95
 96	while (1) {
 97		static char buf[4097];
 98		ssize_t sz;
 99
100		sz = read(trace_fd, buf, sizeof(buf) - 1);
101		if (sz > 0) {
102			buf[sz] = 0;
103			puts(buf);
104			if (outf != NULL) {
105				fprintf(outf, "%s\n", buf);
106				fflush(outf);
107			}
108		}
109	}
110}
111
112static void do_error(char *msg, bool errno_flag)
113{
114	if (errno_flag)
115		printf("ERROR: %s, errno: %d\n", msg, errno);
116	else
117		printf("ERROR: %s\n", msg);
118	exit(1);
119}
120
121static int prog_load(char *prog)
122{
123	obj = bpf_object__open_file(prog, NULL);
124	if (libbpf_get_error(obj)) {
125		printf("ERROR: opening BPF object file failed\n");
126		return 1;
127	}
128
129	/* load BPF program */
130	if (bpf_object__load(obj)) {
131		printf("ERROR: loading BPF object file failed\n");
132		goto err;
133	}
134
135	bpf_prog = bpf_object__find_program_by_title(obj, "cgroup_skb/egress");
136	if (!bpf_prog) {
137		printf("ERROR: finding a prog in obj file failed\n");
138		goto err;
139	}
140
141	queue_stats_fd = bpf_object__find_map_fd_by_name(obj, "queue_stats");
142	if (queue_stats_fd < 0) {
143		printf("ERROR: finding a map in obj file failed\n");
144		goto err;
145	}
146
147	return 0;
148
149err:
150	bpf_object__close(obj);
151	return 1;
152}
153
154static int run_bpf_prog(char *prog, int cg_id)
155{
156	struct hbm_queue_stats qstats = {0};
157	char cg_dir[100], cg_pin_path[100];
158	struct bpf_link *link = NULL;
159	int key = 0;
160	int cg1 = 0;
161	int rc = 0;
162
163	sprintf(cg_dir, "/hbm%d", cg_id);
164	rc = prog_load(prog);
165	if (rc != 0)
166		return rc;
167
168	if (setup_cgroup_environment()) {
169		printf("ERROR: setting cgroup environment\n");
170		goto err;
171	}
172	cg1 = create_and_get_cgroup(cg_dir);
173	if (!cg1) {
174		printf("ERROR: create_and_get_cgroup\n");
175		goto err;
176	}
177	if (join_cgroup(cg_dir)) {
178		printf("ERROR: join_cgroup\n");
179		goto err;
180	}
181
182	qstats.rate = rate;
183	qstats.stats = stats_flag ? 1 : 0;
184	qstats.loopback = loopback_flag ? 1 : 0;
185	qstats.no_cn = no_cn_flag ? 1 : 0;
186	if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) {
187		printf("ERROR: Could not update map element\n");
188		goto err;
189	}
190
191	if (!outFlag)
192		bpf_program__set_expected_attach_type(bpf_prog, BPF_CGROUP_INET_INGRESS);
193
194	link = bpf_program__attach_cgroup(bpf_prog, cg1);
195	if (libbpf_get_error(link)) {
196		fprintf(stderr, "ERROR: bpf_program__attach_cgroup failed\n");
197		goto err;
198	}
199
200	sprintf(cg_pin_path, "/sys/fs/bpf/hbm%d", cg_id);
201	rc = bpf_link__pin(link, cg_pin_path);
202	if (rc < 0) {
203		printf("ERROR: bpf_link__pin failed: %d\n", rc);
204		goto err;
205	}
206
207	if (work_conserving_flag) {
208		struct timeval t0, t_last, t_new;
209		FILE *fin;
210		unsigned long long last_eth_tx_bytes, new_eth_tx_bytes;
211		signed long long last_cg_tx_bytes, new_cg_tx_bytes;
212		signed long long delta_time, delta_bytes, delta_rate;
213		int delta_ms;
214#define DELTA_RATE_CHECK 10000		/* in us */
215#define RATE_THRESHOLD 9500000000	/* 9.5 Gbps */
216
217		bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
218		if (gettimeofday(&t0, NULL) < 0)
219			do_error("gettimeofday failed", true);
220		t_last = t0;
221		fin = fopen("/sys/class/net/eth0/statistics/tx_bytes", "r");
222		if (fscanf(fin, "%llu", &last_eth_tx_bytes) != 1)
223			do_error("fscanf fails", false);
224		fclose(fin);
225		last_cg_tx_bytes = qstats.bytes_total;
226		while (true) {
227			usleep(DELTA_RATE_CHECK);
228			if (gettimeofday(&t_new, NULL) < 0)
229				do_error("gettimeofday failed", true);
230			delta_ms = (t_new.tv_sec - t0.tv_sec) * 1000 +
231				(t_new.tv_usec - t0.tv_usec)/1000;
232			if (delta_ms > dur * 1000)
233				break;
234			delta_time = (t_new.tv_sec - t_last.tv_sec) * 1000000 +
235				(t_new.tv_usec - t_last.tv_usec);
236			if (delta_time == 0)
237				continue;
238			t_last = t_new;
239			fin = fopen("/sys/class/net/eth0/statistics/tx_bytes",
240				    "r");
241			if (fscanf(fin, "%llu", &new_eth_tx_bytes) != 1)
242				do_error("fscanf fails", false);
243			fclose(fin);
244			printf("  new_eth_tx_bytes:%llu\n",
245			       new_eth_tx_bytes);
246			bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
247			new_cg_tx_bytes = qstats.bytes_total;
248			delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
249			last_eth_tx_bytes = new_eth_tx_bytes;
250			delta_rate = (delta_bytes * 8000000) / delta_time;
251			printf("%5d - eth_rate:%.1fGbps cg_rate:%.3fGbps",
252			       delta_ms, delta_rate/1000000000.0,
253			       rate/1000.0);
254			if (delta_rate < RATE_THRESHOLD) {
255				/* can increase cgroup rate limit, but first
256				 * check if we are using the current limit.
257				 * Currently increasing by 6.25%, unknown
258				 * if that is the optimal rate.
259				 */
260				int rate_diff100;
261
262				delta_bytes = new_cg_tx_bytes -
263					last_cg_tx_bytes;
264				last_cg_tx_bytes = new_cg_tx_bytes;
265				delta_rate = (delta_bytes * 8000000) /
266					delta_time;
267				printf(" rate:%.3fGbps",
268				       delta_rate/1000000000.0);
269				rate_diff100 = (((long long)rate)*1000000 -
270						     delta_rate) * 100 /
271					(((long long) rate) * 1000000);
272				printf("  rdiff:%d", rate_diff100);
273				if (rate_diff100  <= 3) {
274					rate += (rate >> 4);
275					if (rate > RATE_THRESHOLD / 1000000)
276						rate = RATE_THRESHOLD / 1000000;
277					qstats.rate = rate;
278					printf(" INC\n");
279				} else {
280					printf("\n");
281				}
282			} else {
283				/* Need to decrease cgroup rate limit.
284				 * Currently decreasing by 12.5%, unknown
285				 * if that is optimal
286				 */
287				printf(" DEC\n");
288				rate -= (rate >> 3);
289				if (rate < minRate)
290					rate = minRate;
291				qstats.rate = rate;
292			}
293			if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY))
294				do_error("update map element fails", false);
295		}
296	} else {
297		sleep(dur);
298	}
299	// Get stats!
300	if (stats_flag && bpf_map_lookup_elem(queue_stats_fd, &key, &qstats)) {
301		char fname[100];
302		FILE *fout;
303
304		if (!outFlag)
305			sprintf(fname, "hbm.%d.in", cg_id);
306		else
307			sprintf(fname, "hbm.%d.out", cg_id);
308		fout = fopen(fname, "w");
309		fprintf(fout, "id:%d\n", cg_id);
310		fprintf(fout, "ERROR: Could not lookup queue_stats\n");
311	} else if (stats_flag && qstats.lastPacketTime >
312		   qstats.firstPacketTime) {
313		long long delta_us = (qstats.lastPacketTime -
314				      qstats.firstPacketTime)/1000;
315		unsigned int rate_mbps = ((qstats.bytes_total -
316					   qstats.bytes_dropped) * 8 /
317					  delta_us);
318		double percent_pkts, percent_bytes;
319		char fname[100];
320		FILE *fout;
321		int k;
322		static const char *returnValNames[] = {
323			"DROP_PKT",
324			"ALLOW_PKT",
325			"DROP_PKT_CWR",
326			"ALLOW_PKT_CWR"
327		};
328#define RET_VAL_COUNT 4
329
330// Future support of ingress
331//		if (!outFlag)
332//			sprintf(fname, "hbm.%d.in", cg_id);
333//		else
334		sprintf(fname, "hbm.%d.out", cg_id);
335		fout = fopen(fname, "w");
336		fprintf(fout, "id:%d\n", cg_id);
337		fprintf(fout, "rate_mbps:%d\n", rate_mbps);
338		fprintf(fout, "duration:%.1f secs\n",
339			(qstats.lastPacketTime - qstats.firstPacketTime) /
340			1000000000.0);
341		fprintf(fout, "packets:%d\n", (int)qstats.pkts_total);
342		fprintf(fout, "bytes_MB:%d\n", (int)(qstats.bytes_total /
343						     1000000));
344		fprintf(fout, "pkts_dropped:%d\n", (int)qstats.pkts_dropped);
345		fprintf(fout, "bytes_dropped_MB:%d\n",
346			(int)(qstats.bytes_dropped /
347						       1000000));
348		// Marked Pkts and Bytes
349		percent_pkts = (qstats.pkts_marked * 100.0) /
350			(qstats.pkts_total + 1);
351		percent_bytes = (qstats.bytes_marked * 100.0) /
352			(qstats.bytes_total + 1);
353		fprintf(fout, "pkts_marked_percent:%6.2f\n", percent_pkts);
354		fprintf(fout, "bytes_marked_percent:%6.2f\n", percent_bytes);
355
356		// Dropped Pkts and Bytes
357		percent_pkts = (qstats.pkts_dropped * 100.0) /
358			(qstats.pkts_total + 1);
359		percent_bytes = (qstats.bytes_dropped * 100.0) /
360			(qstats.bytes_total + 1);
361		fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
362		fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
363
364		// ECN CE markings
365		percent_pkts = (qstats.pkts_ecn_ce * 100.0) /
366			(qstats.pkts_total + 1);
367		fprintf(fout, "pkts_ecn_ce:%6.2f (%d)\n", percent_pkts,
368			(int)qstats.pkts_ecn_ce);
369
370		// Average cwnd
371		fprintf(fout, "avg cwnd:%d\n",
372			(int)(qstats.sum_cwnd / (qstats.sum_cwnd_cnt + 1)));
373		// Average rtt
374		fprintf(fout, "avg rtt:%d\n",
375			(int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
376		// Average credit
377		if (edt_flag)
378			fprintf(fout, "avg credit_ms:%.03f\n",
379				(qstats.sum_credit /
380				 (qstats.pkts_total + 1.0)) / 1000000.0);
381		else
382			fprintf(fout, "avg credit:%d\n",
383				(int)(qstats.sum_credit /
384				      (1500 * ((int)qstats.pkts_total ) + 1)));
385
386		// Return values stats
387		for (k = 0; k < RET_VAL_COUNT; k++) {
388			percent_pkts = (qstats.returnValCount[k] * 100.0) /
389				(qstats.pkts_total + 1);
390			fprintf(fout, "%s:%6.2f (%d)\n", returnValNames[k],
391				percent_pkts, (int)qstats.returnValCount[k]);
392		}
393		fclose(fout);
394	}
395
396	if (debugFlag)
397		read_trace_pipe2();
398	goto cleanup;
399
400err:
401	rc = 1;
402
403cleanup:
404	bpf_link__destroy(link);
405	bpf_object__close(obj);
406
407	if (cg1 != -1)
408		close(cg1);
409
410	if (rc != 0)
411		cleanup_cgroup_environment();
412	return rc;
413}
414
415static void Usage(void)
416{
417	printf("This program loads a cgroup skb BPF program to enforce\n"
418	       "cgroup output (egress) bandwidth limits.\n\n"
419	       "USAGE: hbm [-o] [-d]  [-l] [-n <id>] [--no_cn] [-r <rate>]\n"
420	       "           [-s] [-t <secs>] [-w] [-h] [prog]\n"
421	       "  Where:\n"
422	       "    -o         indicates egress direction (default)\n"
423	       "    -d         print BPF trace debug buffer\n"
424	       "    --edt      use fq's Earliest Departure Time\n"
425	       "    -l         also limit flows using loopback\n"
426	       "    -n <#>     to create cgroup \"/hbm#\" and attach prog\n"
427	       "               Default is /hbm1\n"
428	       "    --no_cn    disable CN notifications\n"
429	       "    -r <rate>  Rate in Mbps\n"
430	       "    -s         Update HBM stats\n"
431	       "    -t <time>  Exit after specified seconds (default is 0)\n"
432	       "    -w	       Work conserving flag. cgroup can increase\n"
433	       "               bandwidth beyond the rate limit specified\n"
434	       "               while there is available bandwidth. Current\n"
435	       "               implementation assumes there is only eth0\n"
436	       "               but can be extended to support multiple NICs\n"
437	       "    -h         print this info\n"
438	       "    prog       BPF program file name. Name defaults to\n"
439	       "                 hbm_out_kern.o\n");
440}
441
442int main(int argc, char **argv)
443{
444	char *prog = "hbm_out_kern.o";
445	int  k;
446	int cg_id = 1;
447	char *optstring = "iodln:r:st:wh";
448	struct option loptions[] = {
449		{"no_cn", 0, NULL, 1},
450		{"edt", 0, NULL, 2},
451		{NULL, 0, NULL, 0}
452	};
453
454	while ((k = getopt_long(argc, argv, optstring, loptions, NULL)) != -1) {
455		switch (k) {
456		case 1:
457			no_cn_flag = true;
458			break;
459		case 2:
460			prog = "hbm_edt_kern.o";
461			edt_flag = true;
462			break;
463		case'o':
464			break;
465		case 'd':
466			debugFlag = true;
467			break;
468		case 'l':
469			loopback_flag = true;
470			break;
471		case 'n':
472			cg_id = atoi(optarg);
473			break;
474		case 'r':
475			minRate = atoi(optarg) * 1.024;
476			rate = minRate;
477			break;
478		case 's':
479			stats_flag = true;
480			break;
481		case 't':
482			dur = atoi(optarg);
483			break;
484		case 'w':
485			work_conserving_flag = true;
486			break;
487		case '?':
488			if (optopt == 'n' || optopt == 'r' || optopt == 't')
489				fprintf(stderr,
490					"Option -%c requires an argument.\n\n",
491					optopt);
492		case 'h':
493			__fallthrough;
494		default:
495			Usage();
496			return 0;
497		}
498	}
499
500	if (optind < argc)
501		prog = argv[optind];
502	printf("HBM prog: %s\n", prog != NULL ? prog : "NULL");
503
504	return run_bpf_prog(prog, cg_id);
505}