1#! /usr/bin/perl
2#
3# Copyright © 2017 Intel Corporation
4#
5# Permission is hereby granted, free of charge, to any person obtaining a
6# copy of this software and associated documentation files (the "Software"),
7# to deal in the Software without restriction, including without limitation
8# the rights to use, copy, modify, merge, publish, distribute, sublicense,
9# and/or sell copies of the Software, and to permit persons to whom the
10# Software is furnished to do so, subject to the following conditions:
11#
12# The above copyright notice and this permission notice (including the next
13# paragraph) shall be included in all copies or substantial portions of the
14# Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22# IN THE SOFTWARE.
23#
24
25use strict;
26use warnings;
27use 5.010;
28
29use Getopt::Std;
30
31chomp(my $igt_root = `pwd -P`);
32my $wsim = "$igt_root/benchmarks/gem_wsim";
33my $wrk_root = "$igt_root/benchmarks/wsim";
34my $tracepl = "$igt_root/scripts/trace.pl";
35my $tolerance = 0.01;
36my $client_target_s = 10;
37my $idle_tolerance_pct = 2.0;
38my $verbose = 0;
39my $gt2 = 0;
40my $show_cmds = 0;
41my $realtime_target = 0;
42my $wps_target = 0;
43my $wps_target_param = 0;
44my $multi_mode = 0;
45my @multi_workloads;
46my $w_direct;
47my $balancer;
48my $nop;
49my %opts;
50
51my @balancers = ( 'rr', 'rand', 'qd', 'qdr', 'qdavg', 'rt', 'rtr', 'rtavg',
52		  'context', 'busy', 'busy-avg', 'i915' );
53my %bal_skip_H = ( 'rr' => 1, 'rand' => 1, 'context' => 1, , 'busy' => 1,
54		   'busy-avg' => 1, 'i915' => 1 );
55my %bal_skip_R = ( 'i915' => 1 );
56my %bal_skip_G = ( 'i915' => 1 );
57
58my @workloads = (
59	'media_load_balance_17i7.wsim',
60	'media_load_balance_19.wsim',
61	'media_load_balance_4k12u7.wsim',
62	'media_load_balance_fhd26u7.wsim',
63	'media_load_balance_hd01.wsim',
64	'media_load_balance_hd06mp2.wsim',
65	'media_load_balance_hd12.wsim',
66	'media_load_balance_hd17i4.wsim',
67	'media_1n2_480p.wsim',
68	'media_1n3_480p.wsim',
69	'media_1n4_480p.wsim',
70	'media_1n5_480p.wsim',
71	'media_1n2_asy.wsim',
72	'media_1n3_asy.wsim',
73	'media_1n4_asy.wsim',
74	'media_1n5_asy.wsim',
75	'media_mfe2_480p.wsim',
76	'media_mfe3_480p.wsim',
77	'media_mfe4_480p.wsim',
78	'media_nn_1080p.wsim',
79	'media_nn_480p.wsim',
80    );
81
82sub show_cmd
83{
84	my ($cmd) = @_;
85
86	say "\n+++ $cmd" if $show_cmds;
87}
88
89sub calibrate_nop
90{
91	my ($delay, $nop);
92	my $cmd = "$wsim";
93
94	show_cmd($cmd);
95	open WSIM, "$cmd |" or die;
96	while (<WSIM>) {
97		chomp;
98		if (/Nop calibration for (\d+)us delay is (\d+)./) {
99			$delay = $1;
100			$nop = $2;
101		}
102
103	}
104	close WSIM;
105
106	die unless $nop;
107
108	return $nop
109}
110
111sub can_balance_workload
112{
113	my ($wrk) = @_;
114	my $res = 0;
115
116	open WRK, "$wrk_root/$wrk" or die;
117	while (<WRK>) {
118		chomp;
119		if (/\.VCS\./) {
120			$res = 1;
121			last;
122		}
123	}
124	close WRK;
125
126	return $res;
127}
128
129sub add_wps_arg
130{
131	my (@args) = @_;
132	my $period;
133
134	return @args if $realtime_target <= 0;
135
136	$period = int(1000000 / $realtime_target);
137	push @args, '-a';
138	push @args, 'p.$period';
139
140	return @args;
141}
142
143sub run_workload
144{
145	my (@args) = @_;
146	my ($time, $wps, $cmd);
147	my @ret;
148
149	@args = add_wps_arg(@args);
150	push @args, '-2' if $gt2;
151
152	unshift @args, $wsim;
153	$cmd = join ' ', @args;
154	show_cmd($cmd);
155
156	open WSIM, "$cmd |" or die;
157	while (<WSIM>) {
158		chomp;
159		if (/^(\d+\.\d+)s elapsed \((\d+\.?\d+) workloads\/s\)$/) {
160			$time = $1;
161			$wps = $2;
162		} elsif (/(\d+)\: \d+\.\d+s elapsed \(\d+ cycles, (\d+\.?\d+) workloads\/s\)/) {
163			$ret[$1] = $2;
164		}
165	}
166	close WSIM;
167
168	return ($time, $wps, \@ret);
169}
170
171sub dump_cmd
172{
173	my ($cmd, $file) = @_;
174
175	show_cmd("$cmd > $file");
176
177	open FOUT, '>', $file or die;
178	open TIN, "$cmd |" or die;
179	while (<TIN>) {
180		print FOUT $_;
181	}
182	close TIN;
183	close FOUT;
184}
185
186sub trace_workload
187{
188	my ($wrk, $b, $r, $c) = @_;
189	my @args = ($tracepl, '--trace', $wsim, '-q', '-n', $nop, '-r', $r, '-c', $c);
190	my $min_batches = 16 + $r * $c / 2;
191	my @skip_engine;
192	my %engines;
193	my ($cmd, $file);
194
195	push @args, '-2' if $gt2;
196
197	unless ($b eq '<none>') {
198		push @args, '-R';
199		push @args, split /\s+/, $b;
200	}
201
202	if (defined $w_direct) {
203		push @args, split /\s+/, $wrk;
204	} else {
205		push @args, '-w';
206		push @args, $wrk_root . '/' . $wrk;
207	}
208
209	show_cmd(join ' ', @args);
210	if (-e 'perf.data') {
211		unlink 'perf.data' or die;
212	}
213	system(@args) == 0 or die;
214
215	$cmd = "perf script | $tracepl";
216	show_cmd($cmd);
217	open CMD, "$cmd |" or die;
218	while (<CMD>) {
219		chomp;
220		if (/Ring(\S+): (\d+) batches.*?(\d+\.?\d+)% idle,/) {
221			if ($2 >= $min_batches) {
222				$engines{$1} = $3;
223			} else {
224				push @skip_engine, $1;
225			}
226		} elsif (/GPU: (\d+\.?\d+)% idle/) {
227			$engines{'gpu'} = $1;
228		}
229	}
230	close CMD;
231
232	$wrk =~ s/$wrk_root//g;
233	$wrk =~ s/\.wsim//g;
234	$wrk =~ s/-w/W/g;
235	$wrk =~ s/[ -]/_/g;
236	$wrk =~ s/\//-/g;
237	$b =~ s/[ <>]/_/g;
238	$file = "${wrk}_${b}_-r${r}_-c${c}";
239
240	dump_cmd('perf script', "${file}.trace");
241
242	$cmd = "perf script | $tracepl --html -x ctxsave -s -c ";
243	$cmd .= join ' ', map("-i $_", @skip_engine);
244
245	dump_cmd($cmd, "${file}.html");
246
247	return \%engines;
248}
249
250sub calibrate_workload
251{
252	my ($wrk) = @_;
253	my $tol = $tolerance;
254	my $loops = 0;
255	my $error;
256	my $r;
257
258	$r = $realtime_target > 0 ? $realtime_target * $client_target_s : 23;
259	for (;;) {
260		my @args = ('-n', $nop, '-r', $r);
261		my ($time, $wps);
262
263		if (defined $w_direct) {
264			push @args, split /\s+/, $wrk;
265		} else {
266			push @args, '-w';
267			push @args, $wrk_root . '/' . $wrk;
268		}
269
270		($time, $wps) = run_workload(@args);
271
272		$wps = $r / $time if $w_direct;
273		$error = abs($time - $client_target_s) / $client_target_s;
274
275		last if $error <= $tol;
276
277		$r = int($wps * $client_target_s);
278		$loops = $loops + 1;
279		if ($loops >= 3) {
280			$tol = $tol * (1.2 + ($tol));
281			$loops = 0;
282		}
283		last if $tol > 0.2;
284	}
285
286	return ($r, $error);
287}
288
289sub find_saturation_point
290{
291	my ($wrk, $rr, $verbose, @args) = @_;
292	my ($last_wps, $c, $swps, $wwps);
293	my $target = $realtime_target > 0 ? $realtime_target : $wps_target;
294	my $r = $rr;
295	my $wcnt;
296	my $maxc;
297	my $max = 0;
298
299	push @args, '-v' if $multi_mode and $w_direct;
300
301	if (defined $w_direct) {
302		push @args, split /\s+/, $wrk;
303		$wcnt = () = $wrk =~ /-[wW]/gi;
304
305	} else {
306		push @args, '-w';
307		push @args, $wrk_root . '/' . $wrk;
308		$wcnt = 1;
309	}
310
311	for ($c = 1; ; $c = $c + 1) {
312		my ($time, $wps);
313		my @args_ = (@args, ('-r', $r, '-c', $c));
314
315		($time, $wps, $wwps) = run_workload(@args_);
316
317		say "        $c clients is $wps wps." if $verbose;
318
319		if ($c > 1) {
320			my $delta;
321
322			if ($target <= 0) {
323				if ($wps > $max) {
324					$max = $wps;
325					$maxc = $c;
326				}
327				$delta = ($wps - $last_wps) / $last_wps;
328				if ($delta > 0) {
329					last if $delta < $tolerance;
330				} else {
331					$delta = ($wps - $max) / $max;
332					last if abs($delta) >= $tolerance;
333				}
334			} else {
335				$delta = ($wps / $c - $target) / $target;
336				last if $delta < 0 and abs($delta) >= $tolerance;
337			}
338			$r = int($rr * ($client_target_s / $time));
339		} elsif ($c == 1) {
340			$swps = $wps;
341			return ($c, $wps, $swps, $wwps) if $wcnt > 1 or
342							   $multi_mode or
343							   ($wps_target_param < 0 and
344							    $wps_target == 0);
345		}
346
347		$last_wps = $wps;
348	}
349
350	if ($target <= 0) {
351		return ($maxc, $max, $swps, $wwps);
352	} else {
353		return ($c - 1, $last_wps, $swps, $wwps);
354	}
355}
356
357getopts('hv2xmn:b:W:B:r:t:i:R:T:w:', \%opts);
358
359if (defined $opts{'h'}) {
360	print <<ENDHELP;
361Supported options:
362
363  -h          Help text.
364  -v          Be verbose.
365  -x          Show external commands.
366  -2          Run gem_wsim in GT2 mode.
367  -n num      Nop calibration.
368  -b str      Balancer to pre-select.
369              Skips balancer auto-selection.
370              Passed straight the gem_wsim so use like -b "-b qd -R"
371  -W a,b,c    Override the default list of workloads.
372  -B a,b,c    Override the default list of balancers.
373  -r sec      Target workload duration.
374  -t pct      Calibration tolerance.
375  -i pct      Engine idleness tolerance.
376  -R wps      Run workloads in the real-time mode at wps rate.
377  -T wps      Calibrate up to wps/client target instead of GPU saturation.
378              Negative values set the target based on the single client
379              performance where target = single-client-wps / -N.
380  -w str      Pass-through to gem_wsim. Overrides normal workload selection.
381  -m          Multi-workload mode. All selected workloads will be run in
382              parallel and overal score will be relative to when run
383              individually.
384ENDHELP
385	exit 0;
386}
387
388$verbose = 1 if defined $opts{'v'};
389$gt2 = 1 if defined $opts{'2'};
390$show_cmds = 1 if defined $opts{'x'};
391$multi_mode = 1 if defined $opts{'m'};
392if (defined $opts{'b'}) {
393	die unless substr($opts{'b'}, 0, 2) eq '-b';
394	$balancer = $opts{'b'};
395}
396if (defined $opts{'B'}) {
397	@balancers = split /,/, $opts{'B'};
398} else {
399	unshift @balancers, '';
400}
401@workloads = split /,/, $opts{'W'} if defined $opts{'W'};
402$client_target_s = $opts{'r'} if defined $opts{'r'};
403$tolerance = $opts{'t'} / 100.0 if defined $opts{'t'};
404$idle_tolerance_pct = $opts{'i'} if defined $opts{'i'};
405$realtime_target = $opts{'R'} if defined $opts{'R'};
406$wps_target = $opts{'T'} if defined $opts{'T'};
407$wps_target_param = $wps_target;
408$w_direct = $opts{'w'} if defined $opts{'w'};
409
410if ($multi_mode) {
411	die if $w_direct; # Not supported
412	@multi_workloads = @workloads;
413}
414
415@workloads = ($w_direct) if defined $w_direct;
416
417say "Workloads:";
418print map { "  $_\n" } @workloads;
419print "Balancers: ";
420say map { "$_," } @balancers;
421say "Target workload duration is ${client_target_s}s.";
422say "Calibration tolerance is $tolerance.";
423say "Real-time mode at ${realtime_target} wps." if $realtime_target > 0;
424say "Wps target is ${wps_target} wps." if $wps_target > 0;
425say "Multi-workload mode." if $multi_mode;
426$nop = $opts{'n'};
427$nop = calibrate_nop() unless $nop;
428say "Nop calibration is $nop.";
429
430goto VERIFY if defined $balancer;
431
432my (%best_bal, %best_bid);
433my %results;
434my %scores;
435my %wscores;
436my %cscores;
437my %cwscores;
438my %mscores;
439my %mwscores;
440
441sub add_points
442{
443	my ($wps, $scores, $wscores) = @_;
444	my ($min, $max, $spread);
445	my @sorted;
446
447	@sorted = sort { $b <=> $a } values %{$wps};
448	$max = $sorted[0];
449	$min = $sorted[-1];
450	$spread = $max - $min;
451	die if $spread < 0;
452
453	foreach my $w (keys %{$wps}) {
454		my ($score, $wscore);
455
456		unless (exists $scores->{$w}) {
457			$scores->{$w} = 0;
458			$wscores->{$w} = 0;
459		}
460
461		$score = $wps->{$w} / $max;
462		$scores->{$w} = $scores->{$w} + $score;
463		$wscore = $score * $spread / $max;
464		$wscores->{$w} = $wscores->{$w} + $wscore;
465	}
466}
467
468my @saturation_workloads = $multi_mode ? @multi_workloads : @workloads;
469my %allwps;
470my $widx = 0;
471
472push @saturation_workloads, '-w ' . join ' -w ', map("$wrk_root/$_", @workloads)
473     if $multi_mode;
474
475foreach my $wrk (@saturation_workloads) {
476	my @args = ( "-n $nop");
477	my ($r, $error, $should_b, $best);
478	my (%wps, %cwps, %mwps);
479	my @sorted;
480	my $range;
481
482	$w_direct = $wrk if $multi_mode and $widx == $#saturation_workloads;
483
484	$should_b = 1;
485	$should_b = can_balance_workload($wrk) unless defined $w_direct;
486
487	print "\nEvaluating '$wrk'...";
488
489	($r, $error) = calibrate_workload($wrk);
490	say " ${client_target_s}s is $r workloads. (error=$error)";
491
492	say "  Finding saturation points for '$wrk'...";
493
494	BAL: foreach my $bal (@balancers) {
495		GBAL: foreach my $G ('', '-G', '-d', '-G -d') {
496			foreach my $H ('', '-H') {
497				my @xargs;
498				my ($w, $c, $s, $bwwps);
499				my $bid;
500
501				if ($bal ne '') {
502					next GBAL if $G =~ '-G' and exists $bal_skip_G{$bal};
503
504					push @xargs, "-b $bal";
505					push @xargs, '-R' unless exists $bal_skip_R{$bal};
506					push @xargs, $G if $G ne '';
507					push @xargs, $H if $H ne '';
508					$bid = join ' ', @xargs;
509					print "    $bal balancer ('$bid'): ";
510				} else {
511					$bid = '<none>';
512					print "    No balancing: ";
513				}
514
515				$wps_target = 0 if $wps_target_param < 0;
516
517				($c, $w, $s, $bwwps) =
518					find_saturation_point($wrk, $r, 0,
519							      (@args, @xargs));
520
521				if ($wps_target_param < 0) {
522					$wps_target = $s / -$wps_target_param;
523
524					($c, $w, $s, $bwwps) =
525						find_saturation_point($wrk, $r,
526								      0,
527								      (@args,
528								       @xargs));
529				}
530
531				if ($multi_mode and $w_direct) {
532					my $widx;
533
534					die unless scalar(@multi_workloads) ==
535						   scalar(@{$bwwps});
536					die unless scalar(@multi_workloads) ==
537						   scalar(keys %allwps);
538
539					# Total of all workload wps from the
540					# mixed run.
541					$w = 0;
542					foreach $widx (0..$#{$bwwps}) {
543						$w += $bwwps->[$widx];
544					}
545
546					# Total of all workload wps from when
547					# ran individually with the best
548					# balancer.
549					my $tot = 0;
550					foreach my $wrk (@multi_workloads) {
551						$tot += $allwps{$wrk}->{$best_bid{$wrk}};
552					}
553
554					# Normalize mixed sum with sum of
555					# individual runs.
556					$w *= 100;
557					$w /= $tot;
558
559					# Second metric is average of each
560					# workload wps normalized by their
561					# individual run performance with the
562					# best balancer.
563					$s = 0;
564					$widx = 0;
565					foreach my $wrk (@multi_workloads) {
566						$s += 100 * $bwwps->[$widx] /
567						      $allwps{$wrk}->{$best_bid{$wrk}};
568						$widx++;
569					}
570					$s /= scalar(@multi_workloads);
571
572					say sprintf('Aggregate (normalized) %.2f%%; fairness %.2f%%',
573						    $w, $s);
574				} else {
575					$allwps{$wrk} = \%wps;
576				}
577
578				$wps{$bid} = $w;
579				$cwps{$bid} = $s;
580
581				if ($realtime_target > 0 || $wps_target_param > 0) {
582					$mwps{$bid} = $w * $c;
583				} else {
584					$mwps{$bid} = $w + $s;
585				}
586
587				say "$c clients ($w wps, $s wps single client, score=$mwps{$bid})."
588				    unless $multi_mode and $w_direct;
589
590				last BAL unless $should_b;
591				next BAL if $bal eq '';
592				next GBAL if exists $bal_skip_H{$bal};
593			}
594		}
595	}
596
597	$widx++;
598
599	@sorted = sort { $mwps{$b} <=> $mwps{$a} } keys %mwps;
600	$best_bid{$wrk} = $sorted[0];
601	@sorted = sort { $b <=> $a } values %mwps;
602	$range = 1 - $sorted[-1] / $sorted[0];
603	$best_bal{$wrk} = $sorted[0];
604
605	next if $multi_mode and not $w_direct;
606
607	say "  Best balancer is '$best_bid{$wrk}' (range=$range).";
608
609
610	$results{$wrk} = \%mwps;
611
612	add_points(\%wps, \%scores, \%wscores);
613	add_points(\%mwps, \%mscores, \%mwscores);
614	add_points(\%cwps, \%cscores, \%cwscores);
615}
616
617sub dump_scoreboard
618{
619	my ($n, $h) = @_;
620	my ($i, $str, $balancer);
621	my ($max, $range);
622	my @sorted;
623
624	@sorted = sort { $b <=> $a } values %{$h};
625	$max = $sorted[0];
626	$range = 1 - $sorted[-1] / $max;
627	$str = "$n rank (range=$range):";
628	say "\n$str";
629	say '=' x length($str);
630	$i = 1;
631	foreach my $w (sort { $h->{$b} <=> $h->{$a} } keys %{$h}) {
632		my $score;
633
634		$balancer = $w if $i == 1;
635		$score = $h->{$w} / $max;
636
637		say "  $i: '$w' ($score)";
638
639		$i = $i + 1;
640	}
641
642	return $balancer;
643}
644
645dump_scoreboard($multi_mode ? 'Throughput' : 'Total wps', \%scores);
646dump_scoreboard('Total weighted wps', \%wscores) unless $multi_mode;
647dump_scoreboard($multi_mode ? 'Fairness' : 'Per client wps', \%cscores);
648dump_scoreboard('Per client weighted wps', \%cwscores) unless $multi_mode;
649$balancer = dump_scoreboard($multi_mode ? 'Combined' : 'Combined wps', \%mscores);
650$balancer = dump_scoreboard('Combined weighted wps', \%mwscores) unless $multi_mode;
651
652VERIFY:
653
654my %problem_wrk;
655
656die unless defined $balancer;
657
658say "\nBalancer is '$balancer'.";
659say "Idleness tolerance is $idle_tolerance_pct%.";
660
661if ($multi_mode) {
662	$w_direct = '-w ' . join ' -w ', map("$wrk_root/$_", @workloads);
663	@workloads = ($w_direct);
664}
665
666foreach my $wrk (@workloads) {
667	my @args = ( "-n $nop" );
668	my ($r, $error, $c, $wps, $swps);
669	my $saturated = 0;
670	my $result = 'Pass';
671	my $vcs2 = $gt2 ? '1:0' : '2:1';
672	my %problem;
673	my $engines;
674
675	next if not defined $w_direct and not can_balance_workload($wrk);
676
677	push @args, $balancer unless $balancer eq '<none>';
678
679	if (scalar(keys %results)) {
680		$r = $results{$wrk}->{$balancer} / $best_bal{$wrk} * 100.0;
681	} else {
682		$r = '---';
683	}
684	say "  \nProfiling '$wrk' ($r% of best)...";
685
686	($r, $error) = calibrate_workload($wrk);
687	say "      ${client_target_s}s is $r workloads. (error=$error)";
688
689	($c, $wps, $swps) = find_saturation_point($wrk, $r, $verbose, @args);
690	say "      Saturation at $c clients ($wps workloads/s).";
691	push @args, "-c $c";
692
693	$engines = trace_workload($wrk, $balancer, $r, $c);
694
695	foreach my $key (keys %{$engines}) {
696		next if $key eq 'gpu';
697		$saturated = $saturated + 1
698			     if $engines->{$key} < $idle_tolerance_pct;
699	}
700
701	if ($saturated == 0) {
702		# Not a single saturated engine
703		$result = 'FAIL';
704	} elsif (not exists $engines->{'2:0'} or not exists $engines->{$vcs2}) {
705		# VCS1 and VCS2 not present in a balancing workload
706		$result = 'FAIL';
707	} elsif ($saturated == 1 and
708		 ($engines->{'2:0'} < $idle_tolerance_pct or
709		  $engines->{$vcs2} < $idle_tolerance_pct)) {
710		# Only one VCS saturated
711		$result = 'WARN';
712	}
713
714	$result = 'WARN' if $engines->{'gpu'} > $idle_tolerance_pct;
715
716	if ($result ne 'Pass') {
717		$problem{'c'} = $c;
718		$problem{'r'} = $r;
719		$problem{'stats'} = $engines;
720		$problem_wrk{$wrk} = \%problem;
721	}
722
723	print "    $result [";
724	print map " $_: $engines->{$_}%,", sort keys %{$engines};
725	say " ]";
726}
727
728say "\nProblematic workloads were:" if scalar(keys %problem_wrk) > 0;
729foreach my $wrk (sort keys %problem_wrk) {
730	my $problem = $problem_wrk{$wrk};
731
732	print "   $wrk -c $problem->{'c'} -r $problem->{'r'} [";
733	print map " $_: $problem->{'stats'}->{$_}%,",
734	      sort keys %{$problem->{'stats'}};
735	say " ]";
736}
737