1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors: Marek Olšák <maraeo@gmail.com>
24  *
25  */
26 
27 /* The GPU load is measured as follows.
28  *
29  * There is a thread which samples the GRBM_STATUS register at a certain
30  * frequency and the "busy" or "idle" counter is incremented based on
31  * whether the GUI_ACTIVE bit is set or not.
32  *
33  * Then, the user can sample the counters twice and calculate the average
34  * GPU load between the two samples.
35  */
36 
37 #include "r600_pipe_common.h"
38 #include "os/os_time.h"
39 
40 /* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
41  * fps (there are too few samples per frame). */
42 #define SAMPLES_PER_SEC 10000
43 
44 #define GRBM_STATUS		0x8010
45 #define SPI_BUSY(x)		(((x) >> 22) & 0x1)
46 #define GUI_ACTIVE(x)		(((x) >> 31) & 0x1)
47 
r600_update_grbm_counters(struct r600_common_screen * rscreen,union r600_grbm_counters * counters)48 static void r600_update_grbm_counters(struct r600_common_screen *rscreen,
49 				      union r600_grbm_counters *counters)
50 {
51 	uint32_t value = 0;
52 
53 	rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
54 
55 	if (SPI_BUSY(value))
56 		p_atomic_inc(&counters->named.spi_busy);
57 	else
58 		p_atomic_inc(&counters->named.spi_idle);
59 
60 	if (GUI_ACTIVE(value))
61 		p_atomic_inc(&counters->named.gui_busy);
62 	else
63 		p_atomic_inc(&counters->named.gui_idle);
64 }
65 
PIPE_THREAD_ROUTINE(r600_gpu_load_thread,param)66 static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param)
67 {
68 	struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
69 	const int period_us = 1000000 / SAMPLES_PER_SEC;
70 	int sleep_us = period_us;
71 	int64_t cur_time, last_time = os_time_get();
72 
73 	while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
74 		if (sleep_us)
75 			os_time_sleep(sleep_us);
76 
77 		/* Make sure we sleep the ideal amount of time to match
78 		 * the expected frequency. */
79 		cur_time = os_time_get();
80 
81 		if (os_time_timeout(last_time, last_time + period_us,
82 				    cur_time))
83 			sleep_us = MAX2(sleep_us - 1, 1);
84 		else
85 			sleep_us += 1;
86 
87 		/*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
88 		last_time = cur_time;
89 
90 		/* Update the counters. */
91 		r600_update_grbm_counters(rscreen, &rscreen->grbm_counters);
92 	}
93 	p_atomic_dec(&rscreen->gpu_load_stop_thread);
94 	return 0;
95 }
96 
r600_gpu_load_kill_thread(struct r600_common_screen * rscreen)97 void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
98 {
99 	if (!rscreen->gpu_load_thread)
100 		return;
101 
102 	p_atomic_inc(&rscreen->gpu_load_stop_thread);
103 	pipe_thread_wait(rscreen->gpu_load_thread);
104 	rscreen->gpu_load_thread = 0;
105 }
106 
r600_read_counter(struct r600_common_screen * rscreen,unsigned busy_index)107 static uint64_t r600_read_counter(struct r600_common_screen *rscreen,
108 				  unsigned busy_index)
109 {
110 	/* Start the thread if needed. */
111 	if (!rscreen->gpu_load_thread) {
112 		pipe_mutex_lock(rscreen->gpu_load_mutex);
113 		/* Check again inside the mutex. */
114 		if (!rscreen->gpu_load_thread)
115 			rscreen->gpu_load_thread =
116 				pipe_thread_create(r600_gpu_load_thread, rscreen);
117 		pipe_mutex_unlock(rscreen->gpu_load_mutex);
118 	}
119 
120 	unsigned busy = p_atomic_read(&rscreen->grbm_counters.array[busy_index]);
121 	unsigned idle = p_atomic_read(&rscreen->grbm_counters.array[busy_index + 1]);
122 
123 	return busy | ((uint64_t)idle << 32);
124 }
125 
r600_end_counter(struct r600_common_screen * rscreen,uint64_t begin,unsigned busy_index)126 static unsigned r600_end_counter(struct r600_common_screen *rscreen,
127 				 uint64_t begin, unsigned busy_index)
128 {
129 	uint64_t end = r600_read_counter(rscreen, busy_index);
130 	unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
131 	unsigned idle = (end >> 32) - (begin >> 32);
132 
133 	/* Calculate the % of time the busy counter was being incremented.
134 	 *
135 	 * If no counters were incremented, return the current counter status.
136 	 * It's for the case when the load is queried faster than
137 	 * the counters are updated.
138 	 */
139 	if (idle || busy) {
140 		return busy*100 / (busy + idle);
141 	} else {
142 		union r600_grbm_counters counters;
143 
144 		memset(&counters, 0, sizeof(counters));
145 		r600_update_grbm_counters(rscreen, &counters);
146 		return counters.array[busy_index] ? 100 : 0;
147 	}
148 }
149 
150 #define BUSY_INDEX(rscreen, field) (&rscreen->grbm_counters.named.field##_busy - \
151 				    rscreen->grbm_counters.array)
152 
r600_begin_counter_spi(struct r600_common_screen * rscreen)153 uint64_t r600_begin_counter_spi(struct r600_common_screen *rscreen)
154 {
155 	return r600_read_counter(rscreen, BUSY_INDEX(rscreen, spi));
156 }
157 
r600_end_counter_spi(struct r600_common_screen * rscreen,uint64_t begin)158 unsigned r600_end_counter_spi(struct r600_common_screen *rscreen, uint64_t begin)
159 {
160 	return r600_end_counter(rscreen, begin, BUSY_INDEX(rscreen, spi));
161 }
162 
r600_begin_counter_gui(struct r600_common_screen * rscreen)163 uint64_t r600_begin_counter_gui(struct r600_common_screen *rscreen)
164 {
165 	return r600_read_counter(rscreen, BUSY_INDEX(rscreen, gui));
166 }
167 
r600_end_counter_gui(struct r600_common_screen * rscreen,uint64_t begin)168 unsigned r600_end_counter_gui(struct r600_common_screen *rscreen, uint64_t begin)
169 {
170 	return r600_end_counter(rscreen, begin, BUSY_INDEX(rscreen, gui));
171 }
172