1 #include <binder/Binder.h>
2 #include <binder/IBinder.h>
3 #include <binder/IPCThreadState.h>
4 #include <binder/IServiceManager.h>
5 #include <string>
6 #include <cstring>
7 #include <cstdlib>
8 #include <cstdio>
9 
10 #include <iostream>
11 #include <vector>
12 #include <tuple>
13 
14 #include <unistd.h>
15 #include <sys/wait.h>
16 
17 using namespace std;
18 using namespace android;
19 
20 enum BinderWorkerServiceCode {
21     BINDER_NOP = IBinder::FIRST_CALL_TRANSACTION,
22 };
23 
24 #define ASSERT_TRUE(cond) \
25 do { \
26     if (!(cond)) {\
27        cerr << __func__ << ":" << __LINE__ << " condition:" << #cond << " failed\n" << endl; \
28        exit(EXIT_FAILURE); \
29     } \
30 } while (0)
31 
32 class BinderWorkerService : public BBinder
33 {
34 public:
BinderWorkerService()35     BinderWorkerService() {}
~BinderWorkerService()36     ~BinderWorkerService() {}
onTransact(uint32_t code,const Parcel & data,Parcel * reply,uint32_t flags=0)37     virtual status_t onTransact(uint32_t code,
38                                 const Parcel& data, Parcel* reply,
39                                 uint32_t flags = 0) {
40         (void)flags;
41         (void)data;
42         (void)reply;
43         switch (code) {
44         case BINDER_NOP:
45             return NO_ERROR;
46         default:
47             return UNKNOWN_TRANSACTION;
48         };
49     }
50 };
51 
52 class Pipe {
53     int m_readFd;
54     int m_writeFd;
Pipe(int readFd,int writeFd)55     Pipe(int readFd, int writeFd) : m_readFd{readFd}, m_writeFd{writeFd} {}
56     Pipe(const Pipe &) = delete;
57     Pipe& operator=(const Pipe &) = delete;
58     Pipe& operator=(const Pipe &&) = delete;
59 public:
Pipe(Pipe && rval)60     Pipe(Pipe&& rval) noexcept {
61         m_readFd = rval.m_readFd;
62         m_writeFd = rval.m_writeFd;
63         rval.m_readFd = 0;
64         rval.m_writeFd = 0;
65     }
~Pipe()66     ~Pipe() {
67         if (m_readFd)
68             close(m_readFd);
69         if (m_writeFd)
70             close(m_writeFd);
71     }
signal()72     void signal() {
73         bool val = true;
74         int error = write(m_writeFd, &val, sizeof(val));
75         ASSERT_TRUE(error >= 0);
76     };
wait()77     void wait() {
78         bool val = false;
79         int error = read(m_readFd, &val, sizeof(val));
80         ASSERT_TRUE(error >= 0);
81     }
send(const T & v)82     template <typename T> void send(const T& v) {
83         int error = write(m_writeFd, &v, sizeof(T));
84         ASSERT_TRUE(error >= 0);
85     }
recv(T & v)86     template <typename T> void recv(T& v) {
87         int error = read(m_readFd, &v, sizeof(T));
88         ASSERT_TRUE(error >= 0);
89     }
createPipePair()90     static tuple<Pipe, Pipe> createPipePair() {
91         int a[2];
92         int b[2];
93 
94         int error1 = pipe(a);
95         int error2 = pipe(b);
96         ASSERT_TRUE(error1 >= 0);
97         ASSERT_TRUE(error2 >= 0);
98 
99         return make_tuple(Pipe(a[0], b[1]), Pipe(b[0], a[1]));
100     }
101 };
102 
103 static const uint32_t num_buckets = 128;
104 static uint64_t max_time_bucket = 50ull * 1000000;
105 static uint64_t time_per_bucket = max_time_bucket / num_buckets;
106 
107 struct ProcResults {
108     uint64_t m_worst = 0;
109     uint32_t m_buckets[num_buckets] = {0};
110     uint64_t m_transactions = 0;
111     uint64_t m_long_transactions = 0;
112     uint64_t m_total_time = 0;
113     uint64_t m_best = max_time_bucket;
114 
add_timeProcResults115     void add_time(uint64_t time) {
116         if (time > max_time_bucket) {
117             m_long_transactions++;
118         }
119         m_buckets[min(time, max_time_bucket-1) / time_per_bucket] += 1;
120         m_best = min(time, m_best);
121         m_worst = max(time, m_worst);
122         m_transactions += 1;
123         m_total_time += time;
124     }
combineProcResults125     static ProcResults combine(const ProcResults& a, const ProcResults& b) {
126         ProcResults ret;
127         for (int i = 0; i < num_buckets; i++) {
128             ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i];
129         }
130         ret.m_worst = max(a.m_worst, b.m_worst);
131         ret.m_best = min(a.m_best, b.m_best);
132         ret.m_transactions = a.m_transactions + b.m_transactions;
133         ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions;
134         ret.m_total_time = a.m_total_time + b.m_total_time;
135         return ret;
136     }
dumpProcResults137     void dump() {
138         if (m_long_transactions > 0) {
139             cout << (double)m_long_transactions / m_transactions << "% of transactions took longer "
140                 "than estimated max latency. Consider setting -m to be higher than "
141                  << m_worst / 1000 << " microseconds" << endl;
142         }
143 
144         double best = (double)m_best / 1.0E6;
145         double worst = (double)m_worst / 1.0E6;
146         double average = (double)m_total_time / m_transactions / 1.0E6;
147         cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
148 
149         uint64_t cur_total = 0;
150         float time_per_bucket_ms = time_per_bucket / 1.0E6;
151         for (int i = 0; i < num_buckets; i++) {
152             float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms;
153             if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) {
154                 cout << "50%: " << cur_time << " ";
155             }
156             if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) {
157                 cout << "90%: " << cur_time << " ";
158             }
159             if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) {
160                 cout << "95%: " << cur_time << " ";
161             }
162             if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) {
163                 cout << "99%: " << cur_time << " ";
164             }
165             cur_total += m_buckets[i];
166         }
167         cout << endl;
168     }
169 };
170 
generateServiceName(int num)171 String16 generateServiceName(int num)
172 {
173     char num_str[32];
174     snprintf(num_str, sizeof(num_str), "%d", num);
175     String16 serviceName = String16("binderWorker") + String16(num_str);
176     return serviceName;
177 }
178 
worker_fx(int num,int worker_count,int iterations,int payload_size,bool cs_pair,Pipe p)179 void worker_fx(int num,
180                int worker_count,
181                int iterations,
182                int payload_size,
183                bool cs_pair,
184                Pipe p)
185 {
186     // Create BinderWorkerService and for go.
187     ProcessState::self()->startThreadPool();
188     sp<IServiceManager> serviceMgr = defaultServiceManager();
189     sp<BinderWorkerService> service = new BinderWorkerService;
190     serviceMgr->addService(generateServiceName(num), service);
191 
192     srand(num);
193     p.signal();
194     p.wait();
195 
196     // If client/server pairs, then half the workers are
197     // servers and half are clients
198     int server_count = cs_pair ? worker_count / 2 : worker_count;
199 
200     // Get references to other binder services.
201     cout << "Created BinderWorker" << num << endl;
202     (void)worker_count;
203     vector<sp<IBinder> > workers;
204     for (int i = 0; i < server_count; i++) {
205         if (num == i)
206             continue;
207         workers.push_back(serviceMgr->getService(generateServiceName(i)));
208     }
209 
210     // Run the benchmark if client
211     ProcResults results;
212     chrono::time_point<chrono::high_resolution_clock> start, end;
213     for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) {
214         Parcel data, reply;
215         int target = cs_pair ? num % server_count : rand() % workers.size();
216         int sz = payload_size;
217 
218         while (sz >= sizeof(uint32_t)) {
219             data.writeInt32(0);
220             sz -= sizeof(uint32_t);
221         }
222         start = chrono::high_resolution_clock::now();
223         status_t ret = workers[target]->transact(BINDER_NOP, data, &reply);
224         end = chrono::high_resolution_clock::now();
225 
226         uint64_t cur_time = uint64_t(chrono::duration_cast<chrono::nanoseconds>(end - start).count());
227         results.add_time(cur_time);
228 
229         if (ret != NO_ERROR) {
230            cout << "thread " << num << " failed " << ret << "i : " << i << endl;
231            exit(EXIT_FAILURE);
232         }
233     }
234 
235     // Signal completion to master and wait.
236     p.signal();
237     p.wait();
238 
239     // Send results to master and wait for go to exit.
240     p.send(results);
241     p.wait();
242 
243     exit(EXIT_SUCCESS);
244 }
245 
make_worker(int num,int iterations,int worker_count,int payload_size,bool cs_pair)246 Pipe make_worker(int num, int iterations, int worker_count, int payload_size, bool cs_pair)
247 {
248     auto pipe_pair = Pipe::createPipePair();
249     pid_t pid = fork();
250     if (pid) {
251         /* parent */
252         return move(get<0>(pipe_pair));
253     } else {
254         /* child */
255         worker_fx(num, worker_count, iterations, payload_size, cs_pair, move(get<1>(pipe_pair)));
256         /* never get here */
257         return move(get<0>(pipe_pair));
258     }
259 
260 }
261 
wait_all(vector<Pipe> & v)262 void wait_all(vector<Pipe>& v)
263 {
264     for (int i = 0; i < v.size(); i++) {
265         v[i].wait();
266     }
267 }
268 
signal_all(vector<Pipe> & v)269 void signal_all(vector<Pipe>& v)
270 {
271     for (int i = 0; i < v.size(); i++) {
272         v[i].signal();
273     }
274 }
275 
run_main(int iterations,int workers,int payload_size,int cs_pair,bool training_round=false)276 void run_main(int iterations,
277               int workers,
278               int payload_size,
279               int cs_pair,
280               bool training_round=false)
281 {
282     vector<Pipe> pipes;
283     // Create all the workers and wait for them to spawn.
284     for (int i = 0; i < workers; i++) {
285         pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair));
286     }
287     wait_all(pipes);
288 
289     // Run the workers and wait for completion.
290     chrono::time_point<chrono::high_resolution_clock> start, end;
291     cout << "waiting for workers to complete" << endl;
292     start = chrono::high_resolution_clock::now();
293     signal_all(pipes);
294     wait_all(pipes);
295     end = chrono::high_resolution_clock::now();
296 
297     // Calculate overall throughput.
298     double iterations_per_sec = double(iterations * workers) / (chrono::duration_cast<chrono::nanoseconds>(end - start).count() / 1.0E9);
299     cout << "iterations per sec: " << iterations_per_sec << endl;
300 
301     // Collect all results from the workers.
302     cout << "collecting results" << endl;
303     signal_all(pipes);
304     ProcResults tot_results;
305     for (int i = 0; i < workers; i++) {
306         ProcResults tmp_results;
307         pipes[i].recv(tmp_results);
308         tot_results = ProcResults::combine(tot_results, tmp_results);
309     }
310 
311     // Kill all the workers.
312     cout << "killing workers" << endl;
313     signal_all(pipes);
314     for (int i = 0; i < workers; i++) {
315         int status;
316         wait(&status);
317         if (status != 0) {
318             cout << "nonzero child status" << status << endl;
319         }
320     }
321     if (training_round) {
322         // sets max_time_bucket to 2 * m_worst from the training round.
323         // Also needs to adjust time_per_bucket accordingly.
324         max_time_bucket = 2 * tot_results.m_worst;
325         time_per_bucket = max_time_bucket / num_buckets;
326         cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl;
327     } else {
328             tot_results.dump();
329     }
330 }
331 
main(int argc,char * argv[])332 int main(int argc, char *argv[])
333 {
334     int workers = 2;
335     int iterations = 10000;
336     int payload_size = 0;
337     bool cs_pair = false;
338     bool training_round = false;
339     (void)argc;
340     (void)argv;
341 
342     // Parse arguments.
343     for (int i = 1; i < argc; i++) {
344         if (string(argv[i]) == "--help") {
345             cout << "Usage: binderThroughputTest [OPTIONS]" << endl;
346             cout << "\t-i N    : Specify number of iterations." << endl;
347             cout << "\t-m N    : Specify expected max latency in microseconds." << endl;
348             cout << "\t-p      : Split workers into client/server pairs." << endl;
349             cout << "\t-s N    : Specify payload size." << endl;
350             cout << "\t-t N    : Run training round." << endl;
351             cout << "\t-w N    : Specify total number of workers." << endl;
352             return 0;
353         }
354         if (string(argv[i]) == "-w") {
355             workers = atoi(argv[i+1]);
356             i++;
357             continue;
358         }
359         if (string(argv[i]) == "-i") {
360             iterations = atoi(argv[i+1]);
361             i++;
362             continue;
363         }
364         if (string(argv[i]) == "-s") {
365             payload_size = atoi(argv[i+1]);
366             i++;
367         }
368         if (string(argv[i]) == "-p") {
369             // client/server pairs instead of spreading
370             // requests to all workers. If true, half
371             // the workers become clients and half servers
372             cs_pair = true;
373         }
374         if (string(argv[i]) == "-t") {
375             // Run one training round before actually collecting data
376             // to get an approximation of max latency.
377             training_round = true;
378         }
379         if (string(argv[i]) == "-m") {
380             // Caller specified the max latency in microseconds.
381             // No need to run training round in this case.
382             if (atoi(argv[i+1]) > 0) {
383                 max_time_bucket = strtoull(argv[i+1], (char **)nullptr, 10) * 1000;
384                 time_per_bucket = max_time_bucket / num_buckets;
385                 i++;
386             } else {
387                 cout << "Max latency -m must be positive." << endl;
388                 exit(EXIT_FAILURE);
389             }
390         }
391     }
392 
393     if (training_round) {
394         cout << "Start training round" << endl;
395         run_main(iterations, workers, payload_size, cs_pair, training_round=true);
396         cout << "Completed training round" << endl << endl;
397     }
398 
399     run_main(iterations, workers, payload_size, cs_pair);
400     return 0;
401 }
402