1 #include <binder/Binder.h>
2 #include <binder/IBinder.h>
3 #include <binder/IPCThreadState.h>
4 #include <binder/IServiceManager.h>
5 #include <string>
6 #include <cstring>
7 #include <cstdlib>
8 #include <cstdio>
9 
10 #include <fstream>
11 #include <iostream>
12 #include <tuple>
13 #include <vector>
14 
15 #include <unistd.h>
16 #include <sys/wait.h>
17 
18 using namespace std;
19 using namespace android;
20 
21 enum BinderWorkerServiceCode {
22     BINDER_NOP = IBinder::FIRST_CALL_TRANSACTION,
23 };
24 
25 #define ASSERT_TRUE(cond) \
26 do { \
27     if (!(cond)) {\
28        cerr << __func__ << ":" << __LINE__ << " condition:" << #cond << " failed\n" << endl; \
29        exit(EXIT_FAILURE); \
30     } \
31 } while (0)
32 
33 class BinderWorkerService : public BBinder
34 {
35 public:
BinderWorkerService()36     BinderWorkerService() {}
~BinderWorkerService()37     ~BinderWorkerService() {}
onTransact(uint32_t code,const Parcel & data,Parcel * reply,uint32_t flags=0)38     virtual status_t onTransact(uint32_t code,
39                                 const Parcel& data, Parcel* reply,
40                                 uint32_t flags = 0) {
41         (void)flags;
42         (void)data;
43         (void)reply;
44         switch (code) {
45         case BINDER_NOP:
46             return NO_ERROR;
47         default:
48             return UNKNOWN_TRANSACTION;
49         };
50     }
51 };
52 
53 static uint64_t warn_latency = std::numeric_limits<uint64_t>::max();
54 
55 struct ProcResults {
56     vector<uint64_t> data;
57 
ProcResultsProcResults58     ProcResults(size_t capacity) { data.reserve(capacity); }
59 
add_timeProcResults60     void add_time(uint64_t time) { data.push_back(time); }
combine_withProcResults61     void combine_with(const ProcResults& append) {
62         data.insert(data.end(), append.data.begin(), append.data.end());
63     }
worstProcResults64     uint64_t worst() {
65         return *max_element(data.begin(), data.end());
66     }
dump_to_fileProcResults67     void dump_to_file(string filename) {
68         ofstream output;
69         output.open(filename);
70         if (!output.is_open()) {
71             cerr << "Failed to open '" << filename << "'." << endl;
72             exit(EXIT_FAILURE);
73         }
74         for (uint64_t value : data) {
75             output << value << "\n";
76         }
77         output.close();
78     }
dumpProcResults79     void dump() {
80         if (data.size() == 0) {
81             // This avoids index-out-of-bounds below.
82             cout << "error: no data\n" << endl;
83             return;
84         }
85 
86         size_t num_long_transactions = 0;
87         for (uint64_t elem : data) {
88             if (elem > warn_latency) {
89                 num_long_transactions += 1;
90             }
91         }
92 
93         if (num_long_transactions > 0) {
94             cout << (double)num_long_transactions / data.size() << "% of transactions took longer "
95                 "than estimated max latency. Consider setting -m to be higher than "
96                 << worst() / 1000 << " microseconds" << endl;
97         }
98 
99         sort(data.begin(), data.end());
100 
101         uint64_t total_time = 0;
102         for (uint64_t elem : data) {
103             total_time += elem;
104         }
105 
106         double best = (double)data[0] / 1.0E6;
107         double worst = (double)data.back() / 1.0E6;
108         double average = (double)total_time / data.size() / 1.0E6;
109         cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
110 
111         double percentile_50 = data[(50 * data.size()) / 100] / 1.0E6;
112         double percentile_90 = data[(90 * data.size()) / 100] / 1.0E6;
113         double percentile_95 = data[(95 * data.size()) / 100] / 1.0E6;
114         double percentile_99 = data[(99 * data.size()) / 100] / 1.0E6;
115         cout << "50%: " << percentile_50 << " ";
116         cout << "90%: " << percentile_90 << " ";
117         cout << "95%: " << percentile_95 << " ";
118         cout << "99%: " << percentile_99 << endl;
119     }
120 };
121 
122 class Pipe {
123     int m_readFd;
124     int m_writeFd;
Pipe(int readFd,int writeFd)125     Pipe(int readFd, int writeFd) : m_readFd{readFd}, m_writeFd{writeFd} {}
126     Pipe(const Pipe &) = delete;
127     Pipe& operator=(const Pipe &) = delete;
128     Pipe& operator=(const Pipe &&) = delete;
129 public:
Pipe(Pipe && rval)130     Pipe(Pipe&& rval) noexcept {
131         m_readFd = rval.m_readFd;
132         m_writeFd = rval.m_writeFd;
133         rval.m_readFd = 0;
134         rval.m_writeFd = 0;
135     }
~Pipe()136     ~Pipe() {
137         if (m_readFd)
138             close(m_readFd);
139         if (m_writeFd)
140             close(m_writeFd);
141     }
signal()142     void signal() {
143         bool val = true;
144         int error = write(m_writeFd, &val, sizeof(val));
145         ASSERT_TRUE(error >= 0);
146     };
wait()147     void wait() {
148         bool val = false;
149         int error = read(m_readFd, &val, sizeof(val));
150         ASSERT_TRUE(error >= 0);
151     }
send(const ProcResults & v)152     void send(const ProcResults& v) {
153         size_t num_elems = v.data.size();
154 
155         int error = write(m_writeFd, &num_elems, sizeof(size_t));
156         ASSERT_TRUE(error >= 0);
157 
158         char* to_write = (char*)v.data.data();
159         size_t num_bytes = sizeof(uint64_t) * num_elems;
160 
161         while (num_bytes > 0) {
162             int ret = write(m_writeFd, to_write, num_bytes);
163             ASSERT_TRUE(ret >= 0);
164             num_bytes -= ret;
165             to_write += ret;
166         }
167     }
recv(ProcResults & v)168     void recv(ProcResults& v) {
169         size_t num_elems = 0;
170         int error = read(m_readFd, &num_elems, sizeof(size_t));
171         ASSERT_TRUE(error >= 0);
172 
173         v.data.resize(num_elems);
174         char* read_to = (char*)v.data.data();
175         size_t num_bytes = sizeof(uint64_t) * num_elems;
176 
177         while (num_bytes > 0) {
178             int ret = read(m_readFd, read_to, num_bytes);
179             ASSERT_TRUE(ret >= 0);
180             num_bytes -= ret;
181             read_to += ret;
182         }
183     }
createPipePair()184     static tuple<Pipe, Pipe> createPipePair() {
185         int a[2];
186         int b[2];
187 
188         int error1 = pipe(a);
189         int error2 = pipe(b);
190         ASSERT_TRUE(error1 >= 0);
191         ASSERT_TRUE(error2 >= 0);
192 
193         return make_tuple(Pipe(a[0], b[1]), Pipe(b[0], a[1]));
194     }
195 };
196 
generateServiceName(int num)197 String16 generateServiceName(int num)
198 {
199     char num_str[32];
200     snprintf(num_str, sizeof(num_str), "%d", num);
201     String16 serviceName = String16("binderWorker") + String16(num_str);
202     return serviceName;
203 }
204 
worker_fx(int num,int worker_count,int iterations,int payload_size,bool cs_pair,Pipe p)205 void worker_fx(int num,
206                int worker_count,
207                int iterations,
208                int payload_size,
209                bool cs_pair,
210                Pipe p)
211 {
212     // Create BinderWorkerService and for go.
213     ProcessState::self()->startThreadPool();
214     sp<IServiceManager> serviceMgr = defaultServiceManager();
215     sp<BinderWorkerService> service = new BinderWorkerService;
216     serviceMgr->addService(generateServiceName(num), service);
217 
218     srand(num);
219     p.signal();
220     p.wait();
221 
222     // If client/server pairs, then half the workers are
223     // servers and half are clients
224     int server_count = cs_pair ? worker_count / 2 : worker_count;
225 
226     // Get references to other binder services.
227     cout << "Created BinderWorker" << num << endl;
228     (void)worker_count;
229     vector<sp<IBinder> > workers;
230     for (int i = 0; i < server_count; i++) {
231         if (num == i)
232             continue;
233         workers.push_back(serviceMgr->waitForService(generateServiceName(i)));
234     }
235 
236     p.signal();
237     p.wait();
238 
239     ProcResults results(iterations);
240     chrono::time_point<chrono::high_resolution_clock> start, end;
241 
242     // Skip the benchmark if server of a cs_pair.
243     if (!(cs_pair && num < server_count)) {
244         for (int i = 0; i < iterations; i++) {
245             Parcel data, reply;
246             int target = cs_pair ? num % server_count : rand() % workers.size();
247             int sz = payload_size;
248 
249             while (sz >= sizeof(uint32_t)) {
250                 data.writeInt32(0);
251                 sz -= sizeof(uint32_t);
252             }
253             start = chrono::high_resolution_clock::now();
254             status_t ret = workers[target]->transact(BINDER_NOP, data, &reply);
255             end = chrono::high_resolution_clock::now();
256 
257             uint64_t cur_time = uint64_t(chrono::duration_cast<chrono::nanoseconds>(end - start).count());
258             results.add_time(cur_time);
259 
260             if (ret != NO_ERROR) {
261                cout << "thread " << num << " failed " << ret << "i : " << i << endl;
262                exit(EXIT_FAILURE);
263             }
264         }
265     }
266 
267     // Signal completion to master and wait.
268     p.signal();
269     p.wait();
270 
271     // Send results to master and wait for go to exit.
272     p.send(results);
273     p.wait();
274 
275     exit(EXIT_SUCCESS);
276 }
277 
make_worker(int num,int iterations,int worker_count,int payload_size,bool cs_pair)278 Pipe make_worker(int num, int iterations, int worker_count, int payload_size, bool cs_pair)
279 {
280     auto pipe_pair = Pipe::createPipePair();
281     pid_t pid = fork();
282     if (pid) {
283         /* parent */
284         return std::move(get<0>(pipe_pair));
285     } else {
286         /* child */
287         worker_fx(num, worker_count, iterations, payload_size, cs_pair,
288                   std::move(get<1>(pipe_pair)));
289         /* never get here */
290         return std::move(get<0>(pipe_pair));
291     }
292 
293 }
294 
wait_all(vector<Pipe> & v)295 void wait_all(vector<Pipe>& v)
296 {
297     for (int i = 0; i < v.size(); i++) {
298         v[i].wait();
299     }
300 }
301 
signal_all(vector<Pipe> & v)302 void signal_all(vector<Pipe>& v)
303 {
304     for (int i = 0; i < v.size(); i++) {
305         v[i].signal();
306     }
307 }
308 
run_main(int iterations,int workers,int payload_size,int cs_pair,bool training_round=false,bool dump_to_file=false,string dump_filename="")309 void run_main(int iterations, int workers, int payload_size, int cs_pair,
310               bool training_round = false, bool dump_to_file = false, string dump_filename = "") {
311     vector<Pipe> pipes;
312     // Create all the workers and wait for them to spawn.
313     for (int i = 0; i < workers; i++) {
314         pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair));
315     }
316     wait_all(pipes);
317     // All workers have now been spawned and added themselves to service
318     // manager. Signal each worker to obtain a handle to the server workers from
319     // servicemanager.
320     signal_all(pipes);
321     // Wait for each worker to finish obtaining a handle to all server workers
322     // from servicemanager.
323     wait_all(pipes);
324 
325     // Run the benchmark and wait for completion.
326     chrono::time_point<chrono::high_resolution_clock> start, end;
327     cout << "waiting for workers to complete" << endl;
328     start = chrono::high_resolution_clock::now();
329     signal_all(pipes);
330     wait_all(pipes);
331     end = chrono::high_resolution_clock::now();
332 
333     // Calculate overall throughput.
334     double iterations_per_sec = double(iterations * workers) / (chrono::duration_cast<chrono::nanoseconds>(end - start).count() / 1.0E9);
335     cout << "iterations per sec: " << iterations_per_sec << endl;
336 
337     // Collect all results from the workers.
338     cout << "collecting results" << endl;
339     signal_all(pipes);
340     ProcResults tot_results(0), tmp_results(0);
341     for (int i = 0; i < workers; i++) {
342         pipes[i].recv(tmp_results);
343         tot_results.combine_with(tmp_results);
344     }
345 
346     // Kill all the workers.
347     cout << "killing workers" << endl;
348     signal_all(pipes);
349     for (int i = 0; i < workers; i++) {
350         int status;
351         wait(&status);
352         if (status != 0) {
353             cout << "nonzero child status" << status << endl;
354         }
355     }
356     if (training_round) {
357         // Sets warn_latency to 2 * worst from the training round.
358         warn_latency = 2 * tot_results.worst();
359         cout << "Max latency during training: " << tot_results.worst() / 1.0E6 << "ms" << endl;
360     } else {
361         if (dump_to_file) {
362             tot_results.dump_to_file(dump_filename);
363         }
364         tot_results.dump();
365     }
366 }
367 
main(int argc,char * argv[])368 int main(int argc, char *argv[])
369 {
370     int workers = 2;
371     int iterations = 10000;
372     int payload_size = 0;
373     bool cs_pair = false;
374     bool training_round = false;
375     int max_time_us;
376     bool dump_to_file = false;
377     string dump_filename;
378 
379     // Parse arguments.
380     for (int i = 1; i < argc; i++) {
381         if (string(argv[i]) == "--help") {
382             cout << "Usage: binderThroughputTest [OPTIONS]" << endl;
383             cout << "\t-i N    : Specify number of iterations." << endl;
384             cout << "\t-m N    : Specify expected max latency in microseconds." << endl;
385             cout << "\t-p      : Split workers into client/server pairs." << endl;
386             cout << "\t-s N    : Specify payload size." << endl;
387             cout << "\t-t      : Run training round." << endl;
388             cout << "\t-w N    : Specify total number of workers." << endl;
389             cout << "\t-d FILE : Dump raw data to file." << endl;
390             return 0;
391         }
392         if (string(argv[i]) == "-w") {
393             if (i + 1 == argc) {
394                 cout << "-w requires an argument\n" << endl;
395                 exit(EXIT_FAILURE);
396             }
397             workers = atoi(argv[i+1]);
398             i++;
399             continue;
400         }
401         if (string(argv[i]) == "-i") {
402             if (i + 1 == argc) {
403                 cout << "-i requires an argument\n" << endl;
404                 exit(EXIT_FAILURE);
405             }
406             iterations = atoi(argv[i+1]);
407             i++;
408             continue;
409         }
410         if (string(argv[i]) == "-s") {
411             if (i + 1 == argc) {
412                 cout << "-s requires an argument\n" << endl;
413                 exit(EXIT_FAILURE);
414             }
415             payload_size = atoi(argv[i+1]);
416             i++;
417             continue;
418         }
419         if (string(argv[i]) == "-p") {
420             // client/server pairs instead of spreading
421             // requests to all workers. If true, half
422             // the workers become clients and half servers
423             cs_pair = true;
424             continue;
425         }
426         if (string(argv[i]) == "-t") {
427             // Run one training round before actually collecting data
428             // to get an approximation of max latency.
429             training_round = true;
430             continue;
431         }
432         if (string(argv[i]) == "-m") {
433             if (i + 1 == argc) {
434                 cout << "-m requires an argument\n" << endl;
435                 exit(EXIT_FAILURE);
436             }
437             // Caller specified the max latency in microseconds.
438             // No need to run training round in this case.
439             max_time_us = atoi(argv[i+1]);
440             if (max_time_us <= 0) {
441                 cout << "Max latency -m must be positive." << endl;
442                 exit(EXIT_FAILURE);
443             }
444             warn_latency = max_time_us * 1000ull;
445             i++;
446             continue;
447         }
448         if (string(argv[i]) == "-d") {
449             if (i + 1 == argc) {
450                 cout << "-d requires an argument\n" << endl;
451                 exit(EXIT_FAILURE);
452             }
453             dump_to_file = true;
454             dump_filename = argv[i + 1];
455             i++;
456             continue;
457         }
458     }
459 
460     if (training_round) {
461         cout << "Start training round" << endl;
462         run_main(iterations, workers, payload_size, cs_pair, true);
463         cout << "Completed training round" << endl << endl;
464     }
465 
466     run_main(iterations, workers, payload_size, cs_pair, false, dump_to_file, dump_filename);
467     return 0;
468 }
469