1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
18 #define ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
19 
20 #include "assembler.h"
21 
22 #include "common_runtime_test.h"  // For ScratchFile
23 
24 #include <cstdio>
25 #include <cstdlib>
26 #include <fstream>
27 #include <iostream>
28 #include <iterator>
29 #include <sys/stat.h>
30 
31 namespace art {
32 
33 // Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
34 // temp directory.
35 static std::string tmpnam_;
36 
37 template<typename Ass, typename Reg, typename Imm>
38 class AssemblerTest : public testing::Test {
39  public:
GetAssembler()40   Ass* GetAssembler() {
41     return assembler_.get();
42   }
43 
44   typedef std::string (*TestFn)(Ass* assembler);
45 
DriverFn(TestFn f,std::string test_name)46   void DriverFn(TestFn f, std::string test_name) {
47     Driver(f(assembler_.get()), test_name);
48   }
49 
50   // This driver assumes the assembler has already been called.
DriverStr(std::string assembly_string,std::string test_name)51   void DriverStr(std::string assembly_string, std::string test_name) {
52     Driver(assembly_string, test_name);
53   }
54 
RepeatR(void (Ass::* f)(Reg),std::string fmt)55   std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
56     const std::vector<Reg*> registers = GetRegisters();
57     std::string str;
58     for (auto reg : registers) {
59       (assembler_.get()->*f)(*reg);
60       std::string base = fmt;
61 
62       size_t reg_index = base.find("{reg}");
63       if (reg_index != std::string::npos) {
64         std::ostringstream sreg;
65         sreg << *reg;
66         std::string reg_string = sreg.str();
67         base.replace(reg_index, 5, reg_string);
68       }
69 
70       if (str.size() > 0) {
71         str += "\n";
72       }
73       str += base;
74     }
75     // Add a newline at the end.
76     str += "\n";
77     return str;
78   }
79 
RepeatRR(void (Ass::* f)(Reg,Reg),std::string fmt)80   std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) {
81     const std::vector<Reg*> registers = GetRegisters();
82     std::string str;
83     for (auto reg1 : registers) {
84       for (auto reg2 : registers) {
85         (assembler_.get()->*f)(*reg1, *reg2);
86         std::string base = fmt;
87 
88         size_t reg1_index = base.find("{reg1}");
89         if (reg1_index != std::string::npos) {
90           std::ostringstream sreg;
91           sreg << *reg1;
92           std::string reg_string = sreg.str();
93           base.replace(reg1_index, 6, reg_string);
94         }
95 
96         size_t reg2_index = base.find("{reg2}");
97         if (reg2_index != std::string::npos) {
98           std::ostringstream sreg;
99           sreg << *reg2;
100           std::string reg_string = sreg.str();
101           base.replace(reg2_index, 6, reg_string);
102         }
103 
104         if (str.size() > 0) {
105           str += "\n";
106         }
107         str += base;
108       }
109     }
110     // Add a newline at the end.
111     str += "\n";
112     return str;
113   }
114 
RepeatRI(void (Ass::* f)(Reg,const Imm &),size_t imm_bytes,std::string fmt)115   std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
116     const std::vector<Reg*> registers = GetRegisters();
117     std::string str;
118     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
119     for (auto reg : registers) {
120       for (int64_t imm : imms) {
121         Imm* new_imm = CreateImmediate(imm);
122         (assembler_.get()->*f)(*reg, *new_imm);
123         delete new_imm;
124         std::string base = fmt;
125 
126         size_t reg_index = base.find("{reg}");
127         if (reg_index != std::string::npos) {
128           std::ostringstream sreg;
129           sreg << *reg;
130           std::string reg_string = sreg.str();
131           base.replace(reg_index, 5, reg_string);
132         }
133 
134         size_t imm_index = base.find("{imm}");
135         if (imm_index != std::string::npos) {
136           std::ostringstream sreg;
137           sreg << imm;
138           std::string imm_string = sreg.str();
139           base.replace(imm_index, 5, imm_string);
140         }
141 
142         if (str.size() > 0) {
143           str += "\n";
144         }
145         str += base;
146       }
147     }
148     // Add a newline at the end.
149     str += "\n";
150     return str;
151   }
152 
RepeatI(void (Ass::* f)(const Imm &),size_t imm_bytes,std::string fmt)153   std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt) {
154     std::string str;
155     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
156     for (int64_t imm : imms) {
157       Imm* new_imm = CreateImmediate(imm);
158       (assembler_.get()->*f)(*new_imm);
159       delete new_imm;
160       std::string base = fmt;
161 
162       size_t imm_index = base.find("{imm}");
163       if (imm_index != std::string::npos) {
164         std::ostringstream sreg;
165         sreg << imm;
166         std::string imm_string = sreg.str();
167         base.replace(imm_index, 5, imm_string);
168       }
169 
170       if (str.size() > 0) {
171         str += "\n";
172       }
173       str += base;
174     }
175     // Add a newline at the end.
176     str += "\n";
177     return str;
178   }
179 
180   // This is intended to be run as a test.
CheckTools()181   bool CheckTools() {
182     if (!FileExists(GetAssemblerCommand())) {
183       return false;
184     }
185     LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
186 
187     if (!FileExists(GetObjdumpCommand())) {
188       return false;
189     }
190     LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
191 
192     // Disassembly is optional.
193     std::string disassembler = GetDisassembleCommand();
194     if (disassembler.length() != 0) {
195       if (!FileExists(disassembler)) {
196         return false;
197       }
198       LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
199     } else {
200       LOG(INFO) << "No disassembler given.";
201     }
202 
203     return true;
204   }
205 
206  protected:
SetUp()207   void SetUp() OVERRIDE {
208     assembler_.reset(new Ass());
209 
210     // Fake a runtime test for ScratchFile
211     CommonRuntimeTest::SetUpAndroidData(android_data_);
212 
213     SetUpHelpers();
214   }
215 
TearDown()216   void TearDown() OVERRIDE {
217     // We leave temporaries in case this failed so we can debug issues.
218     CommonRuntimeTest::TearDownAndroidData(android_data_, false);
219     tmpnam_ = "";
220   }
221 
222   // Override this to set up any architecture-specific things, e.g., register vectors.
SetUpHelpers()223   virtual void SetUpHelpers() {}
224 
225   virtual std::vector<Reg*> GetRegisters() = 0;
226 
227   // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
228   virtual std::string GetArchitectureString() = 0;
229 
230   // Get the name of the assembler, e.g., "as" by default.
GetAssemblerCmdName()231   virtual std::string GetAssemblerCmdName() {
232     return "as";
233   }
234 
235   // Switches to the assembler command. Default none.
GetAssemblerParameters()236   virtual std::string GetAssemblerParameters() {
237     return "";
238   }
239 
240   // Return the host assembler command for this test.
GetAssemblerCommand()241   virtual std::string GetAssemblerCommand() {
242     // Already resolved it once?
243     if (resolved_assembler_cmd_.length() != 0) {
244       return resolved_assembler_cmd_;
245     }
246 
247     std::string line = FindTool(GetAssemblerCmdName());
248     if (line.length() == 0) {
249       return line;
250     }
251 
252     resolved_assembler_cmd_ = line + GetAssemblerParameters();
253 
254     return line;
255   }
256 
257   // Get the name of the objdump, e.g., "objdump" by default.
GetObjdumpCmdName()258   virtual std::string GetObjdumpCmdName() {
259     return "objdump";
260   }
261 
262   // Switches to the objdump command. Default is " -h".
GetObjdumpParameters()263   virtual std::string GetObjdumpParameters() {
264     return " -h";
265   }
266 
267   // Return the host objdump command for this test.
GetObjdumpCommand()268   virtual std::string GetObjdumpCommand() {
269     // Already resolved it once?
270     if (resolved_objdump_cmd_.length() != 0) {
271       return resolved_objdump_cmd_;
272     }
273 
274     std::string line = FindTool(GetObjdumpCmdName());
275     if (line.length() == 0) {
276       return line;
277     }
278 
279     resolved_objdump_cmd_ = line + GetObjdumpParameters();
280 
281     return line;
282   }
283 
284   // Get the name of the objdump, e.g., "objdump" by default.
GetDisassembleCmdName()285   virtual std::string GetDisassembleCmdName() {
286     return "objdump";
287   }
288 
289   // Switches to the objdump command. As it's a binary, one needs to push the architecture and
290   // such to objdump, so it's architecture-specific and there is no default.
291   virtual std::string GetDisassembleParameters() = 0;
292 
293   // Return the host disassembler command for this test.
GetDisassembleCommand()294   virtual std::string GetDisassembleCommand() {
295     // Already resolved it once?
296     if (resolved_disassemble_cmd_.length() != 0) {
297       return resolved_disassemble_cmd_;
298     }
299 
300     std::string line = FindTool(GetDisassembleCmdName());
301     if (line.length() == 0) {
302       return line;
303     }
304 
305     resolved_disassemble_cmd_ = line + GetDisassembleParameters();
306 
307     return line;
308   }
309 
310   // Create a couple of immediate values up to the number of bytes given.
CreateImmediateValues(size_t imm_bytes)311   virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes) {
312     std::vector<int64_t> res;
313     res.push_back(0);
314     res.push_back(-1);
315     res.push_back(0x12);
316     if (imm_bytes >= 2) {
317       res.push_back(0x1234);
318       res.push_back(-0x1234);
319       if (imm_bytes >= 4) {
320         res.push_back(0x12345678);
321         res.push_back(-0x12345678);
322         if (imm_bytes >= 6) {
323           res.push_back(0x123456789ABC);
324           res.push_back(-0x123456789ABC);
325           if (imm_bytes >= 8) {
326             res.push_back(0x123456789ABCDEF0);
327             res.push_back(-0x123456789ABCDEF0);
328           }
329         }
330       }
331     }
332     return res;
333   }
334 
335   // Create an immediate from the specific value.
336   virtual Imm* CreateImmediate(int64_t imm_value) = 0;
337 
338  private:
339   // Driver() assembles and compares the results. If the results are not equal and we have a
340   // disassembler, disassemble both and check whether they have the same mnemonics (in which case
341   // we just warn).
Driver(std::string assembly_text,std::string test_name)342   void Driver(std::string assembly_text, std::string test_name) {
343     EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
344 
345     NativeAssemblerResult res;
346     Compile(assembly_text, &res, test_name);
347 
348     EXPECT_TRUE(res.ok) << res.error_msg;
349     if (!res.ok) {
350       // No way of continuing.
351       return;
352     }
353 
354     size_t cs = assembler_->CodeSize();
355     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
356     MemoryRegion code(&(*data)[0], data->size());
357     assembler_->FinalizeInstructions(code);
358 
359     if (*data == *res.code) {
360       Clean(&res);
361     } else {
362       if (DisassembleBinaries(*data, *res.code, test_name)) {
363         if (data->size() > res.code->size()) {
364           // Fail this test with a fancy colored warning being printed.
365           EXPECT_TRUE(false) << "Assembly code is not identical, but disassembly of machine code "
366               "is equal: this implies sub-optimal encoding! Our code size=" << data->size() <<
367               ", gcc size=" << res.code->size();
368         } else {
369           // Otherwise just print an info message and clean up.
370           LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the "
371               "same.";
372           Clean(&res);
373         }
374       } else {
375         // This will output the assembly.
376         EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical.";
377       }
378     }
379   }
380 
381   // Structure to store intermediates and results.
382   struct NativeAssemblerResult {
383     bool ok;
384     std::string error_msg;
385     std::string base_name;
386     std::unique_ptr<std::vector<uint8_t>> code;
387     uintptr_t length;
388   };
389 
390   // Compile the assembly file from_file to a binary file to_file. Returns true on success.
Assemble(const char * from_file,const char * to_file,std::string * error_msg)391   bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) {
392     bool have_assembler = FileExists(GetAssemblerCommand());
393     EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand();
394     if (!have_assembler) {
395       return false;
396     }
397 
398     std::vector<std::string> args;
399 
400     args.push_back(GetAssemblerCommand());
401     args.push_back("-o");
402     args.push_back(to_file);
403     args.push_back(from_file);
404 
405     return Exec(args, error_msg);
406   }
407 
408   // Runs objdump -h on the binary file and extracts the first line with .text.
409   // Returns "" on failure.
Objdump(std::string file)410   std::string Objdump(std::string file) {
411     bool have_objdump = FileExists(GetObjdumpCommand());
412     EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
413     if (!have_objdump) {
414       return "";
415     }
416 
417     std::string error_msg;
418     std::vector<std::string> args;
419 
420     args.push_back(GetObjdumpCommand());
421     args.push_back(file);
422     args.push_back(">");
423     args.push_back(file+".dump");
424     std::string cmd = Join(args, ' ');
425 
426     args.clear();
427     args.push_back("/bin/sh");
428     args.push_back("-c");
429     args.push_back(cmd);
430 
431     if (!Exec(args, &error_msg)) {
432       EXPECT_TRUE(false) << error_msg;
433     }
434 
435     std::ifstream dump(file+".dump");
436 
437     std::string line;
438     bool found = false;
439     while (std::getline(dump, line)) {
440       if (line.find(".text") != line.npos) {
441         found = true;
442         break;
443       }
444     }
445 
446     dump.close();
447 
448     if (found) {
449       return line;
450     } else {
451       return "";
452     }
453   }
454 
455   // Disassemble both binaries and compare the text.
DisassembleBinaries(std::vector<uint8_t> & data,std::vector<uint8_t> & as,std::string test_name)456   bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as,
457                            std::string test_name) {
458     std::string disassembler = GetDisassembleCommand();
459     if (disassembler.length() == 0) {
460       LOG(WARNING) << "No dissassembler command.";
461       return false;
462     }
463 
464     std::string data_name = WriteToFile(data, test_name + ".ass");
465     std::string error_msg;
466     if (!DisassembleBinary(data_name, &error_msg)) {
467       LOG(INFO) << "Error disassembling: " << error_msg;
468       std::remove(data_name.c_str());
469       return false;
470     }
471 
472     std::string as_name = WriteToFile(as, test_name + ".gcc");
473     if (!DisassembleBinary(as_name, &error_msg)) {
474       LOG(INFO) << "Error disassembling: " << error_msg;
475       std::remove(data_name.c_str());
476       std::remove((data_name + ".dis").c_str());
477       std::remove(as_name.c_str());
478       return false;
479     }
480 
481     bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
482 
483     if (result) {
484       std::remove(data_name.c_str());
485       std::remove(as_name.c_str());
486       std::remove((data_name + ".dis").c_str());
487       std::remove((as_name + ".dis").c_str());
488     }
489 
490     return result;
491   }
492 
DisassembleBinary(std::string file,std::string * error_msg)493   bool DisassembleBinary(std::string file, std::string* error_msg) {
494     std::vector<std::string> args;
495 
496     args.push_back(GetDisassembleCommand());
497     args.push_back(file);
498     args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
499     args.push_back(">");
500     args.push_back(file+".dis");
501     std::string cmd = Join(args, ' ');
502 
503     args.clear();
504     args.push_back("/bin/sh");
505     args.push_back("-c");
506     args.push_back(cmd);
507 
508     return Exec(args, error_msg);
509   }
510 
WriteToFile(std::vector<uint8_t> & buffer,std::string test_name)511   std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) {
512     std::string file_name = GetTmpnam() + std::string("---") + test_name;
513     const char* data = reinterpret_cast<char*>(buffer.data());
514     std::ofstream s_out(file_name + ".o");
515     s_out.write(data, buffer.size());
516     s_out.close();
517     return file_name + ".o";
518   }
519 
CompareFiles(std::string f1,std::string f2)520   bool CompareFiles(std::string f1, std::string f2) {
521     std::ifstream f1_in(f1);
522     std::ifstream f2_in(f2);
523 
524     bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
525                              std::istreambuf_iterator<char>(),
526                              std::istreambuf_iterator<char>(f2_in));
527 
528     f1_in.close();
529     f2_in.close();
530 
531     return result;
532   }
533 
534   // Compile the given assembly code and extract the binary, if possible. Put result into res.
Compile(std::string assembly_code,NativeAssemblerResult * res,std::string test_name)535   bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
536     res->ok = false;
537     res->code.reset(nullptr);
538 
539     res->base_name = GetTmpnam() + std::string("---") + test_name;
540 
541     // TODO: Lots of error checking.
542 
543     std::ofstream s_out(res->base_name + ".S");
544     s_out << assembly_code;
545     s_out.close();
546 
547     if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(),
548                   &res->error_msg)) {
549       res->error_msg = "Could not compile.";
550       return false;
551     }
552 
553     std::string odump = Objdump(res->base_name + ".o");
554     if (odump.length() == 0) {
555       res->error_msg = "Objdump failed.";
556       return false;
557     }
558 
559     std::istringstream iss(odump);
560     std::istream_iterator<std::string> start(iss);
561     std::istream_iterator<std::string> end;
562     std::vector<std::string> tokens(start, end);
563 
564     if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) {
565       res->error_msg = "Objdump output not recognized: too few tokens.";
566       return false;
567     }
568 
569     if (tokens[1] != ".text") {
570       res->error_msg = "Objdump output not recognized: .text not second token.";
571       return false;
572     }
573 
574     std::string lengthToken = "0x" + tokens[2];
575     std::istringstream(lengthToken) >> std::hex >> res->length;
576 
577     std::string offsetToken = "0x" + tokens[5];
578     uintptr_t offset;
579     std::istringstream(offsetToken) >> std::hex >> offset;
580 
581     std::ifstream obj(res->base_name + ".o");
582     obj.seekg(offset);
583     res->code.reset(new std::vector<uint8_t>(res->length));
584     obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length);
585     obj.close();
586 
587     res->ok = true;
588     return true;
589   }
590 
591   // Remove temporary files.
Clean(const NativeAssemblerResult * res)592   void Clean(const NativeAssemblerResult* res) {
593     std::remove((res->base_name + ".S").c_str());
594     std::remove((res->base_name + ".o").c_str());
595     std::remove((res->base_name + ".o.dump").c_str());
596   }
597 
598   // Check whether file exists. Is used for commands, so strips off any parameters: anything after
599   // the first space. We skip to the last slash for this, so it should work with directories with
600   // spaces.
FileExists(std::string file)601   static bool FileExists(std::string file) {
602     if (file.length() == 0) {
603       return false;
604     }
605 
606     // Need to strip any options.
607     size_t last_slash = file.find_last_of('/');
608     if (last_slash == std::string::npos) {
609       // No slash, start looking at the start.
610       last_slash = 0;
611     }
612     size_t space_index = file.find(' ', last_slash);
613 
614     if (space_index == std::string::npos) {
615       std::ifstream infile(file.c_str());
616       return infile.good();
617     } else {
618       std::string copy = file.substr(0, space_index - 1);
619 
620       struct stat buf;
621       return stat(copy.c_str(), &buf) == 0;
622     }
623   }
624 
GetGCCRootPath()625   static std::string GetGCCRootPath() {
626     return "prebuilts/gcc/linux-x86";
627   }
628 
GetRootPath()629   static std::string GetRootPath() {
630     // 1) Check ANDROID_BUILD_TOP
631     char* build_top = getenv("ANDROID_BUILD_TOP");
632     if (build_top != nullptr) {
633       return std::string(build_top) + "/";
634     }
635 
636     // 2) Do cwd
637     char temp[1024];
638     return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
639   }
640 
FindTool(std::string tool_name)641   std::string FindTool(std::string tool_name) {
642     // Find the current tool. Wild-card pattern is "arch-string*tool-name".
643     std::string gcc_path = GetRootPath() + GetGCCRootPath();
644     std::vector<std::string> args;
645     args.push_back("find");
646     args.push_back(gcc_path);
647     args.push_back("-name");
648     args.push_back(GetArchitectureString() + "*" + tool_name);
649     args.push_back("|");
650     args.push_back("sort");
651     args.push_back("|");
652     args.push_back("tail");
653     args.push_back("-n");
654     args.push_back("1");
655     std::string tmp_file = GetTmpnam();
656     args.push_back(">");
657     args.push_back(tmp_file);
658     std::string sh_args = Join(args, ' ');
659 
660     args.clear();
661     args.push_back("/bin/sh");
662     args.push_back("-c");
663     args.push_back(sh_args);
664 
665     std::string error_msg;
666     if (!Exec(args, &error_msg)) {
667       EXPECT_TRUE(false) << error_msg;
668       return "";
669     }
670 
671     std::ifstream in(tmp_file.c_str());
672     std::string line;
673     if (!std::getline(in, line)) {
674       in.close();
675       std::remove(tmp_file.c_str());
676       return "";
677     }
678     in.close();
679     std::remove(tmp_file.c_str());
680     return line;
681   }
682 
683   // Use a consistent tmpnam, so store it.
GetTmpnam()684   std::string GetTmpnam() {
685     if (tmpnam_.length() == 0) {
686       ScratchFile tmp;
687       tmpnam_ = tmp.GetFilename() + "asm";
688     }
689     return tmpnam_;
690   }
691 
692   std::unique_ptr<Ass> assembler_;
693 
694   std::string resolved_assembler_cmd_;
695   std::string resolved_objdump_cmd_;
696   std::string resolved_disassemble_cmd_;
697 
698   std::string android_data_;
699 
700   static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
701 };
702 
703 }  // namespace art
704 
705 #endif  // ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
706