1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "berberis/runtime_primitives/memory_region_reservation.h"
18 
19 #include <array>
20 #include <atomic>
21 
22 #include "berberis/guest_state/guest_addr.h"
23 #include "berberis/guest_state/guest_state_arch.h"
24 
25 namespace berberis {
26 
27 namespace {
28 
29 template <bool flag = false>
static_bad_size()30 void static_bad_size() {
31   static_assert(flag, "Expected Reservation to be of size 8 or 16");
32 }
33 
34 template <typename ReservationType>
MemoryRegionReservationLoadTemplate(GuestAddr addr,std::memory_order mem_order)35 inline ReservationType MemoryRegionReservationLoadTemplate(GuestAddr addr,
36                                                            std::memory_order mem_order) {
37   if constexpr (sizeof(ReservationType) == 16) {
38     // Intel doesn't have atomic 128-bit load other that CMPXCHG16B, which is also
39     // a store, and doesn't work for read-only memory. We only support guests that
40     // are similar to x86 in that a 128-bit load is two atomic 64-bit loads.
41     ReservationType low =
42         std::atomic_load_explicit(ToHostAddr<std::atomic<uint64_t>>(addr), mem_order);
43     ReservationType high =
44         std::atomic_load_explicit(ToHostAddr<std::atomic<uint64_t>>(addr + 8), mem_order);
45     return (high << 64) | low;
46   } else if constexpr (sizeof(ReservationType) == 8) {
47     // Starting from i486 all accesses for all instructions are atomic when they are used for
48     // naturally-aligned variables of uint8_t, uint16_t and uint32_t types.  But situation is not so
49     // straightforward when we are dealing with uint64_t.
50     //
51     // This is what Intel manual says about atomicity of 64-bit memory operations:
52     //   The Pentium processor (and newer processors since) guarantees that the following additional
53     //   memory operations will always be carried out atomically:
54     //     * Reading or writing a quadword aligned on a 64-bit boundary
55     //
56     // AMD manual says the same thing:
57     //   Single load or store operations (from instructions that do just a single load or store) are
58     //   naturally atomic on any AMD64 processor as long as they do not cross an aligned 8-byte
59     //   boundary. Accesses up to eight bytes in size which do cross such a boundary may be
60     //   performed atomically using certain instructions with a lock prefix, such as XCHG, CMPXCHG
61     //   or CMPXCHG8B, as long as all such accesses are done using the same technique.
62     //
63     // Fortunately, the RISC-V ISA manual agrees as well - only accesses to naturally aligned memory
64     // are required to be performed atomically.
65     //
66     // Thus using regular x86 movq is good enough for emulation of RISC-V behavior.
67     //
68     // But std::atomic<uint64_t> would always use heavy "lock chmpxchg8b" operation on IA32 platform
69     // because uint64_t is not guaranteed to be naturally-aligned on IA32!
70     //
71     // Not only is this slow, but this fails when we are accessing read-only memory!
72     //
73     // Use raw "movq" assembler instruction to circumvent that limitation of IA32 ABI.
74     ReservationType reservation;
75     __asm__ __volatile__("movq (%1),%0" : "=x"(reservation) : "r"(addr));
76     return reservation;
77   } else {
78     static_bad_size();
79   }
80 }
81 
MemoryRegionReservationLoad(GuestAddr addr,std::memory_order mem_order)82 inline Reservation MemoryRegionReservationLoad(GuestAddr addr, std::memory_order mem_order) {
83   return MemoryRegionReservationLoadTemplate<Reservation>(addr, mem_order);
84 }
85 
GetEntry(GuestAddr addr)86 MemoryRegionReservation::Entry& GetEntry(GuestAddr addr) {
87   static constexpr size_t kHashSize = 4096;
88   static std::array<MemoryRegionReservation::Entry, kHashSize> g_owners;
89 
90   return g_owners[(addr / sizeof(Reservation)) % kHashSize];
91 }
92 
93 // Special owner to disallow stealing. Only used when exclusive store is in progress.
94 int g_fake_cpu;
95 constexpr void* kLockedOwner = &g_fake_cpu;
96 
97 }  // namespace
98 
SetOwner(GuestAddr aligned_addr,void * cpu)99 void MemoryRegionReservation::SetOwner(GuestAddr aligned_addr, void* cpu) {
100   auto& entry = GetEntry(aligned_addr);
101 
102   // Try stealing. Fails if another thread is doing an exclusive store or wins a race.
103   // If stealing fails, then the subsequent exclusive store fails as well.
104   auto prev = entry.load();
105   if (prev != kLockedOwner) {
106     entry.compare_exchange_strong(prev, cpu);
107   }
108 }
109 
TryLock(GuestAddr aligned_addr,void * cpu)110 MemoryRegionReservation::Entry* MemoryRegionReservation::TryLock(GuestAddr aligned_addr,
111                                                                  void* cpu) {
112   auto& entry = GetEntry(aligned_addr);
113 
114   // Try locking. Fails if Load failed to steal the address or the address was stolen afterwards.
115   if (!entry.compare_exchange_strong(cpu, kLockedOwner)) {
116     return nullptr;
117   }
118 
119   return &entry;
120 }
121 
Unlock(MemoryRegionReservation::Entry * entry)122 void MemoryRegionReservation::Unlock(MemoryRegionReservation::Entry* entry) {
123   // No need to compare and swap as the locked address cannot be stolen.
124   entry->store(nullptr);
125 }
126 
ReservationLoad(void * cpu,GuestAddr aligned_addr,std::memory_order mem_order)127 Reservation MemoryRegionReservation::ReservationLoad(void* cpu,
128                                                      GuestAddr aligned_addr,
129                                                      std::memory_order mem_order) {
130   SetOwner(aligned_addr, cpu);
131 
132   // ATTENTION!
133   // For region size <= 8, region load is atomic, so this always returns a consistent value.
134   // For region size > 8, region load is NOT atomic! The returned value might be inconsistent.
135   //
136   // If, to load a 16-byte value atomically, the guest architecture suggests to perform a 16-byte
137   // exclusive load and then an exclusive store of the loaded value. The loaded value can be used
138   // only if the exclusive store succeeds.
139   //
140   // If developers are aware of the above and do not use the result of 16-byte exclusive load
141   // without a subsequent check by an exclusive store, an inconsistent return value here is safe.
142   // Too bad if this is not the case...
143   return MemoryRegionReservationLoad(aligned_addr, mem_order);
144 }
145 
ReservationExchange(void * cpu,GuestAddr aligned_addr,Reservation expected,Reservation value,std::memory_order mem_order)146 bool MemoryRegionReservation::ReservationExchange(void* cpu,
147                                                   GuestAddr aligned_addr,
148                                                   Reservation expected,
149                                                   Reservation value,
150                                                   std::memory_order mem_order) {
151   auto* entry = TryLock(aligned_addr, cpu);
152 
153   if (!entry) {
154     return false;
155   }
156 
157   bool written = std::atomic_compare_exchange_strong_explicit(
158       ToHostAddr<std::atomic<Reservation>>(aligned_addr),
159       &expected,
160       value,
161       mem_order,
162       std::memory_order_relaxed);
163 
164   Unlock(entry);
165 
166   return written;
167 }
168 
169 }  // namespace berberis
170