1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | #ifndef LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |
19 | #define LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |
20 | |
21 | #include "llvm/ADT/DenseMap.h" |
22 | #include "llvm/ADT/DenseSet.h" |
23 | #include "llvm/ADT/PointerUnion.h" |
24 | #include "llvm/ADT/STLExtras.h" |
25 | #include "llvm/ADT/SmallVector.h" |
26 | #include "llvm/ADT/TinyPtrVector.h" |
27 | #include "llvm/ADT/iterator_range.h" |
28 | #include "llvm/Support/Endian.h" |
29 | #include "llvm/Support/EndianStream.h" |
30 | #include "llvm/Support/OnDiskHashTable.h" |
31 | #include "llvm/Support/raw_ostream.h" |
32 | #include <algorithm> |
33 | #include <cstdint> |
34 | #include <vector> |
35 | |
36 | namespace clang { |
37 | namespace serialization { |
38 | |
39 | |
40 | template<typename Info> class MultiOnDiskHashTable { |
41 | public: |
42 | |
43 | using file_type = typename Info::file_type; |
44 | |
45 | |
46 | using storage_type = const unsigned char *; |
47 | |
48 | using external_key_type = typename Info::external_key_type; |
49 | using internal_key_type = typename Info::internal_key_type; |
50 | using data_type = typename Info::data_type; |
51 | using data_type_builder = typename Info::data_type_builder; |
52 | using hash_value_type = unsigned; |
53 | |
54 | private: |
55 | |
56 | template<typename ReaderInfo, typename WriterInfo> |
57 | friend class MultiOnDiskHashTableGenerator; |
58 | |
59 | |
60 | struct OnDiskTable { |
61 | using HashTable = llvm::OnDiskIterableChainedHashTable<Info>; |
62 | |
63 | file_type File; |
64 | HashTable Table; |
65 | |
66 | OnDiskTable(file_type File, unsigned NumBuckets, unsigned NumEntries, |
67 | storage_type Buckets, storage_type Payload, storage_type Base, |
68 | const Info &InfoObj) |
69 | : File(File), |
70 | Table(NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj) {} |
71 | }; |
72 | |
73 | struct MergedTable { |
74 | std::vector<file_type> Files; |
75 | llvm::DenseMap<internal_key_type, data_type> Data; |
76 | }; |
77 | |
78 | using Table = llvm::PointerUnion<OnDiskTable *, MergedTable *>; |
79 | using TableVector = llvm::TinyPtrVector<void *>; |
80 | |
81 | |
82 | |
83 | |
84 | |
85 | |
86 | TableVector Tables; |
87 | |
88 | |
89 | |
90 | llvm::TinyPtrVector<file_type> PendingOverrides; |
91 | |
92 | struct AsOnDiskTable { |
93 | using result_type = OnDiskTable *; |
94 | |
95 | result_type operator()(void *P) const { |
96 | return Table::getFromOpaqueValue(P).template get<OnDiskTable *>(); |
97 | } |
98 | }; |
99 | |
100 | using table_iterator = |
101 | llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable>; |
102 | using table_range = llvm::iterator_range<table_iterator>; |
103 | |
104 | |
105 | table_range tables() { |
106 | auto Begin = Tables.begin(), End = Tables.end(); |
107 | if (getMergedTable()) |
108 | ++Begin; |
109 | return llvm::make_range(llvm::map_iterator(Begin, AsOnDiskTable()), |
110 | llvm::map_iterator(End, AsOnDiskTable())); |
111 | } |
112 | |
113 | MergedTable *getMergedTable() const { |
114 | |
115 | return Tables.empty() ? nullptr : Table::getFromOpaqueValue(*Tables.begin()) |
116 | .template dyn_cast<MergedTable*>(); |
117 | } |
118 | |
119 | |
120 | void clear() { |
121 | for (auto *T : tables()) |
122 | delete T; |
123 | if (auto *M = getMergedTable()) |
124 | delete M; |
125 | Tables.clear(); |
126 | } |
127 | |
128 | void removeOverriddenTables() { |
129 | llvm::DenseSet<file_type> Files; |
130 | Files.insert(PendingOverrides.begin(), PendingOverrides.end()); |
131 | |
132 | auto ShouldRemove = [&Files](void *T) -> bool { |
133 | auto *ODT = Table::getFromOpaqueValue(T).template get<OnDiskTable *>(); |
134 | bool Remove = Files.count(ODT->File); |
135 | if (Remove) |
136 | delete ODT; |
137 | return Remove; |
138 | }; |
139 | Tables.erase(std::remove_if(tables().begin().getCurrent(), Tables.end(), |
140 | ShouldRemove), |
141 | Tables.end()); |
142 | PendingOverrides.clear(); |
143 | } |
144 | |
145 | void condense() { |
146 | MergedTable *Merged = getMergedTable(); |
147 | if (!Merged) |
148 | Merged = new MergedTable; |
149 | |
150 | |
151 | |
152 | for (auto *ODT : tables()) { |
153 | auto &HT = ODT->Table; |
154 | Info &InfoObj = HT.getInfoObj(); |
155 | |
156 | for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) { |
157 | auto *LocalPtr = I.getItem(); |
158 | |
159 | |
160 | auto L = InfoObj.ReadKeyDataLength(LocalPtr); |
161 | const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); |
162 | data_type_builder ValueBuilder(Merged->Data[Key]); |
163 | InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, |
164 | ValueBuilder); |
165 | } |
166 | |
167 | Merged->Files.push_back(ODT->File); |
168 | delete ODT; |
169 | } |
170 | |
171 | Tables.clear(); |
172 | Tables.push_back(Table(Merged).getOpaqueValue()); |
173 | } |
174 | |
175 | public: |
176 | MultiOnDiskHashTable() = default; |
177 | |
178 | MultiOnDiskHashTable(MultiOnDiskHashTable &&O) |
179 | : Tables(std::move(O.Tables)), |
180 | PendingOverrides(std::move(O.PendingOverrides)) { |
181 | O.Tables.clear(); |
182 | } |
183 | |
184 | MultiOnDiskHashTable &operator=(MultiOnDiskHashTable &&O) { |
185 | if (&O == this) |
186 | return *this; |
187 | clear(); |
188 | Tables = std::move(O.Tables); |
189 | O.Tables.clear(); |
190 | PendingOverrides = std::move(O.PendingOverrides); |
191 | return *this; |
192 | } |
193 | |
194 | ~MultiOnDiskHashTable() { clear(); } |
195 | |
196 | |
197 | void add(file_type File, storage_type Data, Info InfoObj = Info()) { |
198 | using namespace llvm::support; |
199 | |
200 | storage_type Ptr = Data; |
201 | |
202 | uint32_t BucketOffset = endian::readNext<uint32_t, little, unaligned>(Ptr); |
203 | |
204 | |
205 | uint32_t NumFiles = endian::readNext<uint32_t, little, unaligned>(Ptr); |
206 | |
207 | |
208 | llvm::SmallVector<file_type, 16> OverriddenFiles; |
209 | OverriddenFiles.reserve(NumFiles); |
210 | for (; NumFiles != 0; --NumFiles) |
211 | OverriddenFiles.push_back(InfoObj.ReadFileRef(Ptr)); |
212 | PendingOverrides.insert(PendingOverrides.end(), OverriddenFiles.begin(), |
213 | OverriddenFiles.end()); |
214 | |
215 | |
216 | storage_type Buckets = Data + BucketOffset; |
217 | auto NumBucketsAndEntries = |
218 | OnDiskTable::HashTable::readNumBucketsAndEntries(Buckets); |
219 | |
220 | |
221 | Table NewTable = new OnDiskTable(File, NumBucketsAndEntries.first, |
222 | NumBucketsAndEntries.second, |
223 | Buckets, Ptr, Data, std::move(InfoObj)); |
224 | Tables.push_back(NewTable.getOpaqueValue()); |
225 | } |
226 | |
227 | |
228 | data_type find(const external_key_type &EKey) { |
229 | data_type Result; |
230 | |
231 | if (!PendingOverrides.empty()) |
232 | removeOverriddenTables(); |
233 | |
234 | if (Tables.size() > static_cast<unsigned>(Info::MaxTables)) |
235 | condense(); |
236 | |
237 | internal_key_type Key = Info::GetInternalKey(EKey); |
238 | auto KeyHash = Info::ComputeHash(Key); |
239 | |
240 | if (MergedTable *M = getMergedTable()) { |
241 | auto It = M->Data.find(Key); |
242 | if (It != M->Data.end()) |
243 | Result = It->second; |
244 | } |
245 | |
246 | data_type_builder ResultBuilder(Result); |
247 | |
248 | for (auto *ODT : tables()) { |
249 | auto &HT = ODT->Table; |
250 | auto It = HT.find_hashed(Key, KeyHash); |
251 | if (It != HT.end()) |
252 | HT.getInfoObj().ReadDataInto(Key, It.getDataPtr(), It.getDataLen(), |
253 | ResultBuilder); |
254 | } |
255 | |
256 | return Result; |
257 | } |
258 | |
259 | |
260 | |
261 | data_type findAll() { |
262 | data_type Result; |
263 | data_type_builder ResultBuilder(Result); |
264 | |
265 | if (!PendingOverrides.empty()) |
266 | removeOverriddenTables(); |
267 | |
268 | if (MergedTable *M = getMergedTable()) { |
269 | for (auto &KV : M->Data) |
270 | Info::MergeDataInto(KV.second, ResultBuilder); |
271 | } |
272 | |
273 | for (auto *ODT : tables()) { |
274 | auto &HT = ODT->Table; |
275 | Info &InfoObj = HT.getInfoObj(); |
276 | for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) { |
277 | auto *LocalPtr = I.getItem(); |
278 | |
279 | |
280 | auto L = InfoObj.ReadKeyDataLength(LocalPtr); |
281 | const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); |
282 | InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, ResultBuilder); |
283 | } |
284 | } |
285 | |
286 | return Result; |
287 | } |
288 | }; |
289 | |
290 | |
291 | template<typename ReaderInfo, typename WriterInfo> |
292 | class MultiOnDiskHashTableGenerator { |
293 | using BaseTable = MultiOnDiskHashTable<ReaderInfo>; |
294 | using Generator = llvm::OnDiskChainedHashTableGenerator<WriterInfo>; |
295 | |
296 | Generator Gen; |
297 | |
298 | public: |
299 | MultiOnDiskHashTableGenerator() : Gen() {} |
300 | |
301 | void insert(typename WriterInfo::key_type_ref Key, |
302 | typename WriterInfo::data_type_ref Data, WriterInfo &Info) { |
303 | Gen.insert(Key, Data, Info); |
304 | } |
305 | |
306 | void emit(llvm::SmallVectorImpl<char> &Out, WriterInfo &Info, |
307 | const BaseTable *Base) { |
308 | using namespace llvm::support; |
309 | |
310 | llvm::raw_svector_ostream OutStream(Out); |
311 | |
312 | |
313 | { |
314 | endian::Writer Writer(OutStream, little); |
315 | |
316 | |
317 | Writer.write<uint32_t>(0); |
318 | |
319 | if (auto *Merged = Base ? Base->getMergedTable() : nullptr) { |
320 | |
321 | Writer.write<uint32_t>(Merged->Files.size()); |
322 | for (const auto &F : Merged->Files) |
323 | Info.EmitFileRef(OutStream, F); |
324 | |
325 | |
326 | for (auto &KV : Merged->Data) { |
327 | if (!Gen.contains(KV.first, Info)) |
328 | Gen.insert(KV.first, Info.ImportData(KV.second), Info); |
329 | } |
330 | } else { |
331 | Writer.write<uint32_t>(0); |
332 | } |
333 | } |
334 | |
335 | |
336 | uint32_t BucketOffset = Gen.Emit(OutStream, Info); |
337 | |
338 | |
339 | endian::write32le(Out.data(), BucketOffset); |
340 | } |
341 | }; |
342 | |
343 | } |
344 | } |
345 | |
346 | #endif |
347 | |