1 | //===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements a sort operation for JavaScript ES6 imports. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "SortJavaScriptImports.h" |
15 | #include "TokenAnalyzer.h" |
16 | #include "TokenAnnotator.h" |
17 | #include "clang/Basic/Diagnostic.h" |
18 | #include "clang/Basic/DiagnosticOptions.h" |
19 | #include "clang/Basic/LLVM.h" |
20 | #include "clang/Basic/SourceLocation.h" |
21 | #include "clang/Basic/SourceManager.h" |
22 | #include "clang/Format/Format.h" |
23 | #include "llvm/ADT/STLExtras.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/Support/Debug.h" |
26 | #include <algorithm> |
27 | #include <string> |
28 | |
29 | #define DEBUG_TYPE "format-formatter" |
30 | |
31 | namespace clang { |
32 | namespace format { |
33 | |
34 | class FormatTokenLexer; |
35 | |
36 | using clang::format::FormatStyle; |
37 | |
38 | // An imported symbol in a JavaScript ES6 import/export, possibly aliased. |
39 | struct JsImportedSymbol { |
40 | StringRef Symbol; |
41 | StringRef Alias; |
42 | SourceRange Range; |
43 | |
44 | bool operator==(const JsImportedSymbol &RHS) const { |
45 | // Ignore Range for comparison, it is only used to stitch code together, |
46 | // but imports at different code locations are still conceptually the same. |
47 | return Symbol == RHS.Symbol && Alias == RHS.Alias; |
48 | } |
49 | }; |
50 | |
51 | // An ES6 module reference. |
52 | // |
53 | // ES6 implements a module system, where individual modules (~= source files) |
54 | // can reference other modules, either importing symbols from them, or exporting |
55 | // symbols from them: |
56 | // import {foo} from 'foo'; |
57 | // export {foo}; |
58 | // export {bar} from 'bar'; |
59 | // |
60 | // `export`s with URLs are syntactic sugar for an import of the symbol from the |
61 | // URL, followed by an export of the symbol, allowing this code to treat both |
62 | // statements more or less identically, with the exception being that `export`s |
63 | // are sorted last. |
64 | // |
65 | // imports and exports support individual symbols, but also a wildcard syntax: |
66 | // import * as prefix from 'foo'; |
67 | // export * from 'bar'; |
68 | // |
69 | // This struct represents both exports and imports to build up the information |
70 | // required for sorting module references. |
71 | struct JsModuleReference { |
72 | bool IsExport = false; |
73 | // Module references are sorted into these categories, in order. |
74 | enum ReferenceCategory { |
75 | SIDE_EFFECT, // "import 'something';" |
76 | ABSOLUTE, // from 'something' |
77 | RELATIVE_PARENT, // from '../*' |
78 | RELATIVE, // from './*' |
79 | }; |
80 | ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT; |
81 | // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`. |
82 | StringRef URL; |
83 | // Prefix from "import * as prefix". Empty for symbol imports and `export *`. |
84 | // Implies an empty names list. |
85 | StringRef Prefix; |
86 | // Symbols from `import {SymbolA, SymbolB, ...} from ...;`. |
87 | SmallVector<JsImportedSymbol, 1> Symbols; |
88 | // Textual position of the import/export, including preceding and trailing |
89 | // comments. |
90 | SourceRange Range; |
91 | }; |
92 | |
93 | bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) { |
94 | if (LHS.IsExport != RHS.IsExport) |
95 | return LHS.IsExport < RHS.IsExport; |
96 | if (LHS.Category != RHS.Category) |
97 | return LHS.Category < RHS.Category; |
98 | if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT) |
99 | // Side effect imports might be ordering sensitive. Consider them equal so |
100 | // that they maintain their relative order in the stable sort below. |
101 | // This retains transitivity because LHS.Category == RHS.Category here. |
102 | return false; |
103 | // Empty URLs sort *last* (for export {...};). |
104 | if (LHS.URL.empty() != RHS.URL.empty()) |
105 | return LHS.URL.empty() < RHS.URL.empty(); |
106 | if (int Res = LHS.URL.compare_lower(RHS.URL)) |
107 | return Res < 0; |
108 | // '*' imports (with prefix) sort before {a, b, ...} imports. |
109 | if (LHS.Prefix.empty() != RHS.Prefix.empty()) |
110 | return LHS.Prefix.empty() < RHS.Prefix.empty(); |
111 | if (LHS.Prefix != RHS.Prefix) |
112 | return LHS.Prefix > RHS.Prefix; |
113 | return false; |
114 | } |
115 | |
116 | // JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is |
117 | // implemented as a TokenAnalyzer because ES6 imports have substantial syntactic |
118 | // structure, making it messy to sort them using regular expressions. |
119 | class JavaScriptImportSorter : public TokenAnalyzer { |
120 | public: |
121 | JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style) |
122 | : TokenAnalyzer(Env, Style), |
123 | FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} |
124 | |
125 | std::pair<tooling::Replacements, unsigned> |
126 | analyze(TokenAnnotator &Annotator, |
127 | SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, |
128 | FormatTokenLexer &Tokens) override { |
129 | tooling::Replacements Result; |
130 | AffectedRangeMgr.computeAffectedLines(AnnotatedLines); |
131 | |
132 | const AdditionalKeywords &Keywords = Tokens.getKeywords(); |
133 | SmallVector<JsModuleReference, 16> References; |
134 | AnnotatedLine *FirstNonImportLine; |
135 | std::tie(References, FirstNonImportLine) = |
136 | parseModuleReferences(Keywords, AnnotatedLines); |
137 | |
138 | if (References.empty()) |
139 | return {Result, 0}; |
140 | |
141 | SmallVector<unsigned, 16> Indices; |
142 | for (unsigned i = 0, e = References.size(); i != e; ++i) |
143 | Indices.push_back(i); |
144 | std::stable_sort(Indices.begin(), Indices.end(), |
145 | [&](unsigned LHSI, unsigned RHSI) { |
146 | return References[LHSI] < References[RHSI]; |
147 | }); |
148 | bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end()); |
149 | |
150 | std::string ReferencesText; |
151 | bool SymbolsInOrder = true; |
152 | for (unsigned i = 0, e = Indices.size(); i != e; ++i) { |
153 | JsModuleReference Reference = References[Indices[i]]; |
154 | if (appendReference(ReferencesText, Reference)) |
155 | SymbolsInOrder = false; |
156 | if (i + 1 < e) { |
157 | // Insert breaks between imports and exports. |
158 | ReferencesText += "\n"; |
159 | // Separate imports groups with two line breaks, but keep all exports |
160 | // in a single group. |
161 | if (!Reference.IsExport && |
162 | (Reference.IsExport != References[Indices[i + 1]].IsExport || |
163 | Reference.Category != References[Indices[i + 1]].Category)) |
164 | ReferencesText += "\n"; |
165 | } |
166 | } |
167 | |
168 | if (ReferencesInOrder && SymbolsInOrder) |
169 | return {Result, 0}; |
170 | |
171 | SourceRange InsertionPoint = References[0].Range; |
172 | InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); |
173 | |
174 | // The loop above might collapse previously existing line breaks between |
175 | // import blocks, and thus shrink the file. SortIncludes must not shrink |
176 | // overall source length as there is currently no re-calculation of ranges |
177 | // after applying source sorting. |
178 | // This loop just backfills trailing spaces after the imports, which are |
179 | // harmless and will be stripped by the subsequent formatting pass. |
180 | // FIXME: A better long term fix is to re-calculate Ranges after sorting. |
181 | unsigned PreviousSize = getSourceText(InsertionPoint).size(); |
182 | while (ReferencesText.size() < PreviousSize) { |
183 | ReferencesText += " "; |
184 | } |
185 | |
186 | // Separate references from the main code body of the file. |
187 | if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2) |
188 | ReferencesText += "\n"; |
189 | |
190 | LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n" |
191 | << getSourceText(InsertionPoint) << "\nwith:\n" |
192 | << ReferencesText << "\n"); |
193 | auto Err = Result.add(tooling::Replacement( |
194 | Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint), |
195 | ReferencesText)); |
196 | // FIXME: better error handling. For now, just print error message and skip |
197 | // the replacement for the release version. |
198 | if (Err) { |
199 | llvm::errs() << llvm::toString(std::move(Err)) << "\n"; |
200 | assert(false); |
201 | } |
202 | |
203 | return {Result, 0}; |
204 | } |
205 | |
206 | private: |
207 | FormatToken *Current; |
208 | FormatToken *LineEnd; |
209 | |
210 | FormatToken invalidToken; |
211 | |
212 | StringRef FileContents; |
213 | |
214 | void skipComments() { Current = skipComments(Current); } |
215 | |
216 | FormatToken *skipComments(FormatToken *Tok) { |
217 | while (Tok && Tok->is(tok::comment)) |
218 | Tok = Tok->Next; |
219 | return Tok; |
220 | } |
221 | |
222 | void nextToken() { |
223 | Current = Current->Next; |
224 | skipComments(); |
225 | if (!Current || Current == LineEnd->Next) { |
226 | // Set the current token to an invalid token, so that further parsing on |
227 | // this line fails. |
228 | invalidToken.Tok.setKind(tok::unknown); |
229 | Current = &invalidToken; |
230 | } |
231 | } |
232 | |
233 | StringRef getSourceText(SourceRange Range) { |
234 | return getSourceText(Range.getBegin(), Range.getEnd()); |
235 | } |
236 | |
237 | StringRef getSourceText(SourceLocation Begin, SourceLocation End) { |
238 | const SourceManager &SM = Env.getSourceManager(); |
239 | return FileContents.substr(SM.getFileOffset(Begin), |
240 | SM.getFileOffset(End) - SM.getFileOffset(Begin)); |
241 | } |
242 | |
243 | // Appends ``Reference`` to ``Buffer``, returning true if text within the |
244 | // ``Reference`` changed (e.g. symbol order). |
245 | bool appendReference(std::string &Buffer, JsModuleReference &Reference) { |
246 | // Sort the individual symbols within the import. |
247 | // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';` |
248 | SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols; |
249 | std::stable_sort( |
250 | Symbols.begin(), Symbols.end(), |
251 | [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) { |
252 | return LHS.Symbol.compare_lower(RHS.Symbol) < 0; |
253 | }); |
254 | if (Symbols == Reference.Symbols) { |
255 | // No change in symbol order. |
256 | StringRef ReferenceStmt = getSourceText(Reference.Range); |
257 | Buffer += ReferenceStmt; |
258 | return false; |
259 | } |
260 | // Stitch together the module reference start... |
261 | SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin(); |
262 | SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd(); |
263 | Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart); |
264 | // ... then the references in order ... |
265 | for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) { |
266 | if (I != Symbols.begin()) |
267 | Buffer += ","; |
268 | Buffer += getSourceText(I->Range); |
269 | } |
270 | // ... followed by the module reference end. |
271 | Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd()); |
272 | return true; |
273 | } |
274 | |
275 | // Parses module references in the given lines. Returns the module references, |
276 | // and a pointer to the first "main code" line if that is adjacent to the |
277 | // affected lines of module references, nullptr otherwise. |
278 | std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *> |
279 | parseModuleReferences(const AdditionalKeywords &Keywords, |
280 | SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { |
281 | SmallVector<JsModuleReference, 16> References; |
282 | SourceLocation Start; |
283 | AnnotatedLine *FirstNonImportLine = nullptr; |
284 | bool AnyImportAffected = false; |
285 | for (auto Line : AnnotatedLines) { |
286 | Current = Line->First; |
287 | LineEnd = Line->Last; |
288 | skipComments(); |
289 | if (Start.isInvalid() || References.empty()) |
290 | // After the first file level comment, consider line comments to be part |
291 | // of the import that immediately follows them by using the previously |
292 | // set Start. |
293 | Start = Line->First->Tok.getLocation(); |
294 | if (!Current) { |
295 | // Only comments on this line. Could be the first non-import line. |
296 | FirstNonImportLine = Line; |
297 | continue; |
298 | } |
299 | JsModuleReference Reference; |
300 | Reference.Range.setBegin(Start); |
301 | if (!parseModuleReference(Keywords, Reference)) { |
302 | if (!FirstNonImportLine) |
303 | FirstNonImportLine = Line; // if no comment before. |
304 | break; |
305 | } |
306 | FirstNonImportLine = nullptr; |
307 | AnyImportAffected = AnyImportAffected || Line->Affected; |
308 | Reference.Range.setEnd(LineEnd->Tok.getEndLoc()); |
309 | LLVM_DEBUG({ |
310 | llvm::dbgs() << "JsModuleReference: {" |
311 | << "is_export: " << Reference.IsExport |
312 | << ", cat: " << Reference.Category |
313 | << ", url: " << Reference.URL |
314 | << ", prefix: " << Reference.Prefix; |
315 | for (size_t i = 0; i < Reference.Symbols.size(); ++i) |
316 | llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as " |
317 | << Reference.Symbols[i].Alias; |
318 | llvm::dbgs() << ", text: " << getSourceText(Reference.Range); |
319 | llvm::dbgs() << "}\n"; |
320 | }); |
321 | References.push_back(Reference); |
322 | Start = SourceLocation(); |
323 | } |
324 | // Sort imports if any import line was affected. |
325 | if (!AnyImportAffected) |
326 | References.clear(); |
327 | return std::make_pair(References, FirstNonImportLine); |
328 | } |
329 | |
330 | // Parses a JavaScript/ECMAScript 6 module reference. |
331 | // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules |
332 | // for grammar EBNF (production ModuleItem). |
333 | bool parseModuleReference(const AdditionalKeywords &Keywords, |
334 | JsModuleReference &Reference) { |
335 | if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export)) |
336 | return false; |
337 | Reference.IsExport = Current->is(tok::kw_export); |
338 | |
339 | nextToken(); |
340 | if (Current->isStringLiteral() && !Reference.IsExport) { |
341 | // "import 'side-effect';" |
342 | Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT; |
343 | Reference.URL = |
344 | Current->TokenText.substr(1, Current->TokenText.size() - 2); |
345 | return true; |
346 | } |
347 | |
348 | if (!parseModuleBindings(Keywords, Reference)) |
349 | return false; |
350 | |
351 | if (Current->is(Keywords.kw_from)) { |
352 | // imports have a 'from' clause, exports might not. |
353 | nextToken(); |
354 | if (!Current->isStringLiteral()) |
355 | return false; |
356 | // URL = TokenText without the quotes. |
357 | Reference.URL = |
358 | Current->TokenText.substr(1, Current->TokenText.size() - 2); |
359 | if (Reference.URL.startswith("..")) |
360 | Reference.Category = |
361 | JsModuleReference::ReferenceCategory::RELATIVE_PARENT; |
362 | else if (Reference.URL.startswith(".")) |
363 | Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; |
364 | else |
365 | Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE; |
366 | } else { |
367 | // w/o URL groups with "empty". |
368 | Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; |
369 | } |
370 | return true; |
371 | } |
372 | |
373 | bool parseModuleBindings(const AdditionalKeywords &Keywords, |
374 | JsModuleReference &Reference) { |
375 | if (parseStarBinding(Keywords, Reference)) |
376 | return true; |
377 | return parseNamedBindings(Keywords, Reference); |
378 | } |
379 | |
380 | bool parseStarBinding(const AdditionalKeywords &Keywords, |
381 | JsModuleReference &Reference) { |
382 | // * as prefix from '...'; |
383 | if (Current->isNot(tok::star)) |
384 | return false; |
385 | nextToken(); |
386 | if (Current->isNot(Keywords.kw_as)) |
387 | return false; |
388 | nextToken(); |
389 | if (Current->isNot(tok::identifier)) |
390 | return false; |
391 | Reference.Prefix = Current->TokenText; |
392 | nextToken(); |
393 | return true; |
394 | } |
395 | |
396 | bool parseNamedBindings(const AdditionalKeywords &Keywords, |
397 | JsModuleReference &Reference) { |
398 | if (Current->is(tok::identifier)) { |
399 | nextToken(); |
400 | if (Current->is(Keywords.kw_from)) |
401 | return true; |
402 | if (Current->isNot(tok::comma)) |
403 | return false; |
404 | nextToken(); // eat comma. |
405 | } |
406 | if (Current->isNot(tok::l_brace)) |
407 | return false; |
408 | |
409 | // {sym as alias, sym2 as ...} from '...'; |
410 | while (Current->isNot(tok::r_brace)) { |
411 | nextToken(); |
412 | if (Current->is(tok::r_brace)) |
413 | break; |
414 | if (!Current->isOneOf(tok::identifier, tok::kw_default)) |
415 | return false; |
416 | |
417 | JsImportedSymbol Symbol; |
418 | Symbol.Symbol = Current->TokenText; |
419 | // Make sure to include any preceding comments. |
420 | Symbol.Range.setBegin( |
421 | Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin()); |
422 | nextToken(); |
423 | |
424 | if (Current->is(Keywords.kw_as)) { |
425 | nextToken(); |
426 | if (!Current->isOneOf(tok::identifier, tok::kw_default)) |
427 | return false; |
428 | Symbol.Alias = Current->TokenText; |
429 | nextToken(); |
430 | } |
431 | Symbol.Range.setEnd(Current->Tok.getLocation()); |
432 | Reference.Symbols.push_back(Symbol); |
433 | |
434 | if (!Current->isOneOf(tok::r_brace, tok::comma)) |
435 | return false; |
436 | } |
437 | nextToken(); // consume r_brace |
438 | return true; |
439 | } |
440 | }; |
441 | |
442 | tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, |
443 | StringRef Code, |
444 | ArrayRef<tooling::Range> Ranges, |
445 | StringRef FileName) { |
446 | // FIXME: Cursor support. |
447 | return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style) |
448 | .process() |
449 | .first; |
450 | } |
451 | |
452 | } // end namespace format |
453 | } // end namespace clang |
454 | |