| 1 | /* |
|---|---|
| 2 | * Copyright (C) 2007-2010 JĂșlio Vilmar Gesser. |
| 3 | * Copyright (C) 2011, 2013-2020 The JavaParser Team. |
| 4 | * |
| 5 | * This file is part of JavaParser. |
| 6 | * |
| 7 | * JavaParser can be used either under the terms of |
| 8 | * a) the GNU Lesser General Public License as published by |
| 9 | * the Free Software Foundation, either version 3 of the License, or |
| 10 | * (at your option) any later version. |
| 11 | * b) the terms of the Apache License |
| 12 | * |
| 13 | * You should have received a copy of both licenses in LICENCE.LGPL and |
| 14 | * LICENCE.APACHE. Please refer to those files for details. |
| 15 | * |
| 16 | * JavaParser is distributed in the hope that it will be useful, |
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | * GNU Lesser General Public License for more details. |
| 20 | */ |
| 21 | package com.github.javaparser; |
| 22 | |
| 23 | import java.io.IOException; |
| 24 | import java.util.ArrayList; |
| 25 | import java.util.Collections; |
| 26 | import java.util.List; |
| 27 | |
| 28 | /** |
| 29 | * {@link Provider} un-escaping unicode escape sequences in the input sequence. |
| 30 | */ |
| 31 | public class UnicodeEscapeProcessingProvider implements Provider { |
| 32 | |
| 33 | private static final char LF = '\n'; |
| 34 | |
| 35 | private static final char CR = '\r'; |
| 36 | |
| 37 | private static final char BACKSLASH = '\\'; |
| 38 | |
| 39 | private static final int EOF = -1; |
| 40 | |
| 41 | private char[] _data; |
| 42 | |
| 43 | /** |
| 44 | * The number of characters in {@link #_data}. |
| 45 | */ |
| 46 | private int _len = 0; |
| 47 | |
| 48 | /** |
| 49 | * The position in {@link #_data} where to read the next source character from. |
| 50 | */ |
| 51 | private int _pos = 0; |
| 52 | |
| 53 | private boolean _backslashSeen; |
| 54 | |
| 55 | private final LineCounter _inputLine = new LineCounter(); |
| 56 | |
| 57 | private final LineCounter _outputLine = new LineCounter(); |
| 58 | |
| 59 | private final PositionMappingBuilder _mappingBuilder = new PositionMappingBuilder(_outputLine, _inputLine); |
| 60 | |
| 61 | private Provider _input; |
| 62 | |
| 63 | /** |
| 64 | * Creates a {@link UnicodeEscapeProcessingProvider}. |
| 65 | */ |
| 66 | public UnicodeEscapeProcessingProvider(Provider input) { |
| 67 | this(2048, input); |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * Creates a {@link UnicodeEscapeProcessingProvider}. |
| 72 | */ |
| 73 | public UnicodeEscapeProcessingProvider(int bufferSize, Provider input) { |
| 74 | _input = input; |
| 75 | _data = new char[bufferSize]; |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * The {@link LineCounter} of the input file. |
| 80 | */ |
| 81 | public LineCounter getInputCounter() { |
| 82 | return _inputLine; |
| 83 | } |
| 84 | |
| 85 | /** |
| 86 | * The {@link LineCounter} of the output file. |
| 87 | */ |
| 88 | public LineCounter getOutputCounter() { |
| 89 | return _outputLine; |
| 90 | } |
| 91 | |
| 92 | @Override |
| 93 | public int read(char[] buffer, final int offset, int len) throws IOException { |
| 94 | int pos = offset; |
| 95 | int stop = offset + len; |
| 96 | while (pos < stop) { |
| 97 | int ch = _outputLine.process(nextOutputChar()); |
| 98 | if (ch < 0) { |
| 99 | if (pos == offset) { |
| 100 | // Nothing read yet, this is the end of the stream. |
| 101 | return EOF; |
| 102 | } else { |
| 103 | break; |
| 104 | } |
| 105 | } else { |
| 106 | _mappingBuilder.update(); |
| 107 | buffer[pos++] = (char) ch; |
| 108 | } |
| 109 | } |
| 110 | return pos - offset; |
| 111 | } |
| 112 | |
| 113 | @Override |
| 114 | public void close() throws IOException { |
| 115 | _input.close(); |
| 116 | } |
| 117 | |
| 118 | /** |
| 119 | * Produces the next un-escaped character to be written to the output. |
| 120 | * |
| 121 | * @return The next character or {@code -1} if no more characters are available. |
| 122 | */ |
| 123 | private int nextOutputChar() throws IOException { |
| 124 | int next = nextInputChar(); |
| 125 | switch (next) { |
| 126 | case EOF: |
| 127 | return EOF; |
| 128 | case BACKSLASH: { |
| 129 | if (_backslashSeen) { |
| 130 | return clearBackSlashSeen(next); |
| 131 | } else { |
| 132 | return backSlashSeen(); |
| 133 | } |
| 134 | } |
| 135 | default: { |
| 136 | // An arbitrary character. |
| 137 | return clearBackSlashSeen(next); |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | private int clearBackSlashSeen(int next) { |
| 143 | _backslashSeen = false; |
| 144 | return next; |
| 145 | } |
| 146 | |
| 147 | private int backSlashSeen() throws IOException { |
| 148 | _backslashSeen = true; |
| 149 | |
| 150 | int next = nextInputChar(); |
| 151 | switch (next) { |
| 152 | case EOF: |
| 153 | // End of file after backslash produces the backslash itself. |
| 154 | return BACKSLASH; |
| 155 | case 'u': { |
| 156 | return unicodeStartSeen(); |
| 157 | } |
| 158 | default: { |
| 159 | pushBack(next); |
| 160 | return BACKSLASH; |
| 161 | } |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | private int unicodeStartSeen() throws IOException { |
| 166 | int uCnt = 1; |
| 167 | while (true) { |
| 168 | int next = nextInputChar(); |
| 169 | switch (next) { |
| 170 | case EOF: { |
| 171 | pushBackUs(uCnt); |
| 172 | return BACKSLASH; |
| 173 | } |
| 174 | case 'u': { |
| 175 | uCnt++; |
| 176 | continue; |
| 177 | } |
| 178 | default: { |
| 179 | return readDigits(uCnt, next); |
| 180 | } |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | private int readDigits(int uCnt, int next3) throws IOException { |
| 186 | int digit3 = digit(next3); |
| 187 | if (digit3 < 0) { |
| 188 | pushBack(next3); |
| 189 | pushBackUs(uCnt); |
| 190 | return BACKSLASH; |
| 191 | } |
| 192 | |
| 193 | int next2 = nextInputChar(); |
| 194 | int digit2 = digit(next2); |
| 195 | if (digit2 < 0) { |
| 196 | pushBack(next2); |
| 197 | pushBack(next3); |
| 198 | pushBackUs(uCnt); |
| 199 | return BACKSLASH; |
| 200 | } |
| 201 | |
| 202 | int next1 = nextInputChar(); |
| 203 | int digit1 = digit(next1); |
| 204 | if (digit1 < 0) { |
| 205 | pushBack(next1); |
| 206 | pushBack(next2); |
| 207 | pushBack(next3); |
| 208 | pushBackUs(uCnt); |
| 209 | return BACKSLASH; |
| 210 | } |
| 211 | |
| 212 | int next0 = nextInputChar(); |
| 213 | int digit0 = digit(next0); |
| 214 | if (digit0 < 0) { |
| 215 | pushBack(next0); |
| 216 | pushBack(next1); |
| 217 | pushBack(next2); |
| 218 | pushBack(next3); |
| 219 | pushBackUs(uCnt); |
| 220 | return BACKSLASH; |
| 221 | } |
| 222 | |
| 223 | int ch = digit3 << 12 | digit2 << 8 | digit1 << 4 | digit0; |
| 224 | return clearBackSlashSeen(ch); |
| 225 | } |
| 226 | |
| 227 | private void pushBackUs(int cnt) { |
| 228 | for (int n = 0; n < cnt; n++) { |
| 229 | pushBack('u'); |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | private static int digit(int ch) { |
| 234 | if (ch >= '0' && ch <= '9') { |
| 235 | return ch - '0'; |
| 236 | } |
| 237 | if (ch >= 'A' && ch <= 'F') { |
| 238 | return 10 + ch - 'A'; |
| 239 | } |
| 240 | if (ch >= 'a' && ch <= 'f') { |
| 241 | return 10 + ch - 'a'; |
| 242 | } |
| 243 | return -1; |
| 244 | } |
| 245 | |
| 246 | /** |
| 247 | * Processes column/line information from the input file. |
| 248 | * |
| 249 | * @return The next character or {@code -1} if no more input is available. |
| 250 | */ |
| 251 | private int nextInputChar() throws IOException { |
| 252 | int result = nextBufferedChar(); |
| 253 | return _inputLine.process(result); |
| 254 | } |
| 255 | |
| 256 | /** |
| 257 | * Retrieves the next un-escaped character from the buffered {@link #_input}. |
| 258 | * |
| 259 | * @return The next character or {@code -1} if no more input is available. |
| 260 | */ |
| 261 | private int nextBufferedChar() throws IOException { |
| 262 | while (isBufferEmpty()) { |
| 263 | int direct = fillBuffer(); |
| 264 | if (direct < 0) { |
| 265 | return EOF; |
| 266 | } |
| 267 | } |
| 268 | return _data[_pos++]; |
| 269 | } |
| 270 | |
| 271 | private boolean isBufferEmpty() { |
| 272 | return _pos >= _len; |
| 273 | } |
| 274 | |
| 275 | private int fillBuffer() throws IOException { |
| 276 | _pos = 0; |
| 277 | int direct = _input.read(_data, 0, _data.length); |
| 278 | if (direct != 0) { |
| 279 | _len = direct; |
| 280 | } |
| 281 | return direct; |
| 282 | } |
| 283 | |
| 284 | private void pushBack(int ch) { |
| 285 | if (ch < 0) { |
| 286 | return; |
| 287 | } |
| 288 | |
| 289 | if (isBufferEmpty()) { |
| 290 | _pos = _data.length; |
| 291 | _len = _data.length; |
| 292 | } else if (_pos == 0) { |
| 293 | if (_len == _data.length) { |
| 294 | // Buffer is completely full, no push possible, enlarge buffer. |
| 295 | char[] newData = new char[_data.length + 1024]; |
| 296 | _len = newData.length; |
| 297 | _pos = newData.length - _data.length; |
| 298 | System.arraycopy(_data, 0, newData, _pos, _data.length); |
| 299 | _data = newData; |
| 300 | } else { |
| 301 | // Move contents to the right. |
| 302 | int cnt = _len - _pos; |
| 303 | _pos = _data.length - _len; |
| 304 | _len = _data.length; |
| 305 | System.arraycopy(_data, 0, _data, _pos, cnt); |
| 306 | } |
| 307 | } |
| 308 | _data[--_pos] = (char) ch; |
| 309 | } |
| 310 | |
| 311 | /** |
| 312 | * The {@link PositionMapping} being built during processing the file. |
| 313 | */ |
| 314 | public PositionMapping getPositionMapping() { |
| 315 | return _mappingBuilder.getMapping(); |
| 316 | } |
| 317 | |
| 318 | /** |
| 319 | * An algorithm mapping {@link Position} form two corresponding files. |
| 320 | */ |
| 321 | public static final class PositionMapping { |
| 322 | |
| 323 | private final List<DeltaInfo> _deltas = new ArrayList<>(); |
| 324 | |
| 325 | /** |
| 326 | * Creates a {@link UnicodeEscapeProcessingProvider.PositionMapping}. |
| 327 | */ |
| 328 | public PositionMapping() { |
| 329 | super(); |
| 330 | } |
| 331 | |
| 332 | /** |
| 333 | * Whether this is the identity transformation. |
| 334 | */ |
| 335 | public boolean isEmpty() { |
| 336 | return _deltas.isEmpty(); |
| 337 | } |
| 338 | |
| 339 | void add(int line, int column, int lineDelta, int columnDelta) { |
| 340 | _deltas.add(new DeltaInfo(line, column, lineDelta, columnDelta)); |
| 341 | } |
| 342 | |
| 343 | /** |
| 344 | * Looks up the {@link PositionUpdate} for the given Position. |
| 345 | */ |
| 346 | public PositionUpdate lookup(Position position) { |
| 347 | int result = Collections.binarySearch(_deltas, position); |
| 348 | if (result >= 0) { |
| 349 | return _deltas.get(result); |
| 350 | } else { |
| 351 | int insertIndex = -result - 1; |
| 352 | if (insertIndex == 0) { |
| 353 | // Before the first delta info, identity mapping. |
| 354 | return PositionUpdate.NONE; |
| 355 | } else { |
| 356 | // The relevant update is the one with the position smaller |
| 357 | // than the requested position. |
| 358 | return _deltas.get(insertIndex - 1); |
| 359 | } |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | /** |
| 364 | * Algorithm updating a {@link Position} from one file to a |
| 365 | * {@link Position} in a corresponding file. |
| 366 | */ |
| 367 | public static interface PositionUpdate { |
| 368 | |
| 369 | /** |
| 370 | * The identity position mapping. |
| 371 | */ |
| 372 | PositionUpdate NONE = new PositionUpdate() { |
| 373 | @Override |
| 374 | public int transformLine(int line) { |
| 375 | return line; |
| 376 | } |
| 377 | |
| 378 | @Override |
| 379 | public int transformColumn(int column) { |
| 380 | return column; |
| 381 | } |
| 382 | |
| 383 | @Override |
| 384 | public Position transform(Position pos) { |
| 385 | return pos; |
| 386 | } |
| 387 | }; |
| 388 | |
| 389 | /** |
| 390 | * Maps the given line to an original line. |
| 391 | */ |
| 392 | int transformLine(int line); |
| 393 | |
| 394 | /** |
| 395 | * Maps the given column to an original column. |
| 396 | */ |
| 397 | int transformColumn(int column); |
| 398 | |
| 399 | /** |
| 400 | * The transformed position. |
| 401 | */ |
| 402 | default Position transform(Position pos) { |
| 403 | int line = pos.line; |
| 404 | int column = pos.column; |
| 405 | int transformedLine = transformLine(line); |
| 406 | int transformedColumn = transformColumn(column); |
| 407 | return new Position(transformedLine, transformedColumn); |
| 408 | } |
| 409 | |
| 410 | } |
| 411 | |
| 412 | private static final class DeltaInfo extends Position implements PositionUpdate { |
| 413 | |
| 414 | /** |
| 415 | * The offset to add to the {@link #line} and all following source |
| 416 | * positions up to the next {@link PositionUpdate}. |
| 417 | */ |
| 418 | private final int _lineDelta; |
| 419 | |
| 420 | /** |
| 421 | * The offset to add to the {@link #column} and all following |
| 422 | * source positions up to the next {@link PositionUpdate}. |
| 423 | */ |
| 424 | private final int _columnDelta; |
| 425 | |
| 426 | /** |
| 427 | * Creates a {@link PositionUpdate}. |
| 428 | */ |
| 429 | public DeltaInfo(int line, int column, int lineDelta, |
| 430 | int columnDelta) { |
| 431 | super(line, column); |
| 432 | _lineDelta = lineDelta; |
| 433 | _columnDelta = columnDelta; |
| 434 | } |
| 435 | |
| 436 | @Override |
| 437 | public int transformLine(int sourceLine) { |
| 438 | return sourceLine + _lineDelta; |
| 439 | } |
| 440 | |
| 441 | @Override |
| 442 | public int transformColumn(int sourceColumn) { |
| 443 | return sourceColumn + _columnDelta; |
| 444 | } |
| 445 | |
| 446 | @Override |
| 447 | public String toString() { |
| 448 | return "(" + line + ", " + column + ": " + _lineDelta + ", " + _columnDelta + ")"; |
| 449 | } |
| 450 | |
| 451 | } |
| 452 | |
| 453 | /** |
| 454 | * Transforms the given {@link Position}. |
| 455 | */ |
| 456 | public Position transform(Position pos) { |
| 457 | return lookup(pos).transform(pos); |
| 458 | } |
| 459 | |
| 460 | /** |
| 461 | * Transforms the given {@link Range}. |
| 462 | */ |
| 463 | public Range transform(Range range) { |
| 464 | Position begin = transform(range.begin); |
| 465 | Position end = transform(range.end); |
| 466 | if (begin == range.begin && end == range.end) { |
| 467 | // No change. |
| 468 | return range; |
| 469 | } |
| 470 | return new Range(begin, end); |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | private static final class PositionMappingBuilder { |
| 475 | |
| 476 | private LineCounter _left; |
| 477 | |
| 478 | private LineCounter _right; |
| 479 | |
| 480 | private final PositionMapping _mapping = new PositionMapping(); |
| 481 | |
| 482 | private int _lineDelta = 0; |
| 483 | private int _columnDelta = 0; |
| 484 | |
| 485 | /** |
| 486 | * Creates a {@link PositionMappingBuilder}. |
| 487 | * |
| 488 | * @param left The source {@link LineCounter}. |
| 489 | * @param right The target {@link LineCounter}. |
| 490 | */ |
| 491 | public PositionMappingBuilder(LineCounter left, LineCounter right) { |
| 492 | _left = left; |
| 493 | _right = right; |
| 494 | update(); |
| 495 | } |
| 496 | |
| 497 | /** |
| 498 | * The built {@link PositionMapping}. |
| 499 | */ |
| 500 | public PositionMapping getMapping() { |
| 501 | return _mapping; |
| 502 | } |
| 503 | |
| 504 | public void update() { |
| 505 | int lineDelta = _right.getLine() - _left.getLine(); |
| 506 | int columnDelta = _right.getColumn() - _left.getColumn(); |
| 507 | |
| 508 | if (lineDelta != _lineDelta || columnDelta != _columnDelta) { |
| 509 | _mapping.add(_left.getLine(), _left.getColumn(), lineDelta, columnDelta); |
| 510 | |
| 511 | _lineDelta = lineDelta; |
| 512 | _columnDelta = columnDelta; |
| 513 | } |
| 514 | } |
| 515 | |
| 516 | } |
| 517 | |
| 518 | /** |
| 519 | * Processor keeping track of the current line and column in a stream of |
| 520 | * incoming characters. |
| 521 | * |
| 522 | * @see #process(int) |
| 523 | */ |
| 524 | public static final class LineCounter { |
| 525 | |
| 526 | /** |
| 527 | * Whether {@link #CR} has been seen on the input as last character. |
| 528 | */ |
| 529 | private boolean _crSeen; |
| 530 | |
| 531 | private int _line = 1; |
| 532 | |
| 533 | private int _column = 1; |
| 534 | |
| 535 | /** |
| 536 | * Creates a {@link UnicodeEscapeProcessingProvider.LineCounter}. |
| 537 | */ |
| 538 | public LineCounter() { |
| 539 | super(); |
| 540 | } |
| 541 | |
| 542 | /** |
| 543 | * The line of the currently processed input character. |
| 544 | */ |
| 545 | public int getLine() { |
| 546 | return _line; |
| 547 | } |
| 548 | |
| 549 | /** |
| 550 | * The column of the currently processed input character. |
| 551 | */ |
| 552 | public int getColumn() { |
| 553 | return _column; |
| 554 | } |
| 555 | |
| 556 | /** |
| 557 | * The current position. |
| 558 | */ |
| 559 | public Position getPosition() { |
| 560 | return new Position(getLine(), getColumn()); |
| 561 | } |
| 562 | |
| 563 | /** |
| 564 | * Analyzes the given character for line feed. |
| 565 | */ |
| 566 | public int process(int ch) { |
| 567 | switch (ch) { |
| 568 | case EOF: { |
| 569 | break; |
| 570 | } |
| 571 | case CR: { |
| 572 | incLine(); |
| 573 | _crSeen = true; |
| 574 | break; |
| 575 | } |
| 576 | case LF: { |
| 577 | // CR LF does only count as a single line terminator. |
| 578 | if (_crSeen) { |
| 579 | _crSeen = false; |
| 580 | } else { |
| 581 | incLine(); |
| 582 | } |
| 583 | break; |
| 584 | } |
| 585 | default: { |
| 586 | _crSeen = false; |
| 587 | _column++; |
| 588 | } |
| 589 | } |
| 590 | return ch; |
| 591 | } |
| 592 | |
| 593 | private void incLine() { |
| 594 | _line++; |
| 595 | _column = 1; |
| 596 | } |
| 597 | |
| 598 | } |
| 599 | |
| 600 | } |
| 601 |
Members