JavaParser Source Viewer

Home|JavaParser/com/github/javaparser/UnicodeEscapeProcessingProvider.java
1/*
2 * Copyright (C) 2007-2010 JĂșlio Vilmar Gesser.
3 * Copyright (C) 2011, 2013-2020 The JavaParser Team.
4 *
5 * This file is part of JavaParser.
6 *
7 * JavaParser can be used either under the terms of
8 * a) the GNU Lesser General Public License as published by
9 *     the Free Software Foundation, either version 3 of the License, or
10 *     (at your option) any later version.
11 * b) the terms of the Apache License
12 *
13 * You should have received a copy of both licenses in LICENCE.LGPL and
14 * LICENCE.APACHE. Please refer to those files for details.
15 *
16 * JavaParser is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU Lesser General Public License for more details.
20 */
21package com.github.javaparser;
22
23import java.io.IOException;
24import java.util.ArrayList;
25import java.util.Collections;
26import java.util.List;
27
28/**
29 * {@link Provider} un-escaping unicode escape sequences in the input sequence.
30 */
31public class UnicodeEscapeProcessingProvider implements Provider {
32    
33    private static final char LF = '\n';
34
35    private static final char CR = '\r';
36
37    private static final char BACKSLASH = '\\';
38
39    private static final int EOF = -1;
40    
41    private char[] _data;
42    
43    /**
44     * The number of characters in {@link #_data}.
45     */
46    private int _len = 0;
47    
48    /**
49     * The position in {@link #_data} where to read the next source character from.
50     */
51    private int _pos = 0;
52
53    private boolean _backslashSeen;
54    
55    private final LineCounter _inputLine = new LineCounter();
56
57    private final LineCounter _outputLine = new LineCounter();
58    
59    private final PositionMappingBuilder _mappingBuilder = new PositionMappingBuilder(_outputLine_inputLine);
60    
61    private Provider _input;
62
63    /** 
64     * Creates a {@link UnicodeEscapeProcessingProvider}.
65     */
66    public UnicodeEscapeProcessingProvider(Provider input) {
67        this(2048input);
68    }
69
70    /** 
71     * Creates a {@link UnicodeEscapeProcessingProvider}.
72     */
73    public UnicodeEscapeProcessingProvider(int bufferSizeProvider input) {
74        _input = input;
75        _data = new char[bufferSize];
76    }
77    
78    /**
79     * The {@link LineCounter} of the input file.
80     */
81    public LineCounter getInputCounter() {
82        return _inputLine;
83    }
84    
85    /**
86     * The {@link LineCounter} of the output file.
87     */
88    public LineCounter getOutputCounter() {
89        return _outputLine;
90    }
91
92    @Override
93    public int read(char[] bufferfinal int offsetint len) throws IOException {
94        int pos = offset;
95        int stop = offset + len;
96        while (pos < stop) {
97            int ch = _outputLine.process(nextOutputChar());
98            if (ch < 0) {
99                if (pos == offset) {
100                    // Nothing read yet, this is the end of the stream.
101                    return EOF;
102                } else {
103                    break;
104                }
105            } else {
106                _mappingBuilder.update();
107                buffer[pos++] = (charch;
108            }
109        }
110        return pos - offset;
111    }
112
113    @Override
114    public void close() throws IOException {
115        _input.close();
116    }
117
118    /** 
119     * Produces the next un-escaped character to be written to the output.
120     * 
121     * @return The next character or {@code -1} if no more characters are available.
122     */
123    private int nextOutputChar() throws IOException {
124        int next = nextInputChar();
125        switch (next) {
126            case EOF:
127                return EOF;
128            case BACKSLASH: {
129                if (_backslashSeen) {
130                    return clearBackSlashSeen(next);
131                } else {
132                    return backSlashSeen();
133                }
134            }
135            default: {
136                // An arbitrary character.
137                return clearBackSlashSeen(next);
138            }
139        }
140    }
141
142    private int clearBackSlashSeen(int next) {
143        _backslashSeen = false;
144        return next;
145    }
146
147    private int backSlashSeen() throws IOException {
148        _backslashSeen = true;
149        
150        int next = nextInputChar();
151        switch (next) {
152            case EOF:
153                // End of file after backslash produces the backslash itself.
154                return BACKSLASH;
155            case 'u': {
156                return unicodeStartSeen();
157            }
158            default: {
159                pushBack(next);
160                return BACKSLASH;
161            }
162        }
163    }
164
165    private int unicodeStartSeen() throws IOException {
166        int uCnt = 1;
167        while (true) {
168            int next = nextInputChar();
169            switch (next) {
170                case EOF: {
171                    pushBackUs(uCnt);
172                    return BACKSLASH;
173                }
174                case 'u': {
175                    uCnt++;
176                    continue;
177                }
178                default: {
179                    return readDigits(uCntnext);
180                }
181            }
182        }
183    }
184
185    private int readDigits(int uCntint next3) throws IOException {
186        int digit3 = digit(next3);
187        if (digit3 < 0) {
188            pushBack(next3);
189            pushBackUs(uCnt);
190            return BACKSLASH;
191        }
192        
193        int next2 = nextInputChar();
194        int digit2 = digit(next2);
195        if (digit2 < 0) {
196            pushBack(next2);
197            pushBack(next3);
198            pushBackUs(uCnt);
199            return BACKSLASH;
200        }
201        
202        int next1 = nextInputChar();
203        int digit1 = digit(next1);
204        if (digit1 < 0) {
205            pushBack(next1);
206            pushBack(next2);
207            pushBack(next3);
208            pushBackUs(uCnt);
209            return BACKSLASH;
210        }
211        
212        int next0 = nextInputChar();
213        int digit0 = digit(next0);
214        if (digit0 < 0) {
215            pushBack(next0);
216            pushBack(next1);
217            pushBack(next2);
218            pushBack(next3);
219            pushBackUs(uCnt);
220            return BACKSLASH;
221        }
222
223        int ch = digit3 << 12 | digit2 << 8 | digit1 << 4 | digit0;
224        return clearBackSlashSeen(ch);
225    }
226
227    private void pushBackUs(int cnt) {
228        for (int n = 0n < cntn++) {
229            pushBack('u');
230        }
231    }
232
233    private static int digit(int ch) {
234        if (ch >= '0' && ch <= '9') {
235            return ch - '0';
236        }
237        if (ch >= 'A' && ch <= 'F') {
238            return 10 + ch - 'A';
239        }
240        if (ch >= 'a' && ch <= 'f') {
241            return 10 + ch - 'a';
242        }
243        return -1;
244    }
245
246    /** 
247     * Processes column/line information from the input file.
248     * 
249     * @return The next character or {@code -1} if no more input is available.
250     */
251    private int nextInputChar() throws IOException {
252        int result = nextBufferedChar();
253        return _inputLine.process(result);
254    }
255
256    /** 
257     * Retrieves the next un-escaped character from the buffered {@link #_input}.
258     * 
259     * @return The next character or {@code -1} if no more input is available.
260     */
261    private int nextBufferedChar() throws IOException {
262        while (isBufferEmpty()) {
263            int direct = fillBuffer();
264            if (direct < 0) {
265                return EOF;
266            }
267        }
268        return _data[_pos++];
269    }
270
271    private boolean isBufferEmpty() {
272        return _pos >= _len;
273    }
274
275    private int fillBuffer() throws IOException {
276        _pos = 0;
277        int direct = _input.read(_data0_data.length);
278        if (direct != 0) {
279            _len = direct;
280        }
281        return direct;
282    }
283
284    private void pushBack(int ch) {
285        if (ch < 0) {
286            return;
287        }
288        
289        if (isBufferEmpty()) {
290            _pos = _data.length;
291            _len = _data.length;
292        } else if (_pos == 0) {
293            if (_len == _data.length) {
294                // Buffer is completely full, no push possible, enlarge buffer.
295                char[] newData = new char[_data.length + 1024];
296                _len = newData.length;
297                _pos = newData.length - _data.length;
298                System.arraycopy(_data0newData_pos_data.length);
299                _data = newData;
300            } else {
301                // Move contents to the right.
302                int cnt = _len - _pos;
303                _pos = _data.length - _len;
304                _len = _data.length;
305                System.arraycopy(_data0_data_poscnt);
306            }
307        }
308        _data[--_pos] = (charch;
309    }
310    
311    /**
312     * The {@link PositionMapping} being built during processing the file.
313     */
314    public PositionMapping getPositionMapping() {
315        return _mappingBuilder.getMapping();
316    }
317    
318    /**
319     * An algorithm mapping {@link Position} form two corresponding files.
320     */
321    public static final class PositionMapping {
322        
323        private final List<DeltaInfo_deltas = new ArrayList<>();
324        
325        /** 
326         * Creates a {@link UnicodeEscapeProcessingProvider.PositionMapping}.
327         */
328        public PositionMapping() {
329            super();
330        }
331        
332        /**
333         * Whether this is the identity transformation.
334         */
335        public boolean isEmpty() {
336            return _deltas.isEmpty();
337        }
338
339        void add(int lineint columnint lineDeltaint columnDelta) {
340            _deltas.add(new DeltaInfo(linecolumnlineDeltacolumnDelta));
341        }
342        
343        /**
344         * Looks up the {@link PositionUpdate} for the given Position.
345         */
346        public PositionUpdate lookup(Position position) {
347            int result = Collections.binarySearch(_deltasposition);
348            if (result >= 0) {
349                return _deltas.get(result);
350            } else {
351                int insertIndex = -result - 1;
352                if (insertIndex == 0) {
353                    // Before the first delta info, identity mapping.
354                    return PositionUpdate.NONE;
355                } else {
356                    // The relevant update is the one with the position smaller
357                    // than the requested position.
358                    return _deltas.get(insertIndex - 1);
359                }
360            }
361        }
362        
363        /**
364         * Algorithm updating a {@link Position} from one file to a
365         * {@link Position} in a corresponding file.
366         */
367        public static interface PositionUpdate {
368            
369            /**
370             * The identity position mapping.
371             */
372            PositionUpdate NONE = new PositionUpdate() {
373                @Override
374                public int transformLine(int line) {
375                    return line;
376                }
377                
378                @Override
379                public int transformColumn(int column) {
380                    return column;
381                }
382                
383                @Override
384                public Position transform(Position pos) {
385                    return pos;
386                }
387            };
388
389            /** 
390             * Maps the given line to an original line.
391             */
392            int transformLine(int line);
393
394            /** 
395             * Maps the given column to an original column.
396             */
397            int transformColumn(int column);
398
399            /**
400             * The transformed position.
401             */
402            default Position transform(Position pos) {
403                int line = pos.line;
404                int column = pos.column;
405                int transformedLine = transformLine(line);
406                int transformedColumn = transformColumn(column);
407                return new Position(transformedLinetransformedColumn);
408            }
409            
410        }
411        
412        private static final class DeltaInfo extends Position implements PositionUpdate {
413
414            /**
415             * The offset to add to the {@link #line} and all following source
416             * positions up to the next {@link PositionUpdate}.
417             */
418            private final int _lineDelta;
419            
420            /**
421             * The offset to add to the {@link #column} and all following
422             * source positions up to the next {@link PositionUpdate}.
423             */
424            private final int _columnDelta;
425
426            /** 
427             * Creates a {@link PositionUpdate}.
428             */
429            public DeltaInfo(int lineint columnint lineDelta,
430                    int columnDelta) {
431                super(linecolumn);
432                _lineDelta = lineDelta;
433                _columnDelta = columnDelta;
434            }
435            
436            @Override
437            public int transformLine(int sourceLine) {
438                return sourceLine + _lineDelta;
439            }
440            
441            @Override
442            public int transformColumn(int sourceColumn) {
443                return sourceColumn + _columnDelta;
444            }
445            
446            @Override
447            public String toString() {
448                return "(" + line + ", " + column + ": " + _lineDelta + ", " + _columnDelta + ")";
449            }
450
451        }
452
453        /** 
454         * Transforms the given {@link Position}.
455         */
456        public Position transform(Position pos) {
457            return lookup(pos).transform(pos);
458        }
459
460        /** 
461         * Transforms the given {@link Range}.
462         */
463        public Range transform(Range range) {
464            Position begin = transform(range.begin);
465            Position end = transform(range.end);
466            if (begin == range.begin && end == range.end) {
467                // No change.
468                return range;
469            }
470            return new Range(beginend);
471        }
472    }
473    
474    private static final class PositionMappingBuilder {
475        
476        private LineCounter _left;
477        
478        private LineCounter _right;
479        
480        private final PositionMapping _mapping = new PositionMapping();
481        
482        private int _lineDelta = 0;
483        private int _columnDelta = 0;
484        
485        /** 
486         * Creates a {@link PositionMappingBuilder}.
487         *
488         * @param left The source {@link LineCounter}.
489         * @param right The target {@link LineCounter}.
490         */
491        public PositionMappingBuilder(LineCounter leftLineCounter right) {
492            _left = left;
493            _right = right;
494            update();
495        }
496        
497        /**
498         * The built {@link PositionMapping}.
499         */
500        public PositionMapping getMapping() {
501            return _mapping;
502        }
503        
504        public void update() {
505            int lineDelta = _right.getLine() - _left.getLine();
506            int columnDelta = _right.getColumn() - _left.getColumn();
507            
508            if (lineDelta != _lineDelta || columnDelta != _columnDelta) {
509                _mapping.add(_left.getLine(), _left.getColumn(), lineDeltacolumnDelta);
510                
511                _lineDelta = lineDelta;
512                _columnDelta = columnDelta;
513            }
514        }
515        
516    }
517    
518    /**
519     * Processor keeping track of the current line and column in a stream of
520     * incoming characters.
521     * 
522     * @see #process(int)
523     */
524    public static final class LineCounter {
525        
526        /**
527         * Whether {@link #CR} has been seen on the input as last character.
528         */
529        private boolean _crSeen;
530
531        private int _line = 1;
532
533        private int _column = 1;
534
535        /** 
536         * Creates a {@link UnicodeEscapeProcessingProvider.LineCounter}.
537         */
538        public LineCounter() {
539            super();
540        }
541        
542        /**
543         * The line of the currently processed input character.
544         */
545        public int getLine() {
546            return _line;
547        }
548        
549        /**
550         * The column of the currently processed input character.
551         */
552        public int getColumn() {
553            return _column;
554        }
555        
556        /** 
557         * The current position.
558         */
559        public Position getPosition() {
560            return new Position(getLine(), getColumn());
561        }
562
563        /** 
564         * Analyzes the given character for line feed.
565         */
566        public int process(int ch) {
567            switch (ch) {
568                case EOF: {
569                    break;
570                }
571                case CR: {
572                    incLine();
573                    _crSeen = true;
574                    break;
575                }
576                case LF: {
577                    // CR LF does only count as a single line terminator.
578                    if (_crSeen) {
579                        _crSeen = false;
580                    } else {
581                        incLine();
582                    }
583                    break;
584                }
585                default: {
586                    _crSeen = false;
587                    _column++;
588                }
589            }
590            return ch;
591        }
592
593        private void incLine() {
594            _line++;
595            _column = 1;
596        }
597
598    }
599    
600}
601
MembersX
UnicodeEscapeProcessingProvider:PositionMapping:DeltaInfo:transformLine
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:transform:Block:transformedLine
UnicodeEscapeProcessingProvider:PositionMapping:DeltaInfo:_columnDelta
UnicodeEscapeProcessingProvider:LineCounter:getColumn
UnicodeEscapeProcessingProvider:PositionMapping:lookup
UnicodeEscapeProcessingProvider:digit
UnicodeEscapeProcessingProvider:PositionMapping:DeltaInfo:toString
UnicodeEscapeProcessingProvider:PositionMappingBuilder:update
UnicodeEscapeProcessingProvider:LineCounter:_column
UnicodeEscapeProcessingProvider:read:Block:pos
UnicodeEscapeProcessingProvider:PositionMapping:add
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:transform
UnicodeEscapeProcessingProvider:close
UnicodeEscapeProcessingProvider:_len
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:transform:Block:line
UnicodeEscapeProcessingProvider:PositionMapping:transform:Block:end
UnicodeEscapeProcessingProvider:CR
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:transform:Block:column
UnicodeEscapeProcessingProvider:PositionMappingBuilder:_lineDelta
UnicodeEscapeProcessingProvider:PositionMappingBuilder:getMapping
UnicodeEscapeProcessingProvider:LineCounter:getPosition
UnicodeEscapeProcessingProvider:_inputLine
UnicodeEscapeProcessingProvider:pushBack:Block:Block:Block:newData
UnicodeEscapeProcessingProvider:fillBuffer:Block:direct
UnicodeEscapeProcessingProvider:LineCounter:LineCounter
UnicodeEscapeProcessingProvider:pushBack:Block:Block:Block:cnt
UnicodeEscapeProcessingProvider:PositionMappingBuilder:_mapping
UnicodeEscapeProcessingProvider:nextOutputChar
UnicodeEscapeProcessingProvider:LineCounter:getLine
UnicodeEscapeProcessingProvider:PositionMapping:lookup:Block:Block:insertIndex
UnicodeEscapeProcessingProvider:LF
UnicodeEscapeProcessingProvider:fillBuffer
UnicodeEscapeProcessingProvider:_input
UnicodeEscapeProcessingProvider:PositionMapping:transform:Block:begin
UnicodeEscapeProcessingProvider:getPositionMapping
UnicodeEscapeProcessingProvider:nextBufferedChar:Block:Block:direct
UnicodeEscapeProcessingProvider:PositionMappingBuilder:update:Block:lineDelta
UnicodeEscapeProcessingProvider:PositionMappingBuilder:_columnDelta
UnicodeEscapeProcessingProvider:LineCounter:process
UnicodeEscapeProcessingProvider:PositionMappingBuilder:PositionMappingBuilder
UnicodeEscapeProcessingProvider:readDigits:Block:next0
UnicodeEscapeProcessingProvider:readDigits:Block:next1
UnicodeEscapeProcessingProvider:readDigits:Block:next2
UnicodeEscapeProcessingProvider:readDigits:Block:digit0
UnicodeEscapeProcessingProvider:getInputCounter
UnicodeEscapeProcessingProvider:unicodeStartSeen:Block:uCnt
UnicodeEscapeProcessingProvider:PositionMapping:_deltas
UnicodeEscapeProcessingProvider:LineCounter:incLine
UnicodeEscapeProcessingProvider:readDigits
UnicodeEscapeProcessingProvider:read:Block:stop
UnicodeEscapeProcessingProvider:readDigits:Block:digit3
UnicodeEscapeProcessingProvider:PositionMapping:DeltaInfo:DeltaInfo
UnicodeEscapeProcessingProvider:readDigits:Block:digit1
UnicodeEscapeProcessingProvider:PositionMapping:lookup:Block:result
UnicodeEscapeProcessingProvider:readDigits:Block:digit2
UnicodeEscapeProcessingProvider:PositionMapping:PositionMapping
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:NONE
UnicodeEscapeProcessingProvider:unicodeStartSeen:Block:Block:next
UnicodeEscapeProcessingProvider:clearBackSlashSeen
UnicodeEscapeProcessingProvider:_mappingBuilder
UnicodeEscapeProcessingProvider:backSlashSeen:Block:next
UnicodeEscapeProcessingProvider:BACKSLASH
UnicodeEscapeProcessingProvider:pushBack
UnicodeEscapeProcessingProvider:PositionMappingBuilder:update:Block:columnDelta
UnicodeEscapeProcessingProvider:UnicodeEscapeProcessingProvider
UnicodeEscapeProcessingProvider:getOutputCounter
UnicodeEscapeProcessingProvider:_outputLine
UnicodeEscapeProcessingProvider:PositionMapping:DeltaInfo:_lineDelta
UnicodeEscapeProcessingProvider:PositionMapping:isEmpty
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:transformLine
UnicodeEscapeProcessingProvider:isBufferEmpty
UnicodeEscapeProcessingProvider:unicodeStartSeen
UnicodeEscapeProcessingProvider:PositionMapping:transform
UnicodeEscapeProcessingProvider:nextBufferedChar
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:transformColumn
UnicodeEscapeProcessingProvider:nextInputChar
UnicodeEscapeProcessingProvider:nextOutputChar:Block:next
UnicodeEscapeProcessingProvider:LineCounter:_line
UnicodeEscapeProcessingProvider:PositionMappingBuilder:_right
UnicodeEscapeProcessingProvider:pushBackUs
UnicodeEscapeProcessingProvider:PositionMappingBuilder:_left
UnicodeEscapeProcessingProvider:read:Block:Block:ch
UnicodeEscapeProcessingProvider:PositionMapping:PositionUpdate:transform:Block:transformedColumn
UnicodeEscapeProcessingProvider:_pos
UnicodeEscapeProcessingProvider:_backslashSeen
UnicodeEscapeProcessingProvider:PositionMapping:DeltaInfo:transformColumn
UnicodeEscapeProcessingProvider:EOF
UnicodeEscapeProcessingProvider:LineCounter:_crSeen
UnicodeEscapeProcessingProvider:nextInputChar:Block:result
UnicodeEscapeProcessingProvider:backSlashSeen
UnicodeEscapeProcessingProvider:readDigits:Block:ch
UnicodeEscapeProcessingProvider:read
UnicodeEscapeProcessingProvider:_data
Members
X