1 | /* |
---|---|
2 | * Copyright (C) 2007-2010 JĂșlio Vilmar Gesser. |
3 | * Copyright (C) 2011, 2013-2020 The JavaParser Team. |
4 | * |
5 | * This file is part of JavaParser. |
6 | * |
7 | * JavaParser can be used either under the terms of |
8 | * a) the GNU Lesser General Public License as published by |
9 | * the Free Software Foundation, either version 3 of the License, or |
10 | * (at your option) any later version. |
11 | * b) the terms of the Apache License |
12 | * |
13 | * You should have received a copy of both licenses in LICENCE.LGPL and |
14 | * LICENCE.APACHE. Please refer to those files for details. |
15 | * |
16 | * JavaParser is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | * GNU Lesser General Public License for more details. |
20 | */ |
21 | package com.github.javaparser; |
22 | |
23 | import com.github.javaparser.utils.LineSeparator; |
24 | |
25 | import java.io.IOException; |
26 | import java.util.HashMap; |
27 | import java.util.Map; |
28 | import java.util.Optional; |
29 | |
30 | /** |
31 | * {@link Provider} un-escaping unicode escape sequences in the input sequence. |
32 | */ |
33 | public class LineEndingProcessingProvider implements Provider { |
34 | |
35 | private static final int EOF = -1; |
36 | |
37 | private static final int DEFAULT_BUFFER_SIZE = 2048; |
38 | |
39 | /** |
40 | * The "other" provider which we are wrapping around / reading from. |
41 | */ |
42 | private final Provider _input; |
43 | |
44 | /** |
45 | * The buffer that we're storing data within. |
46 | */ |
47 | private final char[] _data; |
48 | |
49 | /** |
50 | * The number of characters in {@link #_data}. |
51 | */ |
52 | private int _len = 0; |
53 | |
54 | /** |
55 | * The position in {@link #_data} where to read the next source character from. |
56 | */ |
57 | private int _pos = 0; |
58 | |
59 | private final Map<LineSeparator, Integer> eolCounts = new HashMap<>(); |
60 | |
61 | public LineEndingProcessingProvider(Provider input) { |
62 | this(DEFAULT_BUFFER_SIZE, input); |
63 | } |
64 | |
65 | public LineEndingProcessingProvider(int bufferSize, Provider input) { |
66 | _input = input; |
67 | _data = new char[bufferSize]; |
68 | } |
69 | |
70 | @Override |
71 | public void close() throws IOException { |
72 | _input.close(); |
73 | } |
74 | |
75 | private int fillBuffer() throws IOException { |
76 | _pos = 0; |
77 | int direct = _input.read(_data, 0, _data.length); |
78 | if (direct != 0) { |
79 | _len = direct; |
80 | } |
81 | return direct; |
82 | } |
83 | |
84 | public LineSeparator getDetectedLineEnding() { |
85 | return LineSeparator.getLineEnding( |
86 | eolCounts.getOrDefault(LineSeparator.CR, 0), |
87 | eolCounts.getOrDefault(LineSeparator.LF, 0), |
88 | eolCounts.getOrDefault(LineSeparator.CRLF, 0) |
89 | ); |
90 | } |
91 | |
92 | private boolean isBufferEmpty() { |
93 | return _pos >= _len; |
94 | } |
95 | |
96 | /** |
97 | * Retrieves the next un-escaped character from the buffered {@link #_input}. |
98 | * |
99 | * @return The next character or {@code -1} if no more input is available. |
100 | */ |
101 | private int nextBufferedChar() throws IOException { |
102 | while (isBufferEmpty()) { |
103 | int direct = fillBuffer(); |
104 | if (direct < 0) { |
105 | return EOF; |
106 | } |
107 | } |
108 | return _data[_pos++]; |
109 | } |
110 | |
111 | @Override |
112 | public int read(char[] buffer, final int offset, int len) throws IOException { |
113 | int pos = offset; |
114 | int stop = offset + len; |
115 | LineSeparator previousLineSeparator = null; |
116 | while (pos < stop) { |
117 | int ch = nextBufferedChar(); |
118 | if (ch < 0) { |
119 | if (pos == offset) { |
120 | // Nothing read yet, this is the end of the stream. |
121 | return EOF; |
122 | } else { |
123 | break; |
124 | } |
125 | } else { |
126 | String str = String.valueOf((char) ch); |
127 | Optional<LineSeparator> lookup = LineSeparator.lookup(str); |
128 | |
129 | if (lookup.isPresent()) { |
130 | LineSeparator lineSeparator = lookup.get(); |
131 | |
132 | // Track the number of times this character is found.. |
133 | eolCounts.putIfAbsent(lineSeparator, 0); |
134 | eolCounts.put(lineSeparator, eolCounts.get(lineSeparator) + 1); |
135 | |
136 | // Handle line separators of length two (specifically CRLF) |
137 | // TODO: Make this more generic than just CRLF (e.g. track the previous char rather than the previous line separator |
138 | if (lineSeparator == LineSeparator.LF) { |
139 | if (previousLineSeparator == LineSeparator.CR) { |
140 | eolCounts.putIfAbsent(LineSeparator.CRLF, 0); |
141 | eolCounts.put(LineSeparator.CRLF, eolCounts.get(LineSeparator.CRLF) + 1); |
142 | } |
143 | } |
144 | |
145 | // If "this" (current) char <strong>is</strong> a line separator, set the next loop's "previous" to this |
146 | previousLineSeparator = lineSeparator; |
147 | } else { |
148 | // If "this" (current) char <strong>is not</strong> a line separator, set the next loop's "previous" to null |
149 | previousLineSeparator = null; |
150 | } |
151 | |
152 | // Move to next character |
153 | buffer[pos++] = (char) ch; |
154 | } |
155 | } |
156 | return pos - offset; |
157 | } |
158 | |
159 | } |
160 |
Members