1 module source.lexstring;
2 
3 mixin template LexStringImpl(Token, alias StringSuffixes, alias CustomStringSuffixes = null) {
4 	/**
5 	 * Character literals.
6 	 */
7 	Token lexCharacter(string s : `'`)() {
8 		uint l = s.length;
9 		auto t = lexDecodedString!('\'')(index - l);
10 		if (t.type != TokenType.Invalid) {
11 			t.type = TokenType.CharacterLiteral;
12 		}
13 		
14 		return t;
15 	}
16 	
17 	/**
18 	 * String literals.
19 	 */
20 	auto lexStrignSuffix(uint begin) {
21 		return lexLiteralSuffix!(StringSuffixes, CustomStringSuffixes)(begin);
22 	}
23 	
24 	Token buildRawString(uint begin, size_t start, size_t stop) {
25 		auto t = lexStrignSuffix(begin);
26 		if (t.type == TokenType.Invalid) {
27 			// Bubble up errors.
28 			return t;
29 		}
30 
31 		if (decodeStrings) {
32 			t.name = context.getName(content[start .. stop]);
33 		}
34 
35 		return t;
36 	}
37 	
38 	Token lexRawString(char Delimiter = '`')(uint begin) {
39 		size_t start = index;
40 		
41 		auto c = frontChar;
42 		while (c != Delimiter && c != '\0') {
43 			popChar();
44 			c = frontChar;
45 		}
46 
47 		if (c == '\0') {
48 			return getError(begin, "Unexpected end of file.");
49 		}
50 		
51 		uint end = index;
52 		popChar();
53 		
54 		return buildRawString(begin, start, end);
55 	}
56 	
57 	Token lexString(string s : "`")() {
58 		uint l = s.length;
59 		return lexRawString!'`'(index - l);
60 	}
61 
62 	Token lexString(string s : "'")() {
63 		uint l = s.length;
64 		return lexRawString!'\''(index - l);
65 	}
66 
67 	Token lexDecodedString(char Delimiter = '"')(uint begin) {
68 		size_t start = index;
69 		string decoded;
70 		
71 		auto c = frontChar;
72 		while (c != Delimiter && c != '\0') {
73 			if (c != '\\') {
74 				popChar();
75 				c = frontChar;
76 				continue;
77 			}
78 			
79 			if (!decodeStrings) {
80 				popChar();
81 				
82 				c = frontChar;
83 				if (c == '\0') {
84 					break;
85 				}
86 				
87 				popChar();
88 				c = frontChar;
89 				continue;
90 			}
91 			
92 			const beginEscape = index;
93 			scope(success) {
94 				start = index;
95 			}
96 			
97 			// Workaround for https://issues.dlang.org/show_bug.cgi?id=22271
98 			if (decoded == "") {
99 				decoded = content[start .. index];
100 			} else {
101 				decoded ~= content[start .. index];
102 			}
103 			
104 			popChar();
105 			if (!lexEscapeSequence(decoded)) {
106 				return getError(begin, "Invalid escape sequence.");
107 			}
108 			
109 			c = frontChar;
110 		}
111 		
112 		if (c == '\0') {
113 			return getError(begin, "Unexpected end of file.");
114 		}
115 		
116 		uint end = index;
117 		popChar();
118 		
119 		auto t = lexStrignSuffix(begin);
120 		if (t.type == TokenType.Invalid) {
121 			// Propagate errors.
122 			return t;
123 		}
124 		
125 		if (decodeStrings) {
126 			// Workaround for https://issues.dlang.org/show_bug.cgi?id=22271
127 			if (decoded == "") {
128 				decoded = content[start .. end];
129 			} else {
130 				decoded ~= content[start .. end];
131 			}
132 			
133 			t.name = context.getName(decoded);
134 		}
135 		
136 		return t;
137 	}
138 	
139 	Token lexString(string s : `"`)() {
140 		uint l = s.length;
141 		return lexDecodedString!'"'(index - l);
142 	}
143 
144 	/**
145 	 * Escape sequences.
146 	 */
147 	bool lexEscapeSequence(ref string decoded) {
148 		char c = frontChar;
149 		
150 		switch (c) {
151 			case '\'', '"', '\\':
152 				// Noop.
153 				break;
154 			
155 			case '?':
156 				assert(0, "WTF is \\?");
157 			
158 			case '0':
159 				c = '\0';
160 				break;
161 			
162 			case 'a':
163 				c = '\a';
164 				break;
165 			
166 			case 'b':
167 				c = '\b';
168 				break;
169 			
170 			case 'f':
171 				c = '\f';
172 				break;
173 			
174 			case 'r':
175 				c = '\r';
176 				break;
177 			
178 			case 'n':
179 				c = '\n';
180 				break;
181 			
182 			case 't':
183 				c = '\t';
184 				break;
185 			
186 			case 'v':
187 				c = '\v';
188 				break;
189 			
190 			case 'u', 'U':
191 				popChar();
192 				
193 				uint v = 0;
194 				
195 				auto length = 4 * (c == 'U') + 4;
196 				foreach (i; 0 .. length) {
197 					c = frontChar;
198 					
199 					uint d = c - '0';
200 					uint h = ((c | 0x20) - 'a') & 0xff;
201 					uint n = (d < 10) ? d : (h + 10);
202 					
203 					if (n >= 16) {
204 						return false;
205 					}
206 					
207 					v |= n << (4 * (length - i - 1));
208 					popChar();
209 				}
210 				
211 				char[4] buf;
212 				
213 				import std.utf;
214 				auto i = encode(buf, v);
215 				
216 				decoded ~= buf[0 .. i];
217 				return true;
218 			
219 			case '&':
220 				assert(0, "HTML5 named character references not implemented");
221 			
222 			default:
223 				return false;
224 		}
225 		
226 		popChar();
227 		decoded ~= c;
228 		return true;
229 	}
230 }