1 module source.jsonlexer; 2 3 import source.context; 4 import source.location; 5 6 enum TokenType { 7 Invalid = 0, 8 9 Begin, 10 End, 11 12 // Comments 13 Comment, 14 15 // Literals 16 StringLiteral, 17 CharacterLiteral = StringLiteral, 18 IntegerLiteral, 19 FloatLiteral, 20 21 // Identifier 22 Identifier, 23 24 // Keywords 25 Null, True, False, 26 27 // Operators. 28 OpenParen, // ( 29 CloseParen, // ) 30 OpenBracket, // [ 31 CloseBracket, // ] 32 OpenBrace, // { 33 CloseBrace, // } 34 Comma, // , 35 Colon, // : 36 } 37 38 struct Token { 39 import source.location; 40 Location location; 41 42 TokenType type; 43 44 import source.name; 45 Name name; 46 47 import source.context; 48 string toString(Context context) { 49 return (type >= TokenType.Identifier) 50 ? name.toString(context) 51 : location.getFullLocation(context).getSlice(); 52 } 53 } 54 55 auto lex(Position base, Context context) { 56 auto lexer = JsonLexer(); 57 58 lexer.content = base.getFullPosition(context).getSource().getContent(); 59 lexer.t.type = TokenType.Begin; 60 61 lexer.context = context; 62 lexer.base = base; 63 lexer.previous = base; 64 65 lexer.t.location = Location(base, base.getWithOffset(lexer.index)); 66 return lexer; 67 } 68 69 struct JsonLexer { 70 enum BaseMap = () { 71 auto ret = [ 72 // Comments 73 "//" : "?tokenizeComments:lexComment|popComment", 74 "/*" : "?tokenizeComments:lexComment|popComment", 75 "/+" : "?tokenizeComments:lexComment|popComment", 76 77 // Integer literals. 78 "0b" : "lexNumeric", 79 "0B" : "lexNumeric", 80 "0x" : "lexNumeric", 81 "0X" : "lexNumeric", 82 83 // String literals. 84 `"` : "lexString", 85 "'" : "lexString", 86 ]; 87 88 foreach (i; 0 .. 10) { 89 import std.conv; 90 ret[to!string(i)] = "lexNumeric"; 91 } 92 93 return ret; 94 }(); 95 96 enum KeywordMap = [ 97 "null" : TokenType.Null, 98 "true" : TokenType.True, 99 "false" : TokenType.False, 100 ]; 101 102 enum OperatorMap = [ 103 "(" : TokenType.OpenParen, 104 ")" : TokenType.CloseParen, 105 "[" : TokenType.OpenBracket, 106 "]" : TokenType.CloseBracket, 107 "{" : TokenType.OpenBrace, 108 "}" : TokenType.CloseBrace, 109 "," : TokenType.Comma, 110 ":" : TokenType.Colon, 111 "\0" : TokenType.End, 112 ]; 113 114 import source.lexbase; 115 mixin LexBaseImpl!(Token, BaseMap, KeywordMap, OperatorMap); 116 117 import source.lexnumeric; 118 mixin LexNumericImpl!(Token, [ 119 "" : TokenType.IntegerLiteral, 120 ], [ 121 "" : TokenType.FloatLiteral, 122 ]); 123 124 import source.lexstring; 125 mixin LexStringImpl!(Token, [ 126 "" : TokenType.StringLiteral, 127 ]); 128 } 129 130 unittest { 131 auto context = new Context(); 132 133 auto testlexer(string s) { 134 import source.name; 135 auto base = context.registerMixin(Location.init, s ~ '\0'); 136 return lex(base, context); 137 } 138 139 import source.parserutil; 140 141 { 142 auto lex = testlexer(""); 143 lex.match(TokenType.Begin); 144 assert(lex.front.type == TokenType.End); 145 } 146 147 { 148 auto lex = testlexer("null(aa[{]true})false"); 149 lex.match(TokenType.Begin); 150 lex.match(TokenType.Null); 151 lex.match(TokenType.OpenParen); 152 153 auto t = lex.front; 154 assert(t.type == TokenType.Identifier); 155 assert(t.toString(context) == "aa"); 156 157 lex.popFront(); 158 lex.match(TokenType.OpenBracket); 159 lex.match(TokenType.OpenBrace); 160 lex.match(TokenType.CloseBracket); 161 lex.match(TokenType.True); 162 lex.match(TokenType.CloseBrace); 163 lex.match(TokenType.CloseParen); 164 lex.match(TokenType.False); 165 166 assert(lex.front.type == TokenType.End); 167 } 168 169 { 170 auto lex = testlexer(`"""foobar"'''balibalo'"\""'"'"'"`); 171 lex.match(TokenType.Begin); 172 173 foreach (expected; [`""`, `"foobar"`, `''`, `'balibalo'`, `"\""`, `'"'`, `"'"`]) { 174 auto t = lex.front; 175 176 assert(t.type == TokenType.StringLiteral); 177 assert(t.toString(context) == expected); 178 lex.popFront(); 179 } 180 181 assert(lex.front.type == TokenType.End); 182 } 183 184 // Check unterminated strings. 185 { 186 auto lex = testlexer(`"`); 187 lex.match(TokenType.Begin); 188 189 auto t = lex.front; 190 assert(t.type == TokenType.Invalid); 191 } 192 193 { 194 auto lex = testlexer(`"\`); 195 lex.match(TokenType.Begin); 196 197 auto t = lex.front; 198 assert(t.type == TokenType.Invalid); 199 } 200 201 { 202 auto lex = testlexer(`'`); 203 lex.match(TokenType.Begin); 204 205 auto t = lex.front; 206 assert(t.type == TokenType.Invalid); 207 } 208 209 { 210 auto lex = testlexer(`'\`); 211 lex.match(TokenType.Begin); 212 213 auto t = lex.front; 214 assert(t.type == TokenType.Invalid); 215 } 216 217 // Check unicode support 218 { 219 auto lex = testlexer(`"\U0001F0BD\u0393α\u1FD6\u03B1\U0001FA01🙈🙉🙊\U0001F71A"`); 220 lex.match(TokenType.Begin); 221 222 auto t = lex.front; 223 224 assert(t.type == TokenType.StringLiteral); 225 assert(t.name.toString(context) == "🂽Γαῖα🨁🙈🙉🙊🜚"); 226 lex.popFront(); 227 228 assert(lex.front.type == TokenType.End); 229 } 230 231 { 232 auto lex = testlexer(`"\U0001F0B"`); 233 lex.match(TokenType.Begin); 234 assert(lex.front.type == TokenType.Invalid); 235 } 236 237 { 238 auto lex = testlexer(`"\u039"`); 239 lex.match(TokenType.Begin); 240 assert(lex.front.type == TokenType.Invalid); 241 } 242 243 { 244 auto lex = testlexer(`"\u039G"`); 245 lex.match(TokenType.Begin); 246 assert(lex.front.type == TokenType.Invalid); 247 } 248 249 { 250 auto lex = testlexer(`"\u03@3"`); 251 lex.match(TokenType.Begin); 252 assert(lex.front.type == TokenType.Invalid); 253 } 254 }