diff options
Diffstat (limited to 'src/monkey/seaspider/C.jj')
-rw-r--r-- | src/monkey/seaspider/C.jj | 541 |
1 files changed, 541 insertions, 0 deletions
diff --git a/src/monkey/seaspider/C.jj b/src/monkey/seaspider/C.jj new file mode 100644 index 000000000..ba10f4261 --- /dev/null +++ b/src/monkey/seaspider/C.jj | |||
@@ -0,0 +1,541 @@ | |||
1 | /* | ||
2 | |||
3 | C grammar defintion for use with JavaCC | ||
4 | Contributed by Doug South (dsouth@squirrel.com.au) 21/3/97 | ||
5 | |||
6 | This parser assumes that the C source file has been preprocessed : all | ||
7 | #includes have been included and all macros have been expanded. I accomplish | ||
8 | this with "gcc -P -E <source file> > <output file>". | ||
9 | |||
10 | There is a problem with compiler specific types, such as __signed, __const, | ||
11 | __inline__, etc. These types can be added as typedef types before the parser | ||
12 | is run on a file. See main() for an example. I have also found a strange little | ||
13 | compiler specific "type" if you can call it that. It is __attribute__, but it | ||
14 | does not seem to be used as a type. I found that just deleting the __attribute__ | ||
15 | and the following "offensive" code works. | ||
16 | |||
17 | This grammar also prints out all the types defined while parsing the file. This | ||
18 | is done via a call to printTypes() when the parser is complete. If you do not want | ||
19 | this, just comment out the printTypes() method call in the production rule | ||
20 | TranslationUnit(), which BTW is the root node for parsing a C source file. | ||
21 | |||
22 | I have not in anyway extensively tested this grammar, in fact it is barely tested, | ||
23 | but I imagine it is better to have a starting point for a C grammar other than from | ||
24 | scratch. It has not been optimized in anyway, my main aim was to get a parser that | ||
25 | works. Lookahead may not be optimum at choice points and may even be insufficient at | ||
26 | times. I choose to err on the side of not optimum if I made a choice at all. | ||
27 | |||
28 | If you use this grammar, I would appreciate hearing from you. I will try to maintain | ||
29 | this grammar to the best of my ability, but at this point in time, this is only a side | ||
30 | hobby (unless someone wants to pay me for doing JavaCC work!). In that regards, I am | ||
31 | interested in hearing bugs and comments. | ||
32 | |||
33 | TODO: | ||
34 | |||
35 | Insert the appropriate code to enable C source trees from this grammar. | ||
36 | |||
37 | ============================================= | ||
38 | 3/2/06: Modified by Tom Copeland | ||
39 | - STRING_LITERAL now handles embedded escaped newlines, thanks to J.Chris Findlay for the patch | ||
40 | - Works with JavaCC 4.0 | ||
41 | - Preprocessor directives are now simply SKIP'd, so no need to run C files through GCC first | ||
42 | |||
43 | 31/8/10: Modified heavily by Christian Grothoff | ||
44 | - No more tracking of type names (so we can run without preprocessing) | ||
45 | - Support certain gcc-isms (unsigned long long, 33LL, etc.) | ||
46 | - No support for certain older C constructs | ||
47 | - Support for magic "GNUNET_PACKED" construct (extra "IDENTIFIER" in struct) | ||
48 | */ | ||
49 | |||
50 | PARSER_BEGIN(CParser) | ||
51 | |||
52 | import java.util.*; | ||
53 | |||
54 | public class CParser{ | ||
55 | |||
56 | // Run the parser | ||
57 | public static void main ( String args [ ] ) { | ||
58 | CParser parser ; | ||
59 | |||
60 | if(args.length == 0){ | ||
61 | System.out.println("C Parser Version 0.1Alpha: Reading from standard input . . ."); | ||
62 | parser = new CParser(System.in); | ||
63 | } | ||
64 | else if(args.length == 1){ | ||
65 | System.out.println("C Parser Version 0.1Alpha: Reading from file " + args[0] + " . . ." ); | ||
66 | try { | ||
67 | parser = new CParser(new java.io.FileInputStream(args[0])); | ||
68 | } | ||
69 | catch(java.io.FileNotFoundException e){ | ||
70 | System.out.println("C Parser Version 0.1: File " + args[0] + " not found."); | ||
71 | return ; | ||
72 | } | ||
73 | } | ||
74 | else { | ||
75 | System.out.println("C Parser Version 0.1Alpha: Usage is one of:"); | ||
76 | System.out.println(" java CParser < inputfile"); | ||
77 | System.out.println("OR"); | ||
78 | System.out.println(" java CParser inputfile"); | ||
79 | return ; | ||
80 | } | ||
81 | try { | ||
82 | parser.TranslationUnit(); | ||
83 | System.out.println("C Parser Version 0.1Alpha: Java program parsed successfully."); | ||
84 | } | ||
85 | catch(ParseException e){ | ||
86 | System.out.println("C Parser Version 0.1Alpha: Encountered errors during parse."); | ||
87 | e.printStackTrace(); | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | PARSER_END(CParser) | ||
93 | |||
94 | SKIP : { | ||
95 | " " | ||
96 | | "\t" | ||
97 | | "\n" | ||
98 | | "\r" | ||
99 | | <"//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> | ||
100 | | <"/*" (~["*"])* "*" ("*" | ~["*","/"] (~["*"])* "*")* "/"> | ||
101 | | "#" : PREPROCESSOR_OUTPUT | ||
102 | } | ||
103 | |||
104 | <PREPROCESSOR_OUTPUT> SKIP: | ||
105 | { | ||
106 | "\n" : DEFAULT | ||
107 | } | ||
108 | |||
109 | <PREPROCESSOR_OUTPUT> MORE: | ||
110 | { | ||
111 | "\\\n" | ||
112 | | | ||
113 | "\\\r\n" | ||
114 | | | ||
115 | < ~[] > | ||
116 | } | ||
117 | |||
118 | |||
119 | TOKEN : { | ||
120 | <INTEGER_LITERAL: <DECIMAL_LITERAL> (["l","L"])? (["l","L"])? | <HEX_LITERAL> (["l","L"])? (["l","L"])? | <OCTAL_LITERAL> (["l","L"])? (["l","L"])?> | ||
121 | | <#DECIMAL_LITERAL: ["1"-"9"] (["0"-"9"])* > | ||
122 | | <#HEX_LITERAL: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+> | ||
123 | | <#OCTAL_LITERAL: "0" (["0"-"7"])*> | ||
124 | | <FLOATING_POINT_LITERAL: (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)? (["f","F","d","D"])? | "." (["0"-"9"])+ (<EXPONENT>)? (["f","F","d","D"])? | (["0"-"9"])+ <EXPONENT> (["f","F","d","D"])? | (["0"-"9"])+ (<EXPONENT>)? ["f","F","d","D"]> | ||
125 | | <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+> | ||
126 | | <CHARACTER_LITERAL: "\'" (~["\'","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"])) "\'"> | ||
127 | | <STRING_LITERAL: "\"" ( ~["\"","\\","\n","\r"] | "\\" ( ["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"] | ( ["\n","\r"] | "\r\n")))* "\"" ( ( ["\r","\n"," "] )* "\"" ( ~["\"","\\","\n","\r"] | "\\" ( ["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"] | ( ["\n","\r"] | "\r\n")))* "\"" )* > | ||
128 | } | ||
129 | |||
130 | TOKEN : { | ||
131 | <CONTINUE: "continue"> | | ||
132 | <VOLATILE: "volatile"> | | ||
133 | <REGISTER: "register"> | | ||
134 | <UNSIGNED: "unsigned"> | | ||
135 | <TYPEDEF: "typedef"> | | ||
136 | <DFLT: "default"> | | ||
137 | <DOUBLE: "double"> | | ||
138 | <SIZEOF: "sizeof"> | | ||
139 | <SWITCH: "switch"> | | ||
140 | <RETURN: "return"> | | ||
141 | <EXTERN: "extern"> | | ||
142 | <STRUCT: "struct"> | | ||
143 | <STATIC: "static"> | | ||
144 | <SIGNED: "signed"> | | ||
145 | <WHILE: "while"> | | ||
146 | <BREAK: "break"> | | ||
147 | <UNION: "union"> | | ||
148 | <CONST: "const"> | | ||
149 | <FLOAT: "float"> | | ||
150 | <SHORT: "short"> | | ||
151 | <ELSE: "else"> | | ||
152 | <CASE: "case"> | | ||
153 | <LONG: "long"> | | ||
154 | <ENUM: "enum"> | | ||
155 | <AUTO: "auto"> | | ||
156 | <VOID: "void"> | | ||
157 | <CHAR: "char"> | | ||
158 | <GOTO: "goto"> | | ||
159 | <FOR: "for"> | | ||
160 | <INT: "int"> | | ||
161 | <IF: "if"> | | ||
162 | <DO: "do"> | ||
163 | } | ||
164 | |||
165 | TOKEN : { | ||
166 | <IDENTIFIER: <LETTER> (<LETTER> | <DIGIT>)*> | ||
167 | | <#LETTER: ["$","A"-"Z","_","a"-"z"]> | ||
168 | | <#DIGIT: ["0"-"9"]> | ||
169 | } | ||
170 | |||
171 | void TranslationUnit() : {} | ||
172 | { | ||
173 | (ExternalDeclaration())+ | ||
174 | } | ||
175 | |||
176 | void ExternalDeclaration() : {} | ||
177 | { | ||
178 | (StorageClassSpecifier())* | ||
179 | ( | ||
180 | LOOKAHEAD (FunctionDeclaration()) FunctionDeclaration() | | ||
181 | LOOKAHEAD (StructOrUnionSpecifier()) StructOrUnionSpecifier() | | ||
182 | LOOKAHEAD (VariableDeclaration()) VariableDeclaration() | | ||
183 | LOOKAHEAD (TypeDeclaration()) TypeDeclaration () | ||
184 | ) | ||
185 | } | ||
186 | |||
187 | void FunctionDeclaration() : {} | ||
188 | { | ||
189 | TypeSpecifier () | ||
190 | <IDENTIFIER> | ||
191 | "(" [ ParameterList () ] ")" | ||
192 | ( ";" | CompoundStatement() ) | ||
193 | } | ||
194 | |||
195 | void StorageClassSpecifier() : {} | ||
196 | { | ||
197 | ( <STATIC> | <EXTERN> ) | ||
198 | } | ||
199 | |||
200 | void TypeDeclaration() : {} | ||
201 | { | ||
202 | <TYPEDEF> | ||
203 | ( LOOKAHEAD (DataType() ";") DataType () | FunctionType() ) ";" | ||
204 | } | ||
205 | |||
206 | void DataType() : {} | ||
207 | { | ||
208 | StructOrUnionSpecifier () <IDENTIFIER> | ||
209 | } | ||
210 | |||
211 | void FunctionType() : {} | ||
212 | { | ||
213 | TypeSpecifier () "(" "*" <IDENTIFIER> ")" "(" [ ParameterList() ] ")" | ||
214 | } | ||
215 | |||
216 | void ParameterList() : {} | ||
217 | { | ||
218 | ParameterDeclaration() ( LOOKAHEAD (2) "," ParameterDeclaration() )* [ "," "..." ] | ||
219 | } | ||
220 | |||
221 | void ParameterDeclaration() : {} | ||
222 | { | ||
223 | TypeSpecifier() <IDENTIFIER> [ Array () ] | ||
224 | } | ||
225 | |||
226 | void VariableDeclaration() : {} | ||
227 | { | ||
228 | VariableClassSpecifier () | ||
229 | TypeSpecifier () | ||
230 | InitDeclaratorList() ";" | ||
231 | } | ||
232 | |||
233 | void LocalVariableDeclaration() : {} | ||
234 | { | ||
235 | [ <STATIC> ] VariableDeclaration () | ||
236 | } | ||
237 | |||
238 | void VariableClassSpecifier() : {} | ||
239 | { | ||
240 | ( <AUTO> | <REGISTER> )* | ||
241 | } | ||
242 | |||
243 | void TypeSpecifier() : {} | ||
244 | { | ||
245 | [ <CONST> ] | ||
246 | ( <VOID> | ||
247 | | <CHAR> | ||
248 | | <SHORT> [ <INT> ] | ||
249 | | <INT> | ||
250 | | <LONG> [ <LONG> ] | ||
251 | | <FLOAT> | <DOUBLE> | ||
252 | | (<SIGNED> | <UNSIGNED>) [ <CHAR> | ||
253 | | <SHORT> [ <INT> ] | ||
254 | | <INT> | ||
255 | | <LONG> [ <LONG> ] ] | ||
256 | | StructOrUnionSpecifier() | ||
257 | | EnumSpecifier() | ||
258 | | <IDENTIFIER> | ||
259 | ) | ||
260 | [ Pointer () ] | ||
261 | [ Array () ] | ||
262 | } | ||
263 | |||
264 | /* this is needed for 'va_arg' where a type is an argument | ||
265 | -- and we cannot disambiguate the use of 'FOO' | ||
266 | after a 'typedef int FOO' from the variable 'FOO'; | ||
267 | hence this hack */ | ||
268 | void NoIdentifierTypeSpecifier() : {} | ||
269 | { | ||
270 | [ <CONST> ] | ||
271 | ( <VOID> | ||
272 | | <CHAR> | ||
273 | | <SHORT> [ <INT> ] | ||
274 | | <INT> | ||
275 | | <LONG> [ <LONG> ] | ||
276 | | <FLOAT> | <DOUBLE> | ||
277 | | (<SIGNED> | <UNSIGNED>) [ <CHAR> | ||
278 | | <SHORT> [ <INT> ] | ||
279 | | <INT> | ||
280 | | <LONG> [ <LONG> ] ] | ||
281 | | StructOrUnionSpecifier() | ||
282 | | EnumSpecifier() | ||
283 | ) | ||
284 | [ Pointer () ] | ||
285 | [ Array () ] | ||
286 | } | ||
287 | |||
288 | void StructOrUnionSpecifier() : {} | ||
289 | { | ||
290 | LOOKAHEAD (3) | ||
291 | StructOrUnion() [ <IDENTIFIER> ] "{" StructDeclarationList() "}" | | ||
292 | StructOrUnion() <IDENTIFIER> | ||
293 | } | ||
294 | |||
295 | void StructOrUnion() : {} | ||
296 | { | ||
297 | ( <STRUCT> | <UNION> ) | ||
298 | } | ||
299 | |||
300 | void StructDeclarationList() : {} | ||
301 | { | ||
302 | (StructDeclaration())+ | ||
303 | } | ||
304 | |||
305 | void InitDeclaratorList() : {} | ||
306 | { | ||
307 | InitDeclarator() ("," InitDeclarator())* | ||
308 | } | ||
309 | |||
310 | void InitDeclarator() : {} | ||
311 | { | ||
312 | <IDENTIFIER> [ Array () ] [ "=" Initializer() ] | ||
313 | } | ||
314 | |||
315 | void StructDeclaration() : {} | ||
316 | { | ||
317 | TypeSpecifier() <IDENTIFIER> [ Array() | ":" ConstantExpression() ] [ <IDENTIFIER> ] ";" | ||
318 | } | ||
319 | |||
320 | void EnumSpecifier() : {} | ||
321 | { | ||
322 | <ENUM> ( LOOKAHEAD(3) [ <IDENTIFIER> ] "{" EnumeratorList() "}" | <IDENTIFIER> ) | ||
323 | } | ||
324 | |||
325 | void EnumeratorList() : {} | ||
326 | { | ||
327 | Enumerator() ("," Enumerator())* | ||
328 | } | ||
329 | |||
330 | void Enumerator() : {} | ||
331 | { | ||
332 | <IDENTIFIER> [ "=" ConstantExpression() ] | ||
333 | } | ||
334 | |||
335 | void Pointer() : {} | ||
336 | { | ||
337 | "*" [ <CONST> ] [ Pointer() ] | ||
338 | } | ||
339 | |||
340 | void IdentifierList() : {} | ||
341 | { | ||
342 | <IDENTIFIER> ("," <IDENTIFIER>)* | ||
343 | } | ||
344 | |||
345 | void Initializer() : {} | ||
346 | { | ||
347 | ( AssignmentExpression() | | ||
348 | "{" InitializerList() [","] "}" ) | ||
349 | } | ||
350 | |||
351 | void InitializerList() : {} | ||
352 | { | ||
353 | Initializer() (LOOKAHEAD(2) "," Initializer())* | ||
354 | } | ||
355 | |||
356 | |||
357 | void Array() : {} | ||
358 | { | ||
359 | "[" [ConstantExpression()] "]" | ||
360 | } | ||
361 | |||
362 | void Statement() : {} | ||
363 | { | ||
364 | ( LOOKAHEAD(2) LabeledStatement() | | ||
365 | ExpressionStatement() | | ||
366 | CompoundStatement() | | ||
367 | SelectionStatement() | | ||
368 | IterationStatement() | | ||
369 | JumpStatement() ) | ||
370 | } | ||
371 | |||
372 | void LabeledStatement() : {} | ||
373 | { | ||
374 | ( <IDENTIFIER> ":" Statement() | | ||
375 | <CASE> ConstantExpression() ":" Statement() | | ||
376 | <DFLT> ":" Statement() ) | ||
377 | } | ||
378 | |||
379 | void ExpressionStatement() : {} | ||
380 | { | ||
381 | [ Expression() ] ";" | ||
382 | } | ||
383 | |||
384 | void CompoundStatement() : {} | ||
385 | { | ||
386 | "{" ( LOOKAHEAD (LocalVariableDeclaration()) LocalVariableDeclaration () | | ||
387 | Statement() )* | ||
388 | "}" | ||
389 | } | ||
390 | |||
391 | void SelectionStatement() : {} | ||
392 | { | ||
393 | ( <IF> "(" Expression() ")" Statement() [ LOOKAHEAD(2) <ELSE> Statement() ] | | ||
394 | <SWITCH> "(" Expression() ")" Statement() ) | ||
395 | } | ||
396 | |||
397 | void IterationStatement() : {} | ||
398 | { | ||
399 | ( <WHILE> "(" Expression() ")" Statement() | | ||
400 | <DO> Statement() <WHILE> "(" Expression() ")" ";" | | ||
401 | <FOR> "(" [ Expression() ] ";" [ Expression() ] ";" [ Expression() ] ")" Statement() ) | ||
402 | } | ||
403 | |||
404 | void JumpStatement() : {} | ||
405 | { | ||
406 | ( <GOTO> <IDENTIFIER> ";" | | ||
407 | <CONTINUE> ";" | | ||
408 | <BREAK> ";" | | ||
409 | <RETURN> [ Expression() ] ";" ) | ||
410 | } | ||
411 | |||
412 | void Expression() : {} | ||
413 | { | ||
414 | AssignmentExpression() ( "," AssignmentExpression() )* | ||
415 | } | ||
416 | |||
417 | void AssignmentExpression() : {} | ||
418 | { | ||
419 | LOOKAHEAD(UnaryExpression() AssignmentOperator()) UnaryExpression() AssignmentOperator() AssignmentExpression() | | ||
420 | LOOKAHEAD(3) ConditionalExpression() | ||
421 | } | ||
422 | |||
423 | void AssignmentOperator() : {} | ||
424 | { | ||
425 | ( "=" | "*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" ) | ||
426 | } | ||
427 | |||
428 | void ConditionalExpression() : {} | ||
429 | { | ||
430 | LogicalORExpression() [ "?" Expression() ":" ConditionalExpression() ] | ||
431 | } | ||
432 | |||
433 | void ConstantExpression() : {} | ||
434 | { | ||
435 | ConditionalExpression() | ||
436 | } | ||
437 | |||
438 | void LogicalORExpression() : {} | ||
439 | { | ||
440 | LogicalANDExpression() [ "||" LogicalORExpression() ] | ||
441 | } | ||
442 | |||
443 | void LogicalANDExpression() : {} | ||
444 | { | ||
445 | InclusiveORExpression() [ "&&" LogicalANDExpression() ] | ||
446 | } | ||
447 | |||
448 | void InclusiveORExpression() : {} | ||
449 | { | ||
450 | ExclusiveORExpression() [ "|" InclusiveORExpression() ] | ||
451 | } | ||
452 | |||
453 | void ExclusiveORExpression() : {} | ||
454 | { | ||
455 | ANDExpression() [ "^" ExclusiveORExpression() ] | ||
456 | } | ||
457 | |||
458 | void ANDExpression() : {} | ||
459 | { | ||
460 | EqualityExpression() [ "&" ANDExpression() ] | ||
461 | } | ||
462 | |||
463 | void EqualityExpression() : {} | ||
464 | { | ||
465 | RelationalExpression() [ ( "==" | "!=" ) EqualityExpression() ] | ||
466 | } | ||
467 | |||
468 | void RelationalExpression() : {} | ||
469 | { | ||
470 | ShiftExpression() [ ( "<" | ">" | "<=" | ">=" ) RelationalExpression() ] | ||
471 | } | ||
472 | |||
473 | void ShiftExpression() : {} | ||
474 | { | ||
475 | AdditiveExpression() [ ( "<<" | ">>" ) ShiftExpression() ] | ||
476 | } | ||
477 | |||
478 | void AdditiveExpression() : {} | ||
479 | { | ||
480 | MultiplicativeExpression() [ ( "+" | "-" ) AdditiveExpression() ] | ||
481 | } | ||
482 | |||
483 | void MultiplicativeExpression() : {} | ||
484 | { | ||
485 | CastExpression() [ ( "*" | "/" | "%" ) MultiplicativeExpression() ] | ||
486 | } | ||
487 | |||
488 | void CastExpression() : {} | ||
489 | { | ||
490 | ( LOOKAHEAD("(" TypeSpecifier() ")" CastExpression() ) "(" TypeSpecifier() ")" CastExpression() | | ||
491 | UnaryExpression() ) | ||
492 | } | ||
493 | |||
494 | void UnaryExpression() : {} | ||
495 | { | ||
496 | ( LOOKAHEAD(3) PostfixExpression() | | ||
497 | "++" UnaryExpression() | | ||
498 | "--" UnaryExpression() | | ||
499 | UnaryOperator() CastExpression() | | ||
500 | <SIZEOF> ( LOOKAHEAD(UnaryExpression() ) UnaryExpression() | "(" TypeSpecifier() ")" ) ) | ||
501 | } | ||
502 | |||
503 | void UnaryOperator() : {} | ||
504 | { | ||
505 | ( "&" | "*" | "+" | "-" | "~" | "!" ) | ||
506 | } | ||
507 | |||
508 | void PostfixExpression() : {} | ||
509 | { | ||
510 | PrimaryExpression() ( "[" Expression() "]" | | ||
511 | "(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" | | ||
512 | "." <IDENTIFIER> | | ||
513 | "->" <IDENTIFIER> | | ||
514 | "++" | | ||
515 | "--" )* | ||
516 | } | ||
517 | |||
518 | void PrimaryExpression() : {} | ||
519 | { | ||
520 | <IDENTIFIER> | | ||
521 | Constant() | | ||
522 | "(" Expression() ")" | ||
523 | } | ||
524 | |||
525 | void ArgumentExpressionList() : {} | ||
526 | { | ||
527 | AssignmentOrTypeExpression() ( "," AssignmentOrTypeExpression() )* | ||
528 | } | ||
529 | |||
530 | |||
531 | void AssignmentOrTypeExpression() : {} | ||
532 | { | ||
533 | NoIdentifierTypeSpecifier() | | ||
534 | AssignmentExpression() | ||
535 | } | ||
536 | |||
537 | void Constant() : {} | ||
538 | { | ||
539 | <INTEGER_LITERAL> | <FLOATING_POINT_LITERAL> | <CHARACTER_LITERAL> | <STRING_LITERAL> | ||
540 | } | ||
541 | |||