/* clexer -- show lexical structure of a C file. Copyright (C) 2010 Sergey Poznyakoff This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ %option noyywrap %top { #ifdef HAVE_CONFIG_H # include #endif } %{ #include #include #include #include enum state { s_init, s_multiline, s_cookie_multiline, s_cookie }; enum token { t_token = 256, t_open, t_close, t_cookie }; const char *progname; char *infilename; char *outfilename; int guile_snarfer_mode; enum state state; FILE *outfile; unsigned line_no = 1; static void advance_line (const char *text) { for (; *text; text++) if (*text == '\n') line_no++; } #define outstr(text) \ do \ if (state != s_init) \ fprintf (outfile, "%s\n", text); \ while (0) #define outtok(tname,tval) \ do \ if (state != s_init) \ fprintf (outfile, "(%s . \"%s\")\n", \ tname, tval); \ while (0) #define RETTOK(t) { outstr (#t); return t_token; } #define RETTXT(t,s) { outtok (#t, s); return t_token; } %} N [0-9] X [0-9a-fA-F] O [0-7] ID [a-zA-Z_][a-zA-Z_0-9]* IQ (l|L|ll|LL|lL|Ll|u|U) FQ [fFlL] E [Ee][+-]?{N}+ WS [ \t\v\f] %% \/\*(\n|[^*]|\*[^/])*\*\/ { advance_line (yytext); outtok ("comment", yytext); } #.*\n { outstr ("hash"); line_no++; } \n { outstr ("eol"); line_no++; } {WS}+ ; \\ ; {ID} RETTXT (id, yytext); 0[xX]{X}+{IQ}? RETTXT (int_hex, yytext + 2); 0{O}+{IQ}? RETTXT (int_oct, yytext + 1); {N}+{IQ}? RETTXT (int_dec, yytext); L?\'[^\\']\' | L?\'\\[^0xX]\' | L?\'\\{O}{1,3}\' | L?\'\\[xX]{X}{1,2} RETTXT (char, yytext); {N}+E{FQ}? | {N}*"."{N}+({E})?{FQ}? | {N}+"."{N}*({E})?{FQ}? RETTXT (flo_dec, yytext); L?\"([^\\\"]|\\.|\\\n)*\" { outstr (yytext); advance_line (yytext); } "^"{WS}*"^" { fputs ("snarf_cookie\n", outfile); return t_cookie; } "{" { outstr ("brace_open"); return t_open; } "}" { outstr ("brace_close"); return t_close; } "," RETTOK (comma); ":" RETTOK (colon); "=" RETTOK (assign); "(" RETTOK (paren_open); ")" RETTOK (paren_close); "[" RETTOK (bracket_open); "]" RETTOK (bracket_close); "&" RETTOK (amp); "^" RETTOK (caret); "|" RETTOK (pipe); "?" RETTOK (question); "!" RETTOK (bang); "~" RETTOK (tilde); "-" RETTOK (minus); "+" RETTOK (plus); "*" RETTOK (star); "/" RETTOK (slash); "%" RETTOK (percent); "..." RETTOK (ellipsis); "->" RETTOK (ptr); "." RETTOK (dot); "*=" RETTOK (mul_assign); "/=" RETTOK (div_assign); "%=" RETTOK (mod_assign); "+=" RETTOK (add_assign); "-=" RETTOK (sub_assign); "<<=" RETTOK (shift_left_assign); ">>=" RETTOK (shift_right_assign); "&=" RETTOK (logand_assign); "|=" RETTOK (logior_assign); "^=" RETTOK (logxor_assign); "||" RETTOK (or); "&&" RETTOK (and); "==" RETTOK (eq); "!=" RETTOK (neq); ">=" RETTOK (ge); ">>" RETTOK (right_shift); ">" RETTOK (gt); "<=" RETTOK (le); "<<" RETTOK (left_shift); "<" RETTOK (lt); "++" RETTOK (inc); "--" RETTOK (dec); ";" RETTOK (semicolon); . { fprintf (stderr, (isascii (yytext[0]) && isprint (yytext[0])) ? "%s:%u: stray character %c\n" : "%s:%u: stray character \\%03o", infilename, line_no, (unsigned char) yytext[0]); return t_token; } %% void usage() { printf ("\ usage: clexer [OPTIONS] [FILE]\n\ \n\ Show lexical structure of a C source. When given the --snarfer option,\n\ display only lexical tokens produced by Guile snarfer. The output may be\n\ piped to the `guile-tools snarf-check-and-output-texi' command.\n\ \n\ OPTIONS are:\n\ -s, --snarfer filter out Guile docstrings\n\ -o, --output=FILE write output to FILE\n\ -h, --help show this help summary\n\ \n"); printf ("Report bugs to \n"); } int main (int argc, char **argv) { int i; enum token tok, last_tok; progname = argv[0]; for (i = 1; i < argc; i++) { if (!strcmp (argv[i], "-s") || !strcmp (argv[i], "--snarfer") || !strcmp (argv[i], "--guile-snarfer")) { guile_snarfer_mode = 1; } else if (!strcmp (argv[i], "-h") || !strcmp (argv[i], "--help")) { usage (); exit (0); } #if 0 else if (!strcmp (argv[i], "-v") || !strcmp (argv[i], "--version")) { version (); exit (0); } #endif else if (!strncmp (argv[i], "-o", 2)) { if (argv[i][2]) outfilename = argv[i] + 2; else if (++i < argc) outfilename = argv[i]; else { fprintf (stderr, "%s: option '-o' requires an argument\n", progname); exit (1); } } else if (!strncmp (argv[i], "--output=", 9)) outfilename = argv[i] + 9; else if (!strcmp (argv[i], "--output")) { if (++i < argc) outfilename = argv[++i]; else { fprintf (stderr, "%s: option '--output' requires an argument\n", progname); exit (1); } } else if (!strcmp (argv[i], "--")) { if (++i < argc) infilename = argv[i]; break; } else if (argv[i][0] == '-') { fprintf (stderr, "%s: unknown option: %s\n", progname, argv[i]); exit (1); } else { infilename = argv[i]; } } if (infilename && strcmp (infilename, "-")) { FILE *fp = fopen (infilename, "r"); if (!fp) { fprintf (stderr, "%s: cannot open file `%s': %s\n", progname, infilename, strerror (errno)); exit (1); } yyrestart (fp); } else infilename = "stdin"; if (outfilename) { outfile = fopen (outfilename, "w"); if (!outfile) { fprintf (stderr, "%s: cannot open file `%s': %s\n", progname, outfilename, strerror (errno)); exit (1); } } else outfile = stdout; state = guile_snarfer_mode ? s_init : s_cookie; last_tok = t_token; while ((tok = yylex ()) > 0) { if (!guile_snarfer_mode) continue; switch (tok) { case t_token: break; case t_open: if (last_tok == t_cookie && state == s_cookie) state = s_multiline; break; case t_close: if (last_tok == t_cookie && state == s_cookie_multiline) state = s_init; break; case t_cookie: switch (state) { case s_init: state = s_cookie; break; case s_multiline: case s_cookie_multiline: state = s_cookie_multiline; break; case s_cookie: state = s_init; } } last_tok = tok; } fclose (outfile); exit (0); }