/* clexer -- show lexical structure of a C file.
Copyright (C) 2010 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . */
%option noyywrap
%top {
#ifdef HAVE_CONFIG_H
# include
#endif
}
%{
#include
#include
#include
#include
enum state
{
s_init,
s_multiline,
s_cookie_multiline,
s_cookie
};
enum token
{
t_token = 256,
t_open,
t_close,
t_cookie
};
const char *progname;
char *infilename;
char *outfilename;
int guile_snarfer_mode;
enum state state;
FILE *outfile;
unsigned line_no = 1;
static void
advance_line (const char *text)
{
for (; *text; text++)
if (*text == '\n')
line_no++;
}
#define outstr(text) \
do \
if (state != s_init) \
fprintf (outfile, "%s\n", text); \
while (0)
#define outtok(tname,tval) \
do \
if (state != s_init) \
fprintf (outfile, "(%s . \"%s\")\n", \
tname, tval); \
while (0)
#define RETTOK(t) { outstr (#t); return t_token; }
#define RETTXT(t,s) { outtok (#t, s); return t_token; }
%}
N [0-9]
X [0-9a-fA-F]
O [0-7]
ID [a-zA-Z_][a-zA-Z_0-9]*
IQ (l|L|ll|LL|lL|Ll|u|U)
FQ [fFlL]
E [Ee][+-]?{N}+
WS [ \t\v\f]
%%
\/\*(\n|[^*]|\*[^/])*\*\/ { advance_line (yytext);
outtok ("comment", yytext); }
#.*\n { outstr ("hash"); line_no++; }
\n { outstr ("eol"); line_no++; }
{WS}+ ;
\\ ;
{ID} RETTXT (id, yytext);
0[xX]{X}+{IQ}? RETTXT (int_hex, yytext + 2);
0{O}+{IQ}? RETTXT (int_oct, yytext + 1);
{N}+{IQ}? RETTXT (int_dec, yytext);
L?\'[^\\']\' |
L?\'\\[^0xX]\' |
L?\'\\{O}{1,3}\' |
L?\'\\[xX]{X}{1,2} RETTXT (char, yytext);
{N}+E{FQ}? |
{N}*"."{N}+({E})?{FQ}? |
{N}+"."{N}*({E})?{FQ}? RETTXT (flo_dec, yytext);
L?\"([^\\\"]|\\.|\\\n)*\" { outstr (yytext); advance_line (yytext); }
"^"{WS}*"^" { fputs ("snarf_cookie\n", outfile);
return t_cookie; }
"{" { outstr ("brace_open"); return t_open; }
"}" { outstr ("brace_close"); return t_close; }
"," RETTOK (comma);
":" RETTOK (colon);
"=" RETTOK (assign);
"(" RETTOK (paren_open);
")" RETTOK (paren_close);
"[" RETTOK (bracket_open);
"]" RETTOK (bracket_close);
"&" RETTOK (amp);
"^" RETTOK (caret);
"|" RETTOK (pipe);
"?" RETTOK (question);
"!" RETTOK (bang);
"~" RETTOK (tilde);
"-" RETTOK (minus);
"+" RETTOK (plus);
"*" RETTOK (star);
"/" RETTOK (slash);
"%" RETTOK (percent);
"..." RETTOK (ellipsis);
"->" RETTOK (ptr);
"." RETTOK (dot);
"*=" RETTOK (mul_assign);
"/=" RETTOK (div_assign);
"%=" RETTOK (mod_assign);
"+=" RETTOK (add_assign);
"-=" RETTOK (sub_assign);
"<<=" RETTOK (shift_left_assign);
">>=" RETTOK (shift_right_assign);
"&=" RETTOK (logand_assign);
"|=" RETTOK (logior_assign);
"^=" RETTOK (logxor_assign);
"||" RETTOK (or);
"&&" RETTOK (and);
"==" RETTOK (eq);
"!=" RETTOK (neq);
">=" RETTOK (ge);
">>" RETTOK (right_shift);
">" RETTOK (gt);
"<=" RETTOK (le);
"<<" RETTOK (left_shift);
"<" RETTOK (lt);
"++" RETTOK (inc);
"--" RETTOK (dec);
";" RETTOK (semicolon);
. { fprintf (stderr,
(isascii (yytext[0]) &&
isprint (yytext[0])) ?
"%s:%u: stray character %c\n" :
"%s:%u: stray character \\%03o",
infilename,
line_no, (unsigned char) yytext[0]);
return t_token; }
%%
void
usage()
{
printf ("\
usage: clexer [OPTIONS] [FILE]\n\
\n\
Show lexical structure of a C source. When given the --snarfer option,\n\
display only lexical tokens produced by Guile snarfer. The output may be\n\
piped to the `guile-tools snarf-check-and-output-texi' command.\n\
\n\
OPTIONS are:\n\
-s, --snarfer filter out Guile docstrings\n\
-o, --output=FILE write output to FILE\n\
-h, --help show this help summary\n\
\n");
printf ("Report bugs to \n");
}
int
main (int argc, char **argv)
{
int i;
enum token tok, last_tok;
progname = argv[0];
for (i = 1; i < argc; i++)
{
if (!strcmp (argv[i], "-s") ||
!strcmp (argv[i], "--snarfer") ||
!strcmp (argv[i], "--guile-snarfer"))
{
guile_snarfer_mode = 1;
}
else if (!strcmp (argv[i], "-h") ||
!strcmp (argv[i], "--help"))
{
usage ();
exit (0);
}
#if 0
else if (!strcmp (argv[i], "-v") ||
!strcmp (argv[i], "--version"))
{
version ();
exit (0);
}
#endif
else if (!strncmp (argv[i], "-o", 2))
{
if (argv[i][2])
outfilename = argv[i] + 2;
else if (++i < argc)
outfilename = argv[i];
else
{
fprintf (stderr, "%s: option '-o' requires an argument\n",
progname);
exit (1);
}
}
else if (!strncmp (argv[i], "--output=", 9))
outfilename = argv[i] + 9;
else if (!strcmp (argv[i], "--output"))
{
if (++i < argc)
outfilename = argv[++i];
else
{
fprintf (stderr, "%s: option '--output' requires an argument\n",
progname);
exit (1);
}
}
else if (!strcmp (argv[i], "--"))
{
if (++i < argc)
infilename = argv[i];
break;
}
else if (argv[i][0] == '-')
{
fprintf (stderr, "%s: unknown option: %s\n",
progname, argv[i]);
exit (1);
}
else
{
infilename = argv[i];
}
}
if (infilename && strcmp (infilename, "-"))
{
FILE *fp = fopen (infilename, "r");
if (!fp)
{
fprintf (stderr, "%s: cannot open file `%s': %s\n",
progname, infilename, strerror (errno));
exit (1);
}
yyrestart (fp);
}
else
infilename = "stdin";
if (outfilename)
{
outfile = fopen (outfilename, "w");
if (!outfile)
{
fprintf (stderr, "%s: cannot open file `%s': %s\n",
progname, outfilename, strerror (errno));
exit (1);
}
}
else
outfile = stdout;
state = guile_snarfer_mode ? s_init : s_cookie;
last_tok = t_token;
while ((tok = yylex ()) > 0)
{
if (!guile_snarfer_mode)
continue;
switch (tok)
{
case t_token:
break;
case t_open:
if (last_tok == t_cookie && state == s_cookie)
state = s_multiline;
break;
case t_close:
if (last_tok == t_cookie && state == s_cookie_multiline)
state = s_init;
break;
case t_cookie:
switch (state)
{
case s_init:
state = s_cookie;
break;
case s_multiline:
case s_cookie_multiline:
state = s_cookie_multiline;
break;
case s_cookie:
state = s_init;
}
}
last_tok = tok;
}
fclose (outfile);
exit (0);
}