Progress on k&r syntax checker

master
Tibor Bizjak 2023-06-02 13:16:37 +02:00
parent 5304a59ba7
commit 4a0efd617c
1 changed files with 143 additions and 179 deletions

View File

@ -1,224 +1,197 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#define TABSTOP 8 #define MAX_NEST 100 // max ([{ nesting
#define BUFFSIZE 100
#define ESCAPED "0bnt\\"
#define LINEN 5
#define ERRMSG_SIZE 100 #define ERRORS \
#define ERR_ESC_EOF "expected escape character, got EOF" X(ERR_COMMENT_TERM, "non-terminating comment") \
#define ERR_ESC_INV "invalid escape character" X(ERR_STRING_EXP, "expected string termination") \
#define ERR_STR_TRM "expected string termination" X(ERR_CHAR_EMPTY, "empty character constant") \
#define ERR_CHAR_EOF "expected character constant, got EOF" X(ERR_CHAR_EXP, "expected character in character constant") \
#define ERR_CHAR_TRM "expected character constant termination" X(ERR_CHAR_TERM, "expected character constant termination") \
#define ERR_COMM_TRM "expected multi-line comment termination" X(ERR_ESCAPE_INV, "invalid escape sequence") \
X(ERR_HEX_INV, "invalid hex number") \
X(ERR_OCT_INV, "invalid octal number")
/* global input line and character counting */ #define X(err, msg) err,
int ln = 1; enum { OK, ERRORS };
int cn = 0; #undef X
int ln_getchar()
{
int c;
if ((c = getchar()) == '\n' || c == EOF) {
cn = 0;
++ln;
} else
++cn;
return c;
}
#define getchar() ln_getchar()
int print_context(); #define BRACKETS \
X('(', ')') \
X('[', ']') \
X('{', '}')
int check_escaped(char errmsg[], int d); #define QUOTES \
int check_multiline(char errmsg[]); X('"', check_string) \
int check_char_const(char errmsg[]); X('\'', check_char_const)
int check_string(char errmsg[]);
int getcharb(); #define INVALID_CHAR_CONSTS \
int ithchar(int i); X('\a') X('\b') X('\f') X('\n') \
int prevchar(); X('\r') X('\t') X('\v') X('\'')
int print_buff(int n);
#define ESCAPE_SEQS \
X('a') X('b') X('f') X('n') X('r') \
X('t') X('v') X('\\') X('?') X('\'')
int check();
int check_comment();
int check_string();
int check_char_const();
int check_escape_seq();
int check_oct();
int check_hex();
void print_error(int err);
void print_line(); void print_line();
void ignore_line(); void ignore_line();
void copy(char from[], char to[]);
/* check c program on input for unabalanced parantheses /* check c program on input for unabalanced parantheses
* string termination, comment termination and char constant * string termination, comment termination and char constant
* validity */ * validity */
int main() int main()
{ {
int i, c; print_error(check());
int err = 0;
char errmsg[ERRMSG_SIZE];
while (!err && (c = getcharb()) != EOF) {
if (c == '"' && check_string(errmsg)) {
err = 1;
} else if (c == '\'' && check_char_const(errmsg)) {
err = 2;
} else if (prevchar() == '/' && c == '*' && check_multiline(errmsg)) {
err = 3;
} else if (c == '/' && prevchar() == '/') {
ignore_line();
}
}
if (!err)
return 0;
int li = print_context();
printf("%d:%d: error: %s\n", ln-1, li+1, errmsg);
return 0; return 0;
} }
void print_error(int err)
/* print context of error and arrow pointing to cursor
* position where error was raised */
int print_context()
{ {
int i, li = print_buff(LINEN); printf("error %d-%d: ", 0, 0);
switch (err) {
if (li < 0) { #define X(E, MSG) case E : printf("%s\n", MSG); break;
for (i = li; i < -1; ++i) ERRORS
putchar(' '); #undef X
} else {
print_line();
for (i = li; i > 0; --i)
putchar(' ');
} }
printf("^\n");
return li;
} }
// ---------------------- validation functions ------------------------ // ---------------------- validation functions ------------------------
/* main validation function */
int check()
{
int c, err = 0;
int bi = 0;
char brackets[MAX_NEST];
while (!err && (c = getchar()) != EOF)
switch (c) {
case '/':
if ((c = getchar()) == '/') {
ignore_line();
break;
} else if (c == '*') {
err = check_comment();
break;
}
#define X(Q, CHECK) \
case Q: \
err = CHECK(); \
break;
QUOTES
#undef X
#define X(ERR, L, R) \
case L: \
if (bi < MAX_NEST) \
brackets[i] = L; \
++bi; \
break; \
case R: \
if (bi <= MAX_NEST && brackets[bi-1] != L) \
err = ERR; \
--bi; \
break;
BRACKETS
#undef X
}
if (!err && bi)
switch (brackets[bi-1]) {
#define X(ERR, L, R) case L: err = ERR; break;
BRACKETS
#undef X
}
return err;
}
/* check multiline comment termination */ /* check multiline comment termination */
int check_multiline(char errmsg[]) int check_comment()
{
int c = getchar();
while (c != EOF)
if (c == '/') {
if ((c = getchar()) == '*')
return OK;
} else
c = getchar();
return ERR_COMMENT_TERM;
}
/* check string termination */
int check_string()
{ {
int c; int c;
while ((c = getcharb()) != EOF) while ((c = getchar()) != EOF && c != '\n') {
if (prevchar() == '*' && c == '/') if (c == '"')
return 0; return OK;
copy(ERR_COMM_TRM, errmsg); else if (c == '\\' && (c = check_escape_seq()))
return 1; return c;
}
return ERR_STRING_TERM;
} }
/* check char constant validity and termination */ /* check char constant validity and termination */
int check_char_const(char errmsg[]) int check_char_const(char errmsg[])
{ {
int c; int err;
if ((c = getcharb()) == EOF) { switch (getchar()) {
copy(ERR_CHAR_EOF, errmsg); case EOF:
return 1; return ERR_CHAR_EXP;
case '\n':
return ERR_CHAR_EXP;
case '\'':
return ERR_CHAR_EMPTY;
case '\\':
if ((err = check_escape_seq()))
return err;
} }
if (c == '\\' && check_escaped(errmsg, '\'')) if (getchar() != '\'')
return 1; return ERR_CHAR_TERM;
if ((c = getcharb()) == EOF || c != '\'') { return OK;
copy(ERR_CHAR_TRM, errmsg);
return 1;
}
return 0;
} }
/* check string termination */ /* check if \(next input char) makes a valid escape char */
int check_string(char errmsg[]) int check_escape_seq()
{ {
int c; int c;
while ((c = getcharb()) != EOF && c != '\n') {
if (c == '\\' && check_escaped(errmsg, '"')) switch (c = getchar()) {
return 1; #define X(C) case C: return 0;
else if (c == '"') ESCAPE_SEQS
return 0; #undef X
case 'x':
return check_hex();
} }
copy(ERR_STR_TRM, errmsg); if ('0' <= c && c < '8')
return 1; return check_oct();
return ERR_ESCAPE_INV;
} }
/* check if \(next input char) makes a valid escape char int check_hex()
* \'d' is considered valid, pass negative 'd' to make optional */
int check_escaped(char errmsg[], int d)
{
int i, c = getcharb();
if (c == EOF) {
copy(ERR_ESC_EOF, errmsg);
return 1;
}
if (c == d)
return 0;
for (i = 0; i < strlen(ESCAPED); ++i)
if (ESCAPED[i] == c)
return 0;
copy(ERR_ESC_INV, errmsg);
return 1;
}
// ------------------------- buffered input -------------------------
int ibi = 0;
int iblen = 0;
int ibtrunc = 0
/* cyclic input stream buffer
* 'ibi' is the current buffer index
* 'iblen' is the buffer length
* 'ibtrunc' is the number of characters truncated
* from first line in buffer */
char in_buff[BUFFSIZE];
/* get char from input, put it in buffer and return it
* otherwise same as getchar */
int getcharb()
{ {
int c; int c;
if ((c = getchar()) == EOF) if ((c = getchar()))
return EOF;
ibtrunc = (iblen == BUFFSIZE && in_buff[ibi] == '\n') ? 0 : ibtrunc + 1;
in_buff[ibi] = c;
++ibi;
if (iblen < BUFFSIZE)
++iblen;
else if (ibi >= BUFFSIZE)
ibi = 0;
return c;
} }
/* return ith char of buffer indexed from 'ibi'
* doesnt check validity of i */
int ithchar(int i)
{
int ri = ibi + i; // real index
return (ri < iblen) ? in_buff[ri] : in_buff[ri - iblen];
}
/* returns second last buffered input char
* if there is none return -1 */
int prevchar()
{
return (iblen > 1) ? ithchar(iblen-2) : -1;
}
/* return the (relative) index of the start of relevant context in buffer */
int context_start(int n)
{
}
// ----------------------- utility functions -------------------------- // ----------------------- utility functions --------------------------
/* ignore line of input */ /* ignore line of input */
void ignore_line() void ignore_line()
{ {
int c; int c;
while ((c = getcharb()) != EOF && c != '\n') while ((c = getchar()) != EOF && c != '\n')
; ;
} }
@ -231,12 +204,3 @@ void print_line()
putchar('\n'); putchar('\n');
} }
/* copy from 'from' to 'to'
* 'to' must be large enough */
void copy(char from[], char to[])
{
int i;
for (i = 0; from[i] != '\0'; ++i)
to[i] = from[i];
to[i] = '\0';
}