Progress on k&r syntax checker

master
Tibor Bizjak 2023-06-02 13:16:37 +02:00
parent 5304a59ba7
commit 4a0efd617c
1 changed files with 143 additions and 179 deletions

View File

@ -1,224 +1,197 @@
#include <stdio.h>
#include <string.h>
#define TABSTOP 8
#define BUFFSIZE 100
#define ESCAPED "0bnt\\"
#define LINEN 5
#define MAX_NEST 100 // max ([{ nesting
#define ERRMSG_SIZE 100
#define ERR_ESC_EOF "expected escape character, got EOF"
#define ERR_ESC_INV "invalid escape character"
#define ERR_STR_TRM "expected string termination"
#define ERR_CHAR_EOF "expected character constant, got EOF"
#define ERR_CHAR_TRM "expected character constant termination"
#define ERR_COMM_TRM "expected multi-line comment termination"
#define ERRORS \
X(ERR_COMMENT_TERM, "non-terminating comment") \
X(ERR_STRING_EXP, "expected string termination") \
X(ERR_CHAR_EMPTY, "empty character constant") \
X(ERR_CHAR_EXP, "expected character in character constant") \
X(ERR_CHAR_TERM, "expected character constant termination") \
X(ERR_ESCAPE_INV, "invalid escape sequence") \
X(ERR_HEX_INV, "invalid hex number") \
X(ERR_OCT_INV, "invalid octal number")
/* global input line and character counting */
int ln = 1;
int cn = 0;
int ln_getchar()
{
int c;
if ((c = getchar()) == '\n' || c == EOF) {
cn = 0;
++ln;
} else
++cn;
return c;
}
#define getchar() ln_getchar()
#define X(err, msg) err,
enum { OK, ERRORS };
#undef X
int print_context();
#define BRACKETS \
X('(', ')') \
X('[', ']') \
X('{', '}')
int check_escaped(char errmsg[], int d);
int check_multiline(char errmsg[]);
int check_char_const(char errmsg[]);
int check_string(char errmsg[]);
#define QUOTES \
X('"', check_string) \
X('\'', check_char_const)
int getcharb();
int ithchar(int i);
int prevchar();
int print_buff(int n);
#define INVALID_CHAR_CONSTS \
X('\a') X('\b') X('\f') X('\n') \
X('\r') X('\t') X('\v') X('\'')
#define ESCAPE_SEQS \
X('a') X('b') X('f') X('n') X('r') \
X('t') X('v') X('\\') X('?') X('\'')
int check();
int check_comment();
int check_string();
int check_char_const();
int check_escape_seq();
int check_oct();
int check_hex();
void print_error(int err);
void print_line();
void ignore_line();
void copy(char from[], char to[]);
/* check c program on input for unabalanced parantheses
* string termination, comment termination and char constant
* validity */
int main()
{
int i, c;
int err = 0;
char errmsg[ERRMSG_SIZE];
while (!err && (c = getcharb()) != EOF) {
if (c == '"' && check_string(errmsg)) {
err = 1;
} else if (c == '\'' && check_char_const(errmsg)) {
err = 2;
} else if (prevchar() == '/' && c == '*' && check_multiline(errmsg)) {
err = 3;
} else if (c == '/' && prevchar() == '/') {
ignore_line();
}
}
if (!err)
return 0;
int li = print_context();
printf("%d:%d: error: %s\n", ln-1, li+1, errmsg);
print_error(check());
return 0;
}
/* print context of error and arrow pointing to cursor
* position where error was raised */
int print_context()
void print_error(int err)
{
int i, li = print_buff(LINEN);
if (li < 0) {
for (i = li; i < -1; ++i)
putchar(' ');
} else {
print_line();
for (i = li; i > 0; --i)
putchar(' ');
printf("error %d-%d: ", 0, 0);
switch (err) {
#define X(E, MSG) case E : printf("%s\n", MSG); break;
ERRORS
#undef X
}
printf("^\n");
return li;
}
// ---------------------- validation functions ------------------------
/* main validation function */
int check()
{
int c, err = 0;
int bi = 0;
char brackets[MAX_NEST];
while (!err && (c = getchar()) != EOF)
switch (c) {
case '/':
if ((c = getchar()) == '/') {
ignore_line();
break;
} else if (c == '*') {
err = check_comment();
break;
}
#define X(Q, CHECK) \
case Q: \
err = CHECK(); \
break;
QUOTES
#undef X
#define X(ERR, L, R) \
case L: \
if (bi < MAX_NEST) \
brackets[i] = L; \
++bi; \
break; \
case R: \
if (bi <= MAX_NEST && brackets[bi-1] != L) \
err = ERR; \
--bi; \
break;
BRACKETS
#undef X
}
if (!err && bi)
switch (brackets[bi-1]) {
#define X(ERR, L, R) case L: err = ERR; break;
BRACKETS
#undef X
}
return err;
}
/* check multiline comment termination */
int check_multiline(char errmsg[])
int check_comment()
{
int c = getchar();
while (c != EOF)
if (c == '/') {
if ((c = getchar()) == '*')
return OK;
} else
c = getchar();
return ERR_COMMENT_TERM;
}
/* check string termination */
int check_string()
{
int c;
while ((c = getcharb()) != EOF)
if (prevchar() == '*' && c == '/')
return 0;
copy(ERR_COMM_TRM, errmsg);
return 1;
while ((c = getchar()) != EOF && c != '\n') {
if (c == '"')
return OK;
else if (c == '\\' && (c = check_escape_seq()))
return c;
}
return ERR_STRING_TERM;
}
/* check char constant validity and termination */
int check_char_const(char errmsg[])
{
int err;
switch (getchar()) {
case EOF:
return ERR_CHAR_EXP;
case '\n':
return ERR_CHAR_EXP;
case '\'':
return ERR_CHAR_EMPTY;
case '\\':
if ((err = check_escape_seq()))
return err;
}
if (getchar() != '\'')
return ERR_CHAR_TERM;
return OK;
}
/* check if \(next input char) makes a valid escape char */
int check_escape_seq()
{
int c;
if ((c = getcharb()) == EOF) {
copy(ERR_CHAR_EOF, errmsg);
return 1;
switch (c = getchar()) {
#define X(C) case C: return 0;
ESCAPE_SEQS
#undef X
case 'x':
return check_hex();
}
if (c == '\\' && check_escaped(errmsg, '\''))
return 1;
if ((c = getcharb()) == EOF || c != '\'') {
copy(ERR_CHAR_TRM, errmsg);
return 1;
}
return 0;
if ('0' <= c && c < '8')
return check_oct();
return ERR_ESCAPE_INV;
}
/* check string termination */
int check_string(char errmsg[])
int check_hex()
{
int c;
while ((c = getcharb()) != EOF && c != '\n') {
if (c == '\\' && check_escaped(errmsg, '"'))
return 1;
else if (c == '"')
return 0;
}
copy(ERR_STR_TRM, errmsg);
return 1;
if ((c = getchar()))
}
/* check if \(next input char) makes a valid escape char
* \'d' is considered valid, pass negative 'd' to make optional */
int check_escaped(char errmsg[], int d)
{
int i, c = getcharb();
if (c == EOF) {
copy(ERR_ESC_EOF, errmsg);
return 1;
}
if (c == d)
return 0;
for (i = 0; i < strlen(ESCAPED); ++i)
if (ESCAPED[i] == c)
return 0;
copy(ERR_ESC_INV, errmsg);
return 1;
}
// ------------------------- buffered input -------------------------
int ibi = 0;
int iblen = 0;
int ibtrunc = 0
/* cyclic input stream buffer
* 'ibi' is the current buffer index
* 'iblen' is the buffer length
* 'ibtrunc' is the number of characters truncated
* from first line in buffer */
char in_buff[BUFFSIZE];
/* get char from input, put it in buffer and return it
* otherwise same as getchar */
int getcharb()
{
int c;
if ((c = getchar()) == EOF)
return EOF;
ibtrunc = (iblen == BUFFSIZE && in_buff[ibi] == '\n') ? 0 : ibtrunc + 1;
in_buff[ibi] = c;
++ibi;
if (iblen < BUFFSIZE)
++iblen;
else if (ibi >= BUFFSIZE)
ibi = 0;
return c;
}
/* return ith char of buffer indexed from 'ibi'
* doesnt check validity of i */
int ithchar(int i)
{
int ri = ibi + i; // real index
return (ri < iblen) ? in_buff[ri] : in_buff[ri - iblen];
}
/* returns second last buffered input char
* if there is none return -1 */
int prevchar()
{
return (iblen > 1) ? ithchar(iblen-2) : -1;
}
/* return the (relative) index of the start of relevant context in buffer */
int context_start(int n)
{
}
// ----------------------- utility functions --------------------------
/* ignore line of input */
void ignore_line()
{
int c;
while ((c = getcharb()) != EOF && c != '\n')
while ((c = getchar()) != EOF && c != '\n')
;
}
@ -231,12 +204,3 @@ void print_line()
putchar('\n');
}
/* copy from 'from' to 'to'
* 'to' must be large enough */
void copy(char from[], char to[])
{
int i;
for (i = 0; from[i] != '\0'; ++i)
to[i] = from[i];
to[i] = '\0';
}