/*
 * LANGCOMPILE  - Language compiler
 *
 * Author:
 * Emile van Bergen, emile@evbergen.xs4all.nl
 *
 * Permission to redistribute an original or modified version of this program
 * in source, intermediate or object code form is hereby granted exclusively
 * under the terms of the GNU General Public License, version 2. Please see the
 * file COPYING for details, or refer to http://www.gnu.org/copyleft/gpl.html.
 *
 * History:
 * 2001/04/27 - EvB - Created
 * 2002/04/23 - EvB - Sanitized conversion operators
 * 2005/12/08 - EvB - Big cleanup: reads whole files, ported to evblib,
 * 		      better const name handling, functions
 */

char langcompile_id[] = "LANGCOMPILE - Copyright (C) 2001 Emile van Bergen.";


/*
 * INCLUDES & DEFINES
 */


#include <stdlib.h>	/* For strtol */
#include <string.h>	/* For strncpy */

#include <srvtypes.h>	/* For IFACE */
#include <language.h>
#include <debug.h>

#include <evblib/misc/misc.h>	/* for MIN */
#include <evblib/str/str.h>	/* for STR, pstr, memspn, memcspn, cptstr */
#include <evblib/strio/strio.h>	/* for strtopath, readstr */
#include <evblib/db/list.h>	/* for LIST_ */


/* (Shortened) type definitions for use in the operator table */

#define NON		-1	/* As context: no type
				   	(expecting term or unary prefix op)
			 	   Not used as left term's cvt tgt
				   As right term's cvt tgt: don't recurse
					(unary postfix op)
				   As reccompile req_ctx: pop term from stack */

#define ANY		-2	/* As context: any type 
				   As left term's cvt tgt: don't convert
				   As right cvt tgt: don't convert
				   As reccompile req_ctx: don't convert */

#define INT		MT_INTEGER
#define IPA		MT_IPADDR
#define DAT		MT_DATE
#define STR		MT_STRING

/* Left or right associating */

#define L		0
#define R		1


/*
 * TYPES
 */


/* Standard operator definition */

typedef struct stdop {
	char *name;	/* Operator name */
	int namel;	/* Operator name length */
	int ctx_l;	/* Op applies in this context only */
	int prec;	/* Precedence */
	int assoc;	/* Right associating operator if nonzero */
	int cvt_l;	/* Convert left term to type */
	int cvt_r;	/* Required type for right term */
	int newctx;	/* Set context to this */
	int op;		/* Opcode to emit */
} STDOP;


/*
 * GLOBALS
 */


/* Standard operators tested before tokenizing - no named identifiers or
 * operators may start with these strings, because they will be shadowed */

static STDOP stdops[] = {

	/* Unary prefix operators. All have same precedence and associate 
	 * right, otherwise behaviour would be quite counterintuitive; the
	 * one closest to the term should always be evaluated first. */

	{ pstr("~"),		NON, 28, R,   ANY, INT,   INT, OP_NOT },
	{ pstr("-"),		NON, 28, R,   ANY, INT,   INT, OP_NEG },
	{ pstr("!"),		NON, 28, R,   ANY, ANY,   INT, OP_BOOLNOT },

	/* Binary operators */

	{ pstr("*"),		ANY, 25, L,   INT, INT,   INT, OP_MUL },
	{ pstr("/"),		IPA, 25, L,   ANY, INT,   IPA, OP_CIDRMASK },
	{ pstr("/"),		ANY, 25, L,   INT, INT,   INT, OP_DIV },
	{ pstr("%"),		ANY, 25, L,   INT, INT,   INT, OP_MOD },

	{ pstr("+"),		DAT, 24, L,   ANY, INT,   DAT, OP_ADD },
	{ pstr("+"),		IPA, 24, L,   ANY, INT,   IPA, OP_ADD },
	{ pstr("+"),		ANY, 24, L,   INT, INT,   INT, OP_ADD },
	{ pstr("-"),		ANY, 24, L,   INT, INT,   INT, OP_SUB },

	{ pstr(">>"),		ANY, 23, L,   INT, INT,   INT, OP_SHR },
	{ pstr("<<"),		ANY, 23, L,   INT, INT,   INT, OP_SHL },

	{ pstr("^"),		INT, 22, L,   ANY, INT,   INT, OP_XOR },
	{ pstr("^"),		IPA, 22, L,   ANY, IPA,   IPA, OP_XOR },
	{ pstr("^"),		DAT, 22, L,   ANY, DAT,   DAT, OP_XOR },
	{ pstr("^"),		STR, 22, L,   ANY, STR,   STR, OP_XORSTR },

	{ pstr("&"),		ANY, 21, L,   INT, INT,   INT, OP_AND },

	{ pstr("|"),		ANY, 20, L,   INT, INT,   INT, OP_OR },

	{ pstr("."),		ANY, 14, L,   STR, STR,   STR, OP_CONCAT },

	{ pstr(">="),		STR, 12, L,   ANY, STR,   INT, OP_GESTR },
	{ pstr(">="),		ANY, 12, L,   INT, INT,   INT, OP_GE },
	{ pstr("<="),		STR, 12, L,   ANY, STR,   INT, OP_LESTR },
	{ pstr("<="),		ANY, 12, L,   INT, INT,   INT, OP_LE },
	{ pstr(">"),		STR, 12, L,   ANY, STR,   INT, OP_GTSTR },
	{ pstr(">"),		ANY, 12, L,   INT, INT,   INT, OP_GT },
	{ pstr("<"),		STR, 12, L,   ANY, STR,   INT, OP_LTSTR },
	{ pstr("<"),		ANY, 12, L,   INT, INT,   INT, OP_LT },

	{ pstr("!="),		STR, 11, L,   ANY, STR,   INT, OP_NESTR },
	{ pstr("!="),		ANY, 11, L,   ANY, ANY,   INT, OP_NE },
	{ pstr("=="),		STR, 11, L,   ANY, STR,   INT, OP_EQSTR },
	{ pstr("=="),		ANY, 11, L,   ANY, ANY,   INT, OP_EQ },

	{ pstr("="),		INT,  7, R,   ANY, INT,   INT, OP_ADDAV },
	{ pstr("="),		IPA,  7, R,   ANY, IPA,   IPA, OP_ADDAV },
	{ pstr("="),		DAT,  7, R,   ANY, DAT,   DAT, OP_ADDAV },
	{ pstr("="),		STR,  7, R,   ANY, STR,   STR, OP_ADDAV },
	{ pstr(":="),		INT,  7, R,   ANY, INT,   INT, OP_REPLACEAV },
	{ pstr(":="),		IPA,  7, R,   ANY, IPA,   IPA, OP_REPLACEAV },
	{ pstr(":="),		DAT,  7, R,   ANY, DAT,   DAT, OP_REPLACEAV },
	{ pstr(":="),		STR,  7, R,   ANY, STR,   STR, OP_REPLACEAV },
};

#define STDOP_CNT	(sizeof(stdops) / sizeof(STDOP))


/* Standard operators tested after tokenizing by taking A-Za-z0-9:- span */

static STDOP namedops[] = {

	/* Halting 'terms' - don't recurse, apply only in 'NON' and pretend
	 * they return an integer on the stack, so that the comma will work. */

	{ pstr("halt"),	 	NON, 32, L,   ANY, NON,   INT, OP_HALT },
	{ pstr("abort"),	NON, 32, L,   ANY, NON,   INT, OP_ABORT },
	{ pstr("accept"),	NON, 32, L,   ANY, NON,   INT, OP_ACCEPT },
	{ pstr("reject"),	NON, 32, L,   ANY, NON,   INT, OP_REJECT },
	{ pstr("acctresp"),	NON, 32, L,   ANY, NON,   INT, OP_ACCTRESP },

	/* Unary postfix operators */

	{ pstr("exists"),	ANY, 30, L,   ANY, NON,   INT, OP_EXISTS },
	/* typecasts / auto-conversion (uses autodetected format for ints) */
	{ pstr("toint"),	ANY, 30, L,   INT, NON,   INT, OP_NOP },
	{ pstr("toip"),	 	ANY, 30, L,   IPA, NON,   IPA, OP_NOP },
	{ pstr("todate"),	ANY, 30, L,   DAT, NON,   DAT, OP_NOP },
	{ pstr("tostr"),	ANY, 30, L,   STR, NON,   STR, OP_NOP },
	/* convert specific string format to int (oct/dec/hex/raw -> int) */
	{ pstr("fromoct"),	ANY, 30, L,   STR, NON,   INT, OP_OCTSTR2ORD },
	{ pstr("fromdec"),	ANY, 30, L,   STR, NON,   INT, OP_DECSTR2ORD },
	{ pstr("fromhex"),	ANY, 30, L,   STR, NON,   INT, OP_HEXSTR2ORD },
	{ pstr("fromraw"),	ANY, 30, L,   STR, NON,   INT, OP_RAWSTR2ORD },
	/* convert int to specific string format (int -> oct/dec/hex/raw) */
	{ pstr("tooct"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2OCTSTR },
	{ pstr("todec"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2DECSTR },
	{ pstr("tohex"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2HEXSTR },
	{ pstr("toraw"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2RAWSTR },

	/* Unary prefix operators. All have same precedence and associate 
	 * right, otherwise behaviour would be quite counterintuitive; the
	 * one closest to the term should always be evaluated first. */

	{ pstr("delall"),	NON, 28, R,   ANY, ANY,   INT, OP_DELALLAV },
	{ pstr("del"),	 	NON, 28, R,   ANY, ANY,   INT, OP_DELAV },
	{ pstr("moveall"),	NON, 28, R,   ANY, ANY,   INT, OP_MOVEALLAV },
	{ pstr("all"),	 	NON, 28, R,   ANY, ANY,   STR, OP_JOINSTR },
	{ pstr("return"), 	NON, 28, R,   ANY, INT,   INT, OP_RETFUNC },
	{ pstr("dictencode"),	NON, 28, R,   ANY, ANY,   STR, OP_DICTENC },
	{ pstr("not"),	 	NON, 28, R,   ANY, ANY,   INT, OP_BOOLNOT },
	{ pstr("no"),		NON, 28, R,   ANY, ANY,   INT, OP_BOOLNOT },
	{ pstr("random"),	NON, 28, R,   ANY, ANY,   STR, OP_RANDOM },
	{ pstr("md5"),	 	NON, 28, R,   ANY, STR,   STR, OP_MD5 },
	{ pstr("hex"),	 	NON, 28, R,   ANY, STR,   STR, OP_HEX },
	{ pstr("upper"),	NON, 28, R,   ANY, STR,   STR, OP_UPPER },
	{ pstr("lower"),	NON, 28, R,   ANY, STR,   STR, OP_LOWER },

	/* Binary operators */

	{ pstr("asmac"),	ANY, 18, L,   STR, STR,   STR, OP_STR2MAC },
	{ pstr("papdecrypt"),	ANY, 18, L,   STR, STR,   STR, OP_PAPDECR },
	{ pstr("papencrypt"),	ANY, 18, L,   STR, STR,   STR, OP_PAPENCR },
	{ pstr("saltencrypt"),	ANY, 18, L,   STR, STR,   STR, OP_SALTENCR },
	{ pstr("unixcrypt"),	ANY, 18, L,   STR, STR,   STR, OP_UNIXCRYPT },
	{ pstr("hmacmd5"),	ANY, 18, L,   STR, STR,   STR, OP_HMACMD5 },
	{ pstr("dictdecode"),	ANY, 18, L,   STR, ANY,   INT, OP_DICTDEC },
	{ pstr("as"),		ANY, 18, L,   DAT, STR,   STR, OP_ORD2DFMSTR },

	{ pstr("beforefirst"),	ANY, 16, L,   STR, STR,   STR, OP_BF },
	{ pstr("afterfirst"),	ANY, 16, L,   STR, STR,   STR, OP_AF },
	{ pstr("beforelast"),	ANY, 16, L,   STR, STR,   STR, OP_BL },
	{ pstr("afterlast"),	ANY, 16, L,   STR, STR,   STR, OP_AL },
	{ pstr("firstof"),	ANY, 16, R,   INT, STR,   STR, OP_FO },
	{ pstr("lastof"),	ANY, 16, R,   INT, STR,   STR, OP_LO },

	{ pstr("pokedwith"),	STR,  7, R,   ANY, STR,   STR, OP_POKEAV }
};

#define NAMEDOP_CNT	(sizeof(namedops) / sizeof(STDOP))


/*
 * FUNCTIONS
 */


/* Returns auto conversion opcode based on source and destination type */

static int cvtop(int srctype, int dsttype)
{
	if (srctype == STR) {
		switch(dsttype) {
		  case INT: return OP_INTSTR2ORD;
		  case IPA: return OP_IPASTR2ORD;
		  case DAT: return OP_DATSTR2ORD;
		}
	}
	else if (dsttype == STR) {
		switch(srctype) {
		  case INT: return OP_ORD2DECSTR;
		  case IPA: return OP_ORD2IPASTR;
		  case DAT: return OP_ORD2DATSTR;
		}
	}

	/* All other conversions are really only typecasts */
	return OP_NOP;
}


/* Add a function at the head of the function list (later overrides earlier) */

static void func_add(FUNC_T **fhead, FUNC_T **ftail, char *name, ssize_t namel,
		     INSN *code)
{
	FUNC_T *f;

	f = safe_malloc(sizeof(FUNC_T));
	memset(f, 0, sizeof(FUNC_T));
	f->namel = cptstr(f->name, sizeof(f->name), name, namel);
	f->code = code;
	LIST_ADDHEAD(fhead, ftail, f);
}


/* Get a function's address from the function list based on name */

static INSN *func_getaddr(FUNC_T **fhead, char *name, ssize_t namel)
{
	FUNC_T *f;

	for(f = *fhead; 
	    f && !str_eq(f->name, f->namel, name, namel);
	    f = f->next);
	if (f) return f->code;
	return 0;
}


/* Forward declaration; used by reccompile for 'include' */

static ssize_t reccompilefile(META *m, IFACE *ifaces, 
			      char *path, ssize_t pathl,
			      char *fname, ssize_t fnamel,
			      INSN **buf, ssize_t bufsize, 
			      int precedence, int req_ctx, 
			      META_ITEM *ctx_item, int *ret_ctx,
			      FUNC_T **funchead, FUNC_T **functail);


/* Compiler. Recursively called for each subexpression 
 * and called by reccompilefile. */

#define C		(**source)
#define I(n)		((n) < *sourcel ? ((*source)[(n)]) : -1)
#define S(n)		((*source) += (n), (*sourcel) -= (n))
#define GETSTR(s,n,rl)	meta_prttoa(*source, *sourcel, 0, 0, (rl), (s), (n))
#define GETIP(rl)	meta_atoip(*source, *sourcel, 0, 0, (rl))
#define GETORDN(b,r,n)  meta_atoord(*source, MIN(*sourcel, (n)), 0, 0, (r), (b))
#define GETORD(ba,rl)	meta_atoord(*source, *sourcel, 0, 0, (rl), (ba))
#define TOKEN()		memspn(*source, *sourcel, pstr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:-"))
#define FEQ(s, l)	((l) > *sourcel || memcmp(*source, (s), (l)))  /*begin*/
#define EQ(sl, s, l)	((l) - (sl) || memcmp(*source, (s), (l)))      /*to sl*/
#define DBGCVT(n)	dbg_cvtstr(*source, (n))


static ssize_t reccompile(META *m, IFACE *ifaces, 
			  char **source, ssize_t *sourcel, 
			  INSN **buf, ssize_t bufsize, 
			  int precedence, int req_ctx, 
			  META_ITEM *ctx_item, int *ret_ctx, int allow_non, 
			  char *fullpath, ssize_t pathl, 
			  FUNC_T **funchead, FUNC_T **functail)
{
	signed char *s, c;
	INSN *o, *saved_o;
	ssize_t l, len, ret;
	int context, n, e;
	META_VAL *v;
	STDOP *so;
	IFACE *ifc;
	STR_T src, str;

	if (!m || !source || !buf || !*buf || !bufsize) {
		msg(F_MISC, L_ERR, "reccompile: BUG: Invalid argument(s)!\n");
		return 0;
	}
	D1(msg(F_LANG, L_DEBUG, "reccompile: c=%c, prec=%d, req_ctx=%d, ctx_item=%s\n", C, precedence, req_ctx, ctx_item ? ctx_item->name:"-"));

	/* We always start each recursive invocation in context 'NON', ie.
	 * expecting a term from the source. And we always return a term, or
	 * NON if that's what the parent required (req_ctx NON) or allowed
	 * (allow_non true).
	 *
	 * Whether or not we process the operator after the term we find on the
	 * current level, or whether we return first to leave it to the parent,
	 * depends on its precedence level relative to the parent's. */

	context = NON;
	o = *buf;
	ret = 0;

	/* We leave guard space of min. 16 INSNs so that we can safely emit
	 * a couple of them inside the loop, only testing for long ones like
	 * PUSHSTRs. */

	while(*sourcel > 0 && ret < bufsize - (16 * sizeof(INSN))) {

		/* Get first character */
		c = C;

		/* Whitespace */
		if (!c || c == ' ' || c == '\n' || c == '\r' || c == '\t') {
			S(1);
			continue;
		}

		/* Comment */
		if (c == '#') {
			S(1); 
			l = memcspn(*source, *sourcel, "\n\r", 2); 
			S(l);
			continue;
		}

		/*
		 * IMMEDIATE TERMS
		 */

		/* If we have an attribute context in scope, we search the
		 * constant value list first, because some vendor's 
		 * dictionaries contain stuff like 1200-75-BPS (yuck!) */

		len = -1;
		if (ctx_item && (len = TOKEN())) {
			if (I(len - 1) == ':') len--; 
			if ((v = meta_getvalbyname(m, ctx_item, *source, len))){
				S(len);
				context = INT;
				o->imm.ord = v->nr;
				o->op = OP_PUSHINT; o++; ret += sizeof(INSN);
				continue;
			}
		}

		/* Numeric term. Also supports dotted-decimal IP addresses. */
		if (((c >= '0' && c <= '9') ||
		     (c == '-' && I(1) >= '0' && I(1) <= '9')) && 
		    context == NON) {

			o->op = OP_PUSHINT; 
			o->imm.ord = GETIP(&l);
			context = IPA;
			if (!l) { 
				o->imm.ord = GETORD(0, &l);
				context = INT;
			}
			S(l);
			o++; ret += sizeof(INSN);
			continue;
		}

		/* String term. Supports hex and octal escaped chars. */
		if ((c == '\"' || c == '\'') && context == NON) {

			o->op = OP_PUSHSTR;
			s = (char *)(o + 1);	/* the space after this insn */
			n = GETSTR(s, bufsize - ret - sizeof(INSN), &l); 
			S(l);

			/* Set string length */
			o->imm.d.str_len = n;

			/* Calculate length as insn count and save in disp */
			n = (n + sizeof(INSN) - 1) / sizeof(INSN);
			o->imm.d.disp = n;

			/* On to the next insn and skip the displacement */
			o += 1 + n; ret += (1 + n) * sizeof(INSN);
			context = STR;
			continue;
		}


		/*
		 * SPECIAL OPERATORS
		 */

		/* Parens reset the precedence level so that we may continue
		   undisturbed until we meet the first closing one. At that
		   point, return until we're up at the precedence level the 
		   opening one brought us, then skip it. */

		if (c == '(' && context == NON) {
			S(1);
			/* We don't pass the attribute context beyond ( */
			/* The type of this term is determined by the type
			 * of the subexpression. Don't autoconvert; we'll
			 * do that at this level if we needed something
			 * else. Pass parent's allow_non to subexpression. */

			l = reccompile(m, ifaces, source, sourcel, 
				       &o, bufsize - ret, 
				       0, ANY, 0, &context, allow_non,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;
			ret += l;
			continue;
		}
		if (c == ')' && (context != NON || allow_non)) {
			if (precedence > 0) break;
			S(1);
			break;
		}

		/* A comma is a no-op (we don't even emit a NOP) if we're still
		 * expecting a term. Otherwise, it's a POP that we'll replace
		 * with a NOP if the right hand side didn't give us any term.
		 * Why this complexity? It allows (3,) to return 3, making the
		 * language much more user friendly with respect to comma 
		 * placement. */

		if (c == ',') {
		    if (precedence >= 1) break;
		    S(1);
		    ctx_item = 0;		/* Comma always resets 
						   attribute context */
		    if (context != NON) {

			/* Emit POP, possibly removed after reccompile */
			saved_o = o;
			o->op = OP_POP; o++; ret += sizeof(INSN);

			/* We don't pass the attribute context; we don't
			 * autoconvert but want to know the returned context,
			 * we allow_non. */

			l = reccompile(m, ifaces, source, sourcel,
				       &o, bufsize - ret, 
				       1, ANY, 0, &n, 1,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;
			ret += l;

			/* If reccompile returned nothing at all, we can remove
			 * the POP by backing up. If it returned some INSNs but
			 * didn't push a term (in whatever way), we replace the
			 * POP with a NOP (and keep the current term and its
			 * context). In all other cases, we keep the POP and
			 * take on the returned context. */

			if (!l) o--, ret -= sizeof(INSN);
			else if (n == NON) saved_o->op = OP_NOP;
			else context = n;
		    }
		    continue;
		}

		/*
		 * STANDARD AND NAMED OPERATORS
		 */

		/* Search in standard operator table for first match of name
		   and context (or any context). The table must be ordered,
		   with the more specific names and contexts first; the amount
		   of text we test depends entirely on the operator. */

		for(so = stdops, n = 0, e = STDOP_CNT; 
		    n < e && (FEQ(so->name, so->namel) ||
			      !(so->ctx_l == context || (so->ctx_l == ANY && context != NON)));
		    n++, so++);

		if (n == e) {

			/* The terms and operators we will search for next are
			 * all tokens that consist entirely of letters, digits,
			 * dashes and colons. If the last character is a colon,
			 * we leave that out, so that 'int:=3' works as
			 * expected. */

			/* Tokenize now if we haven't already done so and
			 * complain if that doesn't give us anything. */

			if (len == -1) {
				len = TOKEN();
				if (len && I(len - 1) == ':') len--; 
			}
			if (len <= 0) goto operror;

			/* Search the named operator table for first
			 * operator matching name and context. */

			for(so = namedops, n = 0, e = NAMEDOP_CNT; 
			    n < e && (EQ(len, so->name, so->namel) ||
				      !(so->ctx_l == context || (so->ctx_l == ANY && context != NON)));
			    n++, so++);

		}

		/* Short and named operators are processed the same way */

		if (n < e) {

			/* Check precedence. Associating right means don't
			   break if same precedence but continue recursing. */
			if (precedence >= so->prec + (so->assoc == R)) break;

			/* We're now processing this operator, so remove it */
			S(so->namel);

			/* Convert stack top to required left hand type */
			if (so->cvt_l != ANY && so->cvt_l != context) {
				o->op = cvtop(context, so->cvt_l);
				if (o->op != OP_NOP) o++, ret += sizeof(INSN);
			}

			/* Recurse, specifying required right hand type */
			if (so->cvt_r != NON) {
				l = reccompile(m, ifaces, source, sourcel,
					&o, bufsize - ret, 
					so->prec, so->cvt_r, ctx_item, 0, 0,
					fullpath, pathl, funchead, functail);
				if (l == -1) return -1;	
				ret += l;
			}

			/* Emit opcode */
			if (so->op != OP_NOP) {
				o->op = so->op; o++; ret += sizeof(INSN);
			}

			/* Set new context */
			context = so->newctx;
			continue;
		}


		/*
		 * NAMED SPECIAL OPERATORS
		 */

		/* These boolean operators use short-circuit evaluation, so
		 * 'A and B' evaluates B only if A is true, and 
		 * 'A or B', evaluates B only if A is false. 
		 *
		 * They always return the last evaluated term as the result, so
		 * 'A and B' returns either false or B, and 
		 * 'A or B' returns A if that's true and B otherwise. 
		 *
		 * Note that these have a very low precedence, and 'or' is
		 * just below 'and'. */

		if (context != NON && !EQ(len, "and", 3)) {
			if (precedence >= 5) break;
			S(2 + (c == 'a'));
			saved_o = o;
			o->op = OP_JMPZ; o++;
			o->op = OP_POP; o++; 
			/* Funky feature: 'and' does not require the left hand
			 * term to be any specific type, because to the VM, a
			 * false int looks exactly like a false str, so if we
			 * return that, then that's what the stack has anyway.
			 * In all cases, the returned context is the right 
			 * hand's type. */
			l = reccompile(m, ifaces, source, sourcel,
				       &o, bufsize - ret, 
				       5, ANY, ctx_item, &context, 0,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;	
			saved_o->imm.d.disp = l / sizeof(INSN) + 1;
			ret += l + 2 * sizeof(INSN);
			continue;
		}

		if (context != NON && !EQ(len, "or", 2)) {
			if (precedence >= 4) break;
			S(2);
			saved_o = o;
			o->op = OP_JMPNZ; o++;
			o->op = OP_POP; o++; 
			/* Require the right term to be the same type as the
			 * left: pass current context as required. */
			l = reccompile(m, ifaces, source, sourcel,
				       &o, bufsize - ret, 
				       4, context, ctx_item, 0, 0,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;	
			saved_o->imm.d.disp = l / sizeof(INSN) + 1;
			ret += l + 2 * sizeof(INSN);
			continue;
		}

		/* All next searches only apply in context NON; if we are still
		 * looking for a typed binary or postfix operator here, it
		 * won't be found, and we can already complain. */

		if (context != NON) goto operror;

		/* Include files */

		if (!EQ(len, "include", 7)) {
			S(7);
			src.p = *source; src.l = *sourcel;
			str = str_getword(&src, pstr(" \n\r\t,"));
			*source = src.p; *sourcel = src.l;

			/* We are at NON, start subexpression at NON (we always
			 * do), expect NON, and stay at NON. Yeah. */

			l = reccompilefile(m, ifaces, 
					   fullpath, pathl, str.p, str.l, 
					   &o, bufsize - ret, 
					   0, NON, 0, 0, funchead, functail);
			if (l == -1) return -1;	
			ret += l;
			continue;
		}

		/* Function definitions. These emit a jump over the
		 * function code (cf. the short cut boolean operators) and add
		 * an entry to the function list. */

		if (!EQ(len, "function", 8)) {
			S(8);

			/* Get name */
			src.p = *source; src.l = *sourcel;
			str = str_getword(&src, pstr(" \n\r\t"));
			*source = src.p; *sourcel = src.l;

			/* Emit jump, set displacement after reccompile */
			saved_o = o;
			o->op = OP_JMP; o++; ret += sizeof(INSN);

			/* Add entry to function table */
			func_add(funchead, functail, str.p, str.l, o);

			/* Compile at precedence 28 (unary prefix ops),
			 * make function body auto-convert to int, in
			 * in case there's no explicit return (which also
			 * converts to int */
	
			l = reccompile(m, ifaces, source, sourcel,
				       &o, bufsize - ret, 
				       28, INT, 0, 0, 0,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;	

			/* Emit return in function in case none was present */
			o->op = OP_RETFUNC; o++; l += sizeof(INSN);
			ret += l;

			/* Fixup jump over function */
			saved_o->imm.d.disp = l / sizeof(INSN);

			/* After the function definition, the context is 
			 * exactly as it was. It's as if it never occured. */
			continue;
		}


		/*
		 * FUNCTION CALLS
		 *
		 * These behave as unary prefix operators that evaluate and 
		 * discard an optional right hand term, and return the integer
		 * returned by the function itself. We set allow_non to allow
		 * reccompile to terminate early, without obtaining a term.
		 */

		saved_o = func_getaddr(funchead, *source, len);
		if (saved_o) {
			if (precedence > 28) break;
			S(len);
			l = reccompile(m, ifaces, source, sourcel, 
				       &o, bufsize - ret, 
				       28, NON, 0, 0, 1,
				       fullpath, pathl, 
				       funchead, functail);
			if (l == -1) return -1;	
			ret += l;

			/* All functions leave an int on stack */
			context = INT;
			o->op = OP_CALLFUNC;
			o->imm.func = saved_o;
			o++; ret += sizeof(INSN);
			continue;
		}


		/*
		 * INTERFACE CALLS
		 */

		/* Search callable interfaces. Interface calls evaluate
		 * the right hand term, if any, then call the interface,
		 * and return the right hand term, or NON if none was
		 * given. We set allow_non to allow reccompile to terminate
		 * without obtaining a term at all. */

		for(ifc = ifaces; 
		    ifc && EQ(len, ifc->name, ifc->namel);
		    ifc = ifc->next);
		if (ifc) {
			if (precedence > 28) break;
			S(len);
			l = reccompile(m, ifaces, source, sourcel, 
				       &o, bufsize - ret, 
				       28, ANY, 0, &context, 1,
				       fullpath, pathl, 
				       funchead, functail);
			if (l == -1) return -1;	
			ret += l;

			/* Emit opcode */
			o->op = OP_CALLIFACE; 
			o->imm.iface = ifc;
			o++; ret += sizeof(INSN); 
			continue;
		}


		/*
		 * DICTIONARY TERMS
		 */

		o->imm.i.item = meta_getitembyextspec(m, *source, len,
						      &o->imm.i.ref);
		if (o->imm.i.item) {
			S(len);
			context = o->imm.i.item->val_type;
			ctx_item = o->imm.i.item;
			o->op = OP_PUSHAV; o++; ret += sizeof(INSN);
			continue;
		}


		/*
		 * NOTHING FOUND
		 */

		/* Special funky feature: if the parent allows us to
		 * push nothing, then just return without an error; let the
		 * parent deal with whatever we got and whatever's left ahead.
		 * Note that we're always in NON if we even get here. 
		 *
		 * Using this, we allow binary operators to terminate optional
		 * subexpressions. Use sparingly. We only use optional
		 * subexpressions for interface- and function calls. */

		if (allow_non) break;

		/* Error. Message depends on whether we have an attribute 
		 * context in scope. */

		if (ctx_item) {
			msg(F_LANG, L_ERR, "compile: ERROR: Unknown "
					   "term or unary operator '%s' (for "
					   "%s:%s:%s)!\n",
			    DBGCVT(len), ctx_item->spc->name,
			    meta_getvndbynr(m, ctx_item->vnd)->name,
			    ctx_item->name);
		}
		else {
			msg(F_LANG, L_ERR, "compile: ERROR: Unknown "
					   "term or unary operator '%s'!\n", 
			    DBGCVT(len));
		}
		goto error;
	}

	/* If parent didn't want a term (req_ctx NON) and we pushed one, pop
	 * it.
	 *
	 * If parent required a term (req_ctx not NON and allow_non clear)
	 * flag an error if we didn't push any. 
	 *
	 * If parent required a term and we pushed one, autoconvert if
	 * necessary.
	 */

	if (req_ctx == NON) {
		if (context != NON) {
			o->op = OP_POP; 
			o++, ret += sizeof(INSN);
		}
	}
	else {					/* req_ctx != NON */
		if (context != NON) {
			o->op = cvtop(context, req_ctx);
			if (o->op != OP_NOP) o++, ret += sizeof(INSN);
		}
		else if (allow_non == 0) {	/* context NON */
			msg(F_LANG, L_ERR, "compile: ERROR: term expected!\n");
			goto error;
		}
	}

	*buf = o;
	if (ret_ctx) *ret_ctx = context;
	D1(msg(F_LANG, L_DEBUG, "reccompile: Returned.\n"));
	return ret;

operror:
	msg(F_LANG, L_ERR, "compile: ERROR: no valid operator found!"
			   " (missing comma?)\n");
error:
	l = *sourcel; s = " <eof>";
	if (l > 20) l = 20, s = "...";
	if (fullpath) {
		msg(F_LANG, L_ERR, "compile: in %s near '%s'%s\n", 
		    fullpath, DBGCVT(l), s);
		return -1;
	}
	msg(F_LANG, L_ERR, "compile: near '%s'%s\n", DBGCVT(l), s);
	return -1;
}


/* Called by lang_compilefile and when an 'include' operator is found */

static ssize_t reccompilefile(META *m, IFACE *ifaces, 
			      char *path, ssize_t pathl,
			      char *fname, ssize_t fnamel,
			      INSN **buf, ssize_t bufsize, 
			      int precedence, int req_ctx, 
			      META_ITEM *ctx_item, int *ret_ctx,
			      FUNC_T **funchead, FUNC_T **functail)
{
	ssize_t ret;
	char *fullpath, *freep;
	STR_T text;

	ret = -1;

	/* strtopath assembles base path and absolute or relative pathname in
	 * fname into one full path and puts it in a newly allocated string. 
	 * It returns the length of the dirname part in pathl, so that this
	 * part of fullpath can become the new base path for subsequent
	 * includes. */

	fullpath = strtopath(path, pathl, fname, fnamel, &pathl);
	if (!fullpath) { msg(F_LANG, L_ERR, "reccompile: strtopath failed!\n");
			 return -1; }

	msg(F_MISC, L_NOTICE, "compile: Opening %s\n", fullpath);
	text = readstr(fullpath);
	if (!text.p) {
		msg(F_LANG, L_ERR, "compile: Can't read %s: %s!\n",
		    fullpath, strerror(-text.l));
		free(fullpath);
		return -1;
	}

	freep = text.p;
	ret = reccompile(m, ifaces, &text.p, &text.l, buf, bufsize, 
			 precedence, req_ctx, ctx_item, ret_ctx, 0,
			 fullpath, pathl, funchead, functail);
	free(freep);
	free(fullpath);

	return ret;
}


/* Called by other modules */

ssize_t lang_compilefile(META *m, IFACE *ifaces, 
			 char *path, ssize_t pathl,
			 char *fname, ssize_t fnamel,
			 INSN *buf, ssize_t bufsize,
			 FUNC_T **funchead, FUNC_T **functail)
{
	ssize_t ret;
	ret = reccompilefile(m, ifaces, 
			     path, pathl, 
			     fname, fnamel, 
			     &buf, bufsize, 
			     0, NON, 0, 0,
			     funchead, functail);

	/* Terminate with a HALT to be sure */

	if (ret != -1 && ret < bufsize - sizeof(INSN)) {
		buf->op = OP_HALT; ret += sizeof(INSN);
	}

	return ret;
}


/* Mainly for testing */

ssize_t lang_compile(META *m, IFACE *ifaces, 
		     char *source, ssize_t sourcel,
		     INSN *buf, ssize_t bufsize,
		     FUNC_T **funchead, FUNC_T **functail)
{
	ssize_t ret;
	ret = reccompile(m, ifaces, &source, &sourcel, &buf, bufsize,
			 0, NON, 0, 0, 0,
			 0, 0, funchead, functail);

	/* Terminate with a HALT to be sure */

	if (ret != -1 && ret < bufsize - sizeof(INSN)) {
		buf->op = OP_HALT; ret += sizeof(INSN);
	}

	return ret;
}

