/*
 * LANGCOMPILE  - Language compiler
 *
 * Author:
 * Emile van Bergen, emile@evbergen.xs4all.nl
 *
 * Permission to redistribute an original or modified version of this program
 * in source, intermediate or object code form is hereby granted exclusively
 * under the terms of the GNU General Public License, version 2. Please see the
 * file COPYING for details, or refer to http://www.gnu.org/copyleft/gpl.html.
 *
 * History:
 * 2001/04/27 - EvB - Created
 * 2002/04/23 - EvB - Sanitized conversion operators
 * 2005/12/08 - EvB - Big cleanup: reads whole files, ported to evblib,
 * 		      better const name handling, functions
 */

char langcompile_id[] = "LANGCOMPILE - Copyright (C) 2001 Emile van Bergen.";


/*
 * INCLUDES & DEFINES
 */


#include <stdlib.h>	/* For strtol */
#include <string.h>	/* For strncpy */

#include <srvtypes.h>	/* For IFACE */
#include <language.h>
#include <debug.h>

#include <evblib/misc/misc.h>	/* for MIN */
#include <evblib/str/str.h>	/* for STR, pstr, memspn, memcspn, cptstr */
#include <evblib/strio/strio.h>	/* for strtopath, readstr */
#include <evblib/db/list.h>	/* for LIST_ */


/* (Shortened) type definitions for use in the standard operator table */

#define NON		-1	/* As context: no type
				   	(expecting term or unary prefix op)
			 	   Not used as left term's cvt tgt
				   As right term's cvt tgt: don't recurse
					(unary postfix op)
				   As reccompile req_ctx: pop term from stack */

#define ANY		-2	/* As context: any type 
				   As left term's cvt tgt: don't convert
				   As right cvt tgt: don't convert
				   As reccompile req_ctx: don't convert */

#define INT		MT_INTEGER
#define IPA		MT_IPADDR
#define DAT		MT_DATE
#define STR		MT_STRING

/* Left or right associating */

#define L		0
#define R		1


/*
 * TYPES
 */


/* Standard operator definition */

typedef struct stdop {
	char *name;	/* Operator name */
	int namel;	/* Operator name length */
	int ctx_l;	/* Op applies in this context only */
	int prec;	/* Precedence */
	int assoc;	/* Right associating operator if nonzero */
	int cvt_l;	/* Convert left term to type */
	int cvt_r;	/* Required type for right term */
	int newctx;	/* Set context to this */
	int op;		/* Opcode to emit */
} STDOP;


/*
 * GLOBALS
 */


/* The standard operators we know */

static STDOP stdops[] = {

	/* Halting 'terms' - don't recurse, apply only in 'NON' and pretend
	 * they return an integer on the stack, so that the comma will work. */
	{ pstr("halt"),	 	NON, 32, L,   ANY, NON,   INT, OP_HALT },
	{ pstr("abort"),	NON, 32, L,   ANY, NON,   INT, OP_ABORT },
	{ pstr("accept"),	NON, 32, L,   ANY, NON,   INT, OP_ACCEPT },
	{ pstr("reject"),	NON, 32, L,   ANY, NON,   INT, OP_REJECT },
	{ pstr("acctresp"),	NON, 32, L,   ANY, NON,   INT, OP_ACCTRESP },

	/* Unary postfix operators */
	{ pstr("exists"),	ANY, 30, L,   ANY, NON,   INT, OP_EXISTS },
	/* Typecasts / auto-conversion (uses autodetected format for ints) */
	{ pstr("toint"),	ANY, 30, L,   INT, NON,   INT, OP_NOP },
	{ pstr("toip"),	 	ANY, 30, L,   IPA, NON,   IPA, OP_NOP },
	{ pstr("todate"),	ANY, 30, L,   DAT, NON,   DAT, OP_NOP },
	{ pstr("tostr"),	ANY, 30, L,   STR, NON,   STR, OP_NOP },
	/* Convert specific string format to int (oct/dec/hex/raw -> int) */
	{ pstr("fromoct"),	ANY, 30, L,   STR, NON,   INT, OP_OCTSTR2ORD },
	{ pstr("fromdec"),	ANY, 30, L,   STR, NON,   INT, OP_DECSTR2ORD },
	{ pstr("fromhex"),	ANY, 30, L,   STR, NON,   INT, OP_HEXSTR2ORD },
	{ pstr("fromraw"),	ANY, 30, L,   STR, NON,   INT, OP_RAWSTR2ORD },
	/* Convert int to specific string format (int -> oct/dec/hex/raw) */
	{ pstr("tooct"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2OCTSTR },
	{ pstr("todec"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2DECSTR },
	{ pstr("tohex"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2HEXSTR },
	{ pstr("toraw"),	ANY, 30, L,   INT, NON,   STR, OP_ORD2RAWSTR },

	/* Unary prefix operators. All have same precedence and associate 
	 * right, otherwise behaviour would be quite counterintuitive; the
	 * one closest to the term is always evaluated first. */
	{ pstr("delall"),	NON, 28, R,   ANY, ANY,   INT, OP_DELALLAV },
	{ pstr("del"),	 	NON, 28, R,   ANY, ANY,   INT, OP_DELAV },
	{ pstr("moveall"),	NON, 28, R,   ANY, ANY,   INT, OP_MOVEALLAV },
	{ pstr("all"),	 	NON, 28, R,   ANY, ANY,   STR, OP_JOINSTR },
	{ pstr("return"), 	NON, 28, R,   ANY, INT,   INT, OP_RETFUNC },
	{ pstr("~"),		NON, 28, R,   ANY, INT,   INT, OP_NOT },
	{ pstr("-"),		NON, 28, R,   ANY, INT,   INT, OP_NEG },
	{ pstr("dictencode"),	NON, 28, R,   ANY, ANY,   STR, OP_DICTENC },
	{ pstr("!"),		NON, 28, R,   ANY, ANY,   INT, OP_BOOLNOT },
	{ pstr("not"),	 	NON, 28, R,   ANY, ANY,   INT, OP_BOOLNOT },
	{ pstr("no"),		NON, 28, R,   ANY, ANY,   INT, OP_BOOLNOT },
	{ pstr("random"),	NON, 28, R,   ANY, ANY,   STR, OP_RANDOM },
	{ pstr("md5"),	 	NON, 28, R,   ANY, STR,   STR, OP_MD5 },
	{ pstr("hex"),	 	NON, 28, R,   ANY, STR,   STR, OP_HEX },
	{ pstr("upper"),	NON, 28, R,   ANY, STR,   STR, OP_UPPER },
	{ pstr("lower"),	NON, 28, R,   ANY, STR,   STR, OP_LOWER },
	/* Binary operators */
	{ pstr("*"),		ANY, 25, L,   INT, INT,   INT, OP_MUL },
	{ pstr("/"),		IPA, 25, L,   ANY, INT,   IPA, OP_CIDRMASK },
	{ pstr("/"),		ANY, 25, L,   INT, INT,   INT, OP_DIV },
	{ pstr("%"),		ANY, 25, L,   INT, INT,   INT, OP_MOD },
	{ pstr("+"),		DAT, 24, L,   ANY, INT,   DAT, OP_ADD },
	{ pstr("+"),		IPA, 24, L,   ANY, INT,   IPA, OP_ADD },
	{ pstr("+"),		ANY, 24, L,   INT, INT,   INT, OP_ADD },
	{ pstr("-"),		ANY, 24, L,   INT, INT,   INT, OP_SUB },
	{ pstr(">>"),		ANY, 23, L,   INT, INT,   INT, OP_SHR },
	{ pstr("<<"),		ANY, 23, L,   INT, INT,   INT, OP_SHL },
	{ pstr("^"),		INT, 22, L,   ANY, INT,   INT, OP_XOR },
	{ pstr("^"),		IPA, 22, L,   ANY, IPA,   IPA, OP_XOR },
	{ pstr("^"),		DAT, 22, L,   ANY, DAT,   DAT, OP_XOR },
	{ pstr("^"),		STR, 22, L,   ANY, STR,   STR, OP_XORSTR },
	{ pstr("&"),		ANY, 21, L,   INT, INT,   INT, OP_AND },
	{ pstr("|"),		ANY, 20, L,   INT, INT,   INT, OP_OR },
	{ pstr("asmac"),	ANY, 18, L,   STR, STR,   STR, OP_STR2MAC },
	{ pstr("papdecrypt"),	ANY, 18, L,   STR, STR,   STR, OP_PAPDECR },
	{ pstr("papencrypt"),	ANY, 18, L,   STR, STR,   STR, OP_PAPENCR },
	{ pstr("unixcrypt"),	ANY, 18, L,   STR, STR,   STR, OP_UNIXCRYPT },
	{ pstr("hmacmd5"),	ANY, 18, L,   STR, STR,   STR, OP_HMACMD5 },
	{ pstr("dictdecode"),	ANY, 18, L,   STR, ANY,   INT, OP_DICTDEC },
	/* Convert to run-time specified format (only implemented for dates) */
	{ pstr("as"),		ANY, 18, L,   DAT, STR,   STR, OP_ORD2DFMSTR },
	{ pstr("beforefirst"),	ANY, 16, L,   STR, STR,   STR, OP_BF },
	{ pstr("afterfirst"),	ANY, 16, L,   STR, STR,   STR, OP_AF },
	{ pstr("beforelast"),	ANY, 16, L,   STR, STR,   STR, OP_BL },
	{ pstr("afterlast"),	ANY, 16, L,   STR, STR,   STR, OP_AL },
	{ pstr("firstof"),	ANY, 16, R,   INT, STR,   STR, OP_FO },
	{ pstr("lastof"),	ANY, 16, R,   INT, STR,   STR, OP_LO },
	{ pstr("."),		ANY, 14, L,   STR, STR,   STR, OP_CONCAT },
	{ pstr(">="),		STR, 12, L,   ANY, STR,   INT, OP_GESTR },
	{ pstr(">="),		ANY, 12, L,   INT, INT,   INT, OP_GE },
	{ pstr("<="),		STR, 12, L,   ANY, STR,   INT, OP_LESTR },
	{ pstr("<="),		ANY, 12, L,   INT, INT,   INT, OP_LE },
	{ pstr(">"),		STR, 12, L,   ANY, STR,   INT, OP_GTSTR },
	{ pstr(">"),		ANY, 12, L,   INT, INT,   INT, OP_GT },
	{ pstr("<"),		STR, 12, L,   ANY, STR,   INT, OP_LTSTR },
	{ pstr("<"),		ANY, 12, L,   INT, INT,   INT, OP_LT },
	{ pstr("!="),		STR, 11, L,   ANY, STR,   INT, OP_NESTR },
	{ pstr("!="),		ANY, 11, L,   ANY, ANY,   INT, OP_NE },
	{ pstr("=="),		STR, 11, L,   ANY, STR,   INT, OP_EQSTR },
	{ pstr("=="),		ANY, 11, L,   ANY, ANY,   INT, OP_EQ },
	{ pstr("="),		INT,  7, R,   ANY, INT,   INT, OP_ADDAV },
	{ pstr("="),		IPA,  7, R,   ANY, IPA,   IPA, OP_ADDAV },
	{ pstr("="),		DAT,  7, R,   ANY, DAT,   DAT, OP_ADDAV },
	{ pstr("="),		STR,  7, R,   ANY, STR,   STR, OP_ADDAV },
	{ pstr(":="),		INT,  7, R,   ANY, INT,   INT, OP_REPLACEAV },
	{ pstr(":="),		IPA,  7, R,   ANY, IPA,   IPA, OP_REPLACEAV },
	{ pstr(":="),		DAT,  7, R,   ANY, DAT,   DAT, OP_REPLACEAV },
	{ pstr(":="),		STR,  7, R,   ANY, STR,   STR, OP_REPLACEAV },
	{ pstr("pokedwith"),	STR,  7, R,   ANY, STR,   STR, OP_POKEAV },
	{ pstr(","),		NON,  1, L,   ANY, NON,   NON, OP_NOP },
	{ pstr(","),		ANY,  1, L,   ANY, NON,   NON, OP_POP }
};

#define STDOP_CNT	(sizeof(stdops) / sizeof(STDOP))


/*
 * FUNCTIONS
 */


/* Returns auto conversion opcode based on source and destination type */

static int cvtop(int srctype, int dsttype)
{
	if (srctype == STR) {
		switch(dsttype) {
		  case INT: return OP_INTSTR2ORD;
		  case IPA: return OP_IPASTR2ORD;
		  case DAT: return OP_DATSTR2ORD;
		}
	}
	else if (dsttype == STR) {
		switch(srctype) {
		  case INT: return OP_ORD2DECSTR;
		  case IPA: return OP_ORD2IPASTR;
		  case DAT: return OP_ORD2DATSTR;
		}
	}

	/* All other conversions are really only typecasts */
	return OP_NOP;
}


/* Add a function to the function list */

static void func_add(FUNC_T **fhead, FUNC_T **ftail, char *name, ssize_t namel,
		     INSN *code)
{
	FUNC_T *f;

	f = safe_malloc(sizeof(FUNC_T));
	memset(f, 0, sizeof(FUNC_T));
	f->namel = cptstr(f->name, sizeof(f->name), name, namel);
	f->code = code;
	LIST_ADDTAIL(fhead, ftail, f);
}


/* Get a function's address from the function list based on name */

static INSN *func_getaddr(FUNC_T **fhead, char *name, ssize_t namel)
{
	FUNC_T *f;

	for(f = *fhead; 
	    f && !str_eq(f->name, f->namel, name, namel);
	    f = f->next);
	if (f) return f->code;
	return 0;
}


/* Forward declaration; used by reccompile for 'include' */

static ssize_t reccompilefile(META *m, IFACE *ifaces, 
			      char *path, ssize_t pathl,
			      char *fname, ssize_t fnamel,
			      INSN **buf, ssize_t bufsize, 
			      int precedence, int req_ctx, 
			      META_ITEM *ctx_item, int *ret_ctx,
			      FUNC_T **funchead, FUNC_T **functail);


/* Compiler. Recursively called for each subexpression 
 * and called by reccompilefile. */

#define C		(**source)
#define I(n)		((n) < *sourcel ? ((*source)[(n)]) : -1)
#define S(n)		((*source) += (n), (*sourcel) -= (n))
#define GETSTR(s,n,rl)	meta_prttoa(*source, *sourcel, 0, 0, (rl), (s), (n))
#define GETIP(rl)	meta_atoip(*source, *sourcel, 0, 0, (rl))
#define GETORDN(b,r,n)  meta_atoord(*source, MIN(*sourcel, (n)), 0, 0, (r), (b))
#define GETORD(ba,rl)	meta_atoord(*source, *sourcel, 0, 0, (rl), (ba))
#define SPN()		memspn(*source, *sourcel, pstr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789:-"))
#define FEQ(s, l)	((l) > *sourcel || memcmp(*source, (s), (l)))  /*begin*/
#define EQ(sl, s, l)	((l) - (sl) || memcmp(*source, (s), (l)))      /*to sl*/
#define DBGCVT(n)	dbg_cvtstr(*source, (n))


static ssize_t reccompile(META *m, IFACE *ifaces, 
			  char **source, ssize_t *sourcel,
			  INSN **buf, ssize_t bufsize, 
			  int precedence, int req_ctx, 
			  META_ITEM *ctx_item, int *ret_ctx,
			  char *fullpath, ssize_t pathl,
			  FUNC_T **funchead, FUNC_T **functail)
{
	signed char *s, c;
	INSN *o, *saved_o;
	ssize_t l, len, ret;
	int context, n;
	META_VAL *v;
	STDOP *so;
	IFACE *ifc;
	STR_T src, str;

	if (!m || !source || !buf || !*buf || !bufsize) {
		msg(F_MISC, L_ERR, "reccompile: BUG: Invalid argument(s)!\n");
		return 0;
	}

	context = NON;
	o = *buf;
	ret = 0;
	D1(msg(F_LANG, L_DEBUG, "reccompile: c=%c, prec=%d, req_ctx=%d, "
				"context=%d, ctx_item=%s\n",
	       C, precedence, req_ctx, context, ctx_item ? ctx_item->name:"-"));

	/* We leave guard space of min. 16 insns so that we can safely emit
	 * inside this loop. */
	while(*sourcel > 0 && ret < bufsize - (16 * sizeof(INSN))) {

		/* Get first character */
		c = C;

		/* Whitespace */
		if (!c || c == ' ' || c == '\n' || c == '\r' || c == '\t') {
			S(1);
			continue;
		}

		/* Comment */
		if (c == '#') {
			S(1); 
			l = memcspn(*source, *sourcel, "\n\r", 2); 
			S(l);
			continue;
		}

		/*
		 * IMMEDIATE TERMS
		 */

		/* If we have an attribute context in scope, we search the
		 * constant value list first, because some vendor's 
		 * dictionaries contain stuff like 1200-75-BPS (yuck!) */

		len = -1;
		if (ctx_item &&
		    (len = SPN()) &&
		    (v = meta_getvalbyname(m, ctx_item, *source, len))) {
			S(len);
			context = INT;
			o->imm.ord = v->nr;
			o->op = OP_PUSHINT; o++; ret += sizeof(INSN);
			continue;
		}

		/* Numeric term. Also supports dotted-decimal IP addresses. */
		if (((c >= '0' && c <= '9') ||
		     (c == '-' && I(1) >= '0' && I(1) <= '9')) && 
		    context == NON) {

			o->op = OP_PUSHINT; 
			o->imm.ord = GETIP(&l);
			context = IPA;
			if (!l) { 
				o->imm.ord = GETORD(0, &l);
				context = INT;
			}
			S(l);
			o++; ret += sizeof(INSN);
			continue;
		}

		/* String term. Supports hex and octal escaped chars. */
		if ((c == '\"' || c == '\'') && context == NON) {

			o->op = OP_PUSHSTR;
			s = (char *)(o + 1);	/* the space after this insn */
			n = GETSTR(s, bufsize - ret - sizeof(INSN), &l); 
			S(l);

			/* Set string length */
			o->imm.d.str_len = n;

			/* Calculate length as insn count and save in disp */
			n = (n + sizeof(INSN) - 1) / sizeof(INSN);
			o->imm.d.disp = n;

			/* On to the next insn and skip the displacement */
			o += 1 + n; ret += (1 + n) * sizeof(INSN);
			context = STR;
			continue;
		}


		/*
		 * SPECIAL OPERATORS
		 */

		/* Parens reset the precedence level so that we may continue
		   undisturbed until we meet the first closing one. At that
		   point, return until we're up at the precedence level the 
		   opening one brought us, then skip it. */
		if (c == '(' && context == NON) {
			S(1);
			/* We don't pass the attribute context beyond ( */
			/* Also, our returned type is determined by the
			 * subexpression. Don't autoconvert. */
			l = reccompile(m, ifaces, source, sourcel, 
				       &o, bufsize - ret, 0,
				       ANY, 0, &context, 
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;
			ret += l;
			continue;
		}
		if (c == ')') {
			if (precedence > 0) break;
			S(1);
			break;
		}

		/* These boolean operators use short-circuit evaluation.
		   They return the last evaluated term as the result. This
		   makes constructs possible like ArgA || ArgB, returning
		   ArgA if that has a true value, or ArgB if ArgA is false. */
		if (((c == '&' && I(1) == '&') ||
		     (c == 'a' && I(2) == 'd' && I(1) == 'n')) && 
		    context != NON) { 
			if (precedence > 5) break;
			S(2 + (c == 'a'));
			saved_o = o;
			o->op = OP_JMPZ; o++;
			o->op = OP_POP; o++; 
			/* Funky feature: 'and' does not require the left hand
			 * term to be any specific type, because to the VM, a
			 * false int looks exactly like a false str, so if we
			 * return that, then that's what the stack has anyway.
			 * In all cases we can set the context after an 'and'
			 * to the right hand's type. */
			context = req_ctx; ctx_item = 0;
			l = reccompile(m, ifaces, source, sourcel,
				       &o, bufsize - ret, 5,
				       ANY, ctx_item, &context,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;	
			saved_o->imm.d.disp = l / sizeof(INSN) + 1;
			ret += l + 2 * sizeof(INSN);
			continue;
		}

		if (((c == '|' && I(1) == '|') ||
		     (c == 'o' && I(1) == 'r')) && context != NON) { 
			if (precedence > 4) break;
			S(2);
			saved_o = o;
			o->op = OP_JMPNZ; o++;
			o->op = OP_POP; o++; 
			/* Or causes rhs to be converted to lhs' type */
			l = reccompile(m, ifaces, source, sourcel,
				       &o, bufsize - ret, 
				       4, context, ctx_item, 0,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;	
			saved_o->imm.d.disp = l / sizeof(INSN) + 1;
			ret += l + 2 * sizeof(INSN);
			continue;
		}

		/*
		 * STANDARD OPERATORS
		 */

		/* Search in standard operator table for first match of name
		   and context (or any context). The table must be ordered,
		   with the more specific names and contexts first; the amount
		   of text we test depends entirely on the operator. So be sure
		   not to clash with the start of dictionary items or
		   interfaces; a unary prefix operator 'in' (ie. valid in
		   context 'NON') would clobber the dictionary item 'int', for
		   example! We'll fix that by splitting named and symbolic
		   operators, but not now. */

		for(n = 0, so = stdops; 
		    n < STDOP_CNT &&
		    (FEQ(so->name, so->namel) ||
		     !(so->ctx_l == context || (so->ctx_l == ANY && 
		     			        context != NON)));
		    n++, so++);

		if (n < STDOP_CNT) {

			/* Check precedence. Associating right means don't
			   break if same precedence but continue recursing. */
			if (precedence >= so->prec + (so->assoc == R)) break;

			/* We're now processing this operator, so remove it */
			S(so->namel);

			/* Convert stack top to required left hand type */
			if (so->cvt_l != ANY && so->cvt_l != context) {
				o->op = cvtop(context, so->cvt_l);
				if (o->op != OP_NOP) o++, ret += sizeof(INSN);
			}

			/* Recurse, specifying required right hand type */
			if (so->cvt_r != NON) {
				l = reccompile(m, ifaces, source, sourcel,
					       &o, bufsize - ret, 
					       so->prec, so->cvt_r, ctx_item, 0,
					       fullpath, pathl, 
					       funchead, functail);
				if (l == -1) return -1;	
				ret += l;
			}

			/* Emit opcode */
			if (so->op != OP_NOP) {
				o->op = so->op; o++; ret += sizeof(INSN);
			}

			/* Set new context; reset ctx_item if none (comma) */
			context = so->newctx;
			if (context == NON) ctx_item = 0;
			continue;
		}

		/* The searches below only apply in context NON. So if we are
		 * still looking for a typed binary or postfix operator here,
		 * it won't be found, and we can complain that we don't 
		 * know acalssbout the operator used in the source.
		 *
		 * The terms and unary prefix operators we look for are all
		 * named and consist entirely of letters, digits, dashes and
		 * colons and are ended by any other character. If the last
		 * character of the name is a colon or a dash, we leave that
		 * out, so that 'int:=3' and 'int-3' work as expected.  This
		 * should be changed, because there are attributes that have
		 * digits after a dash, and even values that *start* with
		 * digits and then a dash, eg. 1200-75-BPS. Yuck. We'll fix
		 * that some other time, by always testing for constants first
		 * if there's an attribute in context and we expect a term. */

		if (context != NON || !((len == -1 && (len = SPN())), len)) {
			msg(F_LANG, L_ERR, "compile: ERROR: no valid operator found after term! (missing comma?)\n");
			goto error; 
		}
		if (I(len - 1) == ':' || I(len - 1) == '-') l--;

		/* Include files */

		if (!EQ(len, "include", 7)) {
			S(7);
			src.p = *source; src.l = *sourcel;
			str = str_getword(&src, pstr(" \n\r\t,"));
			*source = src.p; *sourcel = src.l;

			/* We don't autoconvert the subexpression in the file
			 * but inherit the context here */
			l = reccompilefile(m, ifaces, 
					   fullpath, pathl, str.p, str.l, 
					   &o, bufsize - ret, 
					   0, ANY, 0, &context,
					   funchead, functail);
			if (l == -1) return -1;	
			ret += l;
			continue;
		}

		/* Function definitions. These emit a jump over the
		 * function code (cf. the short cut boolean operators) and add
		 * an entry to the function list. */

		if (!EQ(len, "function", 8)) {
			S(8);

			/* Get name */
			src.p = *source; src.l = *sourcel;
			str = str_getword(&src, pstr(" \n\r\t"));
			*source = src.p; *sourcel = src.l;

			/* Emit jump, set displacement after reccompile */
			saved_o = o;
			o->op = OP_JMP; o++; ret += sizeof(INSN);

			/* Add entry to function table */
			func_add(funchead, functail, str.p, str.l, o);

			/* Compile at precedence 28 (unary prefix ops),
			 * make function auto-convert stack top to int
			 * in case there's no explicit return (requiring int),
			 * reset ctx_item, don't care about returned ctx. */
			l = reccompile(m, ifaces, source, sourcel,
				       &o, bufsize - ret, 
				       28, INT, 0, 0,
				       fullpath, pathl, funchead, functail);
			if (l == -1) return -1;	

			/* Emit return in function in case none was present */
			o->op = OP_RETFUNC; o++; l += sizeof(INSN);
			ret += l;

			/* Fixup jump over function */
			saved_o->imm.d.disp = l / sizeof(INSN);

			/* After the function definition, the context is 
			 * exactly as it was. It's as if it never occured. */
			continue;
		}


		/*
		 * FUNCTION CALLS
		 *
		 * These behave as unary prefix operators that
		 * evaluate but pop (NON) the right hand side and return 
		 * the integer returned by the function itself.
		 */

		saved_o = func_getaddr(funchead, *source, len);
		if (saved_o) {
			if (precedence >= 28) break;
			S(len);
			l = reccompile(m, ifaces, source, sourcel, 
				       &o, bufsize - ret, 
				       28, NON, 0, 0,
				       fullpath, pathl, 
				       funchead, functail);
			if (l == -1) return -1;	
			ret += l;

			/* All functions leave an int on stack */
			context = INT;
			o->op = OP_CALLFUNC;
			o->imm.func = saved_o;
			o++; ret += sizeof(INSN);
			continue;
		}


		/*
		 * INTERFACE CALLS
		 */

		/* Search callable interfaces, which behave as unary
		 * prefix operators that evaluate but pop the right hand
		 * side and keep the context at 'NON' (term required) */

		for(ifc = ifaces; 
		    ifc && EQ(len, ifc->name, ifc->namel);
		    ifc = ifc->next);
		if (ifc) {
			if (precedence >= 28) break;
			S(len);
			l = reccompile(m, ifaces, source, sourcel, 
				       &o, bufsize - ret, 
				       28, NON, 0, 0,
				       fullpath, pathl, 
				       funchead, functail);
			if (l == -1) return -1;	
			ret += l;

			/* Emit opcode */
			o->op = OP_CALLIFACE; 
			o->imm.iface = ifc;
			o++; ret += sizeof(INSN); 
			continue;
		}

		/*
		 * DICTIONARY TERMS
		 */

		o->imm.i.item = meta_getitembyextspec(m, *source, len,
						      &o->imm.i.ref);
		if (o->imm.i.item) {
			S(len);

			/* This is a hack. If the reference doesn't specify
			 * a list, record the case of the first letter of the
			 * reference so that the default can be dependent on
			 * that. 
			 *
			 * A lowercase letter causes the opcode's default
			 * target list to be inverted. Used for 'str', etc.,
			 * that you generally set on the request list and test
			 * on the reply list. 
			 *
			 * This will go away. It successfully decreases the
			 * number of prefixes in behaviour files, but is a bit
			 * too hard to automate mentally. */

			if ((o->imm.i.ref.have & AVR_HAVE_LIST) == 0 && 
			    ISLWR(o->imm.i.item->name[0]))
				o->imm.i.ref.list = AVR_LIST_REP;
				/* note that we don't set AVR_HAVE_LIST */

			context = o->imm.i.item->val_type;
			ctx_item = o->imm.i.item;
			o->op = OP_PUSHAV; o++; ret += sizeof(INSN);
			continue;
		}

		/* Error. See if we have an itemized context or not. */
		if (ctx_item) {
			msg(F_LANG, L_ERR, "compile: ERROR: Unknown "
					   "term or operator '%s' (for "
					   "%s:%s:%s)!\n",
			    DBGCVT(len), ctx_item->spc->name,
			    meta_getvndbynr(m, ctx_item->vnd)->name,
			    ctx_item->name);
		}
		else {
			msg(F_LANG, L_ERR, "compile: ERROR: Unknown "
					   "term or operator '%s'!\n", 
			    DBGCVT(len));
		}
		goto error;
	}

	/* Add conversion operator if requested context doesn't match current.
	 * If we were requested a term (req_ctx != NON) and we or our children 
	 * didn't push any, complain. If we were requested no term, and we
	 * or our children pushed one, pop it. Otherwise, autoconvert. */

	if (req_ctx != context) {
		if (context == NON) {
			msg(F_LANG, L_ERR, "compile: ERROR: term expected!\n");
			goto error;
		}
		if (req_ctx == NON) {
			o->op = OP_POP; 
			o++, ret += sizeof(INSN);
		}
		else {
			o->op = cvtop(context, req_ctx);
			if (o->op != OP_NOP) o++, ret += sizeof(INSN);
		}
	}

	*buf = o;
	if (ret_ctx) *ret_ctx = context;
	D1(msg(F_LANG, L_DEBUG, "reccompile: Returned.\n"));
	return ret;

error:
	l = *sourcel; s = " <eof>";
	if (l > 20) l = 20, s = "...";
	if (fullpath) {
		msg(F_LANG, L_ERR, "compile: in %s near '%s'%s\n", 
		    fullpath, DBGCVT(l), s);
		return -1;
	}
	msg(F_LANG, L_ERR, "compile: near '%s'%s\n", DBGCVT(l), s);
	return -1;
}


/* Called by lang_compilefile and when an 'include' operator is found */

static ssize_t reccompilefile(META *m, IFACE *ifaces, 
			      char *path, ssize_t pathl,
			      char *fname, ssize_t fnamel,
			      INSN **buf, ssize_t bufsize, 
			      int precedence, int req_ctx, 
			      META_ITEM *ctx_item, int *ret_ctx,
			      FUNC_T **funchead, FUNC_T **functail)
{
	ssize_t ret;
	char *fullpath;
	STR_T text, src;

	ret = -1;

	/* strtopath assembles base path and absolute or relative pathname in
	 * fname into one full path and puts it in a newly allocated string. 
	 * It returns the length of the dirname part in pathl, so that this
	 * part of fullpath can become the new base path for subsequent
	 * includes. */

	fullpath = strtopath(path, pathl, fname, fnamel, &pathl);
	if (!fullpath) { msg(F_LANG, L_ERR, "reccompile: strtopath failed!\n");
			 return -1; }

	msg(F_MISC, L_NOTICE, "compile: Opening %s\n", fullpath);
	text = readstr(fullpath);
	if (!text.p) {
		msg(F_LANG, L_ERR, "compile: Can't read %s: %s!\n",
		    fullpath, strerror(-text.l));
		free(fullpath);
		return -1;
	}

	src = text;
	ret = reccompile(m, ifaces, &src.p, &src.l, buf, bufsize, 
			 precedence, req_ctx, ctx_item, ret_ctx,
			 fullpath, pathl, funchead, functail);
	free(text.p);
	free(fullpath);
	return ret;
}



/* Called by other modules */

ssize_t lang_compilefile(META *m, IFACE *ifaces, 
			 char *path, ssize_t pathl,
			 char *fname, ssize_t fnamel,
			 INSN *buf, ssize_t bufsize,
			 FUNC_T **funchead, FUNC_T **functail)
{
	ssize_t ret;
	ret = reccompilefile(m, ifaces, 
			     path, pathl, 
			     fname, fnamel, 
			     &buf, bufsize, 
			     0, NON, 0, 0,
			     funchead, functail);

	if (ret != -1 && ret < bufsize - sizeof(INSN)) {
		buf->op = OP_HALT; ret += sizeof(INSN);
	}

	return ret;
}


/* Mainly for testing */

ssize_t lang_compile(META *m, IFACE *ifaces, 
		     char *source, ssize_t sourcel,
		     INSN *buf, ssize_t bufsize,
		     FUNC_T **funchead, FUNC_T **functail)
{
	ssize_t ret;
	ret = reccompile(m, ifaces, &source, &sourcel, &buf, bufsize,
			 0, NON, 0, 0, 0, 0, funchead, functail);

	if (ret != -1 && ret < bufsize - sizeof(INSN)) {
		buf->op = OP_HALT; ret += sizeof(INSN);
	}

	return ret;
}

