/*
 * STRIO - Functions to format and output p, l strings
 *
 * Author:
 * Emile van Bergen, emile@evbergen.xs4all.nl
 *
 * Permission to redistribute an original or modified version of this program
 * in source, intermediate or object code form is hereby granted exclusively
 * under the terms of the GNU General Public License, version 2. Please see the
 * file COPYING for details, or refer to http://www.gnu.org/copyleft/gpl.html.
 *
 * History:
 * 2005/11/04 - EvB - Created
 */


#ifndef _STRIO_H
#define _STRIO_H 1


/*
 * INCLUDES & DEFINES
 */


#include <unistd.h>		/* For write */
#include <fcntl.h>		/* For O_RDONLY */
#include <errno.h>		/* For errno */
#include <string.h>		/* For strerror */
#include <stdarg.h>		/* For va_list */

#include <evblib/sysdefs/sysdefs-libc.h>
#include <evblib/str/str.h>


/* Write a constant C string to a filedescriptor, resp. stdout or stderr */

#define fdputcs(fd, s)	    write(fd, pstr(s))
#define outputcs(s)	    fdputcs(1, (s))
#define errputcs(s)	    fdputcs(2, (s))


/* 
 * FUNCTIONS
 */


/* Read an entire file into a newly allocated string. If it fails, l is
 * negative (-errno after the system call that failed). Use l to find or report
 * the reason, not errno, as that may have been overwritten by cleanup calls. p
 * must be freed if non-NULL. p will be NULL if l is zero or negative. */

STR_T readstr(char *fname);	/* uses readstrfd */
STR_T readstrfd(int fd);	/* uses seek to get size and seeks to start */
STR_T reststreof(int fd, ssize_t guesslen);
				/* uses reallocations, no seek */


/* Form an ASCIIz full path name based on two STR_T strings, and return
 * length of directory component */

char *strtopath(char *path, ssize_t pathl, char *fname, ssize_t fnamel,
		ssize_t *retpathlen);


/* Conversion from STR_T to integers. These are here not because I like to
 * reinvent the strtoul weel, but because there's no such thing as a 
 * strntoul. strtoul requires zero termination and cannot deal with length
 * limited strings. We can do either length terminated or zero terminated
 * strings (NUL is always a non-digit, so always stops the conversion anyway);
 * specify -1 for l if you can guarantee that the string at p is terminated
 * with a nondigit. Note that these functions are required to always set *got 
 * if got is nonzero, regardless of termination cause. If no bytes are taken,
 * these return 0. 
 */

#define STRTOPROTO	char *p, ssize_t l, ssize_t *got, int base

int strtoint(STRTOPROTO);
long strtolong(STRTOPROTO);
unsigned int strtouint(STRTOPROTO);
unsigned long strtoulong(STRTOPROTO);


/* [v]formatstr, [v]formatfd - format integers and arguments to strings or file
 * descriptor output
 *
 *
 * INTRODUCTION
 *
 * The functions devised here share some similarities with the stdio printf
 * family, but are not the same. The problem is that the widely portable subset
 * of stdio features is small and leaves many problems unsolved, the least of
 * which being field truncation. Also, snprintf, asprintf and (f)dprintf aren't
 * available everywhere, and emulation using sprintf is unattractive and causes
 * security problems. Lastly, the C99 specification of printf and especially
 * glibc's implementation have gone way beyond the baroque stage and firmly
 * into rococo, in my humble opinion.
 *
 * The properties that are different from the stdio printf family are:
 *
 * - restartable: if output runs out of space, we're able to resume; we always
 *   either output a value completely or not at all; we return the number of
 *   arguments processed, and we allow the caller to specify the number of 
 *   arguments to be skipped. That means you don't have to perform again all
 *   conversion from the start if it turns out you need to extend a buffer, as
 *   is the case with [v]asprintf implementations.
 *
 * - no mixing of data and metadata: this helps to avoid security problems.
 *   Format specifiers are just that. They are never output. String values are
 *   just that, they are never interpreted. Arguments alternate between format
 *   strings and values.
 *
 * - environment variables do not influence output. That also avoids many
 *   security problems. It also means no i18n, but that should be done one
 *   level above this one anyway, because different languages mean different
 *   field orders in most cases. The %m$ and *m$ formats are nice kludges, but
 *   don't exist in C99 and require considerable complexity from the 
 *   implementation, especially if we'd want to combine those with the 
 *   restartable property.
 *
 *   Our functions are mostly intended to glue values of different type
 *   together into one string or write call, and more focused on alignment,
 *   padding and truncation than on the conversion of individual fields.
 *
 * - guaranteed maximum output length: absolute maximum widths can be specified
 *   for all argument types. Precision specifiers are not overloaded to allow
 *   a maximum width to be specified for certain conversions, but not for
 *   others (most importantly, not for the numeric ones in printf). Thus, if a
 *   maximum is specified for each field, the length of the output is
 *   guaranteed to be smaller or equal to the exact sum of field widths.
 *
 * - features not in printf:
 *   - supports char *, size_t strings 
 *   - supports printing binary strings using C style escapes 
 *   - supports aligning around any given character, such as decimal separators
 *   - supports truncation with a user specified truncation indicator, such 
 *     as + or ..., either inside or just beyond the field into the next or 
 *     previous, where one is available.
 *   - powerful floating point support: the number of significant digits can be
 *     specified and exponential notations can be customized. Right side
 *     zero padding after the decimal point will never cause more digits to
 *     be output than the given number of significant digits; beyond that,
 *     spaces will be used instead.
 *
 *
 * FORMAT STRINGS
 *
 * Format strings consist of an optional field width specifier, followed by
 * an argument type specifier, followed by any number of formatting parameters.
 *
 * THE FIELD WIDTH SPECIFIER
 *
 * Field width specifiers are formatted as follows:
 *
 * [min][.max]	    minimum and maximum number of octets to cover in output 
 * 		    by the converted argument; min and max being positive 
 * 		    integers, written in decimal
 *
 * THE ARGUMENT TYPE SPECIFIER
 *
 * The argument type specifier is a string not containing the following
 * characters ; * NUL.
 *
 * The following argument type specifiers are understood. These are intended
 * firstly to specify the C type to be taken from the variable argument list,
 * secondly to specify which conversion to perform, and lastly to specify 
 * certain default formatting parameters for the conversion.
 *
 * Integers (converted in base 10 by default)
 *
 * - c	    signed char (int8_t, see note regarding 'char' at the end)
 * - C	    unsigned char (uint8_t, idem)
 * - h	    int16_t
 * - H	    uint16_t
 * - l	    int32_t
 * - L	    uint32_t
 * - q	    int64_t
 * - Q	    uint64_t
 * - d, i   host's int 
 * - u	    host's unsigned int
 * - x	    same as u, but sets base to 16
 * - X	    same as u, but sets base to 16, uppercase letters
 *
 * Floating point values
 *
 * - F	    double, sets alignment around . at 2 octets from right
 * - D	    long double, alignment around . at 4 octets from right
 *
 * Strings
 *
 * - s	    binary string at char *p, zero terminated
 * - S	    binary string at char *p, ssize_t l
 * - m	    same as s, but uses strerror(errno) and does not take an argument
 * - M	    same as m, but takes integer argument for use instead of errno
 *
 * Addresses
 *
 * - a	    void * address, base 16 by default
 * - A	    void * address, base 16 by default, uppercase letters
 * - z	    ssize_t
 * - Z	    size_t
 * - .	    none; used for outputting padding characters and the like
 *
 * From the above set, only m and . don't take an argument from the argument
 * list. All others do. If an implementation does not understand an argument
 * type character, it does not know what argument to take, if any, and so we
 * must return an error as we've lost synchronisation. It's too bad stdarg
 * does not preserve argument boundaries. The only alternative is to pass an
 * array of unions or to use GCC and specify a format where every ... argument
 * is promoted to the union's width first. But arrays are a bad alternative, 
 * because non-constant immediate arrays are not widely supported.
 *
 * FORMATTING PARAMETERS
 *
 * Formatting parameters are written as follows:
 *
 * ;a 	specifies parameter flag 'a'
 * ;a*	specifies parameter 'a' with value v taken from next argument
 * ;a=v	specifies parameter 'a' with value v
 *
 * The parameter a is named by a string that may contain all characters 
 * except NUL ; * =
 *
 * If the value 'v' is numeric, it is written in decimal after '='; if
 * the value is a string, any character may occur after '=' with the
 * exception of the delimiter ';', if the value is a string on the argument
 * list, it is given as a char *p, ssize_t l; if l is -1, the string at p
 * must be zero-terminated.
 *
 * The following formatting parameters are supported. An 'n' indicates
 * that the parameter takes a numeric value, an 's' indicates that it takes
 * a string value, a 'c' indicates a character if taken from the parameter
 * string and a numeric value if taken from the argument list.
 *
 * For all types:
 *
 * - Xp s   where X is l or r; specifies padding string s to be used, wrapping
 *   	    around s if there's more to be padded than the string is long, on 
 *   	    the left resp. right instead of the default, which is a single 
 *   	    space on both sides
 * - Xt s   specifies that truncation must be indicated using a string.
 *   	    Default values if specified without them: l="{", r="}"
 * - Xe s   like t, but overwrites the last byte(s) of the previous field or 
 *   	    the first byte(s) of the next, if that exists and has sufficient
 *   	    width. Up to half (rounded up) of the indicator is moved into the 
 *   	    adjacent field.
 * - < n    specifies left alignment instead of default, which is left for
 *   	    strings and right for numeric values; padding and truncation 
 *   	    happens on the right. Argument value specifies the distance
 *          of the alignment character (start of string or given by f or l)
 *          from the left
 * - > n    same as <, but exchange all occurrences of left and right and
 *          start of string with end of string
 * - f c    align the first occurence of the specified character, dot
 *          by default (padding and truncation happens on both sides)
 * - l c    same as f, but align the last occurence of given character
 *
 * For integer types:
 *
 * - b n    sets base to n, where 2 <= n <= 36, output characters 0-9a-z
 * - B n    same as b, but uses uppercase letters
 * - ber    specifies a BER compressed integer (base 128, bit 7 set in all
 *          output octets except the last)
 * - n	    output as binary in big-endian (network) order
 * - v	    output as binary in little-endian (VAX) order
 *
 * For floating point types:
 *
 * - d n    specifies number of significant digits; padding after decimal
 *          point beyond significant digits happens with spaces instead of 
 *          zeroes; if the number of significant digits is reached before
 *          the decimal point is reached, exponential notation is used.
 * - e n    specifies exponential notation with a mantisse that has n digits 
 *   	    before the decimal point (1 by default)
 * - em n   same as e, using a mantisse with up to n digits (3 by default) 
 *	    before the decimal point, so that the exponent is always a
 *	    multiple of n
 * - E s    specifies string to be used to introduce the exponent instead of
 *   	    "e", eg. to create |     -142.55   x 10^+43 | (s=" x 10^")
 *   	    instead of         |          -142.55  e+43 | (s="e", default)
 *   	    if there are 5 significant digits and the decimal point is aligned 
 *   	    4 positions from the right (the exponent doesn't count there)
 *
 * For string types:
 *
 * - p	    creates 'printable' output without loss of information, using C 
 *   	    style hex escapes (\xaa) for the characters 0-31 \ 127-255
 * - q	    creates 'quotable' output without loss of information, using C 
 *   	    style hex escapes for the characters 0-31 " ' \ 127-255
 * - x	    transforms each octet into its value as two hexadecimal digits
 * - X	    same as x, but uses uppercase characters
 *
 *
 * A note regarding the C char type
 *
 * I know that a C char is formally not required to be 8 bits, and that it
 * refers to a character value instead of a small integer. But in practice,
 * compiler writers never take advantage of that freedom, at least not anymore.
 * Not in the least perhaps because sizeof(char) == 1 by ANSI definition, so
 * that the other types can only be a multiple of sizeof(char) wide, making it
 * impractical to ever use a single char to store a 32-bit unicode value, which
 * is the only wide character encoding that would ever gain enough momentum to
 * influence future compiler writers.
 *
 * Also, there are so many assumptions about char being 8 bits in virtually all
 * C programs in existence, that I don't think it's worth it to obsessively
 * write all references to unaligned buffers containing packed octets as int8_t
 * * or uint8_t *. Everyone does so using char *, and I gladly continue that
 * tradition. Perhaps a bit sloppy from a formal point of view, but a conscious
 * decision nonetheless. 
 */


ssize_t formatstr(char *p, ssize_t l, int *skip, ...);

ssize_t vformatstr(char *p, ssize_t l, int *skip, va_list ap);


#endif	/* _STRIO_H */


/*
 * vim:softtabstop=4:sw=4
 */
