/**********************************************************************
*
* DLMTOP Christine Caulfield <ccaulfie@redhat.com>
*
* (c) 2008 Red Hat Inc
*
* $ gcc dlmtop.c -o dlmtop -lpcap -DRHEL4
*
**********************************************************************/

/*
 * TODO:
 *  - Print lockspace name rather than number (if possible)
 *  - Print node name rather than address
 *  - Fix bug where PIDs appear in both lists!
 */

#include <pcap.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <ifaddrs.h>
#include <getopt.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netinet/if_ether.h>
#include <time.h>

static char *dlm_filter = "port 21064";

#define VERSION "v0.2"
#define MAX_PROCESSES 512
#define NAME_LENGTH 1024

#ifdef RHEL4
#define DLM_LVB_LEN            (32)

struct dlm_message {
	uint32_t		m_version;
	uint8_t			m_cmd;		/* What we are */
	uint8_t			m_flags;	/* maybe just a pad */
	uint16_t		m_length;	/* Length of struct (so we can
						   send many in 1 message) */
	uint32_t		m_lkid;     	/* Lock ID tag: ie the local
						   (requesting) lock ID */
	uint32_t		m_lockspace;	/* Lockspace ID */
	uint32_t		m_remlkid;	/* Remote lock ID */
	uint32_t		m_remparid;	/* Parent's remote lock ID */
	uint32_t		m_lflags;	/* Flags from lock/convert req*/
	uint64_t		m_range_start;  /* Yes, these are in the right
						   place... */
	uint64_t		m_range_end;
	uint32_t		m_status;	/* Status to return if this is
						   an AST request */
        uint32_t                m_pid;          /* Owner PID of lock */
	uint32_t		m_lvbseq;
	uint8_t			m_rqmode;	/* Requested lock mode */
	uint8_t			m_asts;         /* Whether the LKB has ASTs */
	char			m_lvb[DLM_LVB_LEN];
	char			m_name[1];	/* As long as needs be. Only
						   used for directory lookups.
						   The length of this can be
						   worked out from the packet
						   length */
} __attribute__((packed));

#define DLM_MSG_LOCK     2
#define DLM_MSG_UNLOCK   3
#define DLM_MSG_CONVERT  4

#define DLM_VERSION      0x00010001

#endif

#if defined(RHEL5) || defined(STABLE2) || defined(STABLE3)

struct dlm_message {
	uint32_t		m_version;
	uint32_t		m_lockspace;
	uint32_t		m_nodeid;	/* nodeid of sender */
	uint16_t		m_length;
	uint8_t			m_msgtype;		/* DLM_MSG, DLM_RCOM */
	uint8_t			m_pad;
	uint32_t		m_cmd;		/* DLM_MSG_ */
	uint32_t		m_rnodeid;
	uint32_t		m_pid;
	uint32_t		m_lkid;		/* lkid on sender */
	uint32_t		m_remid;	/* lkid on receiver */
	uint32_t		m_parent_lkid;
	uint32_t		m_parent_remid;
	uint32_t		m_exflags;
	uint32_t		m_sbflags;
	uint32_t		m_flags;
	uint32_t		m_lvbseq;
	uint32_t		m_hash;
	int			m_status;
	int			m_grmode;
	int			m_rqmode;
	int			m_bastmode;
	int			m_asts;
	int			m_result;	/* 0 or -EXXX */
	char			m_name[0];	/* name or lvb */
};

#define DLM_MSG_LOCK     1
#define DLM_MSG_UNLOCK   3
#define DLM_MSG_CONVERT  2

#define DLM_VERSION      0x00030000

#endif

#ifndef DLM_MSG_LOCK
#error You must build for RHEL4, RHEL5, STABLE2 or STABLE2
#endif

/* Struct for a saved process */
struct locking_process
{
	char name[NAME_LENGTH];
	pid_t pid;
	uint32_t lockspace;
	in_addr_t nodeaddr;
	unsigned int locks;
	unsigned int convs;
	unsigned int unlocks;
	unsigned int others;
	unsigned int resources;
	unsigned int last_locks;
	unsigned int last_convs;
	unsigned int last_unlocks;
};

static int nondlm = 0;
static int packets = 0;
static int l_locks = 0;
static int r_locks = 0;
static int errors = 0;
static struct timeval starttime;
typedef enum {SORT_LOCKS, SORT_RESOURCES} sort_type_t;
static sort_type_t sort_type = SORT_LOCKS;
typedef enum {DISPLAY_ACCUM, DISPLAY_DELTA} display_type_t;
static display_type_t display_type = DISPLAY_DELTA;

static int interval = 1;
static int debug = 0;

static int term_width;
static int term_height;

static struct locking_process local_procs[MAX_PROCESSES];
static struct locking_process remote_procs[MAX_PROCESSES];
static int num_local_procs;
static int num_remote_procs;

/* This always succeeds */
static struct locking_process *find_proc(struct locking_process *array, pid_t pid)
{
	int i;
	int *array_size;
	struct locking_process *proc;

	if (array == &local_procs[0])
		array_size = &num_local_procs;
	else
		array_size = &num_remote_procs;

	for (i=0; i<*array_size; i++) {
		if (array[i].pid == pid)
			return &array[i];
	}

	/* If the array is full, then discard the last element in the hope that it's not
	   much used anyway. */
	proc = &array[*array_size];
	if (*array_size < MAX_PROCESSES)
		*array_size = *array_size + 1;

	/* Clear out new entry in case it is being reused */
	memset(proc, 0, sizeof(*proc));

	return proc;
}

static void update_deltas(struct locking_process *array, int size)
{
	int i;

	for (i=0; i<size; i++) {
		array[i].last_locks = array[i].locks;
		array[i].last_unlocks = array[i].unlocks;
		array[i].last_convs = array[i].convs;
	}
}

static int proc_compare(const void *a, const void *b)
{
	const struct locking_process *pa = a;
	const struct locking_process *pb = b;

	if (display_type == DISPLAY_DELTA) {
		if (sort_type == SORT_LOCKS)
			return (pb->locks - pb->last_locks) - (pa->locks - pa->last_locks);
		else
			return pb->resources - pa->resources;
	}
	else {
		if (sort_type == SORT_LOCKS)
			return pb->locks - pa->locks;
		else
			return pb->resources - pa->resources;
	}
}

static void sort_proc(struct locking_process *array)
{
	int array_size;

	if (array == &local_procs[0])
		array_size = num_local_procs;
	else
		array_size = num_remote_procs;

	qsort(array, array_size, sizeof(struct locking_process), proc_compare);
}

/* Get the command name for a PID */
static char *get_command(pid_t pid)
{
	char name[1024];
	static char cmd[1024];
	int i;
	FILE *f;

	cmd[0] = '\0';
	sprintf(name, "/proc/%d/cmdline", pid);
	f = fopen(name, "r");
	if (f) {
		fgets(cmd, sizeof(cmd), f);

		/* Cope with empty names */
		if (!strlen(cmd))
			return cmd;

		/* Return the full command line */
		for (i=0; i<sizeof(cmd); i++)
			if ((cmd[i] == 0) &&
			    (cmd[i+1] != 0))
				cmd[i] = ' ';

		fclose(f);
	}
	return cmd;
}

/* We assume that the local host only has 1 IPv4 address */
static int is_local_ip(unsigned int test_ip)
{
	struct ifaddrs *ifa, *ifa_list;
	struct sockaddr *sa;
	struct sockaddr_in *sin;
	static unsigned int local_ip = 0;

	if (local_ip) {
		if (local_ip == test_ip)
			return 1;
		else
			return 0;
	}
	if (getifaddrs(&ifa_list))
		return 0;

	for (ifa = ifa_list; ifa; ifa = ifa->ifa_next) {
		sa = ifa->ifa_addr;
		if (!sa || sa->sa_family != AF_INET)
			continue;

		sin = (struct sockaddr_in *)sa;

		if (test_ip == sin->sin_addr.s_addr) {
			local_ip = test_ip;
			return 1;
		}
	}

	freeifaddrs(ifa_list);
	return 0;
}

/* Maybe we should use ncurses */
static void display_local_procs()
{
	int num_procs = term_height/2 - 5;
	int i;
	char name[NAME_LENGTH];

	printf("  pid   lockspace    locks   unlcks    convs      res   cmd\n");

	for (i=0; i<num_procs; i++) {
		if (i < num_local_procs) {

			/* Don't overflow the line. col 57 is where the command starts */
			strncpy(name, local_procs[i].name, term_width-57);
			name[term_width-57] = '\0';
			printf("%5d   %08x   %7d  %7d  %7d  %7d   %s \033[K\n", // esc[K erases to EOL
			       local_procs[i].pid,
			       local_procs[i].lockspace,
			       display_type == DISPLAY_ACCUM ? local_procs[i].locks:local_procs[i].locks-local_procs[i].last_locks,
			       display_type == DISPLAY_ACCUM ? local_procs[i].unlocks:local_procs[i].unlocks-local_procs[i].last_unlocks,
			       display_type == DISPLAY_ACCUM ? local_procs[i].convs:local_procs[i].convs-local_procs[i].last_convs,
			       local_procs[i].resources,
			       name);
		}
	}
}

static void display_remote_procs(void)
{
	int num_procs = term_height/2 - 5;
	int i;

	printf("  pid   lockspace    locks   unlcks    convs      res   node\n");

	for (i=0; i<num_procs; i++) {
		if (i < num_remote_procs) {
			struct in_addr addr;
			addr.s_addr = remote_procs[i].nodeaddr;
			printf("%5d   %08x   %7d  %7d  %7d  %7d   %s\033[K\n", // esc[K erases to EOL
			       remote_procs[i].pid,
			       remote_procs[i].lockspace,
			       display_type == DISPLAY_ACCUM ? remote_procs[i].locks:remote_procs[i].locks-remote_procs[i].last_locks,
			       display_type == DISPLAY_ACCUM ? remote_procs[i].unlocks:remote_procs[i].unlocks-remote_procs[i].last_unlocks,
			       display_type == DISPLAY_ACCUM ? remote_procs[i].convs:remote_procs[i].convs-remote_procs[i].last_convs,
			       remote_procs[i].resources,
			       inet_ntoa(addr)); // TODO name
		}
	}
}

/* Show what we have seen */
static void display(int ignored)
{
	char timebuf[1024];
	time_t t;
	struct tm *now;

	/* Keep them in order */
	sort_proc(remote_procs);
	sort_proc(local_procs);

	time(&t);
	now = localtime(&t);
	strftime(timebuf, sizeof(timebuf), "%d-%b-%Y  %H:%M:%S", now);

	/* Clear screen and display the header */
	printf("\033[H\033[2Jdlmtop %s     %s ", VERSION, timebuf);
	printf("\n");
	printf("\nSeen %d packets, %d locals, %d remote,  %d non-dlm, %d errors\n", packets, l_locks, r_locks, nondlm, errors);
	printf("\n");

	printf("Local\n");
	display_local_procs();
	printf("\033[%d;0H", term_height/2+2);
	printf("\nRemote\n");
	display_remote_procs();

	/* If we are showing deltas, then update: */
	if (display_type == DISPLAY_DELTA) {
		update_deltas(remote_procs, num_remote_procs);
		update_deltas(local_procs, num_local_procs);
	}

	fflush(stdout);
	alarm(interval);
}

/* Called when a user resizes the screen */
static void new_term_size(int sig)
{
	struct winsize w;
	int st;

	st = ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
	if (st)
	{
		perror("ioctl on stdout");
		term_width = 80;
		term_height = 24;
	}
	else
	{
		term_width = w.ws_col;
		term_height = w.ws_row;
	}
	display(0);
}


static u_char save_buffer[4096]; /* More than enough for two packets */
static int save_len = 0;

/* Process a packet */
static void my_callback(u_char *useless,const struct pcap_pkthdr* pkthdr,
			const u_char* packet)
{
	struct dlm_message *req;
	int offset = 0x42;    /* Skip past the TCP/IP header */
	int ip_offset = 0x1A; /* Location of the local IP address */
	int len;
	struct locking_process *proc = NULL;
	struct locking_process *proc_array;
	unsigned int ip_addr;

	memcpy(&ip_addr, packet+ip_offset, sizeof(ip_addr));

	if (save_len) {
		if (debug>1)
			fprintf(stderr, "CC: Brought forward %d bytes. pktlen=%d\n", save_len, pkthdr->caplen);
		memcpy(save_buffer+save_len, packet+offset, pkthdr->caplen-offset);
		packet = save_buffer;
		len = pkthdr->caplen + save_len - offset;
		save_len = 0;
		offset = 0;
	}
	else {
		len = pkthdr->caplen;
		memset(save_buffer, 0, sizeof(save_buffer));
	}

	packets++;
	while (offset < len) {
		req = (struct dlm_message *)((char *)packet+offset);

		/* The packet remaining is too small to be of use */
		if (len - offset < offsetof(struct dlm_message, m_length))
			return;

		/* Non-dlm packet */
		if (req->m_version != DLM_VERSION) {
			if (debug)
				fprintf(stderr, "CC: error non-dlm version = %x\n", req->m_version);
			nondlm++;
			return;
		}

		/* Ignore error packets */
		if (req->m_length <= 0 || req->m_length > 200) {
			if (debug)
				fprintf(stderr, "CC: error packet length = %d\n", req->m_length);
			errors++;
			save_len = 0;
			return;
		}

		/* Carry forward incomplete packets */
		if (offset+req->m_length > len) {
			save_len = len - offset;
			memcpy(save_buffer, packet+offset, save_len);
			if (debug>1)
				fprintf(stderr, "CC: Carried forward %d bytes. pktlen=%d, reqlen=%d, offset=%d\n", save_len, len, req->m_length, offset);
			return;
		}

		/* Is it a local or remote packet ? */
		if (is_local_ip(ip_addr)) {
			proc_array = local_procs;
			l_locks++;
		}
		else {
			proc_array = remote_procs;
			r_locks++;
		}

		if (debug > 2)
			fprintf(stderr, "%s PACKET (IP:%x): cmd: %d, pid: %d\n", proc_array==&local_procs[0]?"Local ":"Remote", ip_addr, req->m_cmd, req->m_pid);

		if (req->m_cmd == DLM_MSG_LOCK ||
		    req->m_cmd == DLM_MSG_UNLOCK ||
		    req->m_cmd == DLM_MSG_CONVERT) {

			/* Not sure what's happening here */
			if (req->m_pid > 32768) {
				if (debug)
					fprintf(stderr, "CC: odd pid %d seen. set to zero\n", req->m_pid);
				req->m_pid = 0; // ???
			}

			proc = find_proc(proc_array, req->m_pid);

			/* If this is a new struct, fill in the details */
			if (!proc->pid) {
				proc->pid = req->m_pid;
				proc->lockspace = req->m_lockspace;
				if (proc_array == &local_procs[0]) {
					strcpy(proc->name, get_command(req->m_pid));
				}
				proc->nodeaddr = ip_addr;
			}

			switch (req->m_cmd) {
			case DLM_MSG_LOCK:
				proc->locks++;
				proc->resources++;
				break;
			case DLM_MSG_UNLOCK:
				proc->unlocks++;
				if (proc->resources > 0)
					proc->resources--;
				break;
			case DLM_MSG_CONVERT:
				proc->convs++;
				break;
			default:
				proc->others++;
				break;
			}
		}
		offset += req->m_length;
	}
}

static void usage(char *cmd, FILE *f)
{
	fprintf(f, "Usage:\n");
	fprintf(f, "%s [hVrladip]\n", cmd);
	fprintf(f, "\n");
	fprintf(f, "Options\n");
	fprintf(f, "   -r        Sort output by resources (default lock ops)\n");
	fprintf(f, "   -l        Sort by lock operations (default)\n");
	fprintf(f, "   -a        Show accumulated counters rather than per <interval> operations\n");
	fprintf(f, "   -i <secs> Set the refresh interval (default 1 second)\n");
	fprintf(f, "   -p <port> Change the TCP port to listen on (default 21064)\n");
	fprintf(f, "   -d[ddd]   Enable/increase debugging messages to stderr\n");
	fprintf(f, "   -h        Show this help message\n");
	fprintf(f, "\n");
}

int main(int argc,char **argv)
{
    char *dev;
    char errbuf[PCAP_ERRBUF_SIZE];
    char bpf_text[132];
    pcap_t* descr;
    struct bpf_program fp;      /* hold compiled program     */
    signed char opt;
    struct winsize w;
    int st;

    /* Deal with command-line arguments */
    opterr = 0;
    optind = 0;
    while ((opt=getopt(argc,argv,"?hVrladi:p:")) != EOF)
    {
	switch(opt)
	{
	case 'h':
		usage(argv[0], stdout);
		exit(0);
	case 'V':
		fprintf(stderr, "dlmtop version " VERSION "\n");
		exit(0);
	case 'r':
		sort_type = SORT_RESOURCES;
		break;
	case 'l':
		sort_type = SORT_LOCKS;
		break;
	case 'a':
		display_type = DISPLAY_ACCUM;
		break;
	case 'i':
		interval = atoi(optarg);
		break;
	case 'p':
		sprintf(bpf_text, "port %d", atoi(optarg));
		dlm_filter = bpf_text;
		break;
	case 'd':
		debug++;
		break;
	default:
		usage(argv[0], stderr);
		exit(0);
	}
    }

    signal(SIGALRM, display);

    /* Get (and maintain) terminal size */
    signal(SIGWINCH, new_term_size);
    st = ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
    if (st)
    {
            perror("ioctl on stdout");
            term_width = 80;
	    term_height = 24;
    }
    else
    {
            term_width = w.ws_col;
	    term_height = w.ws_row;
    }

    memset(local_procs, 0, sizeof(local_procs));
    memset(remote_procs, 0, sizeof(remote_procs));

    /* grab a device to peek into... */
    dev = pcap_lookupdev(errbuf);
    if(dev == NULL)
    { fprintf(stderr,"%s\n",errbuf); exit(1); }

    /* open device for reading this time lets set it in promiscuous
     * mode so we can monitor traffic to another machine             */
    descr = pcap_open_live(dev,65536,1,-1,errbuf);
    if(descr == NULL)
    { printf("pcap_open_live(): %s\n",errbuf); exit(1); }

    /* Lets try and compile the program.. non-optimized */
    if(pcap_compile(descr,&fp,dlm_filter,0,-1) == -1)
    { fprintf(stderr,"Error calling pcap_compile\n"); exit(1); }

    /* set the compiled program as the filter */
    if(pcap_setfilter(descr,&fp) == -1)
    { fprintf(stderr,"Error setting filter\n"); exit(1); }

    /* ... and loop */
    gettimeofday(&starttime, NULL);
    alarm(interval);
    fprintf(stderr, "Listening for DLM packets ...\n");
    pcap_loop(descr,-1,my_callback,NULL);

    return 0;
}
