;;; -*- mode:lisp; base:10.; package:user; -*- ;;;
;;; $Header: /ct/debug/dfind.l,v 1.31 84/11/28 16:52:14 bill Exp $
(putprop 'dfind "$Revision: 1.31 $" 'rcs_revision)


;;; d-find

;;; This file contains code to allow the easy retrieval of a best
;;; fitting Diana node.

;;;  The challenge is:  given a source position within a user program
;;; (a count of bytes from the beginning of the file), find the diana
;;; node which best contains the position (each diana node has a range
;;; associated with it), and is also a member of some class.

;;;  Searching a diana tree at run time is expensive.  There are many
;;; (hundreds or thousands of) nodes, and the work required to look at
;;; all the nodes is non-trivial.  

;;;  Each time we look at a new diana-tree, we calculate a flattened
;;; list that includes source position and a pointer to the correct 
;;; node.  This list is then used to build several lists of interest.
;;; Each of these sublists contains only nodes of a certain type or
;;; group of types.  

;;;  In use, if someone asks for the best node of type '(foo bar)
;;; we check first to see if we are looking at the right diana tree.
;;; If not, we get the flattened form of the diana tree. We then
;;; check to see if we have the correct sublist for the type-list
;;; submitted.  If we don't have it, we calculate it.  Finally, we
;;; return the best fitting node.





;;;  ****************************************************************
;;;  Constants, other needed goodies, specials, etc.
;;;  ****************************************************************


#+franz(declare (macros t))			;in case we want macros.
#+franz (setq *flavor-expand-macros* t)

(eval-when (compile load eval) (ct_load 'compat))
(eval-when (compile load eval) (ct_load 'diana))
(eval-when (compile load eval) (ct_load 'ferec))

;;;
;;;  A tree is built on the assumption that the nodes can be arranged in
;;; a hierarchy; any given node is wholly contained by another node.
;;; The tree consists of POINTS, each of which is a list of a node
;;; and any subordinate points.  The leaves of the tree are of the
;;; same format, but there are no subordinate points.
;;;

;;;Specials from other modules
(declare (special *db%hidden_files*))

;;; for debugging.
(defvar *d-find-diana-tree* nil)

#+franz
(declare (localf d-find-flatten-tree
		 d-find-walk-tree
		 maybe-attach-flat-tree
		 int-d-find-walk-tree
		 d-find-make-tree
		 add-to-tree
		 contains
		 contains-point
		 search-tree
	 ))

(defconst *plus-infinity* 9999999999.)




;;;  ****************************************************************
;;;  User accessible functions, macros, etc.
;;;  ****************************************************************

;;; Flatten a group of diana trees. Loop through the list and call the flattener on
;;; each. The list must be self containing. That is, any node reachable from any
;;; tree should have a tree in the list with the corresponding path.

(defun flatten-diana-trees (diana-trees)
  ;; just in case we don't have a list
  (or (listp diana-trees) (setq diana-trees (list diana-trees)))

  ;; make sure things look good
  (loop for diana-tree in diana-trees
	if (not (diana_nodep diana-tree))
	do (lose 'bad-tree 'flatten-diana-trees '("Bad diana tree"))
	for old-source-region = (diana_get diana-tree 'lx_srcpos)
	do (%= (source_region%startchar old-source-region) (- *plus-infinity* -1))
	do (%= (source_region%endchar old-source-region) (1- *plus-infinity*))
	do (diana_put diana-tree old-source-region 'lx_srcpos))

  (loop for diana-tree in diana-trees
	;; for debugging, keep pointer to last tree looked at.
	do (setq *d-find-diana-tree* diana-tree)
  
	;; finally flatten the sucker
	do (d-find-flatten-tree diana-tree diana-tree diana-trees)))


;;;  Returns the best fitting nodes for the source position and type.
;;;  This guarantees a correct answer, even if it has to update tables.

(defun get-best-nodes (diana-tree source-pos node-type-list &aux result)
  ;; for debugging, keep pointer to last tree looked at.
  (setq *d-find-diana-tree* diana-tree)
  
  ;; Check to see that things look ok.
  (ct_if (not (and (diana_nodep diana-tree)
		   (diana_late_get diana-tree ':d-find-flat-tree)))
	 (lose 'bad_get_best 'get-best-nodes '("Bad tree passed to get best nodes")))

  ;; If we don't have an entry yet for this tree and this node-type-list,
  ;; create one.
  (ct_if (and node-type-list
	      (not (assoc node-type-list (diana_late_get diana-tree ':d-find-a-list))))
	 (d-find-make-tree diana-tree node-type-list))

  ;; Finally, at this point, we are guaranteed to have the right tree 
  ;; ready for searching.
  (and source-pos
       node-type-list
       (setq result (search-tree source-pos
				 (cdr (assoc node-type-list
					     (diana_late_get diana-tree ':d-find-a-list)))))
       (not (memq diana-tree result))
       result))

;;; A predicate to determine if a given diana node is a member of a particular
;;; diana tree (in the sense that we could find it if we used the source position
;;; searching mechanism.) Then just
;;; look for the node in the flat tree

(defun tree-memberp (diana-node diana-tree)
  ;; for debugging, keep pointer to last tree looked at.
  (setq *d-find-diana-tree* diana-tree)
  
  ;; Check to make sure things look ok.
  (ct_if (not (and (diana_nodep diana-node)
		   (diana_nodep diana-tree)
		   (diana_late_get diana-tree ':d-find-flat-tree)))
	 (lose 'bad_tree_memberp 'tree-memberp '("Bad diana flat tree")))

  ;; Finally check to see if the flat tree list contains the node.
  (memq diana-node (diana_late_get diana-tree ':d-find-flat-tree)))




;;;  ****************************************************************
;;;  Internal functions, etc.
;;;  ****************************************************************

;; A few macros to make it easier to get fields out of diana nodes.

(defmacro d-find-start (node)
  `(source_region%startchar (diana_get ,node 'lx_srcpos)))

(defmacro d-find-end (node)
  `(source_region%endchar (diana_get ,node 'lx_srcpos)))

(defmacro d-find-path (node)
  `(source_region%path (diana_get ,node 'lx_srcpos)))

(defmacro d-find-nodetype (node)
  `(diana_nodetype_get ,node))


;;; A function to flatten a diana tree. Tree is a subtree of a diana tree. Root-tree
;;; is the root of the tree we are flattening. Other-trees is a list of other
;;; diana tree roots. The basic idea is to recurse down the tree looking at each
;;; node. If we have already visited a node, then we are done. If not, then 
;;; we add the node to the list and look at its children. Two cases occur. Either
;;; the child is part of the same source file or part of a different source file.
;;; If we have a different file, first find the correct root and keep recursing
;;; with it.

(defun d-find-flatten-tree (tree root-tree other-trees &aux nodes)
  (cond ((not (diana_nodep tree)))
	((memq tree (setq nodes (diana_late_get root-tree ':d-find-flat-tree))))
	(t
	 (diana_late_put root-tree (cons tree nodes) ':d-find-flat-tree)
	 (loop for thing in (diana_children tree)
	       for child-path = (d-find-path thing)
	       do (cond ((member child-path *db%hidden_files*))
			((not (equal (d-find-path root-tree) child-path))
			 (d-find-flatten-tree thing (path-to-root child-path other-trees)
					      other-trees))
			(t
			 (d-find-flatten-tree thing root-tree other-trees)))))))

;;; Find the root corresponding to a given path.

(defun path-to-root (path trees)
  (loop for root in trees
	if (equal path (d-find-path root))
	collect root into possibles
	finally (cond ((neq (length possibles) 1)
		       (return (lose 'bad-tree-list 'path-to-tree '("Bad path to root"))))
		      (t (return (first possibles))))))

;;; Generates a new tree.  This finds all nodes in a flattened tree
;;; that are of certain type.  It
;;; returns them as a tree, which makes the assumption that the nodes
;;; can all be arranged that way.  Each POINT in the tree is a list
;;; of a node and subordinate POINTS.

(defun d-find-make-tree (root-tree type-list)
  (loop with flat-tree = (diana_late_get root-tree ':d-find-flat-tree)
	with new-tree = (list root-tree)
	for node in flat-tree
	if (and (memq (d-find-nodetype node) type-list)
		(plusp (node-width node))
		(neq node root-tree))
	do (setq new-tree (add-to-tree node new-tree))
	finally (diana_late_put root-tree
				(cons (cons type-list new-tree)
				      (diana_late_get root-tree ':d-find-a-list))
				':d-find-a-list)))

;;; Adds a node to a tree. Note, the tree is started with a root node which should
;;; have a range that will cover every thing. This is so that we don't need to worry
;;; about the case where two nodes have adjacent ranges but we haven't found their
;;; superior yet.
(defun add-to-tree (node tree)
  (cond

    ;; If the tree is empty, return a tree which is one point, which
    ;; is a list of one node.
    ((null tree)
     (list node))

    ;; If the node supplied is a superior to the tree, return a new
    ;; tree with the node as root.  This should only be used at top
    ;; level, since other cases would catch first during recursion.
    ((contains node (car tree))
     (list node tree))

    ;; If the new node is contained in one of the inferiors, we will
    ;; want to recurse.  We loop down tails to allow RPLAC'ing.
    ((loop for inferior-tail on (cdr tree)
	   ;; if the inferior contains the node
	   if (contains (first (car inferior-tail)) node)
	   do (rplaca inferior-tail (add-to-tree node (car inferior-tail)))
	   and return tree))

    ;; If none of the above are true, the new node is a direct inferior
    ;; of the tree, BUT, existing inferiors may become inferior to the
    ;; new node, or remain inferiors of the tree.
    (t
     (loop with new-inferiors = nil
	   with previous-inferiors = tree
	   for inferiors = (cdr tree) then next-inferiors
	   while inferiors
	   for next-inferiors = (cdr inferiors)
	   if (contains node (first (car inferiors)))
	   do (progn (rplacd previous-inferiors next-inferiors)
		     (setq new-inferiors (rplacd inferiors new-inferiors)))
	   else do (setq previous-inferiors inferiors)
	   finally (progn (setq new-inferiors (cons node new-inferiors))
			  (nconc tree (list new-inferiors))
			  (return tree))))))


;;; The contains function is handed two nodes.  We merely check the order of
;;; left and right srcpos components.
(defun contains (node1 node2)
  (let ((start1 (d-find-start node1))
	(end1 (d-find-end node1))
	(start2 (d-find-start node2))
	(end2 (d-find-end node2)))
    (and start1 start2 end1 end2
	 (<= start1 start2 end2 end1))))


;;;  Returns T if the node contains a point
(defun contains-point (node point)
  (let ((start (d-find-start node))
	(end (d-find-end node)))
    (and start point end
	 (<= start point (1- end)))))

;;;  Returns the width of the node at this point in the tree.
(defun node-width (node)
  (let ((start (d-find-start node))
	(end (d-find-end node)))
    (ct_if (and start end)
	   (- end start)
	   *plus-infinity*)))

;;;  Checks to see if two trees/points cover the same range
(defun same-range (node1 node2)
  (let ((start1 (d-find-start node1))
	(end1 (d-find-end node1))
	(start2 (d-find-start node2))
	(end2 (d-find-end node2)))
    (and start1 start2 end1 end2
	 (equal start1 start2)
	 (equal end1 end2))))


;;;  An alternative definition for search tree.  This one assumes that
;;; children of a node can overlap, but are still each contained by
;;; the parent.  It is not sufficient to find the first child that
;;; contains a point, but to look at ALL of them, and select the BEST
;;; one.  We still have a recursive definition.

(defun search-tree (source-pos tree)
  (cond
    ;; If no children, return this node if it contains, nil otherwise.
    ((null (cdr tree))
     (and (contains-point (first tree) source-pos)
	  (ncons (first tree))))
    ;; If there are children, find the best containing node of all
    ;; the children (and this one too.)  If no nodes are good, return
    ;; nil so we don't recurse infinitely.
    (t (let ((best-point (best-contains source-pos (cons tree (cdr tree)))) best-search)
	 (cond
	   ;; If the best node is just the current one, don't
	   ;; bother recursing.  Just return the right diana-tree.
	   ((eq best-point tree)
	    (and (contains-point (first tree) source-pos)
		 (ncons (first tree))))
	   ;; If no node is good, return nil.
	   ((null best-point) nil)
	   ;; If it is one of the children, recurse.
	   (t
	    (setq best-search (search-tree source-pos best-point))
	    (ct_if (and best-search
			(not (memq (first best-point) best-search))
			(same-range (first best-search) (first best-point)))
		   (cons (first best-point) best-search)
		   best-search)))))))

;;; Best contains expects a list of "trees".  Car of a tree is a node,
;;; and fifth of a node is a pointer into a diana-tree.  Anyway, we
;;; compare the trees to the source-position, and return the one that
;;; best contains the position.
(defun best-contains (posn tree-list)
    (loop for tree in tree-list
	  with best-tree
	  with best-width = *plus-infinity*
	  for width = (node-width (first tree))
	  if (and (contains-point (car tree) posn)
		  (<= width best-width))
	  do (setq best-tree tree
		   best-width width)
	  finally (return best-tree)))









;;;  ****************************************************************
;;;  Functions for testing.
;;;  ****************************************************************


;;; Prints out a nice copy of the internal data structure that we
;;; will be searching.
;;;

(defun print-a-list (&optional (tree *d-find-diana-tree*))
  (format t "~%Pathname: ~A" (d-find-path tree))
  (loop for (type-list . tree) in (diana_late_get tree ':d-find-a-list)
	do (format t "~%  TypeList: ~A" type-list)
	do (print-tree tree 4)))

(defun print-tree (tree spacing)
  (cond ((null tree))
	(t (format t "~%~V@T[~D ~D]~A ~A"
		   spacing (d-find-start (first tree)) (d-find-end (first tree))
		   (diana_get (first tree) 'ct_id) (d-find-nodetype (first tree)))
	   (loop for sub-tree in (cdr tree)
		 do (print-tree sub-tree (+ spacing 2))))))


(defun print-flat-tree (&optional (tree *d-find-diana-tree*))
  (loop	initially (format t "~%Pathname: ~A" (d-find-path tree))
	for node in (diana_late_get tree ':d-find-flat-tree)
	do (format t "~% [~D..~D]~15T~A  ~A"
		   (d-find-start node) (d-find-end node)
		   (d-find-nodetype node) (diana_get node 'ct_id))))
