head	1.23;
access;
symbols
	V0-99-8:1.23
	V0-99-7:1.22
	V0-99-6:1.21
	V0-99-5:1.21
	V0-99-4:1.21
	V0-99-2:1.20
	V0-99-1:1.19
	V0-93-14:1.18
	V0-93-13:1.18
	V0-93-12:1.18
	V0-93-11:1.14
	V0-93-10:1.13
	V0-93-9:1.12
	V0-93-8:1.12
	V0-93-7:1.12
	V0-93-5:1.12
	V80:1.5;
locks; strict;
comment	@ * @;


1.23
date	96.08.13.04.16.10;	author dumoulin;	state Exp;
branches;
next	1.22;

1.22
date	95.11.07.22.55.03;	author jcooper;	state Exp;
branches;
next	1.21;

1.21
date	95.05.19.22.46.16;	author dumoulin;	state Exp;
branches;
next	1.20;

1.20
date	95.03.29.00.36.04;	author dumoulin;	state Exp;
branches;
next	1.19;

1.19
date	95.03.01.19.16.21;	author rushing;	state Exp;
branches;
next	1.18;

1.18
date	95.01.25.03.26.51;	author rushing;	state Exp;
branches;
next	1.17;

1.17
date	95.01.24.03.18.30;	author rushing;	state Exp;
branches;
next	1.16;

1.16
date	95.01.19.02.29.59;	author rushing;	state Exp;
branches;
next	1.15;

1.15
date	95.01.03.20.26.40;	author brydon;	state Exp;
branches;
next	1.14;

1.14
date	94.12.21.16.12.49;	author jpiraino;	state Exp;
branches;
next	1.13;

1.13
date	94.12.12.19.39.13;	author jcooper;	state Exp;
branches;
next	1.12;

1.12
date	94.11.10.01.57.36;	author rushing;	state Exp;
branches;
next	;


desc
@routines for handling the header and thread index arrays
@


1.23
log
@add support to save sort options between sessions
@
text
@/* -*- C++ -*-
 * headarry.cpp
 * author: Sam Rushing
 * $Id: headarry.cpp 1.22 1995/11/07 22:55:03 jcooper Exp $
 */

#include <windows.h>
#include <windowsx.h>
extern "C"
{
#include "wvglob.h"
#include "winvn.h"
}
#pragma hdrstop
#include <ctype.h>	// for isspace

/* The header and thread arrays are set up as follows:
 * When we allocate space for the TypHeader array, we leave room for a
 * pointer at the very front of that space, which will either indicate 
 * that there is no thread index array (== NULL) or will point to that
 * array.  Since windows can move both of these arrays around, this
 * slot is updated whenever lock_headers is called.
 * 
 * After initialize_header_array is called, this sequence of
 * operations can be used to access the header array:
 * 1. header_p headers = lock_headers (header_handle, thread_handle);
 * 2. header_elt (headers, index);
 * 3. unlock_headers (header_handle, thread_handle);
 *
 * The thread_index array is an array of longs, each an index into the
 * the real header array.  If thread_index is allocated and filled,
 * header_elt will indirect through it.  Sorting the array of indices
 * will sort the headers accordingly.
 */

/* globals, yuck! */
header_p g_headers;
thread_array g_index, g_parent, g_parsort;
long g_length;

/* lock the headers and the thread index array in memory for access */
header_p
lock_headers (HANDLE header_handle, HANDLE thread_handle)
{
  thread_array_p indirect;
  header_p headers;

  /* lock the header array in position */
  indirect = (thread_array_p) GlobalLock (header_handle);
  headers = (header_p) ((char_p) indirect + sizeof (char_p));

  /* if we've a valid thread_handle, lock the thread_array, too */
  if (thread_handle) {
	*indirect = (thread_array) GlobalLock (thread_handle);
  }
  else
	*indirect = NULL;

  return (headers);
}

/* unlock the headers and thread index array */
void
unlock_headers (HANDLE header_handle, HANDLE thread_handle)
{
  GlobalUnlock (header_handle);
  if (thread_handle)
	GlobalUnlock (thread_handle);
}


/* return the header memory to windows */
void
free_headers (HANDLE header_handle, HANDLE thread_handle)
{
  GlobalFree (header_handle);
  if (thread_handle)
	GlobalFree (thread_handle);
}


void
set_index_to_identity (HANDLE header_handle, HANDLE thread_handle, long length)
{
  long i;
  header_p headers;
  thread_array thread_index;
  thread_array_p thread_index_p;

  headers = lock_headers (header_handle, thread_handle);

  if (thread_handle) {
	thread_index_p = (thread_array_p) ((char_p) headers - sizeof (char_p));
	thread_index = *thread_index_p;

	/* Initialize with identity */
	for (i = 0; i < length; i++)
	  thread_index[i] = i;
  }
}


/* set up the header array, and possibly the thread index array */
void
initialize_header_array (HANDLE header_handle, HANDLE thread_handle, long length)
{
  long i;
  header_p headers;
  thread_array thread_index;
  thread_array_p thread_index_p;

  headers = lock_headers (header_handle, thread_handle);

  if (thread_handle) {
	thread_index_p = (thread_array_p) ((char_p) headers - sizeof (char_p));
	thread_index = *thread_index_p;

	/* Initialize with identity */
	for (i = 0; i < length; i++)
	  thread_index[i] = i;
  }

  for (i = 0; i < length; i++)  // we could be bolder about this...
    memset(&headers[i], 0, sizeof(headers[0]));

  unlock_headers (header_handle, thread_handle);
}


long
find_index_from_artnum (long artindex, HANDLE header_handle, HANDLE thread_handle, long length)
{
  long i;
  header_p headers;

  headers = lock_headers (header_handle, thread_handle);
  for (i = 0; i < length; i++) {
    if ((long) headers[i].number == artindex)
      break;
    }
  
  unlock_headers (header_handle, thread_handle);
  if (i >= length) return(-1);
  else
    return(i);
}

/* Use this routine to get at an element of the header array - it  */
/* will automatically indirect through the thread index array if it's */
/* there. */

header_p
header_elt (header_p headers, long index)
{

  thread_array thread_index;
  thread_index = *((thread_array_p) ((char_p) headers - sizeof (char_p)));

  if (thread_index) {
	return (&(headers[thread_index[index]]));
  }
  else
	return (&(headers[index]));
}

int
compare_artnum (header_p headers,
				long elem1, long elem2)
{
  long e1, e2;
  e1 = headers[elem1].number;
  e2 = headers[elem2].number;
  if (e1 == e2)
	return 0;
  else if (e1 < e2)
	return -1;
  else
	return 1;
}


int
compare_lines (header_p headers,
			   long elem1, long elem2)
{
  long e1, e2;
  e1 = headers[elem1].lines;
  e2 = headers[elem2].lines;
  if (e1 == e2)
	return 0;
  else if (e1 < e2)
	return -1;
  else
	return 1;
}

int
compare_message_id (header_p headers,
					long elem1, long elem2)
{
  return strcmp (headers[elem1].message_id, headers[elem2].message_id);
}


int
_compare_subject (char * p1, char * p2)
{
  // trim leading 're:' notation
  while (*p1 && !_strnicmp(p1, "Re:", 3)) {
  	p1 += 3;
  	while (*p1 && isspace(*p1)) p1++;
  }
  while (*p2 && !_strnicmp(p2, "Re:", 3)) {
  	p2 += 3;
  	while (*p2 && isspace(*p2)) p2++;
  }
  return stricmp (p1, p2);
}  

int
compare_subject (header_p headers,
				 long elem1, long elem2)
{
  return _compare_subject (headers[elem1].subject,headers[elem2].subject);
}

int
compare_from (header_p headers,
			  long elem1, long elem2)
{
  return stricmp (headers[elem1].from, headers[elem2].from);
}

int
compare_date (header_p headers,
			  long elem1, long elem2)
{
  long e1, e2;
  e1 = headers[elem1].date;
  e2 = headers[elem2].date;
  if (e1 == e2)
	return 0;
  else if (e1 < e2)
	return -1;
  else
	return 1;
}

/* this is the shell sort taken from shellsor.c and modified for */
/* threading purposes... The extra comparison is to make the sort */
/* stable w.r.t. article number */

void
shell_sort_index_array (header_p headers,
						thread_array index,
						long nElements,
						int (*compare) (header_p headers,
										long elem1,
										long elem2))
{
#define STRIDE_FACTOR 3
  int c, d, stride;
  int found;

  stride = 1;
  while (stride <= nElements)
	stride = stride * STRIDE_FACTOR + 1;

  while (stride > (STRIDE_FACTOR - 1)) {
	stride = stride / STRIDE_FACTOR;
	for (c = stride; c < nElements; c++) {
	  found = 0;
	  d = c - stride;
	  while ((d >= 0) && !found) {
		int comp = compare (headers, index[d + stride], index[d]);
		if ((comp < 0) ||
			((comp == 0) &&
			 (compare_artnum (headers, index[d + stride], index[d]) < 0))) {
		  long tmp = index[d];
		  index[d] = index[d + stride];
		  index[d + stride] = tmp;
		  d -= stride;
		}
		else {
		  found = 1;
		}
	  }
	}
  }
}


void
shell_sort_parent_array (thread_array index,
						 thread_array parents,
						 long n)
{
#define STRIDE_FACTOR 3
  int c, d, stride;
  int found;

  stride = 1;
  while (stride <= n)
	stride = stride * STRIDE_FACTOR + 1;

  while (stride > (STRIDE_FACTOR - 1)) {
	stride = stride / STRIDE_FACTOR;
	for (c = stride; c < n; c++) {
	  found = 0;
	  d = c - stride;
	  while ((d >= 0) && !found) {
		long p1 = parents[index[d + stride]];
		long p2 = parents[index[d]];

		if ((p1 < p2) || ((p1 == p2) && (index[d + stride] > index[d]))) {
		  long tmp = index[d];
		  index[d] = index[d + stride];
		  index[d + stride] = tmp;
		  d -= stride;
		}
		else {
		  found = 1;
		}
	  }
	}
  }
}

long
bsearch_parsort_table (thread_array parsort,
					   thread_array parents,
					   long looking_for,
					   long n)
{
  long p;
  long high = n;
  long low = 0;

  while ((high - low) > 1) {
	p = (high + low) / 2;
	if (looking_for <= parents[parsort[p - 1]])
	  high = p;
	else
	  low = p;
  }
  if (looking_for == parents[parsort[high - 1]])
	return (high - 1);
  else
	return -1;
}


long
bsearch_mid_table (header_p headers,
				   thread_array index,	/* sorted index */
				   char *mid,			/* message_id we're looking for */
				   long n)
{
  long p;
  long high = n;
  long low = 0;

  while ((high - low) > 1) {
	p = (high + low) / 2;
	if (strcmp (mid, headers[index[p - 1]].message_id) <= 0)
	  high = p;
	else
	  low = p;
  }
  if (strcmp (mid, headers[index[high - 1]].message_id) == 0)
	return (high - 1);
  else
	return -1;
}

/*
 * All these bsearch & shell-sort routines are ugly, but I haven't
 * the time right now to generalize and debug it all (SMR 950123)
 */

long
bsearch_sub_table (header_p headers,
				   thread_array index,	/* sorted index */
				   char * sub,			/* subject we're looking for */
				   long n)
{
  long p;
  long high = n;
  long low = 0;

  while ((high - low) > 1) {
	p = (high + low) / 2;
	if (_compare_subject (sub, headers[index[p - 1]].subject) <= 0)
	  high = p;
	else
	  low = p;
  }
  if (_compare_subject (sub, headers[index[high-1]].subject) == 0)
	return (high - 1);
  else
	return -1;
}

long
thread_sort (long root_index, long start, long end, int depth)
{
  long j;
  long num_children = 0;
  long child_start;

  if (start == end)
	return end;
  else {
	/* find the children of this node, pack them in the bottom */

	/* this will find the first child in the sorted table */
	child_start = bsearch_parsort_table (g_parsort,
										 g_parent,
										 root_index,
										 g_length);

	/* for each child, find its index and push on the stack in g_index */
	if (child_start != -1) {
	  while ((child_start < g_length) &&
			 (g_parent[g_parsort[child_start]] == root_index)) {
		g_index[end - num_children - 1] = g_parsort[child_start];
		child_start++;
		num_children++;
	  }
	}
	/* no children found */
	else
	  return (start);

	/* apply sort-me to each of the children */

	if (num_children == 0)
	  return (start);

	for (j = num_children; j > 0; j--) {
	  g_index[start] = g_index[end - j];
	  g_headers[g_index[start]].thread_depth = (char) depth;
	  start = thread_sort (g_index[start], start + 1, end - (j - 1), depth + 1);
	}
	return (start);
  }
}

#if 0
/*
 * Routine SortParents will make 2 parents with the same subject contiguous in
 * the header list without disrupting the "stableness" of the children.
 *
 * this function rearranges the 'parsort' table (see end of documentation)
 * so that threads with the same subject are contiguous.
 * Unfortunately this is an n^2 algorithm (or close), and it was slowing
 * the thread process considerably (4000 arts took about 22 seconds on a 486/33)
 * (SMR 950123)
 */

void
SortParents (header_p headers,
			 thread_array parsort,
			 thread_array parents,
			 long iSize)
{
  long nlast = 0;
  long temp;
  int32 i, j, k;

  for(i = 0; i < iSize; i++) {
	nlast = i-1;
	if(parents[parsort[i]] != -1) {
//	  TRACE4("SortParents: i = %02d, parsort[i] = %2d, parents[parsort[i]] = %3d, nlast = %3d\n", 
//	         i, parsort[i], parents[parsort[i]], nlast);
	  break;
	}
  }
  if (nlast) {	// at least 2 items in list
	for (i = 0; i < nlast; i++) {
	  for (j = i+2; j <= nlast; j++) {
		if (!compare_subject(headers, parsort[i], parsort[j])) {
		  // char *p1 = headers[parsort[i]].subject;
		  // char *p2 = headers[parsort[j]].subject;
		  // TRACE2("SortParents: headers[%02d] = %s\n", parsort[i], p1);
		  // TRACE2("             headers[%02d] = %s\n", parsort[j], p2);
		  for (k = j-1; k > i; k--) {
			temp = parsort[k];
			parsort[k] = parsort[k+1];
			parsort[k+1] = temp;
		  }
		}
	  }
	}
  }
}

#endif

long
find_parent (thread_array parent_table, long start)
{
  long climber = start;
  while ((parent_table[climber] != -1) &&
  		 (climber != parent_table[climber])) {
	climber = parent_table[climber];
  }
  return (climber);
}

/* setup for threading algorithm:
 * 1. allocate an extra thread_array for holding a sorted message-id table
 * 2. using mid_table, allocate and create another table, a map from
 *    article_index->parent_index  
 * 3. sort this table by parent_index (must be a stable sort)
 * 
 * the recursive thread_sort will use these tables to do a stable sort
 * by threads...
 */

void
sort_by_threads (HANDLE header_handle, HANDLE thread_handle, long length, BOOL bSOpt)
{
  long i;
  header_p headers;
  HANDLE mid_handle, parent_handle;
  thread_array thread_index, mid_table, parent_table;
  
  headers = lock_headers (header_handle, thread_handle);
  thread_index = *((thread_array_p) ((char_p) headers - sizeof (char_p)));
  
  if (!thread_index)
	return;
  
  mid_handle = GlobalAlloc (GMEM_MOVEABLE, (long) (sizeof (long) * length));
  if (!mid_handle)
	return;
  
  parent_handle = GlobalAlloc (GMEM_MOVEABLE, (long) (sizeof (long) * length));
  if (!parent_handle) {
	GlobalFree (mid_handle);
	return;
  }
  
  mid_table = (thread_array) GlobalLock (mid_handle);
  parent_table = (thread_array) GlobalLock (parent_handle);
  
  /* these globals are needed in various sort & search routines */
  g_headers = headers;
  g_index = thread_index;
  g_parent = parent_table;
  g_parsort = mid_table;
  g_length = length;

  /* create the message_id table */
  for (i = 0; i < length; i++)
	mid_table[i] = i;
  shell_sort_index_array (headers, mid_table, length, compare_message_id);
  
  /* create the parent table */
  
  for (i = 0; i < length; i++) {
	long p = bsearch_mid_table (headers, mid_table, headers[i].best_ref, length);
	parent_table[i] = (p == -1) ? -1 : mid_table[p];
  }
  
  if(bSOpt) {
	// this should be moved into a separate routine, called something like
	// 'reunite_orphans'
	
	// At this point, parent_table maps from article -> parent.
	// We want to identify 'orphaned' threads, created by broken newsreaders
	//  and mangled reference lines, and reattach them to the first tree with
	//  the same subject.

	// reuse mid_table temporarily as an index sorted by subject (modulo 'Re: ')
	for (i = 0; i < length; i++) {
	  mid_table[i] = i;
	} // set to identity
	shell_sort_index_array (headers, mid_table, length, compare_subject); // sort
	
	// In a single pass through parent_table, check each root 
	// subject, and attach them to the root of the first matching thread...
	
	for (i = 0; i < length; i++) {
	  if (parent_table[i] == -1) {
		long p = bsearch_sub_table (headers, mid_table, headers[i].subject, length);
		// find a matching subject for this orphan
		if (p != -1) {
		  // find the root node of this subject hit
		  p = find_parent (parent_table, mid_table[p]);
		  // if we didn't find ourself
		  if (p != i) {
			int p_re, i_re;
			// Try to identify root art from the subject line
			p_re = (_strnicmp (headers[p].subject, "re:", 3) == 0);
			i_re = (_strnicmp (headers[i].subject, "re:", 3) == 0);
			if (p_re && !i_re) {
			  parent_table[p] = i;
			} else if (i_re && !p_re) {
			  parent_table[i] = p;
			// Try to identify root art by comparing dates,
			// which doesn't work very well because winvn's
			// current date parser does not handle timezones
			} else if (headers[p].date < headers[i].date) {
			  parent_table[i] = p;
			} else {
			  parent_table[p] = i;
			}
		  }
		}
	  }
	}
  }
	
  // re-use the mid_table again, as a sorted parent table.
  for (i = 0; i < length; i++)
	mid_table[i] = i;
  
  shell_sort_parent_array (mid_table, parent_table, length);
  
  // recursively construct the thread trees
  thread_sort (-1, 0, length, 0);
  
  GlobalUnlock (parent_handle);
  GlobalFree (parent_handle);
  GlobalUnlock (mid_handle);
  GlobalFree (mid_handle);
}


void sort_by_option(header_p headers, thread_array thread_index, BOOL threadOk,
					unsigned int nLines, HANDLE header_handle, HANDLE thread_handle)
{
  switch(iSortOption)
	{
	case IDM_SORT_DATE:
	  shell_sort_index_array (headers, thread_index,
							  nLines, compare_date);
	  break;
	  
	case IDM_SORT_SUBJECT:
	  shell_sort_index_array (headers, thread_index,
							  nLines, compare_subject);
	  break;
	  
	case IDM_SORT_LINES:
	  shell_sort_index_array (headers, thread_index,
							  nLines, compare_lines);
	  break;
	  
	case IDM_SORT_THREADSUB:
	case IDM_SORT_THREADS:
	  if (threadOk)
		sort_by_threads(header_handle, thread_handle, nLines,
						(iSortOption == IDM_SORT_THREADSUB) ? TRUE : FALSE);
	  else
		MessageBox(NULL, "Threading disabled", "WinVN", MB_OK);
	  break;
	  
	case IDM_SORT_FROM:
	  shell_sort_index_array (headers, thread_index,
							  nLines, compare_from);
	  break;
	  
	case IDM_SORT_ARTNUM:
	default:
	  shell_sort_index_array (headers, thread_index,
							  nLines, compare_artnum);
	  
	  break;
	}
}

//   --------------------------------------------------------------------------
//
//   Thread sorting algorithm.
//
//   Here's an example, already threaded, showing the relationship between
//   parent, child, and original index.
//
//
//
//   0   5
//   1      2
//   2         3
//   3      0   
//   4   1   
//   5      4        
//
//   On the display, it may look like this:
//
//   5   Why my computer is better than yours...
//   2     Re: Why my computer is better than yours...
//   3       Why my _car_ is better than your computer (was Re: ...)
//   0     Re: Why my computer is better than yous...
//   1   What's the latest on the Amiga 6000???
//   4     What planet are you on ? (was Re: What's the latest...)
//
//   This shows that articles #2 and #0 are in response to article #5 (yes this can
//   and does happen), article #4 is in response to #1, etc...
//
//   This is the parent table.  It answers the question: "what is the index
//   of my parent".  '*' means root, or 'no' index - either there is no    
//   parent of this article, or we don't have it.  In the code, '*' == -1.
//
//     |--|
//   0 |5 |
//     |--|
//   1 |* | 
//     |--|
//   2 |5 |
//     |--|
//   3 |2 |
//     |--|
//   4 |1 |
//     |--|
//   5 |* |
//     |--|
//
//   This table was computed by
//   1) sorting by Message-ID (by creating 'mid_table') with a shell sort.
//   2) use 'mid_table' to find each article's parent (using a binary search),
//   thereby creating 'parent_table'.
//   3) mid_table's not needed any longer, so we now re-use it as a sorted
//   index into parent_table.  More on this later.
//
//   What we want from thread_sort is for the empty thread_index to hold
//   an array with these articles indices in the correct order:
//
//   |--|
//   |5 |
//   |--|
//   |2 |
//   |--|
//   |3 |
//   |--|
//   |0 |
//   |--|
//   |1 |
//   |--|
//   |4 |
//   |--|
//
//   ---------------------------------------------------------------------------
//   Now, for the actual algorithm.  The work is performed by the recursive
//   function thread_sort().
//
//   thread_sort (root_index, start, end, depth) { ... }
//   (depth is used to keep track of the depth of the recursion)
//
//   1) Start with an empty index table.  start = 0 (the beginning of the
//   table), and end = length (the length of the table).
//
//   2) Find all the children of the current root, (which is '*' to start
//   with), and pack them (in order) into the bottom of the table.
//   If there are no children, return 'start'.
//   If start == end, return 'start'.
//
//   3) Now, we will recurse [call thread_root()] for each of these
//   children, using the empty portion of thread_index to work with.
//   We do this by:
//
//   3a) Move child #1 into the top slot.
//   3b) calling thread_sort, with
//   root_index = child #1
//   start = start of the empty portion
//   end = end of the empty portion
//   depth = depth+1
//   3c) After thread_sort() does its magic, it will return a 
//   new value for 'start', indicating where the 'work area'
//   can start.  thread_sort() may have filled in an arbitrary
//   number of slots in this call, but will never overstep the
//   free space.  Don't worry, it all works out.  8^)
//
//   3d) Go to child #2, repeat 3(a-c), #3, #4, etc...
//
//   4) return 'start' (the start of the empty space).
//
//   Here's a trace of the algorithm using our example articles.
//   ---------------------------------------------------------------------------
//   The number above the stack of boxes indicates the parent that
//   we're finding the children of.
//
//      *                      
//     |--|  |--|                                                
//   0 |  |  |5 |   5                                            
//     |--|  |--|  |--|  |--|                          |--|  |--|
//   1 |  |  |  |  |  |  |2 |   2                      |2 |  |2 |
//     |--|  |--|  |--|  |--|  |--|  |--|        |--|  |--|  |--|
//   2 |  |  |  |  |  |  |  |  |  |  |3 |   3    |3 |  |3 |  |3 |    ==>
//     |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
//   3 |  |  |  |  |2 |  |  |  |3 |  |  |  |  |  |  |  |  |  |  |
//     |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
//   4 |5 |  |  |  |0 |  |0 |                                |0 |
//     |--|  |--|  |--|  |--|                                |--|
//   5 |1 |  |1 |                                                
//     |--|  |--|                                                
//
//
//   continued...
//
//                       |--|                    |--|
//   0                   |5 |                    |5 |
//                 |--|  |--|                    |--|
//   1             |2 |  |2 |                    |2 |
//                 |--|  |--|                    |--|
//   2             |3 |  |3 |                    |3 |
//     |--|        |--|  |--|                    |--|
//   3 |0 |   0    |0 |  |0 |                    |0 |
//     |--|  |--|  |--|  |--|  |--|              |--|
//   4 |  |  |  |  |  |  |  |  |1 |   1          |1 |
//     |--|  |--|  |--|  |--|  |--|  |--|        |--|
//   5                   |1 |  |  |  |4 |   4    |4 |
//                       |--|  |--|  |--|  |--|  |--|
//
//
//   ---------------------------------------------------------------------------
//   Now that you understand the algorithm 8^), we go back to parsort_table.  
//   At the start of each call to thread_sort(), we need to find all the
//   children of root_index.  We can do this quickly by using a sorted
//   version of parent_table, called parsort_table.  It contains a
//   convenient ordered list of children for us:
//
//   x -+
//   x  | all the children of 'x' 
//   x  |                   
//   x -+                        
//   y -+           
//   y -+ all the children of 'y'
//   z -+
//   z  |
//   z  | all the children of 'z' 
//   z  | 
//   z -+
//
//   A single call (order logn) to bsearch_parsort_table puts us at the
//   correct index for finding all the children we are looking for.
//
//   parsort_table
//     |--|
//   0 |1 |
//     |--|
//   1 |5 | 
//     |--|
//   2 |4 |
//     |--|
//   3 |3 |
//     |--|
//   4 |0 |
//     |--|
//   5 |2 |
//     |--|
//
//   ---------------------------------------------------------------------------
// (SMR 950123)
// Harvey Brydon has implemented a more sophisticated threading algorithm
// that attempts to reunite 'orphaned' articles to their parent threads by
// using the subject header.  I recoded the algorithm to make it O(nlogn).
// See the comments in the function 'sort_by_threads'.  The extra code executes
// just before the 'parsort' table is created.

/*
 * Local Variables:
 * tab-width: 4
 * end:
 */
@


1.22
log
@Fixed bug in threading code that can cause an infinite loop
in the "find_parent" function.
@
text
@d2 1
a2 1
 * headarry.c
d4 1
a4 1
 * $Id: headarry.cpp 1.21 1995/05/19 22:46:16 dumoulin Exp $
d123 3
a125 13
  for (i = 0; i < length; i++) {
	headers[i].Seen = (char) 0;
	headers[i].Selected = (char) 0;
	headers[i].number = 0;
	headers[i].thread_depth = 0;
	headers[i].lines = 0;
	headers[i].date = 0;
	headers[i].subject[0] = (char) 0;
	headers[i].from[0] = (char) 0;
	headers[i].message_id[0] = (char) 0;
	headers[i].references[0] = (char) 0;
	headers[i].ArtDoc = NULL;
  }
d563 1
a563 1
	long p = bsearch_mid_table (headers, mid_table, headers[i].references, length);
@


1.21
log
@Added new routine to find article index into header array
given only the article number.  Needed for crossposting
support.
@
text
@d4 1
a4 1
 * $Id: headarry.cpp 1.20 1995/03/29 00:36:04 dumoulin Exp $
d514 2
a515 1
  while (parent_table[climber] != -1) {
@


1.20
log
@Comment out all Trace options in preparation of WinVN 1.0 Release
@
text
@d4 1
a4 1
 * $Id: headarry.cpp 1.19 1995/03/01 19:16:21 rushing Exp $
a41 1

a49 1

a62 1

a72 1

a103 1

d139 19
d366 1
a366 1
				   char *mid,	/* message_id we're looking for */
d463 1
a463 3
 */

/*
@


1.19
log
@use presence of 'Re: ' as a hint when reconstructing a broken thread
@
text
@d4 1
a4 1
 * $Id: headarry.cpp 1.18 1995/01/25 03:26:51 rushing Exp $
d472 2
a473 2
	  TRACE4("SortParents: i = %02d, parsort[i] = %2d, parents[parsort[i]] = %3d, nlast = %3d\n", 
	         i, parsort[i], parents[parsort[i]], nlast);
@


1.18
log
@redesigned the 'orphan reunite' code in sort_by_threads() again
@
text
@d4 1
a4 1
 * $Id: headarry.cpp 1.17 1995/01/24 03:18:30 rushing Exp rushing $
d591 12
a602 2
			// connect newer to older
			if (headers[p].date < headers[i].date) {
@


1.17
log
@recoded Harvey Brydon's SortParents() algorithm to be nlogn.
@
text
@d4 1
a4 1
 * $Id: headarry.cpp 1.16 1995/01/19 02:29:59 rushing Exp rushing $
d498 10
d574 3
a576 1
	for (i = 0; i < length; i++) mid_table[i] = i; // set to identity
d580 1
a580 1
	// subject, and attach them to the first matching art...
d584 1
d586 5
a590 4
		long p = bsearch_sub_table (headers, mid_table, headers[i].subject, length);
		if (p != -1 && mid_table[p] != i) {
		  // if both are root nodes...
		  if (parent_table[mid_table[p]] == -1) {
d592 2
a593 2
			if (headers[mid_table[p]].date < headers[i].date) {
			  parent_table[i] = mid_table[p];
d595 1
a595 7
			  parent_table[mid_table[p]] = i;
			}
		  } else {
			// the other has a parent, traverse the tree to its root
			long climber = mid_table[p];
			while (parent_table[climber] != -1) {
			  climber = parent_table[climber];
a596 2
			// and attach the current orphan there
			parent_table[i] = parent_table[climber];
d602 1
a602 1
  
@


1.16
log
@.experimenting with new sort by subject algorithm.
@
text
@d4 1
a4 1
 * $Id: headarry.cpp 1.15 1995/01/03 20:26:40 brydon Exp rushing $
d202 1
a202 2
compare_subject (header_p headers,
				 long elem1, long elem2)
a203 3
  char *p1 = headers[elem1].subject;
  char *p2 = headers[elem2].subject;

d214 7
a220 1
  // return stricmp (headers[elem1].subject, headers[elem2].subject);
d372 27
d445 1
a445 1

d451 8
d461 1
a461 1
			 thread_array index,
d471 3
a473 3
	if(parents[index[i]] != -1) {
	  TRACE4("SortParents: i = %02d, index[i] = %2d, parents[index[i]] = %3d, nlast = %3d\n", 
	         i, index[i], parents[index[i]], nlast);
d480 5
a484 5
		if (!compare_subject(headers, index[i], index[j])) {
		  // char *p1 = headers[index[i]].subject;
		  // char *p2 = headers[index[j]].subject;
		  // TRACE2("SortParents: headers[%02d] = %s\n", index[i], p1);
		  // TRACE2("             headers[%02d] = %s\n", index[j], p2);
d486 3
a488 3
			temp = index[k];
			index[k] = index[k+1];
			index[k+1] = temp;
d496 2
d535 7
d554 42
a595 5
#ifdef _DEBUG
  /* clear it so we can debug */
  for (i = 0; i < length; i++)
	thread_index[i] = 0;
#endif
d597 1
a597 1
  /* re-use the mid_table as a sorted parent table */
d603 1
a603 11
  if(bSOpt)
	{
	  SortParents(headers, mid_table, parent_table, length);
	}
  
  g_headers = headers;
  g_index = thread_index;
  g_parent = parent_table;
  g_parsort = mid_table;
  g_length = length;
  
d656 187
a842 185

/*
   --------------------------------------------------------------------------
   
   Thread sorting algorithm.
   
   Here's an example, already threaded, showing the relationship between
   parent, child, and original index.
   
   
   
   0   5
   1      2
   2         3
   3      0   
   4   1   
   5      4        
   
   On the display, it may look like this:
   
   5   Why my computer is better than yours...
   2     Re: Why my computer is better than yours...
   3       Why my _car_ is better than your computer (was Re: ...)
   0     Re: Why my computer is better than yous...
   1   What's the latest on the Amiga 6000???
   4     What planet are you on ? (was Re: What's the latest...)
   
   This shows that articles #2 and #0 are in response to article #5 (yes this can
   and does happen), article #4 is in response to #1, etc...
   
   This is the parent table.  It answers the question: "what is the index
   of my parent".  '*' means root, or 'no' index - either there is no    
   parent of this article, or we don't have it.  In the code, '*' == -1.
   
   |--|
   0 |5 |
   |--|
   1 |* | 
   |--|
   2 |5 |
   |--|
   3 |2 |
   |--|
   4 |1 |
   |--|
   5 |* |
   |--|
   
   This table was computed by
   1) sorting by Message-ID (by creating 'mid_table') with a shell sort.
   2) use 'mid_table' to find each article's parent (using a binary search),
   thereby creating 'parent_table'.
   3) mid_table's not needed any longer, so we now re-use it as a sorted
   index into parent_table.  More on this later.
   
   What we want from thread_sort is for the empty thread_index to hold
   an array with these articles indices in the correct order:
   
   |--|
   |5 |
   |--|
   |2 |
   |--|
   |3 |
   |--|
   |0 |
   |--|
   |1 |
   |--|
   |4 |
   |--|
   
   ---------------------------------------------------------------------------
   Now, for the actual algorithm.  The work is performed by the recursive
   function thread_sort().
   
   thread_sort (root_index, start, end, depth) { ... }
   (depth is used to keep track of the depth of the recursion)
   
   1) Start with an empty index table.  start = 0 (the beginning of the
   table), and end = length (the length of the table).
   
   2) Find all the children of the current root, (which is '*' to start
   with), and pack them (in order) into the bottom of the table.
   If there are no children, return 'start'.
   If start == end, return 'start'.
   
   3) Now, we will recurse [call thread_root()] for each of these
   children, using the empty portion of thread_index to work with.
   We do this by:
   
   3a) Move child #1 into the top slot.
   3b) calling thread_sort, with
   root_index = child #1
   start = start of the empty portion
   end = end of the empty portion
   depth = depth+1
   3c) After thread_sort() does its magic, it will return a 
   new value for 'start', indicating where the 'work area'
   can start.  thread_sort() may have filled in an arbitrary
   number of slots in this call, but will never overstep the
   free space.  Don't worry, it all works out.  8^)
   
   3d) Go to child #2, repeat 3(a-c), #3, #4, etc...

   4) return 'start' (the start of the empty space).

   Here's a trace of the algorithm using our example articles.
   ---------------------------------------------------------------------------
   The number above the stack of boxes indicates the parent that
   we're finding the children of.

      *                      
     |--|  |--|                                                
   0 |  |  |5 |   5                                            
     |--|  |--|  |--|  |--|                          |--|  |--|
   1 |  |  |  |  |  |  |2 |   2                      |2 |  |2 |
     |--|  |--|  |--|  |--|  |--|  |--|        |--|  |--|  |--|
   2 |  |  |  |  |  |  |  |  |  |  |3 |   3    |3 |  |3 |  |3 |    ==>
     |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
   3 |  |  |  |  |2 |  |  |  |3 |  |  |  |  |  |  |  |  |  |  |
     |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
   4 |5 |  |  |  |0 |  |0 |                                |0 |
     |--|  |--|  |--|  |--|                                |--|
   5 |1 |  |1 |                                                
     |--|  |--|                                                


   continued...

                       |--|                    |--|
   0                   |5 |                    |5 |
                 |--|  |--|                    |--|
   1             |2 |  |2 |                    |2 |
                 |--|  |--|                    |--|
   2             |3 |  |3 |                    |3 |
     |--|        |--|  |--|                    |--|
   3 |0 |   0    |0 |  |0 |                    |0 |
     |--|  |--|  |--|  |--|  |--|              |--|
   4 |  |  |  |  |  |  |  |  |1 |   1          |1 |
     |--|  |--|  |--|  |--|  |--|  |--|        |--|
   5                   |1 |  |  |  |4 |   4    |4 |
                       |--|  |--|  |--|  |--|  |--|


   ---------------------------------------------------------------------------
   Now that you understand the algorithm 8^), we go back to parsort_table.  
   At the start of each call to thread_sort(), we need to find all the
   children of root_index.  We can do this quickly by using a sorted
   version of parent_table, called parsort_table.  It contains a
   convenient ordered list of children for us:

   x -+
   x  | all the children of 'x' 
   x  |                   
   x -+                        
   y -+           
   y -+ all the children of 'y'
   z -+
   z  |
   z  | all the children of 'z' 
   z  | 
   z -+

   A single call (order logn) to bsearch_parsort_table puts us at the
   correct index for finding all the children we are looking for.

   parsort_table
     |--|
   0 |1 |
     |--|
   1 |5 | 
     |--|
   2 |4 |
     |--|
   3 |3 |
     |--|
   4 |0 |
     |--|
   5 |2 |
     |--|

   ---------------------------------------------------------------------------

 */
@


1.15
log
@"Sort" menu: checkmark on sorting option (ie. CheckMenuItem) selected by
user.
Add Thread/Subject sort option
Support to record and remember user's sort option for each newsgroup
made an ANSI C++ file
@
text
@d4 1
a4 1
 * $Id: $
d417 10
a426 3
/* Routine SortParents will make 2 parents with the same subject contiguous in
   the header list without disrupting the "stableness" of the children. */    
void SortParents(header_p headers, thread_array index, thread_array parents, long iSize)
d432 24
a455 31
  for(i = 0; i < iSize; i++)
  {
    nlast = i-1;
    if(parents[index[i]] != -1)
    {
/*      TRACE4("SortParents: i = %02d, index[i] = %2d, parents[index[i]] = %3d, nlast = %3d\n", 
        i, index[i], parents[index[i]], nlast); */
      break;
    }
  }
  if(nlast)  /* at least 2 items in list */
  {
    for(i = 0; i < nlast; i++)
    {
      for(j = i+2; j <= nlast; j++)
      {
        if(!compare_subject(headers, index[i], index[j]))
        {
//		  char *p1 = headers[index[i]].subject;
//		  char *p2 = headers[index[j]].subject;
//		  TRACE2("SortParents: headers[%02d] = %s\n", index[i], p1);
//		  TRACE2("             headers[%02d] = %s\n", index[j], p2);
          for(k = j-1; k > i; k--)
          {
            temp = index[k];
            index[k] = index[k+1];
            index[k+1] = temp;
          }
        }
      }
    }
d476 1
a476 1

d479 1
a479 1

d482 1
a482 1

d486 1
a486 1

d492 1
a492 1

d495 1
a495 1

d500 1
a500 1

d502 1
a502 1

d507 2
a508 1

d512 2
a513 1

d517 1
a517 1

d519 1
a519 1

d521 4
a524 4
  {
    SortParents(headers, mid_table, parent_table, length);
  }

d530 1
a530 1

d532 1
a532 1

d541 1
a541 1
  unsigned int nLines, HANDLE header_handle, HANDLE thread_handle)
d544 37
a580 37
  {
  case IDM_SORT_DATE:
    shell_sort_index_array (headers, thread_index,
  						  nLines, compare_date);
    break;

  case IDM_SORT_SUBJECT:
    shell_sort_index_array (headers, thread_index,
  						  nLines, compare_subject);
    break;

  case IDM_SORT_LINES:
    shell_sort_index_array (headers, thread_index,
  						  nLines, compare_lines);
    break;

  case IDM_SORT_THREADSUB:
  case IDM_SORT_THREADS:
    if (threadOk)
      sort_by_threads(header_handle, thread_handle, nLines,
        (iSortOption == IDM_SORT_THREADSUB) ? TRUE : FALSE);
    else
      MessageBox(NULL, "Threading disabled", "WinVN", MB_OK);
    break;

  case IDM_SORT_FROM:
    shell_sort_index_array (headers, thread_index,
  						  nLines, compare_from);
    break;

  case IDM_SORT_ARTNUM:
  default:
    shell_sort_index_array (headers, thread_index,
  						  nLines, compare_artnum);

    break;
  }
d586 1
a586 1

d588 1
a588 1

d591 3
a593 3



d600 1
a600 1

d602 1
a602 1

d609 1
a609 1

d612 1
a612 1

d616 2
a617 2

     |--|
d619 1
a619 1
     |--|
d621 1
a621 1
     |--|
d623 1
a623 1
     |--|
d625 1
a625 1
     |--|
d627 1
a627 1
     |--|
d629 2
a630 2
     |--|

d637 1
a637 1

d640 1
a640 1

d654 1
a654 1

d658 1
a658 1

d661 1
a661 1

d664 1
a664 1

d669 1
a669 1

d673 1
a673 1

d685 1
a685 1

@


1.14
log
@unlock temp arrays before freeing
@
text
@d1 2
a2 2

/* headarry.c
d4 1
a4 1
 * $Id: headarry.c 1.13 1994/12/12 19:39:13 jcooper Exp jpiraino $
d9 2
d13 1
d218 1
a218 1
//return stricmp (headers[elem1].subject, headers[elem2].subject);
d417 42
d470 1
a470 1
sort_by_threads (HANDLE header_handle, HANDLE thread_handle, long length)
d518 5
d537 45
d615 1
a615 1
   |--|
d617 1
a617 1
   |--|
d619 1
a619 1
   |--|
d621 1
a621 1
   |--|
d623 1
a623 1
   |--|
d625 1
a625 1
   |--|
d627 1
a627 1
   |--|
d693 2
a694 2
   *                      
   |--|  |--|                                                
d696 1
a696 1
   |--|  |--|  |--|  |--|                          |--|  |--|
d698 1
a698 1
   |--|  |--|  |--|  |--|  |--|  |--|        |--|  |--|  |--|
d700 1
a700 1
   |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
d702 1
a702 1
   |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
d704 1
a704 1
   |--|  |--|  |--|  |--|                                |--|
d706 1
a706 1
   |--|  |--|                                                
d711 1
a711 1
   |--|                    |--|
d713 1
a713 1
   |--|  |--|                    |--|
d715 1
a715 1
   |--|  |--|                    |--|
d717 1
a717 1
   |--|        |--|  |--|                    |--|
d719 1
a719 1
   |--|  |--|  |--|  |--|  |--|              |--|
d721 1
a721 1
   |--|  |--|  |--|  |--|  |--|  |--|        |--|
d723 1
a723 1
   |--|  |--|  |--|  |--|  |--|
d749 1
a749 1
   |--|
d751 1
a751 1
   |--|
d753 1
a753 1
   |--|
d755 1
a755 1
   |--|
d757 1
a757 1
   |--|
d759 1
a759 1
   |--|
d761 1
a761 1
   |--|
d766 6
@


1.13
log
@0.9.99 update
@
text
@d4 1
a4 1
 * $Id: headarry.c 1.12 1994/11/10 01:57:36 rushing Exp $
d481 1
d483 1
@


1.12
log
@restart
@
text
@d12 1
d202 14
a215 1
  return stricmp (headers[elem1].subject, headers[elem2].subject);
@
