1 /* Prepare Tex index dribble output into an actual index.
2 Copyright (C) 1987 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 1, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
31 #define EXIT_SUCCESS ((1 << 28) | 1)
32 #define EXIT_FATAL ((1 << 28) | 4)
34 #define tell(fd) lseek(fd, 0L, 1)
39 #include <sys/types.h>
40 #include <sys/fcntl.h>
44 #define EXIT_SUCCESS 0
55 extern noshare int sys_nerr;
56 extern noshare char *sys_errlist[];
59 extern char *sys_errlist[];
62 /* When sorting in core, this structure describes one line
63 and the position and length of its first keyfield. */
67 char *text; /* The actual text of the line */
69 { /* The start of the key (for textual comparison) */
71 long number; /* or the numeric value (for numeric comparison) */
73 long keylen; /* Length of key field */
76 /* This structure describes a field to use as a sort key */
80 int startwords; /* # words to skip */
81 int startchars; /* and # additional chars to skip, to start of field */
82 int endwords; /* similar, from beg (or end) of line, to find end of field */
84 char ignore_blanks; /* Ignore spaces and tabs within the field */
85 char fold_case; /* Convert upper case to lower before comparing */
86 char reverse; /* Compare in reverse order */
87 char numeric; /* Parse text as an integer and compare the integers */
88 char positional; /* Sort according to position within the file */
89 char braced; /* Count balanced-braced groupings as fields */
92 /* Vector of keyfields to use */
94 struct keyfield keyfields[3];
96 /* Number of keyfields stored in that vector. */
98 int num_keyfields = 3;
100 /* Vector of input file names, terminated with a zero (null pointer) */
104 /* Vector of corresponding output file names, or zero meaning default it */
108 /* Length of `infiles' */
112 /* Pointer to the array of pointers to lines being sorted */
116 /* The allocated length of `linearray'. */
120 /* Directory to use for temporary files. On Unix, it ends with a slash. */
124 /* Start of filename to use for temporary files. */
128 /* Number of last temporary file. */
132 /* Number of last temporary file already deleted.
133 Temporary files are deleted by `flush_tempfiles' in order of creation. */
135 int last_deleted_tempcount;
137 /* During in-core sort, this points to the base of the data block
138 which contains all the lines of data. */
142 /* Additional command switches */
144 int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */
146 /* Forward declarations of functions in this file */
148 void decode_command ();
149 void sort_in_core ();
150 void sort_offline ();
155 char *find_braced_pos ();
156 char *find_braced_end ();
163 char *maketempname ();
164 void flush_tempfiles ();
167 extern char *mktemp ();
169 #define MAX_IN_CORE_SORT 500000
179 last_deleted_tempcount = 0;
181 /* Describe the kind of sorting to do. */
182 /* The first keyfield uses the first braced field and folds case */
183 keyfields[0].braced = 1;
184 keyfields[0].fold_case = 1;
185 keyfields[0].endwords = -1;
186 keyfields[0].endchars = -1;
187 /* The second keyfield uses the second braced field, numerically */
188 keyfields[1].braced = 1;
189 keyfields[1].numeric = 1;
190 keyfields[1].startwords = 1;
191 keyfields[1].endwords = -1;
192 keyfields[1].endchars = -1;
193 /* The third keyfield (which is ignored while discarding duplicates)
194 compares the whole line */
195 keyfields[2].endwords = -1;
196 keyfields[2].endchars = -1;
198 decode_command (argc, argv);
200 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
202 /* Process input files completely, one by one. */
204 for (i = 0; i < num_infiles; i++)
211 desc = open (infiles[i], 0, 0);
212 if (desc < 0) pfatal_with_name (infiles[i]);
213 lseek (desc, 0, L_XTND);
217 outfile = outfiles[i];
220 outfile = concat (infiles[i], "s", "");
223 if (ptr < MAX_IN_CORE_SORT)
224 /* Sort a small amount of data */
225 sort_in_core (infiles[i], ptr, outfile);
227 sort_offline (infiles[i], ptr, outfile);
230 flush_tempfiles (tempcount);
234 /* This page decodes the command line arguments to set the parameter variables
235 and set up the vector of keyfields and the vector of input files */
238 decode_command (argc, argv)
246 /* Store default values into parameter variables */
249 tempdir = "sys$scratch:";
256 /* Allocate argc input files, which must be enough. */
258 infiles = (char **) xmalloc (argc * sizeof (char *));
259 outfiles = (char **) xmalloc (argc * sizeof (char *));
263 /* First find all switches that control the default kind-of-sort */
265 for (i = 1; i < argc; i++)
267 int tem = classify_arg (argv[i]);
280 fatal ("switch %s given with no argument following it", argv[i]);
281 else if (!strcmp (argv[i], "-T"))
282 tempdir = argv[i + 1];
283 else if (!strcmp (argv[i], "-o"))
284 *(op - 1) = argv[i + 1];
298 fatal ("invalid command switch %c", c);
303 /* Record number of keyfields, terminate list of filenames */
305 num_infiles = ip - infiles;
309 /* Return 0 for an argument that is not a switch;
310 for a switch, return 1 plus the number of following arguments that the switch swallows.
317 if (!strcmp (arg, "-T") || !strcmp (arg, "-o"))
324 /* Create a name for a temporary file */
331 sprintf (tempsuffix, "%d", count);
332 return concat (tempdir, tempbase, tempsuffix);
335 /* Delete all temporary files up to the specified count */
338 flush_tempfiles (to_count)
341 if (keep_tempfiles) return;
342 while (last_deleted_tempcount < to_count)
343 unlink (maketempname (++last_deleted_tempcount));
346 /* Copy an input file into a temporary file, and return the temporary file name */
354 char *outfile = maketempname (++tempcount);
356 char buffer[BUFSIZE];
358 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
360 if (odesc < 0) pfatal_with_name (outfile);
364 int nread = read (idesc, buffer, BUFSIZE);
365 write (odesc, buffer, nread);
374 /* Compare two lines, provided as pointers to pointers to text,
375 according to the specified set of keyfields */
378 compare_full (line1, line2)
379 char **line1, **line2;
383 /* Compare using the first keyfield;
384 if that does not distinguish the lines, try the second keyfield; and so on. */
386 for (i = 0; i < num_keyfields; i++)
388 long length1, length2;
389 char *start1 = find_field (&keyfields[i], *line1, &length1);
390 char *start2 = find_field (&keyfields[i], *line2, &length2);
391 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
392 start2, length2, *line2 - text_base);
395 if (keyfields[i].reverse)
401 return 0; /* Lines match exactly */
404 /* Compare two lines described by structures
405 in which the first keyfield is identified in advance.
406 For positional sorting, assumes that the order of the lines in core
407 reflects their nominal order. */
410 compare_prepared (line1, line2)
411 struct lineinfo *line1, *line2;
417 /* Compare using the first keyfield, which has been found for us already */
418 if (keyfields->positional)
420 if (line1->text - text_base > line2->text - text_base)
425 else if (keyfields->numeric)
426 tem = line1->key.number - line2->key.number;
428 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0);
431 if (keyfields->reverse)
439 /* Compare using the second keyfield;
440 if that does not distinguish the lines, try the third keyfield; and so on. */
442 for (i = 1; i < num_keyfields; i++)
444 long length1, length2;
445 char *start1 = find_field (&keyfields[i], text1, &length1);
446 char *start2 = find_field (&keyfields[i], text2, &length2);
447 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
448 start2, length2, text2 - text_base);
451 if (keyfields[i].reverse)
457 return 0; /* Lines match exactly */
460 /* Like compare_full but more general.
461 You can pass any strings, and you can say how many keyfields to use.
462 `pos1' and `pos2' should indicate the nominal positional ordering of
463 the two lines in the input. */
466 compare_general (str1, str2, pos1, pos2, use_keyfields)
473 /* Compare using the first keyfield;
474 if that does not distinguish the lines, try the second keyfield; and so on. */
476 for (i = 0; i < use_keyfields; i++)
478 long length1, length2;
479 char *start1 = find_field (&keyfields[i], str1, &length1);
480 char *start2 = find_field (&keyfields[i], str2, &length2);
481 int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2);
484 if (keyfields[i].reverse)
490 return 0; /* Lines match exactly */
493 /* Find the start and length of a field in `str' according to `keyfield'.
494 A pointer to the starting character is returned, and the length
495 is stored into the int that `lengthptr' points to. */
498 find_field (keyfield, str, lengthptr)
499 struct keyfield *keyfield;
507 if (keyfield->braced) fun = find_braced_pos;
510 start = ( *fun )(str, keyfield->startwords, keyfield->startchars,
511 keyfield->ignore_blanks);
512 if (keyfield->endwords < 0)
514 if (keyfield->braced)
515 end = find_braced_end (start);
519 while (*end && *end != '\n') end++;
524 end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0);
525 if (end - str < start - str) end = start;
527 *lengthptr = end - start;
531 /* Find a pointer to a specified place within `str',
532 skipping (from the beginning) `words' words and then `chars' chars.
533 If `ignore_blanks' is nonzero, we skip all blanks
534 after finding the specified word. */
537 find_pos (str, words, chars, ignore_blanks)
545 for (i = 0; i < words; i++)
548 /* Find next bunch of nonblanks and skip them. */
549 while ((c = *p) == ' ' || c == '\t') p++;
550 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++;
551 if (!*p || *p == '\n') return p;
554 while (*p == ' ' || *p == '\t') p++;
556 for (i = 0; i < chars; i++)
558 if (!*p || *p == '\n') break;
564 /* Like find_pos but assumes that each field is surrounded by braces
565 and that braces within fields are balanced. */
568 find_braced_pos (str, words, chars, ignore_blanks)
578 for (i = 0; i < words; i++)
581 while ((c = *p++) != '{' && c != '\n' && c);
587 if (c == '{') bracelevel++;
588 if (c == '}') bracelevel--;
590 if (c == '\\' || c == '@') c = *p++; /* \ quotes braces and \ */
592 if (c == 0 || c == '\n') return p-1;
596 while ((c = *p++) != '{' && c != '\n' && c);
602 while ((c = *p) == ' ' || c == '\t') p++;
604 for (i = 0; i < chars; i++)
606 if (!*p || *p == '\n') break;
612 /* Find the end of the balanced-brace field which starts at `str'.
613 The position returned is just before the closing brace. */
616 find_braced_end (str)
627 if (c == '{') bracelevel++;
628 if (c == '}') bracelevel--;
630 if (c == '\\' || c == '@') c = *p++;
632 if (c == 0 || c == '\n') return p-1;
638 find_value (start, length)
642 while (length != 0L) {
651 /* Vector used to translate characters for comparison.
652 This is how we make all alphanumerics follow all else,
653 and ignore case in the first sorting. */
659 for (i = 1; i < 256; i++)
662 for (i = '0'; i <= '9'; i++)
663 char_order[i] += 512;
665 for (i = 'a'; i <= 'z'; i++) {
666 char_order[i] = 512 + i;
667 char_order[i + 'A' - 'a'] = 512 + i;
671 /* Compare two fields (each specified as a start pointer and a character count)
672 according to `keyfield'. The sign of the value reports the relation between the fields */
675 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
676 struct keyfield *keyfield;
684 if (keyfields->positional)
691 if (keyfield->numeric)
693 long value = find_value (start1, length1) - find_value (start2, length2);
694 if (value > 0) return 1;
695 if (value < 0) return -1;
702 char *e1 = start1 + length1;
703 char *e2 = start2 + length2;
705 int fold_case = keyfield->fold_case;
711 if (p1 == e1) c1 = 0;
713 if (p2 == e2) c2 = 0;
716 if (char_order[c1] != char_order[c2])
717 return char_order[c1] - char_order[c2];
721 /* Strings are equal except possibly for case. */
728 if (p1 == e1) c1 = 0;
730 if (p2 == e2) c2 = 0;
734 /* Reverse sign here so upper case comes out last. */
743 /* A `struct linebuffer' is a structure which holds a line of text.
744 `readline' reads a line from a stream into a linebuffer
745 and works regardless of the length of the line. */
753 /* Initialize a linebuffer for use */
756 initbuffer (linebuffer)
757 struct linebuffer *linebuffer;
759 linebuffer->size = 200;
760 linebuffer->buffer = (char *) xmalloc (200);
763 /* Read a line of text from `stream' into `linebuffer'.
764 Return the length of the line. */
767 readline (linebuffer, stream)
768 struct linebuffer *linebuffer;
771 char *buffer = linebuffer->buffer;
772 char *p = linebuffer->buffer;
773 char *end = p + linebuffer->size;
777 int c = getc (stream);
780 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
781 p += buffer - linebuffer->buffer;
782 end += buffer - linebuffer->buffer;
783 linebuffer->buffer = buffer;
785 if (c < 0 || c == '\n')
796 /* Sort an input file too big to sort in core. */
799 sort_offline (infile, nfiles, total, outfile)
804 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */
805 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
806 FILE *istream = fopen (infile, "r");
808 struct linebuffer lb;
814 /* Read in one line of input data. */
816 linelength = readline (&lb, istream);
818 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
820 error ("%s: not a texinfo index file", infile);
824 /* Split up the input into `ntemps' temporary files, or maybe fewer,
825 and put the new files' names into `tempfiles' */
827 for (i = 0; i < ntemps; i++)
829 char *outname = maketempname (++tempcount);
830 FILE *ostream = fopen (outname, "w");
833 if (!ostream) pfatal_with_name (outname);
834 tempfiles[i] = outname;
836 /* Copy lines into this temp file as long as it does not make file "too big"
837 or until there are no more lines. */
839 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
841 tempsize += linelength + 1;
842 fputs (lb.buffer, ostream);
843 putc ('\n', ostream);
845 /* Read another line of input data. */
847 linelength = readline (&lb, istream);
848 if (!linelength && feof (istream)) break;
850 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
852 error ("%s: not a texinfo index file", infile);
858 if (feof (istream)) break;
864 /* Record number of temp files we actually needed. */
868 /* Sort each tempfile into another tempfile.
869 Delete the first set of tempfiles and put the names of the second into `tempfiles' */
871 for (i = 0; i < ntemps; i++)
873 char *newtemp = maketempname (++tempcount);
874 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
876 unlink (tempfiles[i]);
877 tempfiles[i] = newtemp;
883 /* Merge the tempfiles together and indexify */
885 merge_files (tempfiles, ntemps, outfile);
888 /* Sort `infile', whose size is `total',
889 assuming that is small enough to be done in-core,
890 then indexify it and send the output to `outfile' (or to stdout). */
893 sort_in_core (infile, total, outfile)
899 char *data = (char *) xmalloc (total + 1);
903 FILE *ostream = stdout;
904 struct lineinfo *lineinfo;
906 /* Read the contents of the file into the moby array `data' */
908 int desc = open (infile, 0, 0);
911 fatal ("failure reopening %s", infile);
912 for (file_size = 0; ; )
914 if ((i = read (desc, data + file_size, total - file_size)) <= 0)
923 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
925 error ("%s: not a texinfo index file", infile);
931 /* Sort routines want to know this address */
935 /* Create the array of pointers to lines, with a default size frequently enough. */
938 if (!nlines) nlines = 2;
939 linearray = (char **) xmalloc (nlines * sizeof (char *));
941 /* `nextline' points to the next free slot in this array.
942 `nlines' is the allocated size. */
944 nextline = linearray;
946 /* Parse the input file's data, and make entries for the lines. */
948 nextline = parsefile (infile, nextline, file_data, file_size);
951 error ("%s: not a texinfo index file", infile);
957 /* If we have enough space, find the first keyfield of each line in advance.
958 Make a `struct lineinfo' for each line, which records the keyfield
959 as well as the line, and sort them. */
961 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
968 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
971 lp->key.text = find_field (keyfields, *p, &lp->keylen);
972 if (keyfields->numeric)
973 lp->key.number = find_value (lp->key.text, lp->keylen);
976 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
978 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
984 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
986 /* Open the output file */
990 ostream = fopen (outfile, "w");
992 pfatal_with_name (outfile);
995 writelines (linearray, nextline - linearray, ostream);
996 if (outfile) fclose (ostream);
1002 /* Parse an input string in core into lines.
1003 DATA is the input string, and SIZE is its length.
1004 Data goes in LINEARRAY starting at NEXTLINE.
1005 The value returned is the first entry in LINEARRAY still unused.
1006 Value 0 means input file contents are invalid. */
1009 parsefile (filename, nextline, data, size)
1016 char **line = nextline;
1024 if (p[0] != '\\' && p[0] != '@')
1028 while (*p && *p != '\n') p++;
1032 if (line == linearray + nlines)
1034 char **old = linearray;
1035 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1036 line += linearray - old;
1043 /* Indexification is a filter applied to the sorted lines
1044 as they are being written to the output file.
1045 Multiple entries for the same name, with different page numbers,
1046 get combined into a single entry with multiple page numbers.
1047 The first braced field, which is used for sorting, is discarded.
1048 However, its first character is examined, folded to lower case,
1049 and if it is different from that in the previous line fed to us
1050 a \initial line is written with one argument, the new initial.
1052 If an entry has four braced fields, then the second and third
1053 constitute primary and secondary names.
1054 In this case, each change of primary name
1055 generates a \primary line which contains only the primary name,
1056 and in between these are \secondary lines which contain
1057 just a secondary name and page numbers.
1060 /* The last primary name we wrote a \primary entry for.
1061 If only one level of indexing is being done, this is the last name seen */
1063 int lastprimarylength; /* Length of storage allocated for lastprimary */
1065 /* Similar, for the secondary name. */
1066 char *lastsecondary;
1067 int lastsecondarylength;
1069 /* Zero if we are not in the middle of writing an entry.
1070 One if we have written the beginning of an entry but have not
1071 yet written any page numbers into it.
1072 Greater than one if we have written the beginning of an entry
1073 plus at least one page number. */
1076 /* The initial (for sorting purposes) of the last primary entry written.
1077 When this changes, a \initial {c} line is written */
1081 int lastinitiallength;
1083 /* When we need a string of length 1 for the value of lastinitial,
1086 char lastinitial1[2];
1088 /* Initialize static storage for writing an index */
1094 lastinitial = lastinitial1;
1095 lastinitial1[0] = 0;
1096 lastinitial1[1] = 0;
1097 lastinitiallength = 0;
1098 lastprimarylength = 100;
1099 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1100 bzero (lastprimary, lastprimarylength + 1);
1101 lastsecondarylength = 100;
1102 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1103 bzero (lastsecondary, lastsecondarylength + 1);
1106 /* Indexify. Merge entries for the same name,
1107 insert headers for each initial character, etc. */
1109 indexify (line, ostream)
1113 char *primary, *secondary, *pagenumber;
1114 int primarylength, secondarylength, pagelength;
1115 int len = strlen (line);
1122 /* First, analyze the parts of the entry fed to us this time */
1124 p = find_braced_pos (line, 0, 0, 0);
1128 /* Get length of inner pair of braces starting at p,
1129 including that inner pair of braces. */
1130 initiallength = find_braced_end (p + 1) + 1 - p;
1139 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1143 pagenumber = find_braced_pos (line, 1, 0, 0);
1144 pagelength = find_braced_end (pagenumber) - pagenumber;
1145 if (pagelength == 0)
1148 primary = find_braced_pos (line, 2, 0, 0);
1149 primarylength = find_braced_end (primary) - primary;
1151 secondary = find_braced_pos (line, 3, 0, 0);
1152 nosecondary = !*secondary;
1154 secondarylength = find_braced_end (secondary) - secondary;
1156 /* If the primary is different from before, make a new primary entry */
1157 if (strncmp (primary, lastprimary, primarylength))
1159 /* Close off current secondary entry first, if one is open */
1162 fputs ("}\n", ostream);
1166 /* If this primary has a different initial, include an entry for the initial */
1167 if (initiallength != lastinitiallength ||
1168 strncmp (initial, lastinitial, initiallength))
1170 fprintf (ostream, "\\initial {");
1171 fwrite (initial, 1, initiallength, ostream);
1172 fprintf (ostream, "}\n", initial);
1173 if (initial == initial1)
1175 lastinitial = lastinitial1;
1176 *lastinitial1 = *initial1;
1180 lastinitial = initial;
1182 lastinitiallength = initiallength;
1185 /* Make the entry for the primary. */
1187 fputs ("\\entry {", ostream);
1189 fputs ("\\primary {", ostream);
1190 fwrite (primary, primarylength, 1, ostream);
1193 fputs ("}{", ostream);
1197 fputs ("}\n", ostream);
1199 /* Record name of most recent primary */
1200 if (lastprimarylength < primarylength)
1202 lastprimarylength = primarylength + 100;
1203 lastprimary = (char *) xrealloc (lastprimary,
1204 1 + lastprimarylength);
1206 strncpy (lastprimary, primary, primarylength);
1207 lastprimary[primarylength] = 0;
1209 /* There is no current secondary within this primary, now */
1210 lastsecondary[0] = 0;
1213 /* Should not have an entry with no subtopic following one with a subtopic */
1215 if (nosecondary && *lastsecondary)
1216 error ("entry %s follows an entry with a secondary name", line);
1218 /* Start a new secondary entry if necessary */
1219 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1223 fputs ("}\n", ostream);
1227 /* Write the entry for the secondary. */
1228 fputs ("\\secondary {", ostream);
1229 fwrite (secondary, secondarylength, 1, ostream);
1230 fputs ("}{", ostream);
1233 /* Record name of most recent secondary */
1234 if (lastsecondarylength < secondarylength)
1236 lastsecondarylength = secondarylength + 100;
1237 lastsecondary = (char *) xrealloc (lastsecondary,
1238 1 + lastsecondarylength);
1240 strncpy (lastsecondary, secondary, secondarylength);
1241 lastsecondary[secondarylength] = 0;
1244 /* Here to add one more page number to the current entry */
1246 fputs (", ", ostream); /* Punctuate first, if this is not the first */
1247 fwrite (pagenumber, pagelength, 1, ostream);
1250 /* Close out any unfinished output entry */
1253 finish_index (ostream)
1257 fputs ("}\n", ostream);
1259 free (lastsecondary);
1262 /* Copy the lines in the sorted order.
1263 Each line is copied out of the input file it was found in. */
1266 writelines (linearray, nlines, ostream)
1271 char **stop_line = linearray + nlines;
1276 /* Output the text of the lines, and free the buffer space */
1278 for (next_line = linearray; next_line != stop_line; next_line++)
1280 /* If -u was specified, output the line only if distinct from previous one. */
1281 if (next_line == linearray
1282 /* Compare previous line with this one, using only the explicitly specd keyfields */
1283 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1285 char *p = *next_line;
1287 while ((c = *p++) && c != '\n');
1289 indexify (*next_line, ostream);
1293 finish_index (ostream);
1296 /* Assume (and optionally verify) that each input file is sorted;
1297 merge them and output the result.
1298 Returns nonzero if any input file fails to be sorted.
1300 This is the high-level interface that can handle an unlimited number of files. */
1302 #define MAX_DIRECT_MERGE 10
1305 merge_files (infiles, nfiles, outfile)
1314 int start_tempcount = tempcount;
1316 if (nfiles <= MAX_DIRECT_MERGE)
1317 return merge_direct (infiles, nfiles, outfile);
1319 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1320 making a temporary file to hold each group's result. */
1322 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1323 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1324 for (i = 0; i < ntemps; i++)
1326 int nf = MAX_DIRECT_MERGE;
1327 if (i + 1 == ntemps)
1328 nf = nfiles - i * MAX_DIRECT_MERGE;
1329 tempfiles[i] = maketempname (++tempcount);
1330 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1333 /* All temporary files that existed before are no longer needed
1334 since their contents have been merged into our new tempfiles.
1336 flush_tempfiles (start_tempcount);
1338 /* Now merge the temporary files we created. */
1340 merge_files (tempfiles, ntemps, outfile);
1347 /* Assume (and optionally verify) that each input file is sorted;
1348 merge them and output the result.
1349 Returns nonzero if any input file fails to be sorted.
1351 This version of merging will not work if the number of
1352 input files gets too high. Higher level functions
1353 use it only with a bounded number of input files. */
1356 merge_direct (infiles, nfiles, outfile)
1361 char **ip = infiles;
1362 struct linebuffer *lb1, *lb2;
1363 struct linebuffer **thisline, **prevline;
1369 struct linebuffer *prev_out = 0;
1370 FILE *ostream = stdout;
1374 ostream = fopen (outfile, "w");
1376 if (!ostream) pfatal_with_name (outfile);
1387 /* For each file, make two line buffers.
1388 Also, for each file, there is an element of `thisline'
1389 which points at any time to one of the file's two buffers,
1390 and an element of `prevline' which points to the other buffer.
1391 `thisline' is supposed to point to the next available line from the file,
1392 while `prevline' holds the last file line used,
1393 which is remembered so that we can verify that the file is properly sorted. */
1395 /* lb1 and lb2 contain one buffer each per file */
1396 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1397 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1399 /* thisline[i] points to the linebuffer holding the next available line in file i,
1400 or is zero if there are no lines left in that file. */
1401 thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1402 /* prevline[i] points to the linebuffer holding the last used line from file i.
1403 This is just for verifying that file i is properly sorted. */
1404 prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1405 /* streams[i] holds the input stream for file i. */
1406 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1407 /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */
1408 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1410 /* Allocate and initialize all that storage */
1412 for (i = 0; i < nfiles; i++)
1414 initbuffer (&lb1[i]);
1415 initbuffer (&lb2[i]);
1416 thisline[i] = &lb1[i];
1417 prevline[i] = &lb2[i];
1418 file_lossage[i] = 0;
1419 streams[i] = fopen (infiles[i], "r");
1421 pfatal_with_name (infiles[i]);
1423 readline (thisline[i], streams[i]);
1426 /* Keep count of number of files not at eof */
1431 struct linebuffer *best = 0;
1432 struct linebuffer *exch;
1436 /* Look at the next avail line of each file; choose the least one. */
1438 for (i = 0; i < nfiles; i++)
1442 0 < compare_general (best->buffer, thisline[i]->buffer,
1443 (long) bestfile, (long) i, num_keyfields)))
1450 /* Output that line, unless it matches the previous one and we don't want duplicates */
1453 !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1)))
1454 indexify (best->buffer, ostream);
1457 /* Now make the line the previous of its file, and fetch a new line from that file */
1459 exch = prevline[bestfile];
1460 prevline[bestfile] = thisline[bestfile];
1461 thisline[bestfile] = exch;
1465 /* If the file has no more, mark it empty */
1467 if (feof (streams[bestfile]))
1469 thisline[bestfile] = 0;
1470 nleft--; /* Update the number of files still not empty */
1473 readline (thisline[bestfile], streams[bestfile]);
1474 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break;
1478 finish_index (ostream);
1480 /* Free all storage and close all input streams */
1482 for (i = 0; i < nfiles; i++)
1484 fclose (streams[i]);
1485 free (lb1[i].buffer);
1486 free (lb2[i].buffer);
1488 free (file_lossage);
1501 /* Print error message and exit. */
1510 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
1515 printf ("texindex: ");
1520 perror_with_name (name)
1525 if (errno < sys_nerr)
1526 s = concat ("", sys_errlist[errno], " for %s");
1528 s = "cannot open %s";
1532 pfatal_with_name (name)
1537 if (errno < sys_nerr)
1538 s = concat ("", sys_errlist[errno], " for %s");
1540 s = "cannot open %s";
1544 /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */
1550 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1551 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1553 strcpy (result, s1);
1554 strcpy (result + len1, s2);
1555 strcpy (result + len1 + len2, s3);
1556 *(result + len1 + len2 + len3) = 0;
1561 /* Like malloc but get fatal error if memory is exhausted. */
1567 int result = malloc (size);
1569 fatal ("virtual memory exhausted", 0);
1575 xrealloc (ptr, size)
1579 int result = realloc (ptr, size);
1581 fatal ("virtual memory exhausted");
1587 register int length;
1591 long max_str = 65535;
1594 while (length > max_str)
1596 (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b);
1601 (void) LIB$MOVC5 (&zero, &zero, &zero, &len, b);
1603 while (length-- > 0)
1605 #endif /* not VMS */