/* =========================================================================== All the functions of MIFFEd are in this one module. Public functions (specified in MIFFED.H) are declared normally. Private functions not exposed to the client are declared static. TO EDIT WITH VI, :set tabstop=3 MIFFEd is Copyright (c) 1994 by David E. Cortesi. See MIFFEDOC.FM4 or OVERVIEW.TXT for license, disclaimer and conditions of use. ======================================================================== */ static char Copyright[] = "MIFFEd: COPYRIGHT(C) 1994 DAVID E.CORTESI"; #include /* [f]printf, fputc, fputs, sscanf */ #include /* should give strtol(), strtod() */ #include #include #include /* * the following are required under GCC because, as it is installed at * Netcom (and reputedly other Sun systems) its stdio.h does not declare * these functions in a C compile -- only C++. (What?) (true!) * The result is a (correct) warning about an implicit declaration. * In the case of strto{ld}, the lack of a declaration causes a bug. * * If you find the following externs causing duplicate/conflicting * declarations, your compiler's stdio.h and stdlib.h are correct, * and the following lines can be commented out. */ /* as noted, these are not needed for SGI compilation extern int fprintf(FILE *f, char *fmt,...); extern int printf(char *fmt,...); extern int fputc(char c, FILE *stream); extern int fputs(char *s, FILE *stream); extern int sscanf (const char *, const char *, ...); extern double strtod(char *str, char **ptr); extern long strtol(char *str, char**ptr, int base); */ #include "mifmain.h" static int numItemSegs = 0; static int numItemsNew = 0; static int numItemsUsed = 0; static int numNewWord = 0; static int numNewString = 0; static int numNewComment = 0; static int numNewNumber = 0; static int numNewList = 0; static int numNewTree = 0; static int numStrNew = 0; static int numStrOld = 0; /* =========================================================================== When internal errors (usually out of memory) are detected this routine is called. It writes a string to stderr and returns a 0 which the caller can return to its caller, i.e. 'return noteError("what the problem is")' Trap here when debugging. ======================================================================== */ unsigned long noteError(char * wail) { fprintf(stderr,"\nMIFFEd: %s\n",wail); Copyright[0] = 'M'; /* force inclusion of Copyright text */ return 0L; } /* =========================================================================== This function allocates items of all types. It takes a type and allocates space for that item type from a larger segment. All Items are multiples of 4 bytes. The count of 4-byte units in the item is stored, along with its type, in it.type. This count is used in freeItem() which stores freed Items on one of several lists depending on their size. Thus makeItem will reuse an old Item of the correct size if it can, before allocating space. ======================================================================== */ #define MOSTLONGS 17 /* no Item is larger than 16*sizeof(long) */ static void * currItemSeg = NULL; /* current block of raw material */ static int currItemOffset = ITEMSEGSIZE; /* offset of next free word in it */ static itemHandle usedItemLists[MOSTLONGS+1] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}; static itemHandle makeItem(enum itemTypes T) { itemHandle it, *listp; int nlongs, nbytes; switch (T) { /* there are faster ways to do this, but this is bulletproof */ case STRING: case WORD: case CMT: nbytes = sizeof(Item); break; case NUMBER: nbytes = sizeof(NumbItem); break; case TREE: nbytes = sizeof(TreeNode); break; case LIST: nbytes = sizeof(ListItem); break; case PSCAN: nbytes = sizeof(ParaScan); break; case EOL: case NA: default: return NULL; } /* make darn sure multiple of longs even if that ain't 4 */ nbytes = ((nbytes+(sizeof(long)-1))&(-sizeof(long))); nlongs = nbytes / sizeof(long); /* hope compiler uses ">>" operator */ /* assert(nlongs < MOSTLONGS && nlongs > 0); */ listp = & usedItemLists[nlongs]; if (*listp) { /* we have a used one juuuust your size, sir */ it = *listp; (*listp) = it->next; ++numItemsUsed; } else { /* nothing in the list of that size items, allocate */ if (currItemOffset + nbytes > ITEMSEGSIZE) { /* no room in this block if any */ currItemSeg = (void *)malloc(ITEMSEGSIZE); if (NULL == currItemSeg) /* allocation failure */ return (itemHandle) noteError("cannot allocate item segment"); currItemOffset = 0; /* got a new block, start at beginning */ ++numItemSegs; } it = (itemHandle)(((char*)currItemSeg)+currItemOffset); currItemOffset += nbytes; ++numItemsNew; } it -> next = NULL; it -> type = (nlongs << 4) | T; it -> cval.offset = INVALSTROFF; return it; } static void freeItem(itemHandle it) { int nlongs = (it -> type >> 4) & 0x0f; if (nlongs > 0 && nlongs <= MOSTLONGS) { it->next = usedItemLists[nlongs]; usedItemLists[nlongs] = it; } } /* =========================================================================== The following functions allocate and initialize various types of items using makeItem. If makeItem can't allocate, it displays a message and returns NULL, which these functions also return. These functions are not static; they are available to clients. ======================================================================== */ static void stowStr(charAddr *, char *); itemHandle newWord(char *text) { wordHandle tmp; if (NULL != (tmp = (wordHandle) makeItem(WORD)) ) { if (text) { stowStr(&tmp->it.cval,text); } } ++numNewWord; return (itemHandle)tmp; } itemHandle newString(char *text) { stringHandle tmp; if(NULL != (tmp = (stringHandle) makeItem(STRING)) ) { if (text) stowStr(&tmp->it.cval,text); } ++numNewString; return (itemHandle)tmp; } itemHandle newComment(char *text) { cmtHandle tmp; if(NULL != (tmp = (cmtHandle) makeItem(CMT)) ) { if (text) stowStr(&tmp->it.cval,text); } ++numNewComment; return (itemHandle)tmp; } itemHandle newNumber(char *text, char * unitext) { numbHandle tmp; if (NULL != (tmp = (numbHandle) makeItem(NUMBER)) ) { tmp->unit[0] = '\0'; if (text) { stowStr(&tmp->it.cval,text); if (unitext) { tmp->unit[0] = unitext[0]; tmp->unit[1] = unitext[1]; tmp->unit[2] = '\0'; } } } ++numNewNumber; return (itemHandle)tmp; } itemHandle newInt(int val, char * unitext) { char str[32]; sprintf(str,"%d",val); return newNumber(str,unitext); } itemHandle newFloat(float val, char * unitext) { char str[32]; sprintf(str,"%f",val); return newNumber(str,unitext); } itemHandle newList(char * name) { listHandle tmp; if (NULL != (tmp = (listHandle) makeItem(LIST)) ){ tmp -> foot.type = EOL; tmp -> foot.cval.offset = INVALSTROFF; tmp -> foot.next = (itemHandle)NULL; tmp -> contnr = (listHandle)NULL; tmp -> tail = (itemHandle)NULL; tmp -> it.next = &tmp->foot; if (name) stowStr(&tmp->it.cval,name); } ++numNewList; return (itemHandle)tmp; } /* newTreeNode is static, the client uses treePut */ static treeHandle newTreeNode() { treeHandle tmp; if (NULL != (tmp = (treeHandle)makeItem(TREE)) ) tmp -> l = tmp -> r = (treeHandle)NULL; ++numNewTree; return tmp; } /* =========================================================================== Character string storage routines. Character strings that are values of Items must be stored permanently so they can be recovered on demand. In order to avoid making many calls on malloc() to get space we allocate string storage space in segments. We store pointers to up to 256 "segments" in an array of pointers. All strings in a MIF file are small with respect to the segment size, so we don't worry about trying to fill every byte in each segment; each time a string won't fit in the current segment we allocate a new segment. ======================================================================== */ static short int strSegNum = -1; static char **strSegList = NULL; static unsigned long int curSegOff = STRSEGSIZE; /* =========================================================================== This function stores a character string, returning its charAddr. Note that it does not enforce uniqueness. If you call this routine twice with the same string, it will store two copies. Returns: 1 if successful, else 0 to indicate unable to allocate memory (and writes a message to stderr if that happens). ======================================================================== */ static int strToSeg(char * it, charAddr * rslt) { int len = strlen(it); rslt->offset = INVALSTROFF; /* set invalid or null-string address */ if (len == 0) /* don't store the null string */ return 1; /* INVALSTROFF means NULL ptr and null string */ if (STRSEGSIZE < len) { /* bulletproofing - if occurs, probably means bug */ return noteError("string too large to store, probable bug"); } /* test to see if room in current segment. Use ">=" to allow for \0 at end */ if (curSegOff + len >= STRSEGSIZE) { /* time to get another segment */ if (strSegNum < 0) { /* strSegList not allocated yet */ if (NULL == (strSegList = calloc(MAXSTRSEGS,sizeof(char *))) ) return noteError("cannot alloc strSegList"); } if (++strSegNum < MAXSTRSEGS) /* room for another segment */ if (NULL != (strSegList[strSegNum] = calloc(STRSEGSIZE,1)) ) curSegOff = 0; /* new segment ready to use */ else return noteError("cannot allocate a string segment"); else return noteError("maxed out on string segments"); } /* assert strSegList[strSegNum]+curSegOff -> at least len+1 bytes */ memcpy(strSegList[strSegNum]+curSegOff,it,len); rslt->segno = strSegNum; rslt->offset = curSegOff; curSegOff += 1+len; /* allowing for a \0 which is thanks to calloc */ return 1; } /* =========================================================================== Convert a charAddr into a real char *. For INVALSTROFF, return address of a static null string. ======================================================================== */ static char nullInvalidString = '\0'; static char * getCharAddr( charAddr * in ) { if ((in->segno <= strSegNum) && (in->offset < INVALSTROFF)) return in->offset + strSegList[in->segno]; else return &nullInvalidString; } /* =========================================================================== Basic navigation routines There is at present a fair amount of bulletproofing code here. Probably it should be converted to assertions so it could be conditionally omitted by the braver programmers... ======================================================================== */ /* Return the type of an item given its handle. */ enum itemTypes itemType(itemHandle ith) { unsigned char t = NA; if (ith) t = ith->type & 0x0f; return ((t > NA)&&(t <= TREE))?(t):(NA); } /* Return the handle of the physically-next item from a handle. From a LIST, returns first contents item. From last contents item of a list, returns the EOL. */ itemHandle nextItem(itemHandle ith) { enum itemTypes T; if ( (ith) /* not null handle */ && (NA < (T = FASTTYPEOF(ith))) && (TREE > T) ) return ith->next; else return (itemHandle)NULL; } /* Return the handle of the logically-next item at the same nesting level as a handle. From a LIST, returns the item following the EOL. From last contents item of a list, returns NULL (no next in list). */ itemHandle nextInList(itemHandle ith) { enum itemTypes T; if ( (ith) /* not null handle */ && (NA < (T = FASTTYPEOF(ith))) && (TREE > T)) { itemHandle nxt; if (LIST != T) { /* not a list, logical next is physical next */ nxt = ith->next; } else { /* logical next of a list is next of its last item */ nxt = ((listHandle)ith) -> foot.next; } if ( (nxt) /* there is a next */ && (EOL == FASTTYPEOF(nxt)) ) { /* ith is the last item in a list, its next is EOL */ nxt = NULL; } return nxt; } else return (itemHandle)NULL; } /* Retrieve the text of an item. Caller is advised to treat the returned string as read-only. */ char * textOf(itemHandle ith) { if (ith) return getCharAddr(&ith->cval); else { noteError("text of null"); return & nullInvalidString; } } /* Change (or install) the text of an item. Permit changing to a null char string. */ void changeText(itemHandle ith, char *newtext) { if (ith) if (NA != itemType(ith)) stowStr(&ith->cval,newtext); else noteError("change text of nonItem"); else noteError("change text of null"); } /* Return the numeric text of a NUMBER item in numeric form, either int or float. Don't bother checking the type as this only queries the cval which is in every item. */ double floatOf(itemHandle ith) { return strtod(getCharAddr(&ith->cval),NULL); } long integerOf(itemHandle ith) { return strtol(getCharAddr(&ith->cval),NULL,0); } /* Return the address of the unit text of an item. Caller is advised to treat this as read-only. */ char * unitTextOf(itemHandle ith){ if (ith) { if (TYPEIS(ith,NUMBER)) { return ((numbHandle)ith)->unit; } else noteError("unit of non-number"); } else noteError("unit of null"); return &nullInvalidString; } /* Change (or install) the numeric text of a NUMBER item, allowing input of either a float or an integer. (To change the text based on a char*, use changeText.) Note: no validity check for the type of NUMBER, since this is merely a text change which is safe on any item type. */ void changeFloat(itemHandle ith, float newval) { char str[32]; sprintf(str,"%f",newval); changeText(ith,str); } void changeInt(itemHandle ith, int newval) { char str[32]; sprintf(str,"%d",newval); changeText(ith,str); } /* Change (or install) the unit text of a NUMBER item. Do type-check it since the unit field exists only in a NUMBER. */ void changeUnit(itemHandle ith, char *uniText) { if (ith) if (TYPEIS(ith,NUMBER)) { ((numbHandle)ith)->unit[0] = uniText[0]; ((numbHandle)ith)->unit[1] = uniText[1]; } else noteError("change unit of nonnumber"); else noteError("change unit of null"); } /* The following functions work with numbers as unit-based measures. There is quite a bit of bullet-proofing in checking for numbers etc. Each one calls the ones above it in this list, which means there is quite a bit of redundant error checking. To be considered. measureOf() -- convert unit text to unit enumeration value millimetersOf() -- return value converted to millimeters convertUnit() -- change units retaining basic value The following table converts 2-character units to unitMeasure codes. Some extra codes are allowed based on the fact that Frame allows the user (online) to write "point," "pica," and "cicero." I don't expect these words to get into a MIF file, but if they do the parser will save the initial and final letters of the word. The table is ordered by approximate likelihood of a hit. */ static struct unitDecode { char c0, c1; enum unitMeasures umc; } unitTable[] = { { '"', '\0', UM_IN }, { 'i', 'n', UM_IN }, { 'p', 't', UM_PT }, { 'p', 'c', UM_PC }, { 'c', 'm', UM_CM }, { 'm', 'm', UM_MM }, { 'c', 'c', UM_CC }, /* official Cicero */ { 'd', 'd', UM_DD }, /* official Didot */ { 'p', 'a', UM_PC }, /* PicA */ { 'i', 'h', UM_IN }, /* IncH */ { 'c', 'o', UM_CC }, /* CicerO */ { 'd', 't', UM_DD } /* DidoT */ }; enum unitMeasures measureOf(itemHandle ith){ struct unitDecode *udp; numbHandle nh; if (ith) { if (TYPEIS(ith,NUMBER)) { nh = (numbHandle)ith; for (udp = unitTable; (udp->c0); ++udp) { if (nh->unit[0] == udp->c0 && nh->unit[1] == udp->c1) { return udp->umc; } } } else noteError("measure of non-number"); } else noteError("measure of null"); return UM_NA; } double millimetersOf(itemHandle ith) { double val = 0.0; enum unitMeasures um = measureOf(ith); if (UM_NA < um) { val = strtod(getCharAddr(&ith->cval),NULL); switch (um) { case UM_IN: val *= UM_IN_MM; break; case UM_PT: val *= UM_PT_MM; break; case UM_PC: val *= UM_PC_MM; break; case UM_CM: val *= UM_CM_MM; break; case UM_CC: val *= UM_CC_MM; break; case UM_DD: val *= UM_DD_MM; case UM_MM: case UM_NA: break; } } return val; } void convertUnit(itemHandle ith, enum unitMeasures um) { enum unitMeasures oldUm = measureOf(ith); if (UM_NA < oldUm) { if (UM_NA < um && UM_DD >= um) { double val = millimetersOf(ith); numbHandle nh = (numbHandle)ith; char work[32]; switch(um) { case UM_IN: val /= UM_IN_MM; nh->unit[0] = 'i'; nh->unit[1] = 'n'; break; case UM_PT: val /= UM_PT_MM; nh->unit[0] = 'p'; nh->unit[1] = 't'; break; case UM_PC: val /= UM_PC_MM; nh->unit[0] = 'p'; nh->unit[1] = 'c'; break; case UM_CM: val /= UM_CM_MM; nh->unit[0] = 'c'; nh->unit[1] = 'm'; break; case UM_CC: val /= UM_CC_MM; nh->unit[0] = 'c'; nh->unit[1] = 'c'; break; case UM_DD: val /= UM_DD_MM; nh->unit[0] = 'd'; nh->unit[1] = 'd'; break; case UM_MM: nh->unit[0] = 'm'; nh->unit[1] = 'm'; case UM_NA: break; } sprintf(work,"%.9g",val); stowStr(&ith->cval,work); } } } /* Return the containing list of a list, or of the list containing any non-list. */ itemHandle containerOf(itemHandle list) { if (list) if (TYPEIS(list,LIST)) { /* container of list is easy */ return (itemHandle)((listHandle)list)->contnr; } else { /* nonlist: find containing list if any */ itemHandle scan = list; while ( (scan)&&(EOL!=FASTTYPEOF(scan)) ) { scan = (LIST==FASTTYPEOF(scan))? ((listHandle)scan)->foot.next : scan->next; } if (scan) { /* scan->EOL, return container */ scan = (itemHandle)( (char *)scan - sizeof(Item) ); return (itemHandle) (((listHandle)scan) -> contnr); } return (itemHandle)NULL; } else return (itemHandle)noteError("container of null"); } /* Append a new item to the tail of a list. Complicating factors: The appended item may be null. Upon consideration we allow this as a no-op. The appended item may be the head of a sequence, in which case the whole sequence is appended. If the (or any) appended item is a list, its container pointer has to be set. */ void append(itemHandle list, itemHandle ith) { itemHandle z = ith; if (NULL==ith) return; /* allow null append as no-op */ if (NULL==list) { /* do not allow append to null list */ noteError("append to null list"); return; } if (LIST!=FASTTYPEOF(list)) { noteError("append to nonlist"); return; } if (NA==itemType(ith)) { noteError("append junk to list"); return; } /* run z to the end of ith->sequence. note that the next of a LIST is never null, hence if ith->LIST the loop will iterate at least once. */ while(z->next) { if (LIST==FASTTYPEOF(z)) { ((listHandle)z) -> contnr = (listHandle)list; z = &((listHandle)z) -> foot; } else z = z->next; } if ( ((listHandle)list)->tail ) /* nonempty list */ ((listHandle)list)->tail->next = ith; else /* empty list */ list->next = ith; ((listHandle)list)->tail = z; z->next = & ((listHandle)list)->foot; } /* Insert a new item at the head of a list or sequence. This is not strictly a list operation, since there is nothing wrong with inserting at any point in a sequence. However, must keep in mind that: the inserted item could be the head of a sequence; any inserted list needs to have its container set (provided insert is into a list of course) For the time being, insertion of a null is not an error...to be reviewed... */ void insert (itemHandle list, itemHandle ith) { enum itemTypes listsType; itemHandle z = ith; if (NULL==ith) return; /* not an error for now */ if (NULL==list) { noteError("insert into null list"); return; } if (NA==(listsType=itemType(list))) { noteError("insert into junk"); return; } while (z->next) { /* run z to end of ith->sequence */ if ( LIST==FASTTYPEOF(z) ) { if (LIST==listsType) ((listHandle)z) -> contnr = (listHandle) list; z = &((listHandle)z) -> foot; } else z = z->next; } z->next = list->next; list->next = ith; if (LIST==listsType) if (NULL==((listHandle)list)->tail) /* empty list got contents*/ ((listHandle)list)->tail = z; } /* Compare two items for having equal character text contents. Since text is stored uniquely in string storage, the items have the same text IFF they have the same cvals. */ int equalText(itemHandle a,itemHandle b) { return (NULL != a) && (NULL != b) && (a->cval.offset == b->cval.offset) && (a->cval.segno == b->cval.segno); } /* Compare the text of an item to a literal string. This is simple enough that it could be a macro -- except that a macro couldn't check for valid pointers. */ int textCmp(itemHandle a, char *b) { if (NA!=itemType(a)) return strcmp(getCharAddr(&a->cval),b); else return -1; /* nonitem a < ?b */ } /* Compare two items for equal contents. For WORD/STRING/CMT the only contents are the item texts. For NUMBER the units must also be equal. However, for LIST the list names and the list contents must be equal. The latter is a recursive test. Note that EOL is treated like CMT, the text of two EOLs is compared. This means that two lists that differ only in their terminal comments (if comments are preserved) are different i.e. "#foo" != "#bar" */ int equalItem(itemHandle a,itemHandle b) { listHandle aList, bList; itemHandle aItem, bItem; if ( (NULL == a) || (NULL == b) || (a->type != b->type) ) return 0; /* the items are nonnull and of the same type */ switch ( FASTTYPEOF(a) ) { case NUMBER: /* compare units, fall through to test texts */ if ( (((numbHandle)a)->unit[0] != ((numbHandle)b)->unit[0]) || (((numbHandle)a)->unit[1] != ((numbHandle)b)->unit[1]) ) return 0; case WORD: case STRING: case CMT: case EOL: return (a->cval.offset == b->cval.offset) && (a->cval.segno == b->cval.segno) ; case LIST: aList = (listHandle)a; /* avoid a lot of casting */ bList = (listHandle)b; aItem = a->next; bItem = b->next; while( equalItem(aItem,bItem) ) { /* recursion */ aItem = nextInList(aItem); bItem = nextInList(bItem); if ((NULL == aItem)||(NULL == bItem)) return ((NULL==aItem)&&(NULL==bItem)); } default: return 0; } } /* copyItem does a "deep copy" of the object it is given. The copy operation uses recursion into a list to reproduce the list's contents. Although a TREE node is nominally an item, copyItem does not accept one. Client code is not supposed to know about tree nodes ("treeHandle" is not a compatible type with "itemHandle"), plus, deep copy of a tree is a dumb idea. EOL items are also rejected because the EOL is a built-in part of a LIST node and can't exist independently. */ itemHandle copyItem(itemHandle ith) { enum itemTypes T; itemHandle ret; if ( (ith) && (NA < (T = FASTTYPEOF(ith))) && (T < TREE) /* we do NOT copy trees */ && (EOL != T) ) /* we do NOT copy EOLs either */ { switch (T) { case NUMBER: ret = (itemHandle) newNumber(NULL,NULL); break; case LIST: ret = (itemHandle) newList(NULL); break; default: /* word, string, comment */ ret = makeItem(T); break; } ret->cval = ith->cval; /* copy text by reference */ if (NUMBER == T) { /* copy unit text by value */ ((numbHandle)ret)->unit[0] = ((numbHandle)ith)->unit[0]; ((numbHandle)ret)->unit[1] = ((numbHandle)ith)->unit[1]; } if (LIST == T) { itemHandle iscan; ((listHandle)ret)->foot.cval = ((listHandle)ith)->foot.cval; for (iscan=ith->next; (iscan) && (EOL != FASTTYPEOF(iscan)) ; ) { append(ret,copyItem(iscan)); iscan = nextInList(iscan); } } /* end LIST==T */ } /* end ith-is-valid */ else ret = (itemHandle) noteError("copy of null, tree or eol"); return ret; } /* Find the first list having a given name, contained within a specified list. Example: find first ParaLine in a Para. Design Note: firstOfName() and nextSameName() are very similar functions, and they are closely related: firstOfName is typically used to set up a for() loop, while nextSameName is used to continue the same loop. The comparison of character strings can be done in two ways: (a) stow the comparand and compare CharAddr structures (b) get the address of list string and use strcmp(). Suppose there are order 1000 unique strings in the string database. Then stowStr will on average make 9 strcmp() calls in stowing or finding a string. In the typical use of firstOfName we would expect to "hit" in the first 1-3 comparisons. Hence in that function it makes more sense to use method (b). However in nextOfName the name string has already been stowed when the list was created, and it is then much faster to compare the CharAddr structures. */ itemHandle firstOfName(itemHandle cntnr, char *name){ itemHandle scan; if (cntnr) if (TYPEIS(cntnr,LIST)) { for ( scan = cntnr->next ; (scan) && (EOL != FASTTYPEOF(scan)) ; ) { if ( LIST == FASTTYPEOF(scan) ) { if (0 == strcmp(name,getCharAddr(&scan->cval))) { return scan; } else { /* a list, but not the right name */ scan = ((listHandle)scan)->foot.next; } } else { /* not a list, advance to next */ scan = scan->next; } } /* hit EOL, or fell off end of incomplete list */ return (itemHandle) NULL; } else return (itemHandle) noteError("firstOfName on nonlist"); else return (itemHandle) noteError("firstOfName on null"); } itemHandle nextSameName(itemHandle scan){ charAddr lookupCA; if (scan) if (TYPEIS(scan,LIST)) { lookupCA = scan->cval; /* save handle of this name */ for( scan = ((listHandle)scan)->foot.next ; (scan) && (EOL != FASTTYPEOF(scan)) ; ) { if (LIST == FASTTYPEOF(scan)) { if ( (lookupCA.offset == scan->cval.offset) && (lookupCA.segno == scan->cval.segno) ) { return scan; } else { /* a list, but not same name */ scan = ((listHandle)scan)->foot.next; } } else { /* not a list, advance to next */ scan = scan->next; } } /* hit EOL, or fell off end of incomplete list */ return (itemHandle) NULL; } else return (itemHandle) noteError("nextSameName on nonlist"); else return (itemHandle) noteError("nextSameName on null"); } /* Look for a list containing a given logical tag within a container. Frame has a number of dictionaries -- lists of tables, variables, frames, fonts, etc. -- which are alike in that each entry has an identifying tag as a subordinate list member. This function is used to look up entries in dictionaries. containing list list name tag name typical tag (first arg) (returned) (list in) (word/string) ColorCatalog Color ColorTag `mauve' ConditionCatalog Condition CTag `version 2' PgfCatalog Pgf PgfTag `Body' FontCatalog Font FTag `emphasis' RulingCatalog Ruling RulingTag `tagname' TblCatalog TblFormat TblTag `type A' VariableFormats VariableFormat VariableName `varname' XRefFormats XrefFormat XRefName `Heading & page' AFrames Frame ID 1 Tbls Tbl TblId 224 For example: ...> ... > return this ^ after finding ^^^^^^^^ */ itemHandle findTaggedList( itemHandle container, char * listName, char * tagName, char * tagValue) { itemHandle listScan,tagScan,tagVal; if (NULL == container) return (itemHandle) noteError("findTaggedList on null"); if (LIST != FASTTYPEOF(container)) return (itemHandle) noteError("findTaggedList on nonlist"); for ( listScan = firstOfName(container,listName); (listScan); listScan = nextSameName(listScan) ) { tagScan = firstOfName(listScan,tagName); if ((tagScan)) tagVal = tagScan->next; /* first item in list */ if ( (tagScan) /* found cval))) ) { return listScan; } } return (itemHandle) NULL; } /* Yank one item from a sequence and return it for use elsewhere. Return null when input is null, or not an item that has a next, or when there is no next. Make sure that the returned item has no "next"; it has been yanked and is no longer part of a sequence. When yanking a list, null out its container pointer. After removing the only remaining item from a list, set its tail pointer to null, which is the mark of an empty list. */ itemHandle yankNext(itemHandle ith) { itemHandle work = (itemHandle)NULL; if (ith) { enum itemTypes T = FASTTYPEOF(ith); if (TREE > T && NA < T) { if ((work = ith->next)) { switch(FASTTYPEOF(work)) { case NA: noteError("yankNext: next is nonitem"); case EOL: work = (itemHandle)NULL; break; case LIST: ((listHandle)work)->contnr = (listHandle)NULL; ith->next = ((listHandle)work)->foot.next; ((listHandle)work)->foot.next = (itemHandle)NULL; break; default: ith->next = work->next; work->next = (itemHandle)NULL; } /* end switch */ if (LIST==T && EOL==FASTTYPEOF(ith->next)) ((listHandle)ith)->tail = (itemHandle)NULL; } else noteError("yankNext: next is null"); } else noteError("yankNext on nonitem"); } else noteError("yankNext on null"); return work; /* which is null on all errors */ } /* Yank out the next item from a sequence and recycle its contents, including the contents of a list -- a recursive job. */ void trashNext(itemHandle ith) { itemHandle work = yankNext(ith); if (work) { /* trust yankNext to return only trashable items */ if (LIST == FASTTYPEOF(work)) { /* trash contents */ while(EOL != FASTTYPEOF(work->next)) trashNext(work); } freeItem(work); }/* else there is no next */ } /* recycle all the items in a sequence, including its head. this is the function that recycles anything based on its handle -- trashNext will not recycle the thing pointed to, only its next. */ void trashSequence(itemHandle ith) { Item work; work.type = CMT; /* innocuous type */ work.next = ith; while(work.next) trashNext(&work); } /* Yank out the logically-next item at the same nesting level as a handle. For a LIST, yanks and returns the item following the EOL. For a non-list, same as yankNext(). For last contents item of a list, returns NULL (no next in list). */ itemHandle yankNextInList(itemHandle ith) { enum itemTypes T; if ( (ith) /* not null handle */ && (NA < (T = FASTTYPEOF(ith))) && (TREE > T)) { itemHandle nxt; if (LIST != T) { /* not a list, logical next is physical next */ return yankNext(ith); } else { /* logical next of a list is next of its last item */ nxt = ((listHandle)ith) -> foot.next; } if ( (nxt) /* there is a next */ && (EOL == FASTTYPEOF(nxt)) ) { /* ith is the last item in a list, its next is EOL */ return NULL; } return yankNext((itemHandle)&((listHandle)ith)->foot); } else return (itemHandle)NULL; } /* Replace the body of a list wholesale. For example, suppose that pPen is the handle of a list. Then you could use replaceBody(pPen,newInteger(15)) to make it . */ itemHandle replaceBody(itemHandle list,itemHandle bod) { if (list) { if (TYPEIS(list,LIST)) { trashSequence(list->next); if (bod) insert(list,bod); return(bod); } else return (itemHandle)noteError("replace body of nonlist"); } else return (itemHandle)noteError("replace body of null"); } /* Replace the body of a designated list within another list. For example, let pFrame be the handle of a list. Then changeAttr(pFrame, "Pen", newInteger(15)) finds the (first) list inside the frame and replaces its body with 15. */ itemHandle changeAttr(itemHandle lst, char * attname, itemHandle val) { if (lst) { if (TYPEIS(lst,LIST)) { itemHandle work = firstOfName(lst,attname); if (work) replaceBody(work,val); return val; } else return (itemHandle)noteError("change attr of nonlist"); } else return (itemHandle)noteError("change attr of null"); } /* =========================================================================== Diagnostic display routines dumpItem does a hex dump of one item on a FILE using fprintf. dumpSequence dumps all items in a sequence, including list contents. dumpStatistics dumps counts of allocations of items ======================================================================== */ static char *typeStrs[8] = { "NA ", "WORD ", "NUMBER ", "STRING ", "LIST ", "EOL ", "CMT ", "TREE "}; void dumpItem(FILE* f,itemHandle ith) { unsigned char t = itemType(ith); int cl; char *cp, tmp[32]; numbHandle nh; listHandle lh; fprintf(f,"%08lx %s",(long)ith,typeStrs[t]); if (t != NA) { cl = strlen(cp = getCharAddr(&ith->cval)); if (cl < 32) strcpy(tmp,cp); else { memcpy(tmp,cp,28); strcpy(tmp+28,"..."); } fprintf(f,"%02x %02x %04x %08lx (%d) %s", ith->type, ith->cval.segno, ith->cval.offset, (long)ith->next, cl, tmp); if (t == NUMBER) { nh = (numbHandle)ith; fprintf(f," %s",nh->unit); } if (t == LIST) { lh = (listHandle)ith; fprintf(f," %08lx %08lx",(long)lh->contnr,(long)lh->tail); } } fprintf(f,"\n"); } void dumpSequence(FILE* f,itemHandle ith) { itemHandle this, that; int depth = 0; for( that = ith; (this = that); that = nextItem(this) ) { if (itemType(this)==EOL) fprintf(f,"\t<<<< list depth %d\n",depth--); dumpItem(f,this); if (itemType(this)==LIST) fprintf(f,"\t>>>> list depth %d\n",++depth); } } void dumpStats(FILE *statf){ fprintf(statf,"%d item segments = %ld bytes\n",numItemSegs, numItemSegs*ITEMSEGSIZE); fprintf(statf,"%d new items\n",numItemsNew); fprintf(statf,"%d new items recycled\n",numItemsUsed); fprintf(statf,"\tWORD\t%d\n",numNewWord); fprintf(statf,"\tSTRING\t%d\n",numNewString); fprintf(statf,"\tCMT\t%d\n",numNewComment); fprintf(statf,"\tNUMBER\t%d\n",numNewNumber); fprintf(statf,"\tLIST\t%d\n",numNewList); fprintf(statf,"\tTREE\t%d\n",numNewTree); fprintf(statf,"%d string segments = %d bytes\n", (((unsigned)strSegNum)+1),STRSEGSIZE*(((unsigned)strSegNum)+1)); fprintf(statf,"%d strings stored\n",numStrNew); fprintf(statf,"%d strings found\n",numStrOld); } /* =========================================================================== The binary tree support is pretty simple. At least in this version there is no attempt to balance the tree, and no support for deleting keys. (The trashTree function is a memory-management routine for whole trees, not individual leaves or nodes.) ======================================================================== */ /* =========================================================================== This function is the guts of the binary tree search, used to implement the publically-accessible functions. It scans a treeHandle-pointer down a tree until it points to the pointer to the node being looked for. Or else it points to a NULL pointer, indicating that the wanted node does not exist, and this is the place to put it. ======================================================================== */ static treeHandle * rawTreeSearch(treeHandle *scan, char *key) { int j; for( ; NULL != (*scan) ; ) { if (0 == (j = strcmp(key, getCharAddr(&(*scan)->it.cval))) ) break; if (j < 0) /* test key is less, go left */ scan = &((*scan)->l); else /* test key is greater, go right */ scan = &((*scan)->r); } return scan; } /* =========================================================================== This function looks for a character string to see if it has been seen and stored before. If not, it stores it. Either way it returns the charAddr of the (now) stored string, or an invalid charAddr if there was no room to store the string. A binary tree is used to make sure that strings are stored only once. In the event that the string has not been noted before, and it is possible to store the string, but impossible to allocate a new binary node, the string is stored perhaps redundantly. Thus failure to allocate a tree node does not cause program failure, only redundant storage of strings. ======================================================================== */ static treeHandle uniqueStringTree = NULL; static void stowStr( charAddr * dest, char * it ) { charAddr tmp; charAddr *pca = &tmp; treeHandle *keyNode = & uniqueStringTree; keyNode = rawTreeSearch( keyNode, it ); if (NULL != (*keyNode)) { /* found key in tree */ pca = &((*keyNode) -> it.cval); /* pca -> found string's charAddr */ /* count uses of this string */ (*keyNode)->it.next = (itemHandle)(1+(long)(*keyNode)->it.next); ++numStrOld; } else { /* need to store string, create tree node */ if (strToSeg(it,&tmp)) { /* string stowed ok in pca->tmp */ if (NULL != ((*keyNode) = newTreeNode()) ) { (*keyNode) -> it.cval = tmp; /* key is the stored string */ (*keyNode) -> it.next = (itemHandle)1L;/* value is use count (1) */ ++numStrNew; } /* else didn't stow string, pca->tmp is a valid null charAddr */ } } /* either way, pca-> a valid charAddr */ (*dest) = *pca; /* assumes structure assignment ok */ } /* =========================================================================== The published way of putting something in a tree. ======================================================================== */ void * treeEnter(treeHandle * pRoot,char *key,void *val) { treeHandle *pNode = rawTreeSearch(pRoot,key); if (NULL==(*pNode)) { (*pNode)=newTreeNode(); if ((*pNode)) stowStr(&(*pNode)->it.cval,key); } if ((*pNode)) { (*pNode)->it.next = (itemHandle)val; return (void*)((*pNode)->it.next); } else return NULL; } /* =========================================================================== The function to get something out of a tree ======================================================================== */ void * treeLook( treeHandle root, char * key) { treeHandle *pNode = rawTreeSearch(&root,key); if ((*pNode)) return (void*)((*pNode)->it.next); else return NULL; } /* =========================================================================== Internal subroutine to walk all the nodes of a tree and call a function at each node. The called functions include treeNodeTrasher, which can free "root" before the right side is inspected. ======================================================================== */ static void treeWalk(treeHandle root, void ICB(treeHandle th)) { if (root) { treeHandle r = root->r; /* save right side */ if (root->l) { treeWalk(root->l,ICB); } ICB(root); /* root->node can disappear right here */ if (r) { treeWalk(r,ICB); } } } /* =========================================================================== Scan a user function over all nodes of a tree in "in" order. ======================================================================== */ static void (*pUCB)(char *k, void *v); /* save &UCB */ static void treeCaller(treeHandle root) { pUCB(getCharAddr(&root->it.cval),(void *)root->it.next); } void treeScan(treeHandle root,void UCB(char *k, void*v)) { pUCB = UCB; treeWalk(root,treeCaller); } /* =========================================================================== Return all nodes of a tree to use by freeing them. ======================================================================== */ static void treeTrasher(treeHandle it) { freeItem((itemHandle)it); } void trashTree( treeHandle * pRoot ) { treeWalk(*pRoot,treeTrasher); pRoot = NULL; } /* =========================================================================== MIF PARSING From an O-O viewpoint we have a class whose attributes are represented by the parseState structure. (Refer to mifmain.h for its declaration as well as the various states and classes.) The methods of that class are as follows: parseInit initialize state of a parse parseMIF take a string of text and convert it to Items parseResult return head of chain of items output by parseMif parseClosed return 1 if all lists have been closed parseErrors return count of errors seen parseDump recycle created Items and reinitialize parse These are the public methods of the class. The key one is obviously parseMIF. It takes a parseState (which would be "self" if this were really O-O) and a null-terminated string of ASCII. It slurps up the whole string and converts what it can to Items. When it finds errors it writes diagnostics to stderr (unless PARSE_NO_DIAG) and continues. It stops on reaching \0 in the input, which has one of two effects: If PARSE_FINAL was passed with the string, \0 is classed as fsmStop, and terminates the parse. fsmStop forces an end to any token. Unclosed strings and lists are an error at fsmStop. Absent PARSE_FINAL, \0 is taken as fsmSuspend, which suspends the parse in the expectation of more input. Thus you can feed in lines or larger blocks of text sequentially, ignoring line boundaries, and the parse suspends after each is processed. All tokens (names, strings, numbers, etc) are allowed to span input of fsmSuspend characters. Besides these public methods there are some "private methods": parseDiag emit error message if allowed, and count an error parseToken start a new token chained off prior token parseCollect append current input character to token text parseAdvance advance parse to next character parseClassify classify current input character parseStow stow current token as cval of current item ======================================================================== */ /* =========================================================================== Initialize a parseState structure before starting a parse. ======================================================================== */ static void parseInit(parseState *p, int opts) { p->state = fsmScanning; p->first = p->currItem = (itemHandle)NULL; p->currList = (listHandle)NULL; p->option = opts; p->incount = p->errors = 0; p->collLen = 0; p->collect[0] = '\0'; } /* =========================================================================== Return the first item found by a parse. Before doing this you should make sure the parse is complete (see parseClosed) and correct (parseErrors) ======================================================================== */ static itemHandle parseResult(parseState *p) { return p->first; } /* =========================================================================== Return the state of a parse -- whether the collected items are complete with all lists, strings, comments, etc closed. The test is, there is no current list, hence all lists are closed, and the state is fsmScanning, hence we are not currently in any kind of token. ======================================================================== */ static int parseClosed(parseState *p) { return (p->currList == NULL) && (p->state == fsmScanning); } /* =========================================================================== Return the count of errors found thus far in a parse. ======================================================================== */ static int parseErrors(parseState *p) { return p->errors; } /* =========================================================================== Following a presumably erroneous parse, get rid of any Items it created. This necessitates cleaning up dangling lists to keep trashSequence simple. Reinitialize the parse (retaining the options set the first time). Note: any strings stowed by the parse remain in the memory of the string storage fairies. There's no way to forget a string. ======================================================================== */ static void parseDump(parseState *p) { while ( (p->currList) ) /* where there is an unclosed list, */ { /* ...pretend we have seen a right bracket to terminate it */ /* make the current item point on to the end of the ending list */ p->currItem->next = & ((listHandle)(p->currList))->foot; /* make the current item the EOL that is part of the current list */ p->currItem = & ((listHandle)(p->currList))->foot; /* make the current list be the container of the ending list */ p->currList = ((listHandle)(p->currList))->contnr; } trashSequence(p->first); parseInit(p,p->option); } /* =========================================================================== This array is used to classify a character for the FSM parser. \0 is fsmSuspend. BS, HT, LF, VT, FF and CR, plus space, are fsmWhite. ======================================================================== */ static enum fsmClasses fsmClassArray[128] = { /* 00-07 */ fsmSuspend,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther, /* 08-0F */ fsmWhite,fsmWhite,fsmNL ,fsmWhite,fsmWhite,fsmWhite,fsmOther,fsmOther, /* 10-17 */ fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther, /* 18-1F */ fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther, /* 20-27 */ fsmWhite,fsmOther,fsmOneUnit,fsmSharp,fsmOther,fsmOneUnit,fsmOther,fsmRQuote, /* 28-2F */ fsmOther,fsmOther,fsmOther,fsmOther,fsmOther,fsmMinus,fsmDecimal,fsmOther, /* 30-37 */ fsmDigit,fsmDigit,fsmDigit,fsmDigit,fsmDigit,fsmDigit,fsmDigit,fsmDigit, /* 38-3F */ fsmDigit,fsmDigit,fsmOther,fsmOther,fsmLBR,fsmOther,fsmRBR,fsmOther, /* 40-47 */ fsmOther,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent, /* 48-4F */ fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent, /* 50-57 */ fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent, /* 58-5F */ fsmIdent,fsmIdent,fsmIdent,fsmOther,fsmBSL,fsmOther,fsmOther,fsmIdent, /* 60-67 */ fsmLQuote,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent, /* 68-6F */ fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent, /* 70-77 */ fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent,fsmIdent, /* 78-7F */ fsmIdent,fsmIdent,fsmIdent,fsmOther,fsmOther,fsmOther,fsmOther,fsmOther } ; /* =========================================================================== This code classifies the current input character, leaving the class in p->inclass. It is here that the PARSE_LAST choice is implemented, in forcing fsmSuspend class to fsmStop. ======================================================================== */ static void parseClassify(parseState *p) { unsigned short c; if ( 0x80 > (c = *(p->inptr)) ) { p->inclass = fsmClassArray[c]; if ( (p->option & PARSE_FINAL) && (p->inclass == fsmSuspend)) p->inclass = fsmStop; } else p->inclass = fsmOther; } /* =========================================================================== This code is exercised whenever the parse recognizes the start of a new token. It records the newly-created item as the nextItem of the current item, as well as the latest item appended to the current list (if any). (However, housekeeping related to starting a new list is done in the fsm code -- in state fsmNameScan when an ident char is seen.) ======================================================================== */ static void parseToken(parseState *p, itemHandle ith) { if (p->currItem) { /* not first item */ p->currItem->next = ith; p->currItem = ith; if (p->currList) { /* this is latest item in a list, set list-tail ptr */ p->currList->tail = ith; if (itemType(ith)==LIST) p->currList->tail = &((listHandle)ith)->foot; if (p->currList->it.next == NULL) /* also the first item of the list */ p->currList->it.next = p->currItem; } } else { /* first item of parse, awwwww */ p->first = p->currItem = ith; } } /* =========================================================================== Report a parse error to stderr (if permitted) and increment the count of errors. ======================================================================== */ static void parseDiag(parseState *p, char *complaint) { if (0==(p->option & PARSE_NO_DIAG)) { fprintf(stderr,"MIF error at %d: %s\n",p->incount,complaint); } ++(p->errors); } /* =========================================================================== Collect the current input character as part of the token string. If the token has reached PARSE_MAX (which should never happen) just skip this input (nobody will miss it anyway) ;-) ======================================================================== */ static void parseCollect(parseState *p) { if (p->collLen < PARSE_MAX) p->collect[p->collLen++] = *(p->inptr); } /* =========================================================================== Stow current token as char value of current item. Stuff a null at the end of it to make it a legit asciiz string first. The test in parseCollect ensures that there is always room for the \0 at the end. ======================================================================== */ static void parseStow(parseState *p) { p->collect[p->collLen] = '\0'; stowStr(&(p->currItem->cval),p->collect); } /* =========================================================================== Advance the parse input pointer, and count input characters (for the diagnostic messages). Note that advancing and classifying are separate steps. That isn't necessary, it's just how I did it. ======================================================================== */ static void parseAdvance(parseState *p) { p->inptr++; p->incount++; } /* =========================================================================== Parse a string of MIF, converting its tokens into Items. The value returned is the address which the parse reached in the input text. This will point to \0. ======================================================================== */ static unsigned char * parseMIF(parseState *p, unsigned char * text, int flag) { p->inptr = text; if (flag) p->option |= PARSE_FINAL; else p->option &= (0xffff^PARSE_FINAL); for( parseClassify(p); /* get class of first input byte */ ; /* loop ends on stop/suspend byte detected at end */ parseClassify(p) )/* classify next (or same) input byte */ { switch (p->state) { case fsmScanning: /* #include "fsmscang.c" */ /* This block of code implements the "fsmScanning" state. This is the starting state, and the state to which every other state returns when a token has been completed or an error has been detected. Advance over white space. Stop on fsmSuspend or fsmStop. On "<" create a list Item and go to fsmNameScan state. On ">" terminate the current list, making its built-in EOL Item the current item, and making its container the current list. If there is no container, the outermost list has been closed. On "`" create a string Item and go to fsmString state. On "#" initialize for collecting a comment and go to fsmComment state. On "-" initialize to collect a number and go to fsmMinusSeen state. On "." initialize to collect a number and go to fsmDotSeen state. On a digit, open a number Item and go to fsmInteger state. On an alpha, open a word Item and go to fsmGetName state. On any other event, report an error and ignore the input. */ switch (p->inclass) { case fsmWhite: case fsmNL: parseAdvance(p); case fsmSuspend: case fsmStop: break; case fsmLBR: /* parseToken makes the list Item the current Item */ parseToken(p,newList((char *)NULL)); /* must also chain lists by containership: the was-current list (or null) becomes the container of the new list, which becomes the current list */ ((listHandle)(p->currItem))->contnr = p->currList; p->currList = (listHandle)p->currItem; p->state = fsmNameScan; /* go look for the identifier */ parseAdvance(p); /* step over "<" */ break; case fsmRBR: if (p->currList) { /* we are in a list, right? */ /* make the current item point on to the end of the ending list */ p->currItem->next = & ((listHandle)(p->currList))->foot; /* make the current item the EOL that is part of the current list */ p->currItem = & ((listHandle)(p->currList))->foot; /* make the current list be the container of the ending list */ p->currList = ((listHandle)(p->currList))->contnr; } else { /* well, actually, no... */ /* diagnose the stray right bracket and ignore it */ parseDiag(p,"Found right bracket when not in list"); } parseAdvance(p); /* in all cases, advance over the ">" */ break; case fsmLQuote: /* left apostrophe: initialize the STRING item and go to string state */ parseToken(p,newString((char *)NULL)); p->state = fsmString; p->collLen = 0; parseAdvance(p); /* step over the quote */ break; case fsmSharp: /* "#" means start collecting comment. In fsmComment state, when we */ /* see either \n or fsmStop, we decide whether to keep the text or */ /* or not, depending p->options. Keep it or not, we have to parse */ /* it in order to step over it */ p->state = fsmComment; p->collLen = 0; parseAdvance(p); /* step past "#" */ break; case fsmMinus: /* "-" may precede only a numeric literal, however we are */ /* permitting whitespace between the minus and the literal. Since we */ /* don't know if a digit will ever turn up, we don't create the */ /* NUMBER Item yet, but do collect the hyphen and go to fsmMinus state */ p->collect[0] = *p->inptr; /* collect minus as part of num */ p->collLen = 1; /* now have exactly 1 char of text */ parseAdvance(p); p->state = fsmMinusSeen; /* go ensure dot or digit follows */ break; case fsmDecimal: /* it isn't clear whether MIF actually permits a numeric literal to */ /* start with a naked decimal -- if not, move fsmDecimal down with */ /* fsmOther. As with "-" we don't know if a digit will turn up so */ /* do not create the Number just yet. Got to fsmDecimal to look, after */ /* collecting the decimal as numeric text. */ p->collect[0] = *p->inptr; /* collect dot as part of num */ p->collLen = 1; /* now have exactly 1 char */ parseAdvance(p); p->state = fsmDotSeen; /* go ensure digit follows */ break; case fsmDigit: /* since we are looking at a digit we know that a NUMBER is going */ /* to exist, so create it. Go to fmsInteger state to collect it. */ parseToken(p,newNumber((char *)NULL,(char *)NULL)); p->collLen = 0; /* do not advance, leave digit for collection */ p->state = fsmInteger; break; case fsmIdent: /* initialize start of a word, go to ident state */ parseToken(p,newWord((char *)NULL)); p->state = fsmGetName; p->collLen = 0; /* do not advance, leave alpha for collection */ break; case fsmRQuote: case fsmBSL: case fsmOneUnit: case fsmOther: default: /* note unexpected character with a message to stderr. However, do */ /* not count it as an "error" in the parse, and do include it in the */ /* parse output as a WORD item. This enables the input of such */ /* irregular constructs as the include() command -- a kludge, yes. */ if (0==(p->option & PARSE_NO_DIAG)) { sprintf(p->collect,"Unexpected character `%c' (0x%02x)", *p->inptr, *p->inptr); noteError(p->collect); } p->collect[0] = *p->inptr; p->collect[1] = '\0'; parseToken(p,newWord(p->collect)); parseAdvance(p); break; } break; case fsmNameScan: /* #include "fsmnmscn.c" */ /* This switch implements fsmNameScan state, which searchs for the identifier following the left bracket that opens a list. If no identifier shows up, give the list the "identifier" of '?'. Note this is a different error policy than implemented with numbers. When a hyphen or dot is seen, which should prefix a number, creation of the number Item is deferred until an actual digit is seen. The alternative would be to create the number and then have to uncreate it if no digit showed up. That would be awkward. Here, when "<" is seen an identifier should follow, and creation of the LIST item could have been deferred until one was assured. It seems less disruptive in this case to let the unnamed (and probably bogus) list continue to exist than to avoid creating it in the first place. Advance over white space. Suspend on fsmSuspend. Switch to fsmIdent state on an identifier character (fsmIdent state collects the identifier and stows it in currItem->cval). On any other event, report an error, stows a list "identifier" if '?' and return to fsmScanning state. */ switch (p->inclass) { case fsmWhite: case fsmNL: parseAdvance(p); break; case fsmSuspend: break; case fsmIdent: p->state = fsmGetName; /* don't advance, leave 1st name char for collection */ p->collLen = 0; break; default: parseDiag(p,"No identifier following left bracket"); stowStr(&(p->currItem->cval),"?"); p->state = fsmScanning; break; } break; case fsmGetName: /* #include "fsmgetnm.c"*/ /* This switch implements fsmGetName state where an identifier is collected. It collects identifier characters and suspends on suspend. On any other event it stores the collected identifier as the value of the current item (which is a WORD or LIST), and returns to fsmScanning state. */ switch (p->inclass) { case fsmDigit: case fsmIdent: parseCollect(p); parseAdvance(p); case fsmSuspend: break; default: parseStow(p); p->state = fsmScanning; break; } break; case fsmComment: /*#include "fsmcmnt.c" */ /* This switch implements fsmComment state in which a comment is collected. The "#" has already been skipped. All characters other than \n and fsmStop are collected. When \n or fsmStop appears, decide if the comment text is to be saved and if so, save it before returning to fsmScan state. */ switch (p->inclass) { case fsmSuspend: break; case fsmNL: case fsmStop: { int emptyEOL; emptyEOL = (p->currItem) && (EOL == FASTTYPEOF(p->currItem)) && (INVALSTROFF == p->currItem->cval.offset); if ( (p->option & PARSE_ALL_CMT) ||((p->option & PARSE_LST_CMT) && emptyEOL) ) { if (!emptyEOL) /* not right after ">" so new token */ parseToken(p,newComment((char *)NULL)); parseStow(p); } p->state = fsmScanning; break; } default: parseCollect(p); parseAdvance(p); break; } break; case fsmString: /*#include "fsmstrng.c" */ /* This switch implements fsmString state, entered following scanning of a left-apostrophe. A MIF `string' cannot extend past the end of line. This is one of two reasons fsmNL is separate from fsmWhite (the other is #comment). If right-apostrophe, \n or fsmStop is seen, the string is forced to end and we return to fsmScanning state. Everything else is fair game for the string contents. Note that MIF allows hex-value escapes using the backslash, however we do not process them as such. Rather we just let the characters "\xa5" or whatever become part of the string we save. At this point we do not support C-style backslash escapes, in particular the input of \' is not going to put a single ' in the string. Actually Frame used to give an error if you tried to put a ">" in a string, you had to use the \xxx escape for it. We allow that anyway. */ switch (p->inclass) { case fsmSuspend: break; case fsmStop: case fsmNL: parseDiag(p,"Unclosed `string' at end of line or input"); parseStow(p); p->state = fsmScanning; break; case fsmRQuote: parseStow(p); parseAdvance(p); /* step over delimiter */ p->state = fsmScanning; break; default: parseCollect(p); parseAdvance(p); break; } break; case fsmMinusSeen: /*#include "fsmminus.c" */ /* this switch implements fsmMinusSeen state. The hyphen character has been collected, but the number Item has not been created. This transient state ensures that a number is actually being defined. Suspend on fsmSuspend. Advance over white space (allowing spaces between minus and digits). On seeing a dot, collect it and switch to fsmDecimal state. On seeing a digit, create the number item and switch to fsmInteger. Any other event is an error. */ switch (p->inclass) { case fsmWhite: case fsmNL: parseAdvance(p); case fsmSuspend: break; case fsmDecimal: parseCollect(p); /* collect the dot as part of the number text */ parseAdvance(p); p->state = fsmDotSeen; /* go ensure a digit follows */ break; case fsmDigit: parseToken(p,newNumber((char *)NULL,(char *)NULL)); p->state = fsmInteger; break; default: parseDiag(p,"non-numeric character after minus sign."); p->state = fsmScanning; break; } break; case fsmDotSeen: /*#include "fsmdot.c" */ /* This switch implements fsmDotSeen state. A leading decimal, or a decimal after a minus sign, has been seen. In either case, the dot character has been collected; however the number Item has not been created. This transient state ensures that the decimal is followed by a digit. If so, create a number Item (now that we are sure there will be a number) and switch to fsmFraction state to collect the fraction digits. */ switch (p->inclass) { case fsmDigit: parseToken(p,newNumber((char *)NULL,(char *)NULL)); p->state = fsmFraction; case fsmSuspend: break; default: parseDiag(p,"non-digit following decimal point."); p->state = fsmScanning; break; } break; case fsmInteger: /*#include "fsmintgr.c" */ /* This switch implements fsmInteger state. A minus sign might have been collected. Here we collect digits up to a nondigit. Suspend on fsmSuspend. On a decimal point, collect the decimal and then switch to fsmFraction to collect the fraction digits. On fsmStop, stow the text and go direct to fsmScanning -- there will be no unit, and the parse loop should not end in fsmUnit1 state, else parseClosed reports an apparent error. On seeing any other character, switch to fsmUnit1 to look for the unit that may follow a number. */ switch (p->inclass) { case fsmSuspend: break; case fsmDigit: parseCollect(p); parseAdvance(p); break; case fsmDecimal: parseCollect(p); parseAdvance(p); p->state = fsmFraction; break; case fsmStop: parseStow(p); p->state = fsmScanning; break; default: parseStow(p); /* stow collected number text */ p->state = fsmUnit1; break; } break; case fsmFraction: /*#include "fsmfrac.c" */ /* this switch implements fsmFraction state. Possibly a minus sign, and possible some integer digits, and definitely a decimal point, have been collected. Collect any further digits. Suspend on fsmSuspend. On any other input, stow the completed numeric text and switch to fsmUnit1 to allow a unit. */ switch (p->inclass) { case fsmDigit: parseCollect(p); parseAdvance(p); case fsmSuspend: break; case fsmStop: parseStow(p); p->state = fsmScanning; break; default: parseStow(p); p->state = fsmUnit1; break; } break; case fsmUnit1: /*#include "fsmunit1.c" */ /* This code, fsmUnit1, is invoked whenever a non-digit follows a digit. The purpose of the switch is to find out if a unit follows a number, for example '1 pt' or '-2.5"'. Skip spaces (the unit can be set off by spaces). On a NL go back to scanning. This means a unit can't be detected after a newline ("1\npt") but it makes readOneItem more reliable. Suspend if fsmSuspend shows up. On a dblquote '"', which means "inch," set up the one-character unit value, advance, and go back to fsmScanning. On an alpha, store the first unit character and go to fsmUnit2 to look for a second. On a non-alpha other than '"', go back to fsmScan. */ switch (p->inclass) { case fsmWhite: parseAdvance(p); break; case fsmSuspend: break; case fsmIdent: ((numbHandle)p->currItem)->unit[0] = *(p->inptr); parseAdvance(p); p->state = fsmUnit2; break; case fsmOneUnit: /* either " or % */ ((numbHandle)p->currItem)->unit[0] = *(p->inptr); parseAdvance(p); p->state = fsmScanning; break; case fsmNL: default: p->state = fsmScanning; break; } break; case fsmUnit2: /*#include "fsmunit2.c" */ /* first character of a numeric unit has proven to be alphabetic, as in "-.5cm". The "c" has been collected in the unit, now we need to verify that the next is also alphabetic, and collect it. NOTE: with the exception of '"' meaning 'inch,' all unit codes are two-letter codes: pt, pc, in, cm. This code does not verify that a known unit has been collected. Nor does it check another fairly obvious error: that the unit has MORE than 2 characters. In fact, MIF has a rather obvious ambiguity in its design in that (a) a unit is optional but (b) a unit is effectively an identifier. Hence a number that is followed by an identifier MUST have an explicit unit. "-2.5 TRUE" is ambiguous -- is TRUE a new kind of unit or is it a WORD production? Our position on this is to cover our eyes firmly and collect any identifier as a unit, but saving only its first and last letters. Hope that suits everyone. */ switch (p->inclass) { case fsmSuspend: break; case fsmIdent: ((numbHandle)p->currItem)->unit[1] = *(p->inptr); parseAdvance(p); break; default: p->state = fsmScanning; break; } break; } /* end switch */ if (p->inclass <= fsmStop) break; /* this is the loop exit */ } return p->inptr; } /* =========================================================================== Input/generation routines. Although parseMIF would support fread-ing a file of MIF in blocks, in this initial version we fgets by lines. This proves to be fortunate as it makes it easy to find the (urk!) embedded EPSI images which are delimited by =FrameImage/=EndInset. readMIF takes a file and runs the whole thing through the parser. strToMIF takes a string and passes it through the parser. patToMIF takes a pattern, varargs to merge with it, and parses the result. ======================================================================== */ itemHandle readMIF(FILE *infile, int keepCmt) { parseState ps; int popt = 0; int inFrameImage = 0; char inbuf[MAXPARSESTRING]; if (1==keepCmt) popt = PARSE_LST_CMT; if (2==keepCmt) popt = PARSE_ALL_CMT; parseInit(&ps,popt); for ( ; (fgets(inbuf,MAXPARSESTRING,infile)); ) { if (!inFrameImage) { if ( ('=' != inbuf[0])||(strncmp(inbuf,"=FrameImage",11)) ) { parseMIF(&ps,inbuf,0); /* suspend at end of line */ if (parseErrors(&ps)) break; } else inFrameImage = 1; } else inFrameImage = (0 != strncmp(inbuf,"=EndInset",9)); } if (0==parseErrors(&ps)) /* got to EOF with no error */ parseMIF(&ps," ",PARSE_FINAL); /* finish the parse */ if (parseErrors(&ps)) parseDump(&ps); return parseResult(&ps); /* return good results or NULL */ } /* readOneItem() is basically the same as readMIF() except that it uses parseClosed() to track when there are no incomplete items. Thus input stops after parsing the last (or only) line of a list. If a line has no lists, the parse results in the sequence of 0 or more items defined on that line -- possibly only a comment. If a line has a one-line list, the output is that list. And if a line opens a list, input continues through the line in which that list closes. This function can produce non-intuitive results given pathological input, but it is well-behaved when processing MIF as written by FrameMaker. */ itemHandle readOneItem(FILE *infile, int keepCmt) { parseState ps; int popt; int inFrameImage = 0; char inbuf[MAXPARSESTRING]; if (1==keepCmt) popt |= PARSE_LST_CMT; if (2==keepCmt) popt |= PARSE_ALL_CMT; parseInit(&ps,popt); for ( ; (fgets(inbuf,MAXPARSESTRING,infile)); ) { if (!inFrameImage) { if ( ('=' != inbuf[0])||(strncmp(inbuf,"=FrameImage",11)) ) { parseMIF(&ps,inbuf,0); /* suspend at end of line */ if (parseErrors(&ps)) break; /* stop if at least one item & no incomplete ones */ if ((parseClosed(&ps)) && (parseResult(&ps))) break; } else inFrameImage = 1; } else inFrameImage = (0 != strncmp(inbuf,"=EndInset",9)); } if (parseErrors(&ps)) parseDump(&ps); /* recycle errors */ return parseResult(&ps); } itemHandle strToMIF(char *text) { parseState ps; parseInit(&ps,0); parseMIF(&ps,text,PARSE_FINAL); if (0==parseClosed(&ps) || parseErrors(&ps)) parseDump(&ps); return parseResult(&ps); } /* patToMIF uses varargs. I really don't want to go to the gyrations needed to handle both ANSI and non-ANSI flavors of this, so this goes strictly ANSI. */ itemHandle patToMIF(char *pat,...) { va_list argptr; unsigned short outLen; char *partA, *partB, *partC, *inptr, *outptr; char wrk[32], outbuf[MAXPARSESTRING]; inptr = pat; outptr = outbuf; outLen = 0; va_start(argptr,pat); for ( ; (*inptr) && (outLen < MAXPARSESTRING) ; ) { if ('%' != *inptr) { /* non-command, copy it */ *outptr++ = *inptr++; outLen++; continue; } /* inptr->'%' so decode it */ partA = partB = partC = (char *)NULL; switch( *++inptr ) { case 's': /* insert just a string */ partA = va_arg(argptr,char *); break; case 'Q': /* insert string in apostrophes */ partA = "`"; partB = va_arg(argptr,char *); partC = "'"; break; case 'S': /* insert string in a */ partA = " outLen ) { for(;((*outptr)=(*partA));++outptr,++partA) ; } if ((partB)) { outLen += strlen(partB); if (MAXPARSESTRING > outLen) { for(;((*outptr)=(*partB));++outptr,++partB) ; } } if ((partC)) { outLen += strlen(partC); if (MAXPARSESTRING > outLen) { for(;((*outptr)=(*partC));++outptr,++partC) ; } } if (MAXPARSESTRING <= outLen) { /* overran buffer */ noteError("exceeded patToMIF buffer area"); break; /* the for loop */ } } /* end of if(partA) anything to copy */ } /* end for */ va_end(argptr); if (MAXPARSESTRING > outLen) { /* no space problem */ *outptr = '\0'; /* terminate built string */ return strToMIF(outbuf); } else return (itemHandle)NULL; } /* The primary output function is outItem. writeMIF mostly iterates calls on outItem, plus dealing with OUT_INDENT (which requires knowing the global status, where outItem knows only one item at a time). The use of partA, partB and part3 is for fans of NPR's Car Talk. */ int outItem(char *buffer, int max, itemHandle ith, int format) { short int copyLen; enum itemTypes ithType; char *partA, *partB, *part3; numbHandle nh; char wrk[4]; if (NULL == ith) { noteError("outItem of null"); return 0; } ithType = FASTTYPEOF(ith); if ((NA == ithType)||(ithType > CMT)) { noteError("outItem of unknown type"); return 0; } partA = partB = part3 = (char *)NULL; wrk[1] = wrk[3] = copyLen = 0; switch(ithType) { case WORD: partA = " "; partB = getCharAddr(&ith->cval); copyLen = 1+strlen(partB); break; case STRING: partA = " `"; partB = getCharAddr(&ith->cval); part3 = "'"; copyLen = 2+strlen(partB); break; case LIST: partA = "<"; partB = getCharAddr(&ith->cval); copyLen = 1+strlen(partB); break; case NUMBER: partA = " "; partB = getCharAddr(&ith->cval); copyLen = 1+strlen(partB); if ( ( ((numbHandle)ith)->unit[0] ) ) {/* any unit */ nh = (numbHandle)ith; if (('\"' == nh->unit[0])||('%' == nh->unit[0])) { wrk[0] = nh->unit[0]; /* 1-char unit " or % */ copyLen += 1; } else { /* 2-char unit, pt/cm/pc etc */ wrk[0] = ' '; wrk[1] = ((numbHandle)ith)->unit[0]; wrk[2] = ((numbHandle)ith)->unit[1]; copyLen += 3; } part3 = wrk; } break; case EOL: partA = ">"; copyLen = 1; if ( (INVALSTROFF != ith->cval.offset) /* some cmt text */ &&(format & OUT_CMT) ) { /* and wanted */ partB = "#"; part3 = getCharAddr(&ith->cval); copyLen += (1+strlen(part3)); } break; case CMT: if (format & OUT_CMT) { partB = "#"; part3 = getCharAddr(&ith->cval); copyLen += (1+strlen(part3)); } break; default: /* cannot occur: avoid warning from gcc */ break; } /* end switch */ if (copyLen < max) { /* room for this item */ if (partA) { for(;((*buffer)=(*partA));++buffer,++partA) ; } if (partB) { for(;((*buffer)=(*partB));++buffer,++partB) ; } if (part3) { for(;((*buffer)=(*part3));++buffer,++part3) ; } } else copyLen = -1; /* signal overrun */ return copyLen; } /* The rule for newlines that reproduces Frame output is this: (a) put a newline before any LIST not at the left margin, (b) put a newline before any EOL that follows another EOL, (c) put a newline before any EOL that follows a LIST (a) but not (b) (c) opens up null lists > # end of ParaLine (b) > # end of Para (b) In any case, always put a newline AFTER any #comment, and after any production that pushes the line size past MAXOUTLINE */ void writeMIF(FILE *outfile, itemHandle ith, int format) { short listDepth, itemLen, column; enum itemTypes thisType, lastType; itemHandle scan; char *indents = " "; /* 1-10 indent spaces */ char wrk[MAXPARSESTRING]; for ( scan = ith, listDepth = 0, lastType = NA, column = 0 ; (scan) ; lastType = thisType, scan = scan->next ) { thisType = FASTTYPEOF(scan); if (EOL != thisType || listDepth) itemLen = outItem(wrk, MAXPARSESTRING, scan, format); else break; if (0 < itemLen) { /* valid item type */ if (LIST == thisType) ++listDepth; /* decide whether to do a NL (and ergo indent) *before* item */ if ( ( format & OUT_EOL ) && ( ( /* (a) */ (LIST==thisType)&&(column) ) || ( /* (b) */ (EOL==thisType)&&(EOL==lastType)&&(column) ) || ( /* (c) */ (EOL==thisType)&&(LIST==lastType)&&(column) ) ) ) { fputc('\n',outfile); column = 0; } if ((format & OUT_INDENT)&&(0==column)) {/* at margin, do indent */ short ld = listDepth-1; column = (ld>0)?ld:0; for(;ld>10;ld-=10) fputs(&indents[0],outfile); if (ld>0) fputs(&indents[10-ld],outfile); } fputs(wrk,outfile); column += itemLen; if ( ( CMT == thisType ) /* must have NL after comment */ || ( (EOL == thisType) && (INVALSTROFF != scan->cval.offset) ) || ( column > MAXOUTLINE ) ) { fputc('\n',outfile); column = 0; } if (EOL == thisType) --listDepth; } else { /* unwanted comment, or bad item type? */ if (CMT != thisType) { fputs("?",outfile); column +=1; } } } /* end for */ if ( (NA != lastType) && (format & OUT_EOL) ) fputc('\n',outfile); /* end final line */ } /*============================================================================ * * Paragraph Scanning functions: * paraScanInit() (creates and) initializes a ParaScan structure * paraScanLine() moves a paragraph scan to a particular * paraScanCount() returns the number of s in a paragraph * paraScan() steps the "iterator" over a paragraph, returning next char * paraScanGets() simple get-string-from-para function * paraScanItem() returns the handle of a non-character item * * Although the purpose of the Frame document format is to store and present * text, it proves to be quite difficult to extract text from it. The reason * is the extraordinary number of things that can appear embedded in text. * * The purpose of these functions is to make it easier to extract the text * of a paragraph in ASCII form. The extracted text can be written to a file, * or it can be used in other ways, for example as a sort key. * * The basic unit of text is the list. Within a the text is * stored as a series of lists. The lists represent * how Frame divided this paragraph into lines at the time the document was * stored. This division into lines is of course ephemeral -- it will change * with any change of format or content of the paragraph. Depending on the * use to which the text will be put, you might or might not want to * preserve the Frame line breaks when converting text to ASCII. * * * # text of first line * # text of next line * ... > # end of Para * * The design approach used here is (using C++ terminology) to treat the * as a container for s. We provide a new kind of object, * the ParaScan, whose function is to be an "iterator" over the and * its s. In old-fashioned terms, the ParaScan is a "cursor" * that you advance one character at a time through the paragraph. * * Most of the text in a is held in the form of * items. All ordinary and many special characters are stored in s * using these rules: * * ordinary ASCII is stored as itself: * * apostrophe, double quote, and slash are escaped: * * most non-ASCII characters are in hex: * Some special characters are stored as items instead of , * for example is used instead of . * User-specified format characters are all in form: , * , and so on. * * Besides s and s, a contains all the kinds of * non-text items you can insert between characters in Frame, for example: * * frame anchors: where "n" is an integer * * table anchors: * * variables: * * markers: ...> * and so forth. These items, and the implications for converting each kind * to text, are covered in the MIFFEd user guide. The ParaScan functions * give you the option of ignoring them, or retrieving them as they appear * and handling them separately. * ============================================================================ */ /*============================================================================ * * paraScanInit takes a large set of option flags and a paragraph handle, and * initializes the scan over the paragraph. The final argument can be NULL * to cause the allocation of a new ParaScan, or if not NULL it is an old * ParaScan structure to be reinitialized. * * The allocation of ParaScan is somewhat uneasily integrated with the * machinery for allocating and deallocating other MIFFed items. There is * no "trashParaScan" function. The reason is you are expected to use only * a very small number of ParaScans (typically, just one). So there is no * significant memory leak if ParaScans are not recycled. * ============================================================================ */ itemHandle paraScanInit( itemHandle para, /* handle of to scan */ const unsigned short int textOpts, /* text conversion options */ const unsigned short int nonTextOpts, /* non-text skip options */ const unsigned short int lineOpts, /* line-end handling options */ itemHandle oldScan /* old scan to re-initialize, or NULL */ ) { ParaScan * ps = (ParaScan *)oldScan; if ( (ps) ) { if (PSCAN != FASTTYPEOF(ps) ) return (itemHandle)noteError("paraScanInit: last arg not a ParaScan"); } else { if (NULL == (ps = (ParaScan *) makeItem(PSCAN)) ) return (itemHandle)noteError("Unable to allocate ParaScan structure"); } if ( (NULL==para) || (LIST != FASTTYPEOF(para)) || (0!=textCmp(para,"Para")) ) return (itemHandle)noteError("paraScanInit:first arg not a Para list"); ps->it.next = NULL; /* no last item*/ ps->thePara = para; ps->textOpts = textOpts; ps->nonTextOpts = nonTextOpts; ps->lineOpts = lineOpts; ps->flags.discHyphen = ps->flags.spaceOut = 0; paraScanLine((itemHandle)ps,1); return (itemHandle)ps; } /*============================================================================ * * paraScanLine takes a ParaScan and a line number, and positions the * scan to the head of the specified line. If the specified line does not * exist, the scan is positioned on the final line of the paragraph. * The returned value is the specified line number to indicate success, or * 0 to indicate that the line does not exist. * * Lines are numbered from 1. It is possible for a paragraph to have zero * lines, that is, there is no in the . * ============================================================================ */ int paraScanLine(itemHandle parascan, int lnum) { ParaScan * ps = (ParaScan *)parascan; int n = 0; itemHandle i; if (NULL==ps || PSCAN != FASTTYPEOF(ps) ) return (int)noteError("paraScanLine: first arg null or not a ParaScan"); if (0 >= lnum) return (int)noteError("paraScanLine: invalid line number requested"); ps->theItem = (itemHandle)NULL; /* no current item */ ps->strChar = (char *)NULL; /* not currently scanning a string */ /* if ps->thePara is not a , or if it contains no s, the following call returns NULL and we return 0. */ if(NULL == (i = ps->theLine = firstOfName(ps->thePara,"ParaLine")) ) return 0; /* ps->theLine is the first line. Count up to the desired one */ do { ps->theLine = i; ps->theItem = i->next; n++; i = nextSameName(i); } while( (ntheItem)) /* null "" -- it happens */ ps->theItem = NULL; return n; } /*============================================================================ * * paraScanCount takes a ParaScan and returns the count of lines in it. * This is a nondestructive operation which does not change the state of * the ParaScan object. * ============================================================================ */ int paraScanCount(const itemHandle parascan) { ParaScan * ps = (ParaScan *)parascan; int n; itemHandle i; if (NULL==ps || PSCAN != FASTTYPEOF(ps)) return (int)noteError("paraScanCount: first arg null or not a ParaScan"); for (n=0, i=firstOfName(ps->thePara,"ParaLine"); (i); i=nextSameName(i),++n ) ; return n; } /*============================================================================ * * paraScanItem takes a ParaScan and returns the current item it is scanning. * This is intended for use after paraScan has returned a constant such as * PS_FONT or PS_XREF, to retrieve the item that was seen. However it can * be called at any time. * ============================================================================ */ itemHandle paraScanItem(const itemHandle parascan) { ParaScan * ps = (ParaScan *)parascan; if (NULL==ps || PSCAN != FASTTYPEOF(ps)) return (itemHandle)noteError("paraScanItem: first arg null or not a ParaScan"); return ps->it.next; } /*============================================================================ * * paraScan takes a ParaScan and returns the next character it represents. * The user can request that many kinds of things in a paragraph be ignored. * Therefore this function has the general form: * do { * extract the current character from the current item in the line * evaluate the character in the light of the options * note if it should be returned or ignored * position the ParaScan to the next character * } while (the current character should be ignored) * * The "current" character is represented by ps->theLine and ps->theItem. * When ps->theItem is a , ps->strChar-> the current character in * the text value of a string literal. When ps->theItem is a special * that is being retrieved as hex (\xnn), ps->strChar-> the current * character in ps->specialXnn. Thus (strChar) is taken as a flag * that a string is being processed. * * When NULL==ps->theLine, end of paragraph has been reached, or the paragraph * had no lines in it. * * When NULL==ps->theItem, end of the line has been reached. Some special * rules have to be applied at this time, after which ps->theLine advances. * * The functions paraScanInit and paraScanLine leave the ParaScan set to * the first item of a line, if there is a line. ============================================================================ */ #define IGNORED -1 /* return flag used to condition the loop */ /*============================================================================ * * The following two arrays encode the translation of special characters * under control of the text options. The first element of each structure * contains the options flags that can apply to the character. The second * is the converted character under a PS_CVT* rule. The third is the * ISO-8859-1 value of the character if any. * ============================================================================ */ static struct cvtInfo { unsigned short rules; /* applicable PS_* rules */ unsigned char cvt; /* misc. conversion value */ unsigned char iso; /* iso-8859-1 conversion */ } cvtLowChars[0x20] = { { 0, 0, 0}, /* x00 not used */ { 0, 0, 0}, /* x01 not used */ { 0, 0, 0}, /* x02 not used */ { 0, 0, 0}, /* x03 not used */ {PS_CVT_DASHES, '-', 0}, /* x04 discretionary hyphen */ { 0, 0, 0}, /* x05 suppress-hyphen not cvt'd */ {PS_CVT_DASHES, '-', 0}, /* x06 soft hyphen */ { 0, 0, 0}, /* x07 not used */ { 0, 0, 0}, /* x08 tab translated in code */ { 0, 0, 0}, /* x09 hard cr translated in code */ { 0, 0, 0}, /* x0a not used */ { 0, 0, 0}, /* x0b not used */ { 0, 0, 0}, /* x0c not used */ { 0, 0, 0}, /* x0d not used */ { 0, 0, 0}, /* x0e not used */ { 0, 0, 0}, /* x0f not used */ {PS_CVT_SPACES, ' ', 0}, /* x10 numberspace */ {PS_CVT_SPACES, ' ', 0}, /* x11 hardspace */ {PS_CVT_SPACES, ' ', 0}, /* x12 thinspace */ {PS_CVT_SPACES, ' ', 0}, /* x13 enspace */ {PS_CVT_SPACES, ' ', 0}, /* x14 emspace */ {PS_CVT_DASHES, '-', 0}, /* x15 hard hyphen */ { 0, 0, 0}, /* x16 not used */ { 0, 0, 0}, /* x17 not used */ { 0, 0, 0}, /* x18 not used */ { 0, 0, 0}, /* x19 not used */ { 0, 0, 0}, /* x1a not used */ { 0, 0, 0}, /* x1b not used */ { 0, 0, 0}, /* x1c not used */ { 0, 0, 0}, /* x1d not used */ { 0, 0, 0}, /* x1e not used */ { 0, 0, 0} /* x1f not used */ }, cvtHighChars[0x80] = { {PS_CVT_ACCENTS+PS_8859_1, 'A', 0xc4}, /* x80 Adieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'A', 0xc5}, /* x81 Aring */ {PS_CVT_ACCENTS+PS_8859_1, 'C', 0xc7}, /* x82 Ccedilla */ {PS_CVT_ACCENTS+PS_8859_1, 'E', 0xc9}, /* x83 Eacute */ {PS_CVT_ACCENTS+PS_8859_1, 'N', 0xd1}, /* x84 Ntilde */ {PS_CVT_ACCENTS+PS_8859_1, 'O', 0xd6}, /* x85 Odieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'U', 0xdc}, /* x86 Udieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'a', 0xe1}, /* x87 aacute */ {PS_CVT_ACCENTS+PS_8859_1, 'a', 0xe0}, /* x88 agrave */ {PS_CVT_ACCENTS+PS_8859_1, 'a', 0xe2}, /* x89 acircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'a', 0xe4}, /* x8a adieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'a', 0xe3}, /* x8b atilde */ {PS_CVT_ACCENTS+PS_8859_1, 'a', 0xe5}, /* x8c aring */ {PS_CVT_ACCENTS+PS_8859_1, 'c', 0xe7}, /* x8d ccedilla */ {PS_CVT_ACCENTS+PS_8859_1, 'e', 0xe9}, /* x8e eacute */ {PS_CVT_ACCENTS+PS_8859_1, 'e', 0xe8}, /* x8f egrave */ {PS_CVT_ACCENTS+PS_8859_1, 'e', 0xea}, /* x90 ecircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'e', 0xeb}, /* x91 edieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'i', 0xed}, /* x92 iacute */ {PS_CVT_ACCENTS+PS_8859_1, 'i', 0xec}, /* x93 igrave */ {PS_CVT_ACCENTS+PS_8859_1, 'i', 0xee}, /* x94 icircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'i', 0xef}, /* x95 idieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'n', 0xf1}, /* x96 ntilde */ {PS_CVT_ACCENTS+PS_8859_1, 'o', 0xf3}, /* x97 oacute */ {PS_CVT_ACCENTS+PS_8859_1, 'o', 0xf2}, /* x98 ograve */ {PS_CVT_ACCENTS+PS_8859_1, 'o', 0xf4}, /* x99 ocircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'o', 0xf6}, /* x9a odiuretic */ {PS_CVT_ACCENTS+PS_8859_1, 'o', 0xf5}, /* x9b otilde */ {PS_CVT_ACCENTS+PS_8859_1, 'u', 0xfa}, /* x9c uacute */ {PS_CVT_ACCENTS+PS_8859_1, 'u', 0xf9}, /* x9d ugrave */ {PS_CVT_ACCENTS+PS_8859_1, 'u', 0xfb}, /* x9e ucircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'u', 0xfc}, /* x9f udieresis */ { 0, 0, 0}, /* xa0 dagger not cvt'd */ { 0, 0, 0}, /* xa1 degree not cvt'd */ { PS_8859_1, 0, 0xa2}, /* xa2 cent */ { PS_8859_1, 0, 0xa3}, /* xa3 sterling */ { PS_8859_1, 0, 0xa7}, /* xa4 section */ { PS_8859_1, 0, 0xb7}, /* xa5 bullet */ { PS_8859_1, 0, 0xb6}, /* xa6 paragraph */ { PS_8859_1, 0, 0xdf}, /* xa7 german double s */ { PS_8859_1, 0, 0xae}, /* xa8 (R) ?mac only? */ { PS_8859_1, 0, 0xa9}, /* xa9 (C) ?mac only? */ { 0, 0, 0}, /* xaa TM not converted */ { PS_CVT_LIKES+PS_8859_1, '\'', 0xb4}, /* xab acute accent */ { PS_8859_1, 0, 0xa8}, /* xac dieresis */ { 0, 0, 0}, /* xad not-equal not cvtd */ { PS_8859_1, 0, 0xc6}, /* xae AE */ { PS_8859_1, 0, 0xd8}, /* xaf Oslash */ { 0, 0, 0}, /* xb0 infinity not cvtd */ { PS_8859_1, 0, 0xb1}, /* xb1 +/- ?mac only? */ { 0, 0, 0}, /* xb2 <= not cvtd */ { 0, 0, 0}, /* xb3 >= not cvtd */ { PS_8859_1, 0, 0xa5}, /* xb4 yen */ { PS_8859_1, 0, 0xb5}, /* xb5 mu ?mac only ? */ { 0, 0, 0}, /* xb6 partial diff ?mac only ? */ { 0, 0, 0}, /* xb7 summation ?mac only ? */ { 0, 0, 0}, /* xb8 product ?mac only ? */ { 0, 0, 0}, /* xb9 pi ?mac only ? */ { 0, 0, 0}, /* xba integral ?mac only ? */ { PS_8859_1, 0, 0xaa}, /* xbb ordfeminine */ { PS_8859_1, 0, 0xba}, /* xbc ordmasculine */ { 0, 0, 0}, /* xbd omega ?mac only ? */ { PS_8859_1, 0, 0xe6}, /* xbe ae */ { PS_8859_1, 0, 0xf7}, /* xbf oslash */ { PS_8859_1, 0, 0xbf}, /* xc0 question down */ { PS_8859_1, 0, 0xa1}, /* xc1 exclaim down */ { PS_8859_1, 0, 0xac}, /* xc2 logical not ?mac only? */ { 0, 0, 0}, /* xc3 radical ?mac only ? */ { 0, 0, 0}, /* xc4 florin */ { 0, 0, 0}, /* xc5 wavy equal ?mac only ? */ { 0, 0, 0}, /* xc6 delta ?mac only ? */ { PS_8859_1, 0, 0xab}, /* xc7 guillemetleft */ { PS_8859_1, 0, 0xbb}, /* xc8 guillemetright */ { 0, 0, 0}, /* xc9 ellipsis */ { 0, 0, 0}, /* xca reserved */ {PS_CVT_ACCENTS+PS_8859_1, 'A', 0xc0}, /* xcb Agrave */ {PS_CVT_ACCENTS+PS_8859_1, 'A', 0xc3}, /* xcc Atilde */ {PS_CVT_ACCENTS+PS_8859_1, 'O', 0xd5}, /* xcd Otilde */ { 0, 0, 0}, /* xce OE */ { 0, 0, 0}, /* xcf oe */ {PS_CVT_DASHES, '-', 0}, /* xd0 endash */ {PS_CVT_DASHES, '-', 0}, /* xd1 emdash */ {PS_CVT_DQUOTES, '"', 0}, /* xd2 quotedblleft*/ {PS_CVT_DQUOTES, '"', 0}, /* xd3 quoteDblRight */ {PS_CVT_SQUOTES, '\'', 0}, /* xd4 quoteleft */ {PS_CVT_SQUOTES, '\'', 0}, /* xd5 quoteright */ { 0, 0, 0}, /* xd6 divide ?mac only ? */ { 0, 0, 0}, /* xd7 lozenge ?mac only ? */ {PS_CVT_ACCENTS+PS_8859_1, 'y', 0xff}, /* xd8 ydieresis */ {PS_CVT_ACCENTS, 'Y', 0}, /* xd9 Ydieresis */ {PS_CVT_LIKES, '/', 0}, /* xda fraction */ { PS_8859_1, 0, 0xa4}, /* xdb currency */ { 0, 0, 0}, /* xdc guilsinglleft */ { 0, 0, 0}, /* xdd guilsinglright */ { 0, 0, 0}, /* xde fi */ { 0, 0, 0}, /* xdf fl */ { 0, 0, 0}, /* xe0 daggerdbl */ { 0, 0, 0}, /* xe1 periodcentered */ { 0, 0, 0}, /* xe2 quotesinglbase */ { 0, 0, 0}, /* xe3 quotedblbase */ { 0, 0, 0}, /* xe4 perthousand */ {PS_CVT_ACCENTS+PS_8859_1, 'A', 0xc2}, /* xe5 Acircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'E', 0xca}, /* xe6 Ecircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'A', 0xc1}, /* xe7 Aacute */ {PS_CVT_ACCENTS+PS_8859_1, 'E', 0xcb}, /* xe8 Edieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'E', 0xc8}, /* xe9 Egrave */ {PS_CVT_ACCENTS+PS_8859_1, 'I', 0xcd}, /* xea Iacute */ {PS_CVT_ACCENTS+PS_8859_1, 'I', 0xce}, /* xeb Icircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'I', 0xcf}, /* xec Idieresis */ {PS_CVT_ACCENTS+PS_8859_1, 'I', 0xcc}, /* xed Igrave */ {PS_CVT_ACCENTS+PS_8859_1, 'O', 0xd3}, /* xee Oacute */ {PS_CVT_ACCENTS+PS_8859_1, 'O', 0xd4}, /* xef Ocircumflex */ { 0, 0, 0}, /* xf0 reserved */ {PS_CVT_ACCENTS+PS_8859_1, 'O', 0xd2}, /* xf1 Ograve */ {PS_CVT_ACCENTS+PS_8859_1, 'U', 0xda}, /* xf2 Uacute */ {PS_CVT_ACCENTS+PS_8859_1, 'U', 0xdb}, /* xf3 Ucircumflex */ {PS_CVT_ACCENTS+PS_8859_1, 'U', 0xd9}, /* xf4 Ugrave */ { 0, 0, 0}, /* xf5 dotlessi */ {PS_CVT_LIKES, '^', 0}, /* xf6 circumflex */ {PS_CVT_LIKES, '~', 0}, /* xf7 tilde */ { PS_8859_1, 0, 0xaf}, /* xf8 macron */ { 0, 0, 0}, /* xf9 breve */ { 0, 0, 0}, /* xfa dotaccent */ { PS_8859_1, 0, 0xb0}, /* xfb ring */ { PS_8859_1, 0, 0xb8}, /* xfc cedilla */ { 0, 0, 0}, /* xfd cedilla */ { 0, 0, 0}, /* xfe hungarumlaut */ { 0, 0, 0} /* xff not used */ }; /* * convert the character c according to the options in ParaScan *ps. * Return 1 if the character is translated, 0 if it is unchanged. * Note that a few (5) characters are "unchanged" when they are * "translated" into their ISO-8859-1 values. Return 1 for these, * also, since the meaning of a 1 return is, the caller wants this * value. The meaning of a 0 return is, this is an unchanged Frame * special, which can be converted by PS_SPECIALS_AS_XNN or _SPACES. */ static int psHandleChar(const ParaScan * ps, const int c, int *cv) { if ((0x20 > c) && (cvtLowChars[c].rules & ps->textOpts)) { *cv = cvtLowChars[c].cvt; return 1; } else if (0x7f < c) { unsigned int rule = cvtHighChars[c-0x80].rules & ps->textOpts; if (rule) { if (PS_8859_1 & rule) *cv = cvtHighChars[c-0x80].iso; else *cv = cvtHighChars[c-0x80].cvt; return 1; } } *cv = c; return 0; } int paraScan(const itemHandle parascan) { int ret = IGNORED; ParaScan * ps = (ParaScan *)parascan; if (NULL==ps || PSCAN != FASTTYPEOF(ps)) return noteError("paraScan: arg null or not a ParaScan"); if (NULL == ps->thePara) return 0; /* uninitialized ParaScan */ do /* while the current character should be ignored */ { if (NULL == ps->theLine) return 0; /* end of paragraph (or empty paragraph) */ if (NULL == ps->theItem) { /* end of line reached */ if (ps->lineOpts & PS_KEEP_LINE_END) { /* returning line-ends */ ret = '\n'; } else /* not returning line ends, so if the prior */ { /* ..char was not a discretionary hyphen, return a space */ if (!ps->flags.discHyphen) ret = ' '; } ps->flags.discHyphen = 0; /* advance to the next after the current one */ /* if there is no next line, this leaves theLine null */ ps->theLine = nextSameName(ps->theLine); if (ps->theLine) /* there is one, point to its first item */ { ps->theItem = nextItem(ps->theLine); if (EOL == FASTTYPEOF(ps->theItem)) /* */ ps->theItem = NULL; /* it happens in some tables */ } continue; /* go apply the termination test */ } /* Not at end of line but rather at one item in the line. */ /* The most likely item is a . If we are just starting */ /* a , set up strChar to its first character. */ if (!ps->strChar) /* not currently in a string of any kind.. */ { if (0==textCmp(ps->theItem,"String")) /* ..but starting one */ /* Here we assume that all String lists are alike: */ /* A mutant or such would still work. Set the text of */ /* the nextItem() of the as a string to process. */ ps->strChar = textOf(nextItem(ps->theItem)); } if (ps->strChar) /* now in a string of some kind */ { /* The next character in the string could be the terminal null */ /* (even if we just started, since is allowed). */ /* If at the end, move to the next item and iterate the loop. */ if ( ! (*ps->strChar) ) { ps->strChar = NULL; /* signal we are not in a string */ ps->theItem = nextInList(ps->theItem); /* could yield null */ continue; /* iterate the loop */ } /* Not end of string. Is next character normal ascii, or \\ ? */ if ('\\' != *ps->strChar) { /* string contains a normal ascii character, convert as nec.*/ ret = *ps->strChar; } else { /* *strChar is a backslash, decode the escaped char */ /* leave strChar pointing to the last char handled */ switch (*(ps->strChar+1)) { case '\\': /* string contains '\\' */ { ret = '\\'; ++ps->strChar; break; } case 'q': /* string contains '\q' meaning '\'' */ { ret = '\''; ++ps->strChar; break; } case 'Q': /* string contains '\Q' meaning '\"' */ { ret = '\"'; ++ps->strChar; break; } case 'x': /* string contains "\xnn " */ { /* MIF uses for most specials >0x7f */ /* Treatment depends on the class of the special char */ /* No test is made for invalid \xnn format. */ int xvalue; sscanf(ps->strChar,"\\x%2x ", &xvalue); if (psHandleChar(ps,xvalue,&ret)) { /* special converted to something else, skip \xnn */ ps->strChar += 4; } else { /* special char is not converted to something */ if (ps->textOpts & PS_SPECIALS_AS_XNN) { /* wanted as \xnn, set up to return the \ now */ ret = '\\'; /* leave strChar->\, next char is 'x' */ } else { /* special wanted some other way, advance in string */ ps->strChar += 4; } } break; } /* end case 'x' */ } /* end switch */ } /* end '\\' == *strChar */ ++ps->strChar; /* advance over processed char */ } else /* 0==ps->strChar, so not in a string of any kind */ { if (0==textCmp(ps->theItem,"Char")) { /* theItem is one of the specials encoded as */ /* convert it to a byte value and apply psHandleChar() */ char *keyword = textOf(nextItem(ps->theItem)); int xlt = 0; /* char not translated yet */ if (0==strcmp(keyword,"Tab")) { if (ps->textOpts & PS_TAB_AS_SPACE) ret = ' '; else ret = '\t'; /* Frame tab is 8, cvt to ASCII */ xlt = 1; /* translated, don't convert to xnn */ } else if (0==strcmp(keyword,"HardReturn")) { if (ps->lineOpts & PS_HARD_CR_AS_CR) ret = '\r'; if (ps->lineOpts & PS_HARD_CR_AS_SPACE) ret = ' '; xlt = 1; /* translated, don't convert to xnn */ } else if (0==strcmp(keyword,"DiscHyphen")) { /* if line ends ignored, skip entirely */ if (ps->lineOpts & PS_KEEP_LINE_END) { ps->flags.discHyphen = 1; /*seen discretionary hyphen */ xlt = psHandleChar(ps,0x04,&ret); /* disc hyphen hex value */ } } else if (0==strcmp(keyword,"NoHyphen")) { if (!(ps->textOpts & PS_SKIP_NOHYPHEN)) ret = 0x05; /* suppress hyphen hex value */ } else if (0==strcmp(keyword,"SoftHyphen")) xlt = psHandleChar(ps,0x06,&ret); /* number space hex value */ else if (0==strcmp(keyword,"NumberSpace")) xlt = psHandleChar(ps,0x10,&ret); /* number space hex value */ else if (0==strcmp(keyword,"HardSpace")) xlt = psHandleChar(ps,0x11,&ret); /* hard space hex value */ else if (0==strcmp(keyword,"ThinSpace")) xlt = psHandleChar(ps,0x12,&ret); /* thin space hex value */ else if (0==strcmp(keyword,"EnSpace")) xlt = psHandleChar(ps,0x13,&ret); /* en space hex value */ else if (0==strcmp(keyword,"EmSpace")) xlt = psHandleChar(ps,0x14,&ret); /* em space hex value */ else if (0==strcmp(keyword,"HardHyphen")) xlt = psHandleChar(ps,0x15,&ret); /* hard hyphen hex value */ else if (0==strcmp(keyword,"Dagger")) xlt = psHandleChar(ps,0xa0,&ret); /* Dagger hex value */ else if (0==strcmp(keyword,"Cent")) xlt = psHandleChar(ps,0xa2,&ret); /* Cent hex value */ else if (0==strcmp(keyword,"Pound")) xlt = psHandleChar(ps,0xa3,&ret); /* Sterling hex value */ else if (0==strcmp(keyword,"Bullet")) xlt = psHandleChar(ps,0xa5,&ret); /* bullet hex value */ else if (0==strcmp(keyword,"Yen")) xlt = psHandleChar(ps,0xb4,&ret); /* Yen hex value */ else if (0==strcmp(keyword,"EnDash")) xlt = psHandleChar(ps,0xd0,&ret); /* en dash hex value */ else if (0==strcmp(keyword,"EmDash")) xlt = psHandleChar(ps,0xd1,&ret); /* em dash hex value */ else if (0==strcmp(keyword,"DoubleDagger")) xlt = psHandleChar(ps,0xe0,&ret); /* dbl dagger hex value */ else noteError("paraScan: unexpected Char keyword"); /* if psHandleChar did not change the value, a special is to be */ /* returned. if user wants specials as \xnn, set up that as a */ /* string and point strChar at it. */ if ((ret != IGNORED) && (!xlt) && (ps->textOpts & PS_SPECIALS_AS_XNN)) { sprintf(ps->specialXnn,"x%02x ",ret); ps->strChar = &ps->specialXnn[0]; ret = '\\'; } } else { /* neither nor -- not text at all! */ /* see if this is a list type the user wants returned */ if (0==textCmp(ps->theItem,"Font")) { if (0==(ps->nonTextOpts & PS_SKIP_FONT)) ret = PS_FONT; } else if (0==textCmp(ps->theItem,"Marker")) { if (0==(ps->nonTextOpts & PS_SKIP_MARKER)) ret = PS_MARKER; } else if (0==textCmp(ps->theItem,"XRef")) { if (0==(ps->nonTextOpts & PS_SKIP_XREF)) ret = PS_XREF; } else if (0==textCmp(ps->theItem,"XRefEnd")) { if (0==(ps->nonTextOpts & PS_SKIP_XREF)) ret = PS_XREFEND; } else if (0==textCmp(ps->theItem,"AFrame")) { if (0==(ps->nonTextOpts & PS_SKIP_AFRAME)) ret = PS_AFRAME; } else if (0==textCmp(ps->theItem,"Variable")) { if (0==(ps->nonTextOpts & PS_SKIP_VAR)) ret = PS_VAR; } else if (0==textCmp(ps->theItem,"ATbl")) { if (0==(ps->nonTextOpts & PS_SKIP_ATBL)) ret = PS_ATBL; } else if (0==textCmp(ps->theItem,"Conditional")) { if (0==(ps->nonTextOpts & PS_SKIP_COND)) ret = PS_COND; } else if (0==textCmp(ps->theItem,"Unconditional")) { if (0==(ps->nonTextOpts & PS_SKIP_UNCOND)) ret = PS_UNCOND; } else if (0==textCmp(ps->theItem,"TextRectID")) { if (0==(ps->nonTextOpts & PS_SKIP_TRID)) ret = PS_TRID; } else if (0==textCmp(ps->theItem,"SpecialHyphenation")) { if (0==(ps->nonTextOpts & PS_SKIP_SPCH)) ret = PS_SPCH; } else if (0==textCmp(ps->theItem,"FNote")) { if (0==(ps->nonTextOpts & PS_SKIP_FNOTE)) ret = PS_FNOTE; } else if (0==textCmp(ps->theItem,"ElementBegin")) { if (0==(ps->nonTextOpts & PS_SKIP_ELBEG)) ret = PS_ELBEG; } else if (0==textCmp(ps->theItem,"ElementEnd")) { if (0==(ps->nonTextOpts & PS_SKIP_ELEND)) ret = PS_ELEND; } else if (0==textCmp(ps->theItem,"Math")) { if (0==(ps->nonTextOpts & PS_SKIP_MATH)) ret = PS_MATH; } else /* ??? */ { if (0==(ps->nonTextOpts & PS_SKIP_UNKNOWN)) ret = PS_UNKNOWN; } } /* end of else-neither-char-nor-string */ } /* end of not-a-string-of-any-kind */ if (0==ps->strChar) /* not working a string */ { /* finished with this item, move to next. This could */ /* leave theItem null, signalling end of line */ ps->it.next = ps->theItem; /* save item just examined */ ps->theItem = nextInList(ps->theItem); } /* if user wants spaces compressed, and if we are returning */ /* a space, note it and possibly skip it. */ if (ps->textOpts & PS_COMPRESS_SPACES) { if (' ' == ret) { if (ps->flags.spaceOut) ret = IGNORED; ps->flags.spaceOut = 1; } else ps->flags.spaceOut = 0; } } while (IGNORED == ret); return ret; } /*============================================================================ * * paraScan() is highly flexible & quite fast, too, but it is too much * trouble for the simple case where you just want to grab the leading * text string from a para and test it or use it. Hence paraScanGets, * modelled on fgets(), with no options. We get a new ParaScan item * each time but then free it. * ============================================================================ */ char * paraScanGets(char *bfr, const int n, const itemHandle hPara) { itemHandle ps = NULL; int j; char *s; if ((2 > n) || (!bfr) || (!hPara)) return NULL; ps = paraScanInit(hPara ,PS_FLAT_ISO, PS_SKIP_ALL_NONTEXT, PS_SKIP_LINE_END, NULL); if (!ps) return NULL; /* not a paragraph, apparently */ for(j=n-1, s=bfr; (j); --j) { if (! (*s++ = paraScan(ps)) ) break; } if (!j) /* quit because exhausted n */ *s = '\0'; /* ensure final null */ freeItem(ps); return bfr; }