#define MIFFEDMAIN enum itemTypes { NA,WORD,NUMBER,STRING,LIST,EOL,CMT,TREE,PSCAN }; /* Character string storage is centralized and some peculiar gyrations gone through to one, eliminate redundant storage of strings such as "Para" that appear with great frequency in MIF, and two, to permit the address of a string to be stored in 3 bytes, which enables an Item to store its type and its char-value in 4 bytes. First, all strings are stored in a repository (function stowStr), which stores only one copy of each string value. Second, the storage is segmented into at most MAXSTRSEGS blocks of STRSEGSIZE each. The address of a string can be stored as a byte of 0..MAXSTRSEGS and a short int offset. It is necessary to have an equivalent of NULL; this is stored as any segment number and an offset of INVALSTROFF, which is STRSEGSIZE-1 (since any string at the last byte of its segment would be a null string). */ /* 10/94: reducing STRSEGSIZE to 16K-64, in hopes of not fragmenting */ /* DOS memory allocation and so getting bigger files in DOS. This also */ /* reduces the maximum possible string set to 6 megs or so--probably ok */ #define STRSEGSIZE 16320 #define MAXSTRSEGS 255 #define INVALSTROFF STRSEGSIZE-1 typedef struct { unsigned char segno; unsigned short int offset; } charAddr; /* The Item is the parent class of all items and nodes. An Item contains a type byte (which contains one of ItemTypes and also the size of the object in longs) and a character value as a charAddr. */ typedef struct ItemStruct { unsigned char type; /* itemType() source plus object size in long units */ charAddr cval; /* text of WORD/NUMBER/STRING/CMT, LIST name, EOL cmt */ struct ItemStruct * next;/* pointer to next Item in sequence (nextItem) */ } Item; typedef Item * itemHandle; /* Word and String are instances of Item. The extra layer of structure is so that fields of all types of Item can be named using "it.field". */ typedef struct { Item it; } CharItem; typedef CharItem * wordHandle; typedef CharItem * stringHandle; typedef CharItem * cmtHandle; /* Number is an Item plus a unit */ typedef struct { Item it; char unit[4]; } NumbItem; typedef NumbItem * numbHandle; /* List structure comprises both the List Item and the EOL Item, plus links to the container, next-of-same-name, and last contained item. */ typedef struct ListStruct { Item it; /* represents "< listName"; it.next->1st contents item */ Item foot; /* represents "> #comment"; it.next->item after list */ struct ListStruct * contnr;/* -> head of containing list */ itemHandle tail; /* last item in this list, for append */ } ListItem; typedef ListItem * listHandle; /* Tree node is also an instance of Item. it.next is the node value, while it.cval is the node key. */ typedef struct TreeNodeStruct { Item it; struct TreeNodeStruct *l,*r; } TreeNode; typedef TreeNode * treeHandle; /* Item type is the low-order nybble of the type (the size in longs is stored in the upper nybble). The itemType() function has validity checks that slow it down for internal use, so this macro is used internally. Must be applied to an itemHandle that is known not to be null. */ #define FASTTYPEOF(i) (enum itemTypes)(0x0f & ((Item *)i)->type) #define TYPEIS(i,t) (t == FASTTYPEOF(i)) /* Items are allocated from larger segments. */ #define ITEMSEGSIZE 8192 * sizeof(long) /* 32768 most places */ /* The state of a parse is maintained in an instance of the following structure. At present I don't imagine we'll have more than one parse going at any time. Nevertheless by using an allocated structure instead of static globals, we keep the possibility open. */ /* The parser has the following options that can be set when a parseState is first initialized. FSMCFGFILE has not been implemented as yet. */ #define PARSE_FINAL 1 /* one string of input, \0 means fsmStop not fsmSuspend */ #define PARSE_LST_CMT 2 /* keep only comments following ">" as cval of EOL */ #define PARSE_ALL_CMT 4 /* keep non-">" comments as CMT items as well as EOL */ #define PARSE_CFG_MIF 8 /* special syntax rules for maker.cfg and similar files */ #define PARSE_NO_DIAG 16 /* write no errors to stderr */ #define PARSE_MAX 900 /* more or less arbitrary, but generous, token limit */ #define MAXPARSESTRING 4096 /* more or less arbitrary, but generous, input buffer */ #define MAXOUTLINE MAXPARSESTRING/2 /* equally arbitrary output size limit */ /* The parser is a finite state machine with the following input classes and states. */ enum fsmClasses { fsmSuspend, /* character ending one line of multiline input */ fsmStop, /* character ending line and MIF input */ fsmWhite, /* blank, tab, etc but not \n */ fsmNL, /* newline as a special case */ fsmLBR, /* "<" */ fsmRBR, /* ">" */ fsmLQuote, /* "`" MIF left-apostrophe opens a string */ fsmRQuote, /* "'" MIF string ending */ fsmSharp, /* "#" opens a comment */ fsmBSL, /* backslash, used in cfg-file parse only */ fsmOneUnit, /* """" and "%" are allowed as 1-char number units */ fsmDecimal, /* decimal point */ fsmMinus, /* dash */ fsmDigit, /* [0-9] */ fsmIdent, /* [A-Za-z] */ fsmOther /* nothing that opens or delimits a token */ }; enum fsmStates { fsmScanning, /* looking for a new token */ fsmNameScan, /* seek ident after "<" */ fsmGetName, /* picking up a word or name of a list */ fsmComment, /* collecting comment */ fsmString, /* accumulating contents of `string' */ fsmMinusSeen, /* expecting a digit or dot after a minus sign */ fsmDotSeen, /* expecting a digit after a dot */ fsmInteger, /* gathering digits of a number before decimal */ fsmFraction, /* gathering digits after the decimal */ fsmUnit1, /* looking for start of unit after number */ fsmUnit2 /* absorbing 2nd char of a unit */ }; typedef struct { unsigned char * inptr; /* -> current input character */ itemHandle first; /* first-allocated item in parse, all others chain from it */ itemHandle currItem; /* current item being created */ listHandle currList; /* current list being created, if any */ int incount; /* number of current input, for diagnostics */ enum fsmClasses inclass; /* class of input character */ enum fsmStates state; /* current state of FSM */ int errors; /* number of errors seen since initializing */ int option; /* parse options (see PARSE_XXX bits) */ short collLen; /* length of token collected so far */ char collect[PARSE_MAX+1]; /* space for #cmt, `string', number or ident */ } parseState; /* The ParaScan structure represents the state of a scan over the text of a paragraph. */ typedef struct { Item it; /* it.next saves the last-processed item */ itemHandle thePara; /* the being scanned */ itemHandle theLine; /* the being scanned */ itemHandle theItem; /* the or */ char * strChar; /* ->current byte of a string */ unsigned short textOpts; /* text-conversion options */ unsigned short nonTextOpts; /* skip non-text item options */ unsigned char lineOpts; /* line-end handling options */ struct { /* status memory */ unsigned char discHyphen:1; /* saw a discretionary Hyphen */ unsigned char spaceOut:1; /* returned a space */ } flags; char specialXnn[6]; /* space for converted special */ } ParaScan; #include "miffed.h"