/* ========================================================================== * * DOCCOUNT: Count the use of tags in a Frame (MIF) Document * * doccount mif-file-name ... * * This program reads MIF files named on the command line. It counts * the definitions (catalog entries) and also the uses of all unique: * * Color tags * Paragraph tags * Character tags * Variable formats * XRef formats * Table formats * Conditional text tags * Reference frame names * * within the specified files. * * One table is produced for each type of tag. The columns are: * * tagname | catalog | hidden | count | replacement * * tagname the name of the paragraph, font, table, etc. * * catalog contains "Y" or "N" depending on whether the tagname is * defined in its respective catalog. * * hidden contains "Y" only if the tagname is used in text that * has been hidden by conditional text. This does NOT * properly reflect the hidden usage of paragraphs in tables, or * of paragraphs or textlines in anchored frames, when the * table/frame anchors are in hidden text. Such usages are * counted and not shown as hidden. * * count contains the number of times the tagname is used * in any text flow. * * replacement this column is empty. * * The output is MIF and goes to standard output. It can be imported * into any Frame document. Typically it is imported into a template * document, and the "replacement" column filled in manually, creating * input for the docwash program. * ==========================================================================*/ #include #include "miffed.h" /* ========================================================================== * This union is used to encode three items of information in a single * "void*" item. The "void*" is the value stored in a tree node whose * key is a tag name. * ==========================================================================*/ typedef union { struct { unsigned short flags; #define FLAGINCAT 0x0001 #define FLAGHIDDEN 0x0002 unsigned short uses; } d; void * v; } UseInfo; /* ========================================================================== * These globals are the roots of binary trees in which the names * and usage counts of tags are stored -- see noteUse() and * noteEntries(). * ==========================================================================*/ static treeHandle paraTree = NULL; static treeHandle fontTree = NULL; static treeHandle variTree = NULL; static treeHandle xrefTree = NULL; static treeHandle tablTree = NULL; static treeHandle condTree = NULL; static treeHandle coloTree = NULL; static treeHandle refeTree = NULL; /* ========================================================================== * oneLinePara() creates a MIF paragraph containing a single * string. The paragraph tag is supplied, with a default of "Body." * >> * * For a simpler, but slower, way to accomplish the same thing, see the * oneLinePara() in DOCALYZE.C. * ==========================================================================*/ itemHandle oneLinePara(char *text, char *tag) { itemHandle hPara = newList("Para"); itemHandle hPtag = newList("PgfTag"); itemHandle hPline = newList("ParaLine"); itemHandle hStr = newList("String"); append( hPtag, newString( (tag)?tag:"Body" ) ); append( hStr, newString( text ) ); append( hPline, hStr ); append( hPara, hPtag ); append( hPara, hPline ); return hPara; } /* ========================================================================== * oneLineCell() creates a MIF table cell containing a one-line * paragraph as its contents. The paragraph tag is supplied, with * a default of "CellBody". * > > * ==========================================================================*/ itemHandle oneLineCell(char*text, char *tag) { itemHandle hCell = newList("Cell"); itemHandle hContent = newList("CellContent"); append( hContent, oneLinePara( text, (tag)?tag:"CellBody" ) ); append( hCell, hContent ); return hCell; } /* ========================================================================== * Note one use of a tag. The tag may or may not be in the tree already. * Typically this function is called first from noteEntries() scanning a * catalog, and later when a tag is found in e.g. a . However, the * use of a reference frame is seen first scanning the para catalog, and * then later the definition (flag == FLAGINCAT) is seen in a . * ==========================================================================*/ void noteUse( treeHandle *root, char *tag, unsigned short flag) { UseInfo u; u.v = treeLook(*root, tag); u.d.flags |= flag; if (!(flag&FLAGINCAT)) /* not the defining entry */ ++u.d.uses; /* count a use */ treeEnter(root, tag, u.v); } /* ========================================================================== * Given a catalog, the tagname of the entry in it, and the * tree to file it in, file all the entries in the tree. * * Note: this is a pretty poor use of the MIFFEd tree support, * because Frame catalogs are in lexical sequence, usually, * and the tree support does not do balancing. So this probably * comes down to a slow way to do sequential-list insertion and * lookup. If lookup proves to be a problem, we can rethink. * (in an Indy it does not seem to be, even for large input files) * ==========================================================================*/ void noteEntries( treeHandle *root, itemHandle hCat, char * entryTag, char * tagTag ) { itemHandle hEntry, hTag, hTxt; for ( hEntry = firstOfName(hCat, entryTag); (hEntry); hEntry = nextSameName(hEntry) ) { hTag = firstOfName(hEntry,tagTag); hTxt = nextItem(hTag); if (hTxt) noteUse(root,textOf(hTxt),FLAGINCAT); } } /* ========================================================================== * Special for the PgfCatalog only, scan all s for the use of * reference frames. * ==========================================================================*/ void noteRefFrames(itemHandle hPCat) { itemHandle hPgf, hX; char *pTag; for (hPgf = firstOfName(hPCat, "Pgf"); (hPgf); hPgf = nextSameName(hPgf)) { hX = firstOfName(hPgf,"PgfTopSeparator"); if (hX) /* frame above? */ { pTag = textOf(nextItem(hX)); if (*pTag) /* not just */ noteUse( &refeTree, pTag, 0 ); } hX = firstOfName(hPgf,"PgfBotSeparator"); if (hX) /* frame below? */ { pTag = textOf(nextItem(hX)); if (*pTag) noteUse( &refeTree, pTag, 0 ); } } } /* ========================================================================== * Handle noting the tag name of one . When a represents a * physical change, e.g. physical bold, there is no in it. When it * represents a return to default font, it has . Otherwise it * has a non-empty FTag value which we note the use of. Here we also note * the use of a color tag in an list -- which can be * independent of the use of a named font. * ==========================================================================*/ void noteFont(itemHandle hFont, unsigned short flag) { itemHandle hFTag = firstOfName(hFont,"FTag"); itemHandle hFCol = firstOfName(hFont,"FColor"); if ((hFTag) && (0 < strlen(textOf(nextItem(hFTag)))) ) noteUse(&fontTree, textOf(nextItem(hFTag)), flag); if ((hFCol)) noteUse(&coloTree, textOf(nextItem(hFCol)), flag); } /* ========================================================================== * Note the uses of tags in a . Usage occurs at two levels. * At the outer level is the -- which can be wrapped in a * when the tag is uncatalogued. * * Then, within the Para is a sequence of s and these contain * , , , and items which we count. * Note: at this point we ignore any and items in a ParaLine. * The use of these tags is counted by scanning the and . * * This function is called in two contexts: from noteText() for each * Para in a textflow, and from noteTblParas() for each paragraph in a * cell of a table. * ==========================================================================*/ void notePara(itemHandle hPara, unsigned short flag) { itemHandle hParaLine, hX, hY; hX = firstOfName(hPara,"PgfTag"); if (!hX) /* not > */ { hX = firstOfName(hPara,"Pgf"); if (hX) /* if > look for >> */ hX = firstOfName(hX,"PgfTag"); } if (hX) /* found PgfTag at some level */ { hY = nextItem(hX); noteUse( ¶Tree, textOf(hY), flag ); } for (hParaLine = firstOfName(hPara,"ParaLine"); (hParaLine); hParaLine = nextSameName(hParaLine) ) { /* scan one for fonts, variables, xrefs and conds */ for (hX = nextItem(hParaLine); (hX); hX = nextInList(hX) ) { if (0 == textCmp(hX, "Font") ) { noteFont(hX, flag); } else if (0 == textCmp(hX, "Variable")) { hY = firstOfName(hX,"VariableName"); if ((hY)) noteUse(&variTree, textOf(nextItem(hY)), flag); } else if (0 == textCmp(hX, "XRef")) { hY = firstOfName(hX,"XRefName"); if ((hY)) noteUse(&xrefTree, textOf(nextItem(hY)), flag); } else if (0 == textCmp(hX, "Conditional")) { hY = firstOfName(hX,"InCondition"); if ((hY)) noteUse(&condTree, textOf(nextItem(hY)), flag); } } /* end of scanning hX over ParaLine contents */ } /* end scanning hParaLine over Para contents */ } /* ========================================================================== * Search a section (from a textflow or a CellContent) for any * s. Most are empty. However, an contains * s which need to be passed to notePara(). * ==========================================================================*/ void noteNotes(itemHandle hNotes, unsigned short flag) { itemHandle hFNote, hPara; for( hFNote = firstOfName(hNotes,"FNote"); (hFNote); hFNote = nextSameName(hFNote) ) { for ( hPara = firstOfName(hFNote, "Para"); (hPara); hPara = nextSameName(hPara) ) { notePara(hPara,flag); } } } /* ========================================================================== * Find all the lists in a . From each, send the * section to noteNotes() and the s to notePara(). * Note that we have no info no hidden text flow usage here, so pass * a flag value of 0 to noteNotes() and notePara(). * ==========================================================================*/ void noteRow(itemHandle hRow) { itemHandle hC, hCContent, hx; for ( hC = firstOfName(hRow,"Cell"); (hC); hC = nextSameName(hC) ) { hCContent = firstOfName(hC,"CellContent"); if (hCContent) /* when would it ever not exist? */ { hx = firstOfName(hCContent,"Notes"); if (hx) noteNotes(hx,0); for (hx = firstOfName(hCContent,"Para"); (hx); hx = nextSameName(hx) ) { notePara(hx,0); } /* end of finding all s in a */ } /* end of parsing in a */ } /* end of finding all s in a */ } /* ========================================================================== * Note all uses of tags in paragraphs in the cells of one table. * The structure of a list is as follows: * ...> * ...> (heading rows in ) * ... (body rows) * ...> (footing rows in ) * In this function we find all the lists and pass them to * noteRow() for analysis. * ==========================================================================*/ void noteOneTbl(itemHandle hTbl) { itemHandle hTTC = firstOfName(hTbl,"TblTitleContent"); itemHandle hTblH = firstOfName(hTbl,"TblH"); itemHandle hTblB = firstOfName(hTbl,"TblBody"); itemHandle hTblF = firstOfName(hTbl,"TblF"); itemHandle hRow; if (hTTC) { itemHandle hx = firstOfName(hTTC,"Notes"); if (hx) noteNotes(hx,0); for (hx = firstOfName(hTTC,"Para"); (hx); hx = nextSameName(hx) ) { notePara(hx,0); } /* end of finding all s in a */ } if (hTblH) { for (hRow = firstOfName(hTblH,"Row"); (hRow); hRow = nextSameName(hRow) ) { noteRow(hRow); } /* end of finding all s in a */ } if (hTblB) { for (hRow = firstOfName(hTblB,"Row"); (hRow); hRow = nextSameName(hRow) ) { noteRow(hRow); } /* end of finding all s in the body of a */ } if (hTblF) { for (hRow = firstOfName(hTblF,"Row"); (hRow); hRow = nextSameName(hRow) ) { noteRow(hRow); } /* end of finding all s in a */ } } /* ========================================================================== * Note all uses of all tags within the section. This occurs at * two levels: each table has a TblTag whose use we count; and each * table has a lot of content that at some level contains s that * need to be analyzed -- in preceding function noteOneTbl(). * * Note that counting uses in the section gets the usage in all * existing tables, but does not distinguish between tables whose anchors * are visible, and tables with anchors that are in hidden text. * ==========================================================================*/ void noteTbls(itemHandle hTbls) { itemHandle hTbl, hTag; for( hTbl = firstOfName(hTbls, "Tbl"); (hTbl); hTbl = nextSameName(hTbl) ) { hTag = firstOfName(hTbl, "TblTag"); if (hTag) noteUse( &tablTree, textOf(nextItem(hTag)), 0 ); noteOneTbl(hTbl); } } /* ========================================================================== * Scan one looking for uses of colors and fonts. Alone among MIF * constructs, the can be nested, so this is a recursive function! * * In a , font usage is found only in s. However, the * itself, and anything shape object in it such as a , * can contain a color in an list. * * When the frame has a item, it is a reference frame and we enter * it into the refeTree as being catalogued. * * Since we need to look at all items in the we can't use the * firstOfName/nextSameName loop (you must be bored with that by now) * but rather scan all items within the frame using nextInList(). * ==========================================================================*/ void noteFrame(itemHandle hFrame) { itemHandle hx; for ( hx = nextItem(hFrame); (hx); hx = nextInList(hx) ) { if (0 == textCmp(hx,"TextLine")) { itemHandle hFont; for (hFont = firstOfName(hx,"Font"); (hFont); hFont = nextSameName(hFont) ) { noteFont(hFont,0); } /* end of scanning all s in a */ } /* end of one in a */ else if (0 == textCmp(hx,"ObColor")) noteUse(&coloTree, textOf(nextItem(hx)), 0); else if (0 == textCmp(hx,"Tag")) noteUse(&refeTree, textOf(nextItem(hx)), FLAGINCAT); else if (0 == textCmp(hx,"Frame")) { noteFrame(hx); /* recurse to handle nested */ } else /* not those things ... is it a colored shape? */ { itemHandle hObC = firstOfName(hx,"ObColor"); if (hObC) noteUse(&coloTree, textOf(nextItem(hObC)), 0); } } /* end of loop over all items in a */ } /* ========================================================================== * Scan the section for constructs and pass each to * function noteFrame for scanning. Actually the only thing in * is s, but you never know. Someday they may start sticking * some other kind of list in there. So use firstOfName/nextSameName. * ==========================================================================*/ void noteAFrames(itemHandle hAFrames) { itemHandle hFrame; for( hFrame = firstOfName(hAFrames,"Frame"); (hFrame); hFrame = nextSameName(hFrame) ) { noteFrame(hFrame); } } /* ========================================================================== * Scan a looking for s which can sit loose on a page. * If one appears, pass it to noteFrame for scanning. Not all pages * have frames on them, in fact typically only reference pages do, but * you can put a fixed frame onto any page manually. * In addition, check any other items on the page for usages. * ==========================================================================*/ void notePageFrames(itemHandle hPage) { itemHandle hThing; for( hThing = nextItem(hPage); (hThing); hThing = nextInList(hThing) ) { if (0 == textCmp(hThing,"Frame")) noteFrame(hThing); else { itemHandle hObC = firstOfName(hThing,"ObColor"); if (hObC) noteUse(&coloTree, textOf(nextItem(hObC)), 0); } } } /* ========================================================================== * Note all uses of tags in a textflow. This consists of finding each * in the flow and scanning it in notePara(), and passing any * list to noteNotes(). * ==========================================================================*/ void noteText(itemHandle hTFlow) { itemHandle hPara; itemHandle hTFTag = firstOfName(hTFlow,"TFTag"); itemHandle hNotes = firstOfName(hTFlow,"Notes"); unsigned short flag = 0; if ( (hTFTag) && (0==textCmp(hTFTag,"HIDDEN")) ) flag = FLAGHIDDEN; if (hNotes) noteNotes(hNotes,flag); for( hPara = firstOfName(hTFlow,"Para"); (hPara); hPara = nextSameName(hPara) ) { notePara(hPara,flag); } /* end scanning hPara over all paras in hTFlow */ } /* ========================================================================== * Scan 1 MIF file: loop over the top-level lists in the input. Pass * catalogs to noteEntries. Pass tables section to noteTables, and * textflows to noteUsage. * ==========================================================================*/ void scanFile(FILE *f) { itemHandle oneList; /* current input list */ for ( oneList = readOneItem(f,0); (oneList); oneList = readOneItem(f,0) ) { if (0 == textCmp(oneList,"ColorCatalog")) noteEntries(&coloTree, oneList, "Color", "ColorTag"); else if (0 == textCmp(oneList,"PgfCatalog")) { noteEntries(¶Tree, oneList, "Pgf", "PgfTag"); noteRefFrames(oneList); } else if (0 == textCmp(oneList,"FontCatalog")) noteEntries(&fontTree, oneList, "Font", "FTag"); else if (0 == textCmp(oneList,"VariableFormats")) noteEntries(&variTree, oneList, "VariableFormat", "VariableName"); else if (0 == textCmp(oneList,"XRefFormats")) noteEntries(&xrefTree, oneList, "XRefFormat", "XRefName"); else if (0 == textCmp(oneList,"TblCatalog")) noteEntries(&tablTree, oneList, "TblFormat", "TblTag"); else if (0 == textCmp(oneList,"ConditionCatalog") ) noteEntries(&condTree, oneList, "Condition", "CTag"); else if (0 == textCmp(oneList,"Tbls") ) noteTbls(oneList); /* note table tag usage */ else if (0 == textCmp(oneList,"AFrames") ) noteAFrames(oneList); else if (0 == textCmp(oneList,"Page") ) notePageFrames(oneList); else if (0 == textCmp(oneList,"TextFlow")) noteText(oneList); else ; trashSequence(oneList); } } /* ========================================================================== * This global is an input parameter to the tree-scanning function. * It is the handle of the in which the scanner should append * each of data. * ==========================================================================*/ static itemHandle currentTblBody = NULL; /* ========================================================================== * The address of this function is passed to treeScan, which automatically * invokes it once for each entry in the tree. Its job is to generate * a of data based on the tree entry, and to append that row to the * table whose handle is in currentTblBody, above. * ==========================================================================*/ void scanEntry(char *tag, void*v) { itemHandle hRow = newList("Row"); UseInfo u; char cat[2], hid[2], num[8]; u.v = v; cat[0] = (u.d.flags&FLAGINCAT)?'Y':'N'; cat[1] = '\0'; hid[0] = (u.d.flags&FLAGHIDDEN)?'Y':' '; hid[1] = '\0'; sprintf(num,"%d",u.d.uses); append(hRow, oneLineCell(tag,NULL)); /* tagname column */ append(hRow, oneLineCell(cat,NULL)); /* catalog column */ append(hRow, oneLineCell(hid,NULL)); /* hidden column */ append(hRow, oneLineCell(num,NULL)); /* count column */ append(hRow, oneLineCell(" ",NULL)); /* replacment column */ append(currentTblBody,hRow); } /* ========================================================================== * tableStart: initialize a for the format used with all * tables in this program: 5 columns of fixed widths. The title * and the table id # are given. * ==========================================================================*/ itemHandle tableStart(char *ttitle, int tblID) { int j; itemHandle hTbl = patToMIF(">",tblID); itemHandle hTitle = newList("TblTitleContent"); itemHandle hTblH = newList("TblH"); itemHandle hRowH = newList("Row"); append(hTbl,patToMIF("","Format A")); append(hTbl,patToMIF("",5)); append(hTbl,patToMIF("",144)); /* tagname */ append(hTbl,patToMIF("",60)); /* catalog */ append(hTbl,patToMIF("",60)); /* hidden */ append(hTbl,patToMIF("",60)); /* count */ append(hTbl,patToMIF("",144)); /* replacement */ append(hTitle,oneLinePara(ttitle,"TableTitle")); append(hTbl,hTitle); append(hRowH, oneLineCell("Tag Name","CellHeading")); append(hRowH, oneLineCell("Catalog","CellHeading")); append(hRowH, oneLineCell("Hidden","CellHeading")); append(hRowH, oneLineCell("Count","CellHeading")); append(hRowH, oneLineCell("Replacement","CellHeading")); append(hTblH, hRowH); append(hTbl, hTblH); return hTbl; } /* ========================================================================== * Given one of the trees of collected counts, and a descriptive * title, and a table #, make a to describe that data. * ==========================================================================*/ itemHandle treeToTable(treeHandle *root, char *ttitle, int tblID) { itemHandle hTbl = tableStart(ttitle,tblID); itemHandle hBody = newList("TblBody"); currentTblBody = hBody; treeScan(*root, scanEntry); append(hTbl,hBody); return hTbl; } /* ========================================================================== * Main: open each argument as a file and pass to scanFile. * Then build tables containing the census info. * ==========================================================================*/ int main(int argc, char*argv[]) { itemHandle hTbls = newList("Tbls"); /* */ itemHandle hTFlow = newList("TextFlow"); /* */ int tblID = 0; int j; FILE *f; char *parapat = ">>"; for( j = 1; j# from DOCCOUNT\n"); writeMIF(stdout,hTbls,OUT_EOL+OUT_INDENT); writeMIF(stdout,hTFlow,OUT_EOL+OUT_INDENT); }