/* ========================================================================== * * DOCWASH: convert a document to use a new template. * * docwash template-file * * This program reads a MIF file from stdin and writes a revised MIF file * to stdout. The output file contains the content of the input, but uses * the formats of the template-file. The conversion is at two levels: * * The following items are stripped from the input file and replaced * with the same items from the template-file: * paragraph catalog * color catalog * font catalog * variable formats * xref formats * conditional catalog * reference pages * * The master pages of the input file are retained, as are its current page * definitions, including any fixed frames and disconnected/frozen pages, and * also the uses of paragraph or font styles that are not in catalogs. * * The tag names for paragraphs, fonts, variables, XRefs, tables, colors, * and conditions that are used in paragraphs, textlines, and graphic objects * are replaced according to data you put in a table in the template-file. * * The expected use is as follows. You have one or more "foreign" Frame * documents that you need to convert to use your "house" templates. * * 1) Use the DOCALYZE program to get a detailed report of the contents * of your house template, for reference. * * 2) Use DOCALYZE to get a detailed report on the contents of the * foreign files, analyze the foreign template, and decide which of * its tags you can support using tags that exist in the house template. * * 3) Open each foreign file and clean it up by: * making all conditional text visible (or deleting the conditions); * deleting all variable and xref formats you do not plan to support * (Frame lets you convert the existing uses to plain text); * find all paragraph or font styles you do not want to support and * change them to plain styles or to styles with replacements; * and finally, save the file as MIF. * * 4) Use the DOCCOUNT program to take a census of all tag usage in all * the foreign MIF files. * * 5) Import the DOCCOUNT output into an empty, house template. Call this * the census template. The DOCCOUNT data reveals use of hidden or * unwanted tags, which must be corrected (return to step 3). It also * reveals tags that are never used (typically many of them), which you * need not try to convert. * * 6) Looking at the census template under Frame, and manually comparing * the DOCALYZE reports, decide on replacement tag names for all used * tags. For example, the foreign files use "Body" for normal text, * but your house template uses "Text." The foreign numbered-list * tags are "1List" and "List" but your house version is "nlist" and * "nlist+". The foreign files use a custom color "Taupe" for comments * and you use plain "Red." And so forth. * * 7) Record corresponding tags in the DOCCOUNT tables in the census * template by writing the names of the replacements in the table * column headed "Replacement". You must supply a replacement tag * name for every tag that is used in a foreign file and appears in * the foreign catalog. (You don't need to give a replacement for a * paragraph or font that is used but not catalogued.) * * In the event that you want to retain a foreign tag as-is (for example, * because the foreign name is the same as the house template uses), * you can supply a replacement tag name of the single character "=". * * 8) Save the census template as MIF. This file is specified as the * template-file argument of DOCWASH. * * 9) Process each foreign MIF file through DOCWASH, specifying the * census template MIF file. Save each output as a new MIF file. * * 10) Open the new MIF files from Frame. They should reflect * the formatting of your house template. The master pages and the * original pagination are retained; you can use Import Formats to * take care of these after checking for disconnected pages and * the existence of fixed (nonanchored) frames on pages. * * Here are the main steps executed by DOCWASH: * * a) Open the template file. If it can't be opened, stop with an error. * * b) Read the template file completely into memory with readMIF(). If the * returned handle is NULL, the file is not MIF; stop with an error. * * c) Search the template file noting the addresses of the various * catalog lists. If any cannot be found, the template file is not * a Frame-produced MIF file; stop with an error. * * d) Scan each table in the section of the template file: * 1. Find the > heading row. * 2. Search for a column heading "Tag Name" and note the column number. * 3. Search for a column heading "Replacement" and note the col number. * 4. If both columns were found, then scan each in the table: * a. Get the first string from the first para in the tag name col * b. Get the first string from the first para in the replacement col * c. If both are non-null, file a tree entry with the key of the * tag name and the value of the replacement name * * Note no effort is made to segregate tags by type. Hence if there * are foreign tags that have duplicate names between one catalog * and another, e.g. a font and a condition with the same names, * mistakes will be made. * * e) Begin reading stdin with readOneItem(). If it returns NULL on the * first call, stdin is not MIF; stop with an error. * * f) Read the input file one list at a time using readOneItem(). * Modify particular lists as follows: * * all catalogs: write the corresponding template catalog instead. * * and s: scan each looking for s * containing s and convert the FTags. * * : scan the contents looking for s to convert. * Scan the contents of all tables, looking for s to convert. * * : If this is the HIDDEN textflow, discard it (hidden text * cannot be converted because the replacement condition tag * should be visible, and it is too complicated to make hidden text * visible). For normal text flows, scan the contents * looking for s to convert. * * Write the converted, or replacement, or original list to stdout * using writeMIF. * * ==========================================================================*/ #include #include "miffed.h" itemHandle template = NULL; itemHandle tplPcat, tplFcat, tplTcat, tplVcat, tplXcat, tplCcat, tplHcat; itemHandle tplRefPages; /* list of multiple ref s */ treeHandle symbolTree = NULL; #define MAXTAG 128 /* longest string to fetch from a table */ /* ========================================================================== * The following table controls the processing of the input file. The first * column contains the name of a top-level list item, e.g. "PgfCatalog", * and the second column contains the address of a transforming function * to be applied to that type of item, e.g. xformPCat. The prototypes of * the available functions are written first, then the initialized table. * * Note: the main() routine will apply all applicable transformers, so it is * possible either to break the transformation of an item into sequential steps * by applying multiple functions to it, or to apply the same function to * multiple items. * ==========================================================================*/ typedef struct { char * listName; itemHandle (*xformer) (itemHandle hItem); } workOrder; itemHandle xformColorCat(itemHandle hItem); itemHandle xformCondCat(itemHandle hItem); itemHandle xformParaCat(itemHandle hItem); itemHandle xformFontCat(itemHandle hItem); itemHandle xformTblCat(itemHandle hItem); itemHandle xformVarCat(itemHandle hItem); itemHandle xformXrefCat(itemHandle hItem); itemHandle xformAFrames(itemHandle hItem); itemHandle xformPage(itemHandle hItem); itemHandle xformTFlow(itemHandle hItem); itemHandle xformTbls(itemHandle hItem); workOrder doList[] = { {"ColorCatalog", xformColorCat}, {"ConditionCatalog", xformCondCat}, {"PgfCatalog", xformParaCat}, {"FontCatalog", xformFontCat}, {"TblCatalog", xformTblCat}, {"VariableFormats", xformVarCat}, {"XRefFormats", xformXrefCat}, {"AFrames", xformAFrames}, {"Page", xformPage}, {"TextFlow", xformTFlow}, {"Tbls", xformTbls}, {NULL,NULL} /* terminates work list */ }; /* ========================================================================== * we transform the color catalog (and others likewise) by replacing it * totally with the template version. * ==========================================================================*/ itemHandle xformColorCat(itemHandle hItem) { trashSequence(hItem); return tplHcat; /* "H" is for "Hue" by the way */ } itemHandle xformCondCat(itemHandle hItem) { trashSequence(hItem); return tplCcat; } itemHandle xformParaCat(itemHandle hItem) { trashSequence(hItem); return tplPcat; } itemHandle xformFontCat(itemHandle hItem) { trashSequence(hItem); return tplFcat; } itemHandle xformTblCat(itemHandle hItem) { trashSequence(hItem); return tplTcat; } itemHandle xformVarCat(itemHandle hItem) { trashSequence(hItem); return tplVcat; } itemHandle xformXrefCat(itemHandle hItem) { trashSequence(hItem); return tplXcat; } /* ========================================================================== * fixTag() is called with a list whose only item is a string that is a * tagname: or or , whatever. * Look up the tag in the symbolTree and replace it with the result, if any. * However, if the tree value is '=', return that char and do nothing. * If the tree value is '/' and the caller accepts slash (meaning delete * the tag rather than changing it), again return it and do nothing. * ==========================================================================*/ char fixTag(itemHandle hTag, int okSlash) { itemHandle hStr, hRep; char *pTag; char *pRep; hStr = nextItem(hTag); pTag = textOf(hStr); if (*pTag) /* not a null-string tagname */ { hRep = (char *)treeLook(symbolTree,pTag); pRep = textOf(hRep); if ( (pRep) && (*pRep) ) /* some useful value in tree */ { if ( (1 changed to <%s %s>\n", textOf(hTag),pTag,textOf(hTag),pRep); #endif return ' '; } else /* 1==strlen & value is = or / */ { if ( ('/' == *pRep) && !okSlash) { fprintf(stderr, "Warning: tag <%s %s> unchanged, / not supported", textOf(hTag), pTag); } #ifdef DEBUG else { fprintf(stderr,"<%s %s> unchanged on =\n", textOf(hTag), pTag); } #endif return *pRep; } } else /* nothing in the tree to match the tag */ { fprintf(stderr,"Warning: no replacement for <%s %s>\n", textOf(hTag), pTag); } } return ' '; } /* ========================================================================== * A list from a or can contain an * and/or an both of which need to be fixed. A is * similar, and is also passed to this function. * ==========================================================================*/ void fixFont(itemHandle hFont) { itemHandle hFTag, hFCol; hFTag = firstOfName(hFont,"FTag"); if (hFTag) { (void) fixTag(hFTag,0); } hFCol = firstOfName(hFont,"FColor"); if (hFCol) { (void) fixTag(hFCol,0); } } /* ========================================================================== * The can contain a number of types of objects. Most of these can * contain an item which needs to be fixed. Also the can * contain a which, besides an , can contain s. * Finally, a can contain another , it is the only recursive * list in MIF. * * A is like a in that it can contain all the same things * including s. This function checks over a or a . * ==========================================================================*/ void frameOrPage(itemHandle hFrame) { itemHandle hX,hObC,hFont; for (hX = nextItem(hFrame); (hX); hX = nextInList(hX)) { hObC = firstOfName(hX,"ObColor"); if (hObC) fixTag(hObC,0); if (0 == textCmp(hX,"Frame")) { frameOrPage(hX); } else if (0 == textCmp(hX,"TextLine")) { for( hFont = firstOfName(hX,"Font"); (hFont); hFont = nextSameName(hFont)) { fixFont(hFont); } /* end scanning all s in a */ } } /* end scanning all objects in a */ } /* ========================================================================== * To transform the AFrames section we scan each in it. * ==========================================================================*/ itemHandle xformAFrames(itemHandle hItem) { itemHandle hFrame; for (hFrame = firstOfName(hItem,"Frame"); (hFrame); hFrame = nextSameName(hFrame)) frameOrPage(hFrame); return hItem; } /* ========================================================================== * To transform a we do one of two things. If it is not a reference * page, we run it through frameOrPage() to fix fonts & colors. If it is * the first reference page, we return the sequence of reference pages * saved from the template file. If it is a later reference page, we lose it. * Note: in either case, the handle to the template reference pages and * the handle to the input reference page are simply forgotten. This is * messy but not a real problem. * ==========================================================================*/ itemHandle xformPage(itemHandle hItem) { itemHandle hPageType = firstOfName(hItem,"PageType"); char * pType = textOf(nextItem(hPageType)); if (0 == strcmp(pType,"ReferencePage")) { if (tplRefPages) { hItem = nextItem(tplRefPages); tplRefPages = (itemHandle)NULL; /* memory leak, we don't care */ } else /* not first reference page */ { hItem = (itemHandle)NULL; /* forgotten, but not gone */ } } else /* MasterPage or BodyPage */ { frameOrPage(hItem); } return hItem; } /* ========================================================================== * This function transforms the contents of a including s, * s, s, and s. * * This is the point where we would like to support a replacement string '/' * meaning, delete the sucker rather than change it. However, this is * too complicated to do at this time. To delete unwanted variables, * fonts, and conditions, use FrameMaker before saving the foreign MIF file. * * Note: by the time we get here we can be nested 6 deep in functions and * 7 deep in for-loops. The longest path is: * xformTbls: for * fixTbl: for * fixRow: for * fixText: for * for * fixPara: for * fixParaLine: for nextInList * ==========================================================================*/ void fixParaLine(itemHandle hPL) { itemHandle hX, hY; for (hX = nextItem(hPL); (hX); hX = nextInList(hX) ) { if (0 == textCmp(hX, "Font") ) { (void) fixFont(hX); } else if (0 == textCmp(hX, "Variable")) { hY = firstOfName(hX,"VariableName"); if ((hY)) (void) fixTag(hY,0); } else if (0 == textCmp(hX, "XRef")) { hY = firstOfName(hX,"XRefName"); if ((hY)) (void) fixTag(hY,0); } else if (0 == textCmp(hX, "Conditional")) { hY = firstOfName(hX,"InCondition"); if ((hY)) (void) fixTag(hY,0); } } /* end of scanning hX over ParaLine contents */ } /* ========================================================================== * This function transforms a from any source -- textflow, Notes, * or CellContent. When a para is based on the catalog, it contains a * to change. If it has a unique format, it contains a * that can contain a and a . After fixing these * issues, we fix every . * ==========================================================================*/ void fixPara(itemHandle hPara) { itemHandle hTag; itemHandle hPL; itemHandle hPgf = firstOfName(hPara,"Pgf"); if (hPgf) { /* para has embedded format, check it */ itemHandle hF; hTag = firstOfName(hPgf,"PgfTag"); if (hTag) fixTag(hTag,0); hF = firstOfName(hPgf,"PgfFont"); if (hF) fixFont(hF); } else { /* para style from catalog, just fix its tag */ hTag = firstOfName(hPara,"PgfTag"); if (hTag) fixTag(hTag,0); } for (hPL = firstOfName(hPara,"ParaLine"); (hPL); hPL = nextSameName(hPL)) { fixParaLine(hPL); } } /* ========================================================================== * This function fixes any container of text: a , a , * a . All of these are the same in that they start with * a which is usually empty but which may contain a sequence of * paragraphs, followed by a sequence of paragraphs. * * Note: the following code *assumes* that is always the first item * in a text container, and also *assumes* that the first item within any * is either end of list (empty ) or an . Strictly * speaking neither of these has to be true, but I've never seen a Frame * MIF file for which it wasn't true. * ==========================================================================*/ void fixText(itemHandle hText) { itemHandle hP; itemHandle hN = nextItem(hText); itemHandle hF = nextItem(hN); if ( (0==textCmp(hN,"Notes")) && (EOL != itemType(hF)) ) { /* there is a non-empty , scan it for s */ for ( ; (hF); hF = nextSameName(hF)) { for( hP = firstOfName(hF,"Para"); (hP); hP = nextSameName(hP)) { fixPara(hP); } /* end all within an */ } /* end all within */ } for (hP = firstOfName(hText,"Para"); (hP); hP = nextSameName(hP)) { fixPara(hP); } /* end all within this textflow/cellcontent/whatever */ } /* ========================================================================== * There really isn't anything special to be done to a except * to fix all its paragraphs. * ==========================================================================*/ itemHandle xformTFlow(itemHandle hItem) { fixText(hItem); return hItem; } /* ========================================================================== * In each of the 3 major sections of a there is a sequence of * s, each containing a sequence of s, each of which contains * a which is a text container. * ==========================================================================*/ void fixRows(itemHandle hSection) { itemHandle hRow, hC, hCC; for (hRow = firstOfName(hSection,"Row"); (hRow); hRow = nextSameName(hRow)) { for (hC = firstOfName(hRow,"Cell"); (hC); hC = nextSameName(hC)) { hCC = firstOfName(hC,"CellContent"); fixText(hCC); } /* end scanning all Cells in a Row */ } /* end scanning all rows in a TblH/Body/F */ } /* ========================================================================== * Fixing a table is complicated. Besides the TblTag itself, we need to * fix the title content and every cell in every header, body, and foot row. * NOTE: a table can have an embedded containing embedded s * to define the style of head/body/foot columns. This structure is not * being fixed up, but we issue a warning if one is found. * ==========================================================================*/ void fixTbl(itemHandle hTbl) { itemHandle hTag = firstOfName(hTbl,"TblTag"); itemHandle hFormat = firstOfName(hTbl,"TblFormat"); itemHandle hTCont = firstOfName(hTbl,"TblTitleContent"); itemHandle hTblH = firstOfName(hTbl,"TblH"); itemHandle hTblB = firstOfName(hTbl,"TblBody"); itemHandle hTblF = firstOfName(hTbl,"TblF"); /* fix the table tag itself */ (void) fixTag(hTag,0); /* if the table has an embedded format issue a warning */ if (hFormat) { itemHandle hID = firstOfName(hTbl,"TblID"); char *pID = textOf(nextItem(hID)); fprintf(stderr, "Warning: table id %s contains embedded TblFormat\n",pID); } /* The is a text container, fix it. */ if (hTCont) { fixText(hTCont); } /* Fix all the rows of the heading, body, and footing */ if (hTblH) fixRows(hTblH); if (hTblB) fixRows(hTblB); if (hTblF) fixRows(hTblF); } /* ========================================================================== * To transform the section, we have to fix each table. * ==========================================================================*/ itemHandle xformTbls(itemHandle hItem) { itemHandle hTbl; for (hTbl = firstOfName(hItem,"Tbl"); (hTbl); hTbl = nextSameName(hTbl)) fixTbl(hTbl); return hItem; } /* ========================================================================== * The template MIF has been read. Search it for the catalogs and save their * addresses. We have to extract ("yank") each catalog out of the templates * list. If we just save a pointer to it as returned by firstOfName(), when * we go to write that catalog, writeMIF() writes the catalog and everything * that follows it in sequence. However, the extract function is yankNext() * which requires the handle of the item that points to the item you want. * In order to get that, we have to find the list item that precedes the * item of the specified name, and then find its EOL, and pass that to * the yankNext() function. Clearly this was not in the original objectives. * ==========================================================================*/ itemHandle yankByName(itemHandle contnr, char *name) { /* n.b. this can't find name as the first item of the container */ itemHandle scan = nextItem(contnr); while ( (scan) && (textCmp(nextInList(scan),name)) ) scan = nextInList(scan); if (scan) /* nextInList(scan) is the list we want to yank */ { return yankNextInList(scan); } else return scan; } int getTplCats() { int j; itemHandle hPage, hX; if (!(tplHcat = yankByName(template,"ColorCatalog")) ) { fputs("Template file lacks a ColorCatalog\n",stderr); return 0; } if (!(tplCcat = yankByName(template,"ConditionCatalog")) ) { fputs("Template file lacks a ConditionCatalog\n",stderr); return 0; } if (!(tplPcat = yankByName(template,"PgfCatalog")) ) { fputs("Template file lacks a PgfCatalog\n",stderr); return 0; } if (!(tplFcat = yankByName(template,"FontCatalog")) ) { fputs("Template file lacks a FontCatalog\n",stderr); return 0; } if (!(tplTcat = yankByName(template,"TblCatalog")) ) { fputs("Template file lacks a TblCatalog\n",stderr); return 0; } if (!(tplVcat = yankByName(template,"VariableFormats")) ) { fputs("Template file lacks VariableFormats\n",stderr); return 0; } if (!(tplXcat = yankByName(template,"XRefFormats")) ) { fputs("Template file lacks XRefFormats\n",stderr); return 0; } tplRefPages = newList("foo"); /* name is irrelevant */ /* the following rips out all lists in the template */ /* the reference s are saved, others are just lost */ for (hPage = yankByName(template,"Page"); (hPage); hPage = yankByName(template,"Page") ) { hX = firstOfName(hPage,"PageType"); if (!hX) continue; /* no page type == body page: skip */ hX = nextItem(hX); if (textCmp(hX,"ReferencePage")) continue; /* master page, not reference page: skip */ /* ref page: build a sequence in dummy list */ append(tplRefPages,hPage); } if (!nextItem(tplRefPages)) fputs("Warning: template has no reference pages\n",stderr); return 1; } /* ========================================================================== * Get the string value from a >> nest. * ==========================================================================*/ void cellGets(itemHandle hCell, char *bfr, int n) { itemHandle hCC = firstOfName(hCell,"CellContent"); itemHandle hPara = firstOfName(hCC,"Para"); *bfr = '\0'; /* ensure null string in case !hPara */ (void) paraScanGets(bfr,n,hPara); } /* ========================================================================== * saveSymbolPair receives a and 2 column numbers (usually 1 & 4). * It gets the string value of the two columns in this row, and saves them * in the symbolTree, with the first as key and the second as data. Note * we assume the columns specified, exist. * Special: strip off leading & trailing spaces. This is because DOCCOUNT puts * a single space in the "Replacement" column and it is very easy to leave * it there accidentally. * ==========================================================================*/ char * stripEm(char *bfr) { char *pA, *pZ; /* run pA to the first nonblank (which can be the null) */ for (pA = bfr; ' '==*pA; ++pA) ; /* run pZ to the end of the string */ for (pZ = pA; *pZ; ++pZ) ; /* bring pZ back, nulling any trailing blanks */ for (--pZ; (pZ>pA)&&(' '==*pZ); --pZ) *pZ = '\0'; return pA; } void saveSymbolPair(itemHandle hRow, int tagCol, int repCol) { char oldTag[MAXTAG], repTag[MAXTAG]; char *pOld, *pRep; itemHandle anyCell, oldCell, repCell; int j; for( j = 1, anyCell = firstOfName(hRow,"Cell"), oldCell=repCell=NULL; (anyCell); ++j, anyCell = nextSameName(anyCell)) { if (j==tagCol) oldCell = anyCell; else if (j==repCol) repCell = anyCell; } cellGets(oldCell,oldTag,sizeof(oldTag)); pOld = stripEm(&oldTag[0]); cellGets(repCell,repTag,sizeof(repTag)); pRep = stripEm(&repTag[0]); if ((*pOld) && (*pRep)) /* 2 non-null strings (after strip) */ treeEnter(&symbolTree, pOld, (void *)newWord(pRep)); } /* ========================================================================== * tblSymbols gets a and checks it to see if it has tag-replacement * info in it. The test is to find two columns headed "Tag Name" and * "Replacement". The column numbers of those columns are noted. * * If both columns exist (in any order) then we pass each row of the table * to saveSymbol(). * ==========================================================================*/ void tblSymbols(itemHandle hTbl) { int tagCol=0; int repCol=0; itemHandle hTblH = firstOfName(hTbl,"TblH"); itemHandle hTblB = firstOfName(hTbl,"TblBody"); itemHandle hRow = firstOfName(hTblH,"Row"); itemHandle hCell; char cHead[MAXTAG], *pC; if (hRow) /* there is a heading with a row */ { int j; for( j=1, hCell=firstOfName(hRow,"Cell"); (hCell); ++j, hCell = nextSameName(hCell)) { cellGets(hCell,cHead,sizeof(cHead)); pC = stripEm(cHead); if (0==strcmp(pC,"Tag Name")) tagCol = j; else if (0==strcmp(pC,"Replacement")) repCol = j; } /* end scanning all heading cells in first heading row */ } if ((tagCol) && (repCol) && (hTblB)) { for( hRow = firstOfName(hTblB,"Row"); (hRow); hRow = nextSameName(hRow)) { saveSymbolPair(hRow,tagCol,repCol); } } } /* ========================================================================== * Apply tblSymbols() to every in the template section. * ==========================================================================*/ int scanTablesForSymbols() { itemHandle hTbls = firstOfName(template,"Tbls"); if (hTbls) { itemHandle hTbl; for(hTbl = firstOfName(hTbls,"Tbl"); (hTbl); hTbl=nextSameName(hTbl)) tblSymbols(hTbl); return 1; } else { fputs("Template has no section\n",stderr); return 0; } } /* ========================================================================== * Main: orchestrate all the above. * ==========================================================================*/ int main(int argc, char **argv) { FILE *tmplate; FILE *inf = stdin; /* set up to support file arguments */ FILE *ouf = stdout; itemHandle oneItem, tplSeq; if (argc != 2) { fputs("usage: docwash template-mif-file-name\n",stderr); return -1; } tmplate = fopen(argv[1],"r"); if (!tmplate) { fprintf(stderr,"unable to open template file %s\n",argv[1]); return -2; } tplSeq = readMIF(tmplate,0); /* get all parts of file as sequence */ if (tplSeq) { template = newList("bar"); /* name is irrelevant */ insert(template,tplSeq); /* put whole file into dummy list */ } else { fprintf(stderr,"apparently not a MIF file: %s\n",argv[1]); return -3; } if (!getTplCats()) { return -4; } if (!scanTablesForSymbols()) { return -5; } oneItem = readOneItem(inf,0); if (!oneItem) { fputs("Input empty, or not a MIF file\n",stderr); return -6; } for ( ; (oneItem) ; oneItem = readOneItem(inf,0) ) { int j; for (j = 0; (doList[j].listName); ++j) { if (0==strcmp(doList[j].listName, textOf(oneItem)) ) oneItem = doList[j].xformer(oneItem); } if (oneItem) /* something left from transformation */ writeMIF(ouf,oneItem,OUT_EOL+OUT_INDENT); } }