/* * dumpmif.c Dump the ascii text and table text of a MIF file. * * This demonstrates the use of paraScan() to extract text from a textflow or * from a table. The program is structured so that you can snip out functions * for use elsewhere. * * For use with ISO-Latin-1 output, see "NOTE ISO" below. * * No command line arguments are taken. * * Input is standard input, and output is standard output. However, the file * descriptors are passed to subfunctions as arguments, so you can easily * extract these functions and use them with other stream files. * * The program structure is upside down, as usual. The innermost functions * appear first, and main() is down at the very end. */ #include #include "miffed.h" /* * Dump text representing a non-text item such as a . Some types * of items are dumped in greater detail than others. Feel free to add more * detail to this function! Arguments: * itemType the flag returned by paraScan() * nti handle returned by paraScanItem() * ofd output file descriptor * Assumes that ofd is at the left margin and leaves it at the left margin. */ void dumpNonText(int itemType, itemHandle nti, FILE *ofd) { itemHandle wrk1, wrk2; switch(itemType) { case PS_ATBL: /* */ case PS_AFRAME: /* */ { fprintf(ofd,"<%s %s>\n", textOf(nti), textOf(nextItem(nti)) ); break; } case PS_FONT: /* > */ { wrk1 = firstOfName(nti,"FTag"); fprintf(ofd,"\n", (wrk1) ? textOf(nextItem(wrk1)) : "(no tag)" ); break; } case PS_COND: /* > */ { wrk1 = firstOfName(nti,"InCondition"); fprintf(ofd,"\n", (wrk1) ? textOf(nextItem(wrk1)) : "(no tag)" ); break; } case PS_MARKER: /* > */ { wrk1 = firstOfName(nti,"MType"); wrk2 = firstOfName(nti,"MText"); fprintf(ofd,"\n", (wrk1) ? textOf(nextItem(wrk1)) : "(no type)", (wrk2) ? textOf(nextItem(wrk2)) : "(no text)" ); break; } default: /* feel free to add more cases above this! */ { fprintf(ofd,"<%s ...>\n",textOf(nti)); break; } } /* end switch */ } /* * Dump the text of one paragraph to the output FD, preceded by the line * * * This the function that actually calls paraScanInit() and paraScan(). * * Text is dumped until a nontext item is seen. Then a newline is inserted * and the nontext item is dumped using dumpNonText(), above, after which * text display continues. * * Frame line-ends are ignored. One good reason for this is that special * characters are all returned, as \xnn strings. This can easily make a * line much longer than Frame had it. * * The line length is set by a constant LINE_LEN, which you could easily * convert to a variable or an argument. * * The line-break logic is implemented by collecting blank-delimited tokens. * Token text is collected until one of 3 things happens: the token itself * reaches the length of the line (unusual); a space is received, starting * a new token; or a nontext item is seen. At that time the current token * is dumped, possibly preceded by a newline or a space. */ static itemHandle ps = NULL; /* parascan object handle for re-use */ #define MAX_LINE 80 /* maximum possible line length */ #define LINE_LEN 79 /* length of current line, could be var */ #define ASCII_OPTS PS_TAB_AS_SPACE+PS_COMPRESS_SPACES+PS_SPECIALS_AS_XNN #define ISO_OPTS ASCII_OPTS+PS_8859_1 /* * ISO NOTE: If the device used as "ofd" supports the full ISO Latin-1 * character set, you can set TEXT_OPTS to ISO_OPTS, and this function * should then dump Latin-1 characters in ISO-8859 coding without other * changes. When TEXT_OPTS is ASCII_OPTS, all non-ascii characters are * dumped in \xnn form. */ #define TEXT_OPTS ASCII_OPTS void dumpPara(itemHandle para, FILE *ofd) { int inc; /* latest return from paraScan() */ char *pc; /* ->next character of token */ short ltok; /* length of token collected so far */ short llin; /* length of line since last \n */ itemHandle ptag; /* handle of list */ char token[MAX_LINE+1]; /* token being collected */ ptag = firstOfName(para,"PgfTag"); fprintf(ofd,"\n", (ptag) ? textOf(nextItem(ptag)) : "(no tag)" ); ps = paraScanInit(para, TEXT_OPTS, /* see above */ 0, /* return all nontext */ PS_SKIP_LINE_END+PS_HARD_CR_AS_SPACE, ps); if (!ps) { fprintf(ofd,"!null return from paraScanInit\n"); return; } pc = token; ltok = llin = 0; /* * loop over the characters in a paragraph. Don't bother using a * profiling tool on this program -- the hot spot is the next 20 lines! */ while ( (inc = paraScan(ps)) ) { if ( (0x20 <= inc) /* not a space, and */ && (0x100 > inc) /* not a nontext item, and */ && (LINE_LEN > ltok) ) /* token not too long yet */ { /* ...so there is no reason to terminate this token */ *pc++ = inc; /* append the char to the token */ ++ltok; /* count it in token */ ++llin; /* count it in line */ if (0x20 < inc) continue; /* iterate the loop */ } /* one of three causes for ending a token has occurred */ *pc = '\0'; /* terminate the token string */ if ( (LINE_LEN < llin) /* token will not fit on this line, */ && (llin != ltok) ) /* and token is not 1st in line, */ { fputc('\n',ofd); /* terminate current line to make room */ llin = ltok; /* token is only thing on new line */ } if ( (llin != ltok)&&(' ' != inc) ) /* insert space */ { fputc(' ',ofd); ++llin; /* include space in length of line */ } fputs(token,ofd); /* write the token */ pc = token; /* start over in buffer */ ltok = 0; /* no token at this time */ if ( (LINE_LEN <= llin) /* only when single token > LINE_LEN */ || ((256 < inc)&&(llin)) )/* or about to dump nontext item on line*/ { fputc('\n',ofd); /* start new line */ llin = 0; } if (256 < inc) /* token ended for nontext item */ dumpNonText(inc, paraScanItem(ps),ofd); } /* end while, end of paragraph text */ if ((llin)||(ltok)) /* some token collected at end of para */ { *pc = '\0'; fputs(token,ofd); fputc('\n',ofd); } } /* * A footnote is encoded as ...> * This function dumps one as followed by dumpPara() output. */ void dumpFNote(itemHandle fnote, FILE *ofd) { itemHandle id = firstOfName(fnote,"ID"); itemHandle para; fprintf(ofd, "\n", (id) ? textOf(nextItem(id)) : "(no id?)" ); for(para = firstOfName(fnote,"Para"); (para); para = nextSameName(para)) { dumpPara(para,ofd); } } /* * Table Titles, Table Cells, and TextFlows all have the same set of items * to define text, namely * * * * This function is called from dumpTable, dumpCell, and dumpTextFlow to * handle this sequence, including dumping notes and all paras. * Argument ctnr is one of , , or . */ void dumpContainer(itemHandle ctnr, FILE * ofd) { itemHandle fnote, para; /* scans notes and paras */ itemHandle notes = firstOfName(ctnr,"Notes"); /* * Most lists are empty. A nonempty one has 1 or more * lists in it. Find these and dump them. */ if (notes) { fnote = firstOfName(notes,"FNote"); if (fnote) { fputs("\n",ofd); while (fnote) { dumpFNote(fnote,ofd); fnote = nextSameName(fnote); } fputs("\n",ofd); } } for (para = firstOfName(ctnr,"Para"); (para); para = nextSameName(para) ) { dumpPara(para,ofd); } } /* * Dump one table cell. Most of the work is done in dumpContainer(), above. * -- number 'n' is an argument from dumpRow(), below * -- output of dumpContainer() */ void dumpCell(itemHandle cell, int cellnum, FILE * ofd) { itemHandle content = firstOfName(cell,"CellContent"); /* * put code here to pick out and display such cell attributes * as , , etc. */ fprintf(ofd,"\n",cellnum); if (content) dumpContainer(content,ofd); else fputs("?no \n",ofd); } /* * Dump one row of a table. This function is called to dump rows from the * Heading and Footing of the table as well as from the body. In all cases * the output has the format: * -- number 'n' is an argument to the function * -- cell dump by dumpCell(), above * ... * and so on to the end of the row. * */ void dumpRow(itemHandle row, int rownum, FILE * ofd) { int colnum; itemHandle cell; /* * put code here to pick out and display row attributes such as * , , etc. */ fprintf(ofd,"\n",rownum); cell = firstOfName(row,"Cell"); for (colnum = 1; (cell); ++colnum, cell = nextSameName(cell) ) { dumpCell(cell, colnum, ofd); } } /* * Dump a table: first dump the table title if any, then the heading rows * if any, then the body rows (there is at least one), then the footing * rows if any. The table dump is decorated with these lines: * -- table id # and table format tag * -- table column widths * * -- output of dumpContainer() * <heading> * -- rows dumped by dumpRow() * <body> * -- rows dumped by dumpRow() * <footing> * -- rows dumped by dumpRow() * * Let's see, at this point we will have: * main-->dumpTable-->dumpRow-->dumpCell-->dumpContainer -->dumpPara */ void dumpTable(itemHandle tab, FILE *ofd) { itemHandle wrk1, wrk2; int rownum; wrk1 = firstOfName(tab,"TblID"); wrk2 = firstOfName(tab,"TblTag"); fprintf(ofd,"<table %s `%s\'>\n", (wrk1) ? textOf(nextItem(wrk1)) : "(no id?)", (wrk2) ? textOf(nextItem(wrk2)) : "(no tag)") ; if ( (wrk1 = firstOfName(tab,"TblColumnWidth")) ) { fputs("<columns",ofd); do { fprintf(ofd," %s %s", textOf(nextItem(wrk1)),unitTextOf(nextItem(wrk1))); wrk1 = nextSameName(wrk1); } while(wrk1); fputs(" >\n",ofd); } if ( (wrk1 = firstOfName(tab,"TableTitleContent")) ) { fputs("<title>\n",ofd); dumpContainer(wrk1,ofd); } if ( (wrk1 = firstOfName(tab,"TblH")) ) { fputs("<heading>",ofd); for(wrk2 = firstOfName(wrk1,"Row"), rownum = 1; (wrk2); wrk2=nextSameName(wrk2),++rownum ) { dumpRow(wrk2,rownum,ofd); } } if ( (wrk1 = firstOfName(tab,"TblBody")) ) { fputs("<body>\n",ofd); for(wrk2 = firstOfName(wrk1,"Row"), rownum = 1; (wrk2); wrk2=nextSameName(wrk2),++rownum ) { dumpRow(wrk2,rownum,ofd); } } if ( (wrk1 = firstOfName(tab,"TblF")) ) { fputs("<footing>",ofd); for(wrk2 = firstOfName(wrk1,"Row"), rownum = 1; (wrk2); wrk2=nextSameName(wrk2),++rownum ) { dumpRow(wrk2,rownum,ofd); } } } /* * Dump a text flow: Display the text flow tag letter, then dump the * contents using dumpContainer(). Do not dump anything if the * textflow lacks a TFTag. This still dumps an irritating * little textflow, TFTag `a', with a null paragraph, that shows up * in most (all?) files and whose purpose I don't know. */ void dumpTextFlow(itemHandle tflow, FILE *ofd) { itemHandle tftag = firstOfName(tflow,"TFTag"); if (tftag) { fprintf(ofd,"<TextFlow `%c\'>\n", *(textOf(nextItem(tftag))) ); /* * here add code to dump such things as <TFAutoConnect> or * <TFSideHeadPlacement>. */ dumpContainer(tflow,ofd); } } /* * Dump the MIF file on standard input. Feel free to add code to pick up the * name of input and/or output files from the command line. */ int main (int argc, char **argv) { itemHandle mainItem; FILE *ifd = stdin; FILE *ofd = stdout; for (;;) { mainItem = readOneItem(ifd,0); if (!mainItem) break; if (0==textCmp(mainItem,"Tbls")) { itemHandle table; for( table = firstOfName(mainItem,"Tbl"); (table); table = nextSameName(table)) { dumpTable(table,ofd); } } else if (0 == textCmp(mainItem,"TextFlow")) { dumpTextFlow(mainItem,ofd); } trashSequence(mainItem); } return 0; }