/*============================================================================ * * atom: ASCII - to - MIF translation * * This program is a demonstration application using the MIFFEd package. * Like MIFFEd, it is Copyright (c) 1994 by David E. Cortesi. See the MIFFEd * documentation or README for conditions of use. * * The input to the program is any ASCII data (for example, a program source) * on the stdin handle. The output is MIF on stdout. That MIF can be * imported into Frame to display the input text in courier, with tab * alignment preserved. * * Command syntax: atom -s ptsize -t tabcols -m margin * * ptsize: the size of the Courier font to be used, in points. Any * fractional number from 1.0 to 72.0 is allowed. The default is 9.0. * * tabcols: the fixed spacing assumed for input tabs. The default is 8. * For example use -t 4 to simulate the effect of :set tabstop=4 in vi. * * margin: the left margin for all paragraphs, in points. The default * is 0. Any decimal number is allowed. * * In order to get predictable spacing, the program assumes the use of * Courier. If you want to use a different font, you will may to change * the constant FONT_CHAR_WIDTH. * * The MIF text describes each line of input as a complete paragraph. * ASCII tabs and ASCII spaces in the input are converted to Frame tabs so as * to produce the same visual alignment on the page. In order to preserve * spaces under Frame "smart spaces," all but the first space in a run of * spaces is converted to a Frame "hard space." * * In order to get predictable, regular tab stops, the program defines a * paragraph format named "atom_S_T" where S and T are the values from * the -s and -t command parameters (thus "atom_9_8" by default). Within * this paragraph, a tabstop is defined at intervals of T*FONT_CHAR_WIDTH * starting at the left margin set by -m. * * The output MIF file contains the following productions: * (note: use :set tabstop=3 to view this file, or use atom -t3 ) * * # required * # -s S and -t T values * > * # -m M value * * > * ... repeated as required to define regular tabstops * >#end of Pgf * >#end of PgfCatalog * * * and items to describe one line... * >#end of ParaLine * >#end of Para * ...More para units as required... * >#end of flow * ==========================================================================*/ #include #include #include "miffed.h" extern double strtod(char *str, char **ptr); /*============================================================================ * The command line values for S, T and M are stored here because they are * needed in several functions. * ==========================================================================*/ static double theMargin = 0.0; static double theFontSize = 9.0; static int theTabWidth = 8; /*============================================================================ * This constant specifies the width of a fixed character as a fraction of the * nominal point size of the font. It was determined for (Adobe) Courier by * experiment. For example, assume 8-column tabstops in an 9pt Courier font. * The tabstops should be set every 8*9*FONT_CHAR_WIDTH points from the left * margin. NOTE: Frame does not portray the width of fixed-width Courier * accurately on the screen. No, not even under high zoom ratios. Even at * 1600% zoom, a tab stop that looks wrong can still print correctly on a * PostScript printer. Or vice versa, tabstops that look right on the screen * can print incorrectly. This is especially true of even-numbered point * sizes for some reason. Anyway, don't assume this program got the tabs * in the wrong place until you actually print the file. * ==========================================================================*/ #define FONT_CHAR_WIDTH 0.6 /* for Courier, Letter Gothic might differ */ /*============================================================================ * This constant sets the limit beyond which tabs should not be set. It is * not critical, since Frame will simply ignore tabs that fall beyond the * edge of the text column. This value for 8-inch paper is also good for A4. * For long lines in landscape mode it needs to be larger. * ==========================================================================*/ #define MAX_TABX 8.5*72.0 /*============================================================================ * This constant specifies the longest input string that will be treated as * a single line and output as a single paragraph. If the input contains * longer lines they will be broken into multiple paragraphs (as a natural * result of the operation of fgets). * ==========================================================================*/ #define MAX_LINE 255 /*============================================================================ * This function generates one list given the TABX value. * ==========================================================================*/ itemHandle tabStop(double tx) { return patToMIF(">",tx); } /*============================================================================ * This function uses the one above to generate a series of tabstops and * to append them to a given list, which is presumably the list. * Input of the -m, -s and -t values from the command line comes from the * globals above. * ==========================================================================*/ void tabList(itemHandle pgf) { double tp = theMargin; double ti; ti = (theTabWidth)*(theFontSize)*(FONT_CHAR_WIDTH); for (tp +=ti; tp < MAX_TABX; tp += ti) { append(pgf,tabStop(tp)); } } /*============================================================================ * This function generates or reproduces the list each * time it is called. To save time it only generates it once, then copies * the original on subsequent calls. Copy is a bit faster than cranking up * the whole parser through patToMIF(). Globals for -s and -t are input. * ==========================================================================*/ static itemHandle pgfTagHandle = NULL; itemHandle pgfTag() { if (!pgfTagHandle) pgfTagHandle=patToMIF("",theFontSize,theTabWidth); return copyItem(pgfTagHandle); } /* * same business for some other oft-needed lists */ static itemHandle masterCT = NULL; itemHandle charTab() { if (NULL==masterCT) masterCT = patToMIF("%T"); return copyItem(masterCT); } static itemHandle masterCS = NULL; itemHandle charHS() { if (NULL==masterCS) masterCS = patToMIF("%B"); return copyItem(masterCS); } /*============================================================================ * This function uses the ones above to generate the whole list. * ==========================================================================*/ itemHandle pgfCat() { itemHandle pc, pa; pc = newList("PgfCatalog"); pa = newList("Pgf"); append(pa,pgfTag()); append(pa, patToMIF(">",theFontSize) ); append(pa,patToMIF("",theMargin) ); append(pa,patToMIF("",theMargin) ); append(pa,patToMIF("") ); tabList(pa); append(pc,pa); return pc; } /*============================================================================ * This function takes a line of ascii text and builds a with * appropriate and lists to reproduce the ascii in * Frame. Some notes: * * When the input contains a tab, one or more items are produced * to get the same alignment in Frame. In order to do this, we maintain a * counter, fmTcol, that represents the Frame tab position. The Frame tab * model differs from the Ascii model in that fmTcol can only advance by * a tab, so it can fall behind fmCcol, which counts the output position * produced by all output characters. * * The logic of this rather complicated function uses 2 exclusive states: * marking: collecting nonspace characters * spacing: collecting white space * When changing from one state to the other, the material collected * is output to the paragraph as or items. In pseudo-code: * * for pC a pointer to the next input character, not end of input: * if *pC is printable (in @..~) * if marking * collect *pC * advance pC * else spacing * generate collected white space as tabs, hardspaces * initialize for marking * (do not advance pC) * endif * else *pC is control char * if marking * generate collected chars as String * initialize for spacing * (do not advance pC) * else already spacing * if *pC is space, count 1 * if *pC is tab, count effective motion * ignore other ctrl chars, incl nl which precedes \0 * advance pC * endif * endif * endfor * ==========================================================================*/ itemHandle paraLine(char *inp) { itemHandle hPL = newList("ParaLine"); itemHandle hMisc; short ascol, fmCcol, fmTcol, marking; char *pC, *pO; char wrk[MAX_LINE]; for (pC = inp, pO = wrk, marking = 1, ascol=fmCcol=fmTcol= 0; (*pC); ) { if (('!'<=*pC)&&('~'>=*pC)) { if (marking) { if (('<'==*pC)||('>'==*pC)||('`'==*pC)||('\''==*pC)||('\\'==*pC)) *pO++ = '\\'; /* escape these in Strings */ *pO++ = *pC++; ++ascol; } else /* spacing, dump white columns */ { if (1 < (ascol-fmCcol)) /* deficit > 1 */ { if (theTabWidth <= (ascol-fmCcol)) /* could use a tab */ { while (theTabWidth <= (ascol-fmTcol)) { append(hPL,charTab()); fmTcol += theTabWidth; } fmCcol = fmTcol; } while (1 < (ascol-fmCcol)) { append(hPL,charHS()); ++fmCcol; } } /* deficit is 0 or 1 */ pO = wrk; marking = 1; if (fmCcol < ascol) /* deficit is 1, use normal space */ *pO++ = ' '; } } else /* not printable */ { if (marking) { if (pO > wrk) /* anything collected? */ { *pO = '\0'; hMisc = newList("String"); append(hMisc,newString(wrk)); append(hPL,hMisc); fmCcol = ascol; } marking = 0; } else /* spacing already */ { switch(*pC) { case ' ': ++ascol; break; case '\t': ascol += theTabWidth - ascol%theTabWidth; break; default: break; /* ignore BS, NL, etc */ } ++pC; } } } return hPL; } /*============================================================================ * This function takes a line of ascii text and builds a list to * describe it. The preceding function does the real work. * ==========================================================================*/ itemHandle onePara(char *work) { itemHandle hPara = newList("Para"); append(hPara, pgfTag()); append(hPara, paraLine(work)); return hPara; } /*============================================================================ * This function reads stdin (or any stream) and drives the generation of * a list for each line. Output is the handle. * ==========================================================================*/ itemHandle textFlow(FILE *infile) { itemHandle hTF = patToMIF(">"); char work[MAX_LINE+1]; while ( fgets(work,MAX_LINE,infile) ) append(hTF, onePara(work) ); return hTF; } /*============================================================================ * Feel free to change this to '/' for DOS. * ==========================================================================*/ #define SWITCHAR '-' /*============================================================================ * Explain our usage. * ==========================================================================*/ void usage() { fprintf(stderr,"Usage: atom [ %cm ] [ %cs ] [ %ct ]\n",SWITCHAR,SWITCHAR,SWITCHAR); fprintf(stderr,"Converts std input ascii text to MIF on std output.\n"); fprintf(stderr," -m\tLeft margin of all paragraphs, decimal number of points\n"); fprintf(stderr," -s\tSize of Courier font to use, decimal number of points\n"); fprintf(stderr," -t\tWidth of a tab column in characters\n"); fprintf(stderr,"Defaults are -m 0.0 -s 9.0 -t 8\n"); } /*============================================================================ * The main() function gets the arguments and drives the output process. * ==========================================================================*/ int main(int argc, char**argv) { int j, woopsies; char *p; itemHandle mif; for (j = 1, woopsies = 0; j < argc; ++j) { p = argv[j]; if (SWITCHAR != p[0]) { fprintf(stderr,"Not an option: %s\n",p); ++woopsies; } else { switch(p[1]) { case 'm': { if (p[2]) p+=2; else p = argv[++j]; theMargin = strtod(p,NULL); break; } case 's': { if (p[2]) p+=2; else p = argv[++j]; theFontSize = strtod(p,NULL); break; } case 't': { if (p[2]) p+=2; else p = argv[++j]; theTabWidth = atoi(p); break; } default: { fprintf(stderr,"unknown options: %s\n",p); ++woopsies; } } } } if (woopsies) { usage(); return 1; } /* print the required list -- no need to use MIFFEd */ printf("# by atom using MIFFEd\n"); /* Generate the , print it, and recycle it */ mif = pgfCat(); writeMIF(stdout,mif,OUT_EOL+OUT_INDENT); /* no comments in that stuff */ trashSequence(mif); /* Generate the and print it. */ mif = textFlow(stdin); writeMIF(stdout,mif,OUT_EOL+OUT_INDENT); trashSequence(mif); /* just in case we add code below this later */ return 0; }