Plan 9 from User Space's /usr/local/plan9/src/cmd/sed.c

/*
 * sed -- stream  editor
 *
 *
 */
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>

enum {
	DEPTH		= 20,		/* max nesting depth of {} */
	MAXCMDS		= 512,		/* max sed commands */
	ADDSIZE		= 10000,	/* size of add & read buffer */
	MAXADDS		= 20,		/* max pending adds and reads */
	LBSIZE		= 8192,		/* input line size */
	LABSIZE		= 50,		/* max label name size */
	MAXSUB		= 10,		/* max number of sub reg exp */
	MAXFILES	= 120		/* max output files */
};
	/* An address is a line #, a R.E., "$", a reference to the last
	 * R.E., or nothing.
	 */
typedef struct	{
	enum {
		A_NONE,
		A_DOL,
		A_LINE,
		A_RE,
		A_LAST
	}type;
	union {
		long line;		/* Line # */
		Reprog *rp;		/* Compiled R.E. */
	} u;
} Addr;

typedef struct	SEDCOM {
	Addr	ad1;			/* optional start address */
	Addr	ad2;			/* optional end address */
	union {
		Reprog	*re1;		/* compiled R.E. */
		Rune	*text;		/* added text or file name */
		struct	SEDCOM	*lb1;	/* destination command of branch */
	} u;
	Rune	*rhs;			/* Right-hand side of substitution */
	Biobuf*	fcode;			/* File ID for read and write */
	char	command;		/* command code -see below */
	char	gfl;			/* 'Global' flag for substitutions */
	char	pfl;			/* 'print' flag for substitutions */
	char	active;			/* 1 => data between start and end */
	char	negfl;			/* negation flag */
} SedCom;

	/* Command Codes for field SedCom.command */
#define ACOM	01
#define BCOM	020
#define CCOM	02
#define	CDCOM	025
#define	CNCOM	022
#define COCOM	017
#define	CPCOM	023
#define DCOM	03
#define ECOM	015
#define EQCOM	013
#define FCOM	016
#define GCOM	027
#define CGCOM	030
#define HCOM	031
#define CHCOM	032
#define ICOM	04
#define LCOM	05
#define NCOM	012
#define PCOM	010
#define QCOM	011
#define RCOM	06
#define SCOM	07
#define TCOM	021
#define WCOM	014
#define	CWCOM	024
#define	YCOM	026
#define XCOM	033

	
typedef struct label {			/* Label symbol table */
	Rune	asc[9];			/* Label name */
	SedCom	*chain;
	SedCom	*address;		/* Command associated with label */
} Label;

typedef	struct	FILE_CACHE {		/* Data file control block */
	struct FILE_CACHE *next;	/* Forward Link */
	char 		  *name;	/* Name of file */
} FileCache;

SedCom pspace[MAXCMDS];			/* Command storage */
SedCom *pend = pspace+MAXCMDS;		/* End of command storage */
SedCom *rep = pspace;			/* Current fill point */

Reprog	*lastre = 0;			/* Last regular expression */
Resub	subexp[MAXSUB];			/* sub-patterns of pattern match*/

Rune	addspace[ADDSIZE];		/* Buffer for a, c, & i commands */
Rune	*addend = addspace+ADDSIZE;

SedCom	*abuf[MAXADDS];			/* Queue of pending adds & reads */
SedCom	**aptr = abuf;

struct {				/* Sed program input control block */
	enum PTYPE 			/* Either on command line or in file */
		{ P_ARG,
		  P_FILE
		} type;
	union PCTL {			/* Pointer to data */
		Biobuf	*bp;
		char	*curr;
	} pctl;
} prog;

Rune	genbuf[LBSIZE];			/* Miscellaneous buffer */

FileCache	*fhead = 0;		/* Head of File Cache Chain */
FileCache	*ftail = 0;		/* Tail of File Cache Chain */

Rune	*loc1;				/* Start of pattern match */
Rune	*loc2;				/* End of pattern match */
Rune	seof;				/* Pattern delimiter char */

Rune	linebuf[LBSIZE+1];		/* Input data buffer */
Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
Rune	*spend = linebuf;			/* End of input data */
Rune	*cp;				/* Current scan point in linebuf */

Rune	holdsp[LBSIZE+1];		/* Hold buffer */
Rune	*hend = holdsp+LBSIZE;		/* End of hold buffer */
Rune	*hspend = holdsp;		/* End of hold data */

int	nflag;				/* Command line flags */
int	gflag;
int	lflag;

int	dolflag;			/* Set when at true EOF */
int	sflag;				/* Set when substitution done */
int	jflag;				/* Set when jump required */
int	delflag;			/* Delete current line when set */

long	lnum = 0;			/* Input line count */

char	fname[MAXFILES][40];		/* File name cache */
Biobuf	*fcode[MAXFILES];		/* File ID cache */
int	nfiles = 0;			/* Cache fill point */

Biobuf	fout;				/* Output stream */
Biobuf	bstdin;				/* Default input */
Biobuf*	f = 0;				/* Input data */

Label	ltab[LABSIZE];			/* Label name symbol table */
Label	*labend = ltab+LABSIZE;		/* End of label table */
Label	*lab = ltab+1;			/* Current Fill point */

int	depth = 0;			/* {} stack pointer */

Rune	bad;				/* Dummy err ptr reference */
Rune	*badp = &bad;


char	CGMES[]	 = 	"Command garbled: %S";
char	TMMES[]	 = 	"Too much text: %S";
char	LTL[]	 = 	"Label too long: %S";
char	AD0MES[] =	"No addresses allowed: %S";
char	AD1MES[] =	"Only one address allowed: %S";

void	address(Addr *);
void	arout(void);
int	cmp(char *, char *);
int	rcmp(Rune *, Rune *);
void	command(SedCom *);
Reprog	*compile(void);
Rune	*compsub(Rune *, Rune *);
void	dechain(void);
void	dosub(Rune *);
int	ecmp(Rune *, Rune *, int);
void	enroll(char *);
void	errexit(void);
int	executable(SedCom *);
void	execute(void);
void	fcomp(void);
long	getrune(void);
Rune	*gline(Rune *);
int	match(Reprog *, Rune *);
void	newfile(enum PTYPE, char *);
int 	opendata(void);
Biobuf	*open_file(char *);
Rune	*place(Rune *, Rune *, Rune *);
void	quit(char *, char *);
int	rline(Rune *, Rune *);
Label	*search(Label *);
int	substitute(SedCom *);
char	*text(char *);
Rune	*stext(Rune *, Rune *);
int	ycomp(SedCom *);
char *	trans(int c);
void	putline(Biobuf *bp, Rune *buf, int n);

void
main(int argc, char **argv)
{
	int	compfl;

	lnum = 0;
	Binit(&fout, 1, OWRITE);
	fcode[nfiles++] = &fout;
	compfl = 0;

	if(argc == 1)
		exits(0);
	ARGBEGIN{
		case 'n':
			nflag++;
			continue;
		case 'f':
			if(argc <= 1)
				quit("no pattern-file", 0);
			newfile(P_FILE, ARGF());
			fcomp();
			compfl = 1;
			continue;
		case 'e':
			if (argc <= 1)
				quit("missing pattern", 0);
			newfile(P_ARG, ARGF());
			fcomp();
			compfl = 1;
			continue;
		case 'g':
			gflag++;
			continue;
		case 'l':
			lflag++;
			continue;
		default:
			fprint(2, "sed: Unknown flag: %c\n", ARGC());
			continue;
	} ARGEND

	if(compfl == 0) {
		if (--argc < 0)
			quit("missing pattern", 0);
		newfile(P_ARG, *argv++);
		fcomp();
	}

	if(depth)
		quit("Too many {'s", 0);

	ltab[0].address = rep;

	dechain();

	if(argc <= 0)
		enroll(0);		/* Add stdin to cache */
	else while(--argc >= 0) {
		enroll(*argv++);
	}
	execute();
	exits(0);
}
void
fcomp(void)
{
	Rune	*tp;
	SedCom	*pt, *pt1;
	int	i;
	Label	*lpt;

	static Rune	*p = addspace;
	static SedCom	**cmpend[DEPTH];	/* stack of {} operations */

	while (rline(linebuf, lbend) >= 0) {
		cp = linebuf;
comploop:
		while(*cp == ' ' || *cp == '\t')
			cp++;
		if(*cp == '\0' || *cp == '#')
			continue;
		if(*cp == ';') {
			cp++;
			goto comploop;
		}

		address(&rep->ad1);
		if (rep->ad1.type != A_NONE) {
			if (rep->ad1.type == A_LAST) {
				if (!lastre)
					quit("First RE may not be null", 0);
				rep->ad1.type = A_RE;
				rep->ad1.u.rp = lastre;
			}
			if(*cp == ',' || *cp == ';') {
				cp++;
				address(&rep->ad2);
				if (rep->ad2.type == A_LAST) {
					rep->ad1.type = A_RE;
					rep->ad2.u.rp = lastre;
				}
			} else
				rep->ad2.type = A_NONE;
		}
		while(*cp == ' ' || *cp == '\t')
			cp++;

swit:
		switch(*cp++) {

			default:
				quit("Unrecognized command: %S", (char *)linebuf);

			case '!':
				rep->negfl = 1;
				goto swit;

			case '{':
				rep->command = BCOM;
				rep->negfl = !(rep->negfl);
				cmpend[depth++] = &rep->u.lb1;
				if(++rep >= pend)
					quit("Too many commands: %S", (char *) linebuf);
				if(*cp == '\0')	continue;
				goto comploop;

			case '}':
				if(rep->ad1.type != A_NONE)
					quit(AD0MES, (char *) linebuf);
				if(--depth < 0)
					quit("Too many }'s", 0);
				*cmpend[depth] = rep;
				if(*cp == 0)	continue;
				goto comploop;

			case '=':
				rep->command = EQCOM;
				if(rep->ad2.type != A_NONE)
					quit(AD1MES, (char *) linebuf);
				break;

			case ':':
				if(rep->ad1.type != A_NONE)
					quit(AD0MES, (char *) linebuf);

				while(*cp == ' ')
					cp++;
				tp = lab->asc;
				while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
					*tp++ = *cp++;
					if(tp >= &(lab->asc[8]))
						quit(LTL, (char *) linebuf);
				}
				*tp = '\0';

				if(lpt = search(lab)) {
					if(lpt->address)
						quit("Duplicate labels: %S", (char *) linebuf);
				} else {
					lab->chain = 0;
					lpt = lab;
					if(++lab >= labend)
						quit("Too many labels: %S", (char *) linebuf);
				}
				lpt->address = rep;
				if (*cp == '#')
					continue;
				rep--;			/* reuse this slot */
				break;

			case 'a':
				rep->command = ACOM;
				if(rep->ad2.type != A_NONE)
					quit(AD1MES, (char *) linebuf);
				if(*cp == '\\')	cp++;
				if(*cp++ != '\n')
					quit(CGMES, (char *) linebuf);
				rep->u.text = p;
				p = stext(p, addend);
				break;
			case 'c':
				rep->command = CCOM;
				if(*cp == '\\')	cp++;
				if(*cp++ != '\n')
					quit(CGMES, (char *) linebuf);
				rep->u.text = p;
				p = stext(p, addend);
				break;
			case 'i':
				rep->command = ICOM;
				if(rep->ad2.type != A_NONE)
					quit(AD1MES, (char *) linebuf);
				if(*cp == '\\')	cp++;
				if(*cp++ != '\n')
					quit(CGMES, (char *) linebuf);
				rep->u.text = p;
				p = stext(p, addend);
				break;

			case 'g':
				rep->command = GCOM;
				break;

			case 'G':
				rep->command = CGCOM;
				break;

			case 'h':
				rep->command = HCOM;
				break;

			case 'H':
				rep->command = CHCOM;
				break;

			case 't':
				rep->command = TCOM;
				goto jtcommon;

			case 'b':
				rep->command = BCOM;
jtcommon:
				while(*cp == ' ')cp++;
				if(*cp == '\0') {
					if(pt = ltab[0].chain) {
						while(pt1 = pt->u.lb1)
							pt = pt1;
						pt->u.lb1 = rep;
					} else
						ltab[0].chain = rep;
					break;
				}
				tp = lab->asc;
				while((*tp++ = *cp++))
					if(tp >= &(lab->asc[8]))
						quit(LTL, (char *) linebuf);
				cp--;
				tp[-1] = '\0';

				if(lpt = search(lab)) {
					if(lpt->address) {
						rep->u.lb1 = lpt->address;
					} else {
						pt = lpt->chain;
						while(pt1 = pt->u.lb1)
							pt = pt1;
						pt->u.lb1 = rep;
					}
				} else {
					lab->chain = rep;
					lab->address = 0;
					if(++lab >= labend)
						quit("Too many labels: %S",
							(char *) linebuf);
				}
				break;

			case 'n':
				rep->command = NCOM;
				break;

			case 'N':
				rep->command = CNCOM;
				break;

			case 'p':
				rep->command = PCOM;
				break;

			case 'P':
				rep->command = CPCOM;
				break;

			case 'r':
				rep->command = RCOM;
				if(rep->ad2.type != A_NONE)
					quit(AD1MES, (char *) linebuf);
				if(*cp++ != ' ')
					quit(CGMES, (char *) linebuf);
				rep->u.text = p;
				p = stext(p, addend);
				break;

			case 'd':
				rep->command = DCOM;
				break;

			case 'D':
				rep->command = CDCOM;
				rep->u.lb1 = pspace;
				break;

			case 'q':
				rep->command = QCOM;
				if(rep->ad2.type != A_NONE)
					quit(AD1MES, (char *) linebuf);
				break;

			case 'l':
				rep->command = LCOM;
				break;

			case 's':
				rep->command = SCOM;
				seof = *cp++;
				if ((rep->u.re1 = compile()) == 0) {
					if(!lastre)
						quit("First RE may not be null.", 0);
					rep->u.re1 = lastre;
				}
				rep->rhs = p;
				if((p = compsub(p, addend)) == 0)
					quit(CGMES, (char *) linebuf);
				if(*cp == 'g') {
					cp++;
					rep->gfl++;
				} else if(gflag)
					rep->gfl++;

				if(*cp == 'p') {
					cp++;
					rep->pfl = 1;
				}

				if(*cp == 'P') {
					cp++;
					rep->pfl = 2;
				}

				if(*cp == 'w') {
					cp++;
					if(*cp++ !=  ' ')
						quit(CGMES, (char *) linebuf);
					text(fname[nfiles]);
					for(i = nfiles - 1; i >= 0; i--)
						if(cmp(fname[nfiles],fname[i]) == 0) {
							rep->fcode = fcode[i];
							goto done;
						}
					if(nfiles >= MAXFILES)
						quit("Too many files in w commands 1", 0);
					rep->fcode = open_file(fname[nfiles]);
				}
				break;

			case 'w':
				rep->command = WCOM;
				if(*cp++ != ' ')
					quit(CGMES, (char *) linebuf);
				text(fname[nfiles]);
				for(i = nfiles - 1; i >= 0; i--)
					if(cmp(fname[nfiles], fname[i]) == 0) {
						rep->fcode = fcode[i];
						goto done;
					}
				if(nfiles >= MAXFILES){
					fprint(2, "sed: Too many files in w commands 2 \n");
					fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
					errexit();
				}
				rep->fcode = open_file(fname[nfiles]);
				break;

			case 'x':
				rep->command = XCOM;
				break;

			case 'y':
				rep->command = YCOM;
				seof = *cp++;
				if (ycomp(rep) == 0)
					quit(CGMES, (char *) linebuf);
				break;

		}
done:
		if(++rep >= pend)
			quit("Too many commands, last: %S", (char *) linebuf);

		if(*cp++ != '\0') {
			if(cp[-1] == ';')
				goto comploop;
			quit(CGMES, (char *) linebuf);
		}

	}
}

Biobuf *
open_file(char *name)
{
	Biobuf *bp;
	int fd;

	if ((bp = malloc(sizeof(Biobuf))) == 0)
		quit("Out of memory", 0);
	if ((fd = open(name, OWRITE)) < 0 &&
		(fd = create(name, OWRITE, 0666)) < 0)
			quit("Cannot create %s", name);
	Binit(bp, fd, OWRITE);
	Bseek(bp, 0, 2);
	fcode[nfiles++] = bp;
	return bp;
}

Rune	*
compsub(Rune *rhs, Rune *end)
{
	Rune	r;

	while ((r = *cp++) != '\0') {
		if(r == '\\') {
			if (rhs < end)
				*rhs++ = Runemax;
			else
				return 0;
			r = *cp++;
			if(r == 'n')
				r = '\n';
		} else {
			if(r == seof) {
				if (rhs < end)
					*rhs++ = '\0';
				else
					return 0;
				return rhs;
			}
		}
		if (rhs < end)
			*rhs++ = r;
		else	
			return 0;

	}
	return 0;
}

Reprog *
compile(void)
{
	Rune c;
	char *ep;
	char expbuf[512];

	if((c = *cp++) == seof)		/* '//' */
		return 0;
	ep = expbuf;
	do {
		if (c == 0 || c == '\n')
			quit(TMMES, (char *) linebuf);
		if (c == '\\') {
			if (ep >= expbuf+sizeof(expbuf))
				quit(TMMES, (char *) linebuf);
			ep += runetochar(ep, &c);
			if ((c = *cp++) == 'n')
				c = '\n';
		}
		if (ep >= expbuf+sizeof(expbuf))
			quit(TMMES, (char *) linebuf);
		ep += runetochar(ep, &c);
	} while ((c = *cp++) != seof);
	*ep = 0;
	return lastre = regcomp(expbuf);
}

void
regerror(char *s)
{
	USED(s);
	quit(CGMES, (char *) linebuf);
}

void
newfile(enum PTYPE type, char *name)
{
	if (type == P_ARG)
		prog.pctl.curr = name;
	else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
		quit("Cannot open pattern-file: %s\n", name);
	prog.type = type;
}

int
rline(Rune *buf, Rune *end)
{
	long c;
	Rune r;

	while ((c = getrune()) >= 0) {
		r = c;
		if (r == '\\') {
			if (buf <= end)
				*buf++ = r;
			if ((c = getrune()) < 0)
				break;
			r = c;
		} else if (r == '\n') {
			*buf = '\0';
			return(1);
		}
		if (buf <= end)
			*buf++ = r;
	}
	*buf = '\0';
	return(-1);
}

long
getrune(void)
{
	char *p;
	long c;
	Rune r;

	if (prog.type == P_ARG) {
		if ((p = prog.pctl.curr) != 0) {
			if (*p) {
				prog.pctl.curr += chartorune(&r, p);
				c = r;
			} else {
				c = '\n';	/* fake an end-of-line */
				prog.pctl.curr = 0;
			}
		} else 
			c = -1;
	} else if ((c = Bgetrune(prog.pctl.bp)) < 0)
			Bterm(prog.pctl.bp);
	return c;
}

void
address(Addr *ap)
{
	int c;
	long	lno;

	if((c = *cp++) == '$')
		ap->type = A_DOL;
	else if(c == '/') {
		seof = c;
		if (ap->u.rp = compile())
			ap->type = A_RE;
		else
			ap->type = A_LAST;
	}
	else if (c >= '0' && c <= '9') {
		lno = c-'0';
		while ((c = *cp) >= '0' && c <= '9')
			lno = lno*10 + *cp++-'0';
		if(!lno)
			quit("line number 0 is illegal",0);
		ap->type = A_LINE;
		ap->u.line = lno;
	}
	else {
		cp--;
		ap->type = A_NONE;
	}
}

int
cmp(char *a, char *b)		/* compare characters */
{
	while(*a == *b++)
		if (*a == '\0')
			return(0);
		else a++;
	return(1);
}

int
rcmp(Rune *a, Rune *b)		/* compare runes */
{
	while(*a == *b++)
		if (*a == '\0')
			return(0);
		else a++;
	return(1);
}

char *
text(char *p)		/* extract character string */
{
	Rune	r;

	while(*cp == '\t' || *cp == ' ')
			cp++;
	while (*cp) {
		if ((r = *cp++) == '\\')
			if ((r = *cp++) == 0)
				break;;
		if (r == '\n')
			while (*cp == '\t' || *cp == ' ')
					cp++;
		p += runetochar(p, &r);
	}
	*p++ = '\0';
	return p;
}

Rune *
stext(Rune *p, Rune *end)		/* extract rune string */
{
	while(*cp == '\t' || *cp == ' ')
		cp++;
	while (*cp) {
		if (*cp == '\\')
			if (*++cp == 0)
				break;
		if (p >= end-1)
			quit(TMMES, (char *) linebuf);
		if ((*p++ = *cp++) == '\n')
			while(*cp == '\t' || *cp == ' ')
					cp++;
	}
	*p++ = 0;
	return p;
}


Label *
search (Label *ptr)
{
	Label	*rp;

	for (rp = ltab; rp < ptr; rp++)
		if(rcmp(rp->asc, ptr->asc) == 0)
			return(rp);
	return(0);
}

void
dechain(void)
{
	Label	*lptr;
	SedCom	*rptr, *trptr;

	for(lptr = ltab; lptr < lab; lptr++) {

		if(lptr->address == 0)
			quit("Undefined label: %S", (char *) lptr->asc);

		if(lptr->chain) {
			rptr = lptr->chain;
			while(trptr = rptr->u.lb1) {
				rptr->u.lb1 = lptr->address;
				rptr = trptr;
			}
			rptr->u.lb1 = lptr->address;
		}
	}
}

int
ycomp(SedCom *r)
{
	int 	i;
	Rune	*rp;
	Rune	c, *tsp, highc;
	Rune	*sp;

	highc = 0;
	for(tsp = cp; *tsp != seof; tsp++) {
		if(*tsp == '\\')
			tsp++;
		if(*tsp == '\n' || *tsp == '\0')
			return(0);
		if (*tsp > highc) highc = *tsp;
	}
	tsp++;
	if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
		quit("Out of memory", 0);
	*rp++ = highc;				/* save upper bound */
	for (i = 0; i <= highc; i++)
		rp[i] = i;
	sp = cp;
	while((c = *sp++) != seof) {
		if(c == '\\' && *sp == 'n') {
			sp++;
			c = '\n';
		}
		if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
			rp[c] = '\n';
			tsp++;
		}
		if(rp[c] == seof || rp[c] == '\0') {
			free(r->u.re1);
			r->u.re1 = 0;
			return(0);
		}
	}
	if(*tsp != seof) {
		free(r->u.re1);
		r->u.re1 = 0;
		return(0);
	}
	cp = tsp+1;
	return(1);
}

void
execute(void)
{
	SedCom	*ipc;

	while (spend = gline(linebuf)){
		for(ipc = pspace; ipc->command; ) {
			if (!executable(ipc)) {
				ipc++;
				continue;
			}
			command(ipc);

			if(delflag)
				break;
			if(jflag) {
				jflag = 0;
				if((ipc = ipc->u.lb1) == 0)
					break;
			} else
				ipc++;

		}
		if(!nflag && !delflag)
			putline(&fout, linebuf, spend-linebuf);
		if(aptr > abuf) {
			arout();
		}
		delflag = 0;
	}
}
	/* determine if a statement should be applied to an input line */
int
executable(SedCom *ipc)
{
	if (ipc->active) {	/* Addr1 satisfied - accept until Addr2 */
		if (ipc->active == 1)		/* Second line */
			ipc->active = 2;
		switch(ipc->ad2.type) {
			case A_NONE:	/* No second addr; use first */
				ipc->active = 0;
				break;
			case A_DOL:	/* Accept everything */
				return !ipc->negfl;
			case A_LINE:	/* Line at end of range? */
				if (lnum <= ipc->ad2.u.line) {
					if (ipc->ad2.u.line == lnum)
						ipc->active = 0;
					return !ipc->negfl;
				}
				ipc->active = 0;	/* out of range */
				return ipc->negfl;
			case A_RE:	/* Check for matching R.E. */
				if (match(ipc->ad2.u.rp, linebuf))
					ipc->active = 0;
				return !ipc->negfl;
			default:		/* internal error */
				quit("Internal error", 0);
		}
	}
	switch (ipc->ad1.type) {	/* Check first address */
		case A_NONE:			/* Everything matches */
			return !ipc->negfl;
		case A_DOL:			/* Only last line */
			if (dolflag)
				return !ipc->negfl;
			break;
		case A_LINE:			/* Check line number */
			if (ipc->ad1.u.line == lnum) {
				ipc->active = 1;	/* In range */
				return !ipc->negfl;
			}
			break;
		case A_RE:			/* Check R.E. */
			if (match(ipc->ad1.u.rp, linebuf)) {
				ipc->active = 1;	/* In range */
				return !ipc->negfl;
			}
			break;
		default:
			quit("Internal error", 0);
	}
	return ipc->negfl;
}

int
match(Reprog *pattern, Rune *buf)
{
	if (!pattern)
		return 0;
	subexp[0].s.rsp = buf; 
	subexp[0].e.rep = 0;
	if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
		loc1 = subexp[0].s.rsp;
		loc2 = subexp[0].e.rep;
		return 1;
	}
	loc1 = loc2 = 0;
	return 0;
}

int
substitute(SedCom *ipc)
{
	int len;

	if(!match(ipc->u.re1, linebuf))
		return 0;

	/*
	 * we have at least one match.  some patterns, e.g. '$' or '^', can
	 * produce zero-length matches, so during a global substitute we
	 * must bump to the character after a zero-length match to keep from looping.
	 */
	sflag = 1;
	if(ipc->gfl == 0)		/* single substitution */
		dosub(ipc->rhs);
	else
	do{				/* global substitution */
		len = loc2-loc1;	/* length of match */
		dosub(ipc->rhs);	/* dosub moves loc2 */
		if(*loc2 == 0)		/* end of string */
			break;
		if(len == 0)		/* zero-length R.E. match */
			loc2++;		/* bump over zero-length match */
		if(*loc2 == 0)		/* end of string */
			break;
	} while(match(ipc->u.re1, loc2));
	return 1;
}

void
dosub(Rune *rhsbuf)
{
	Rune *lp, *sp;
	Rune *rp;
	int c, n;

	lp = linebuf;
	sp = genbuf;
	rp = rhsbuf;
	while (lp < loc1)
		*sp++ = *lp++;
	while(c = *rp++) {
		if (c == '&') {
			sp = place(sp, loc1, loc2);
			continue;
		}
		if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB+'0') {
			n = c-'0';
			if (subexp[n].s.rsp && subexp[n].e.rep) {
				sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
				continue;
			}
			else {
				fprint(2, "sed: Invalid back reference \\%d\n",n);
				errexit();
			}
		}
		*sp++ = c;
		if (sp >= &genbuf[LBSIZE])
			fprint(2, "sed: Output line too long.\n");
	}
	lp = loc2;
	loc2 = sp - genbuf + linebuf;
	while (*sp++ = *lp++)
		if (sp >= &genbuf[LBSIZE])
			fprint(2, "sed: Output line too long.\n");
	lp = linebuf;
	sp = genbuf;
	while (*lp++ = *sp++)
		;
	spend = lp-1;
}

Rune *
place(Rune *sp, Rune *l1, Rune *l2)
{
	while (l1 < l2) {
		*sp++ = *l1++;
		if (sp >= &genbuf[LBSIZE])
			fprint(2, "sed: Output line too long.\n");
	}
	return(sp);
}

char *
trans(int c)
{
	static char buf[] = "\\x0000";
	static char hex[] = "0123456789abcdef";

	switch(c) {
		case '\b':
			return "\\b";
		case '\n':
			return "\\n";
		case '\r':
			return "\\r";
		case '\t':
			return "\\t";
		case '\\':
			return "\\\\";
	}
	buf[2] = hex[(c>>12)&0xF];
	buf[3] = hex[(c>>8)&0xF];
	buf[4] = hex[(c>>4)&0xF];
	buf[5] = hex[c&0xF];
	return buf;
}

void
command(SedCom *ipc)
{
	int	i, c;
	Rune	*p1, *p2;
	char	*ucp;
	Rune	*rp;
	Rune	*execp;

	switch(ipc->command) {

		case ACOM:
			*aptr++ = ipc;
			if(aptr >= abuf+MAXADDS) {
				quit("sed: Too many appends after line %ld\n",
					(char *) lnum);
			}
			*aptr = 0;
			break;
		case CCOM:
			delflag = 1;
			if(ipc->active == 1) {
				for(rp = ipc->u.text; *rp; rp++)
					Bputrune(&fout, *rp);
				Bputc(&fout, '\n');
			}
			break;
		case DCOM:
			delflag++;
			break;
		case CDCOM:
			p1 = p2 = linebuf;
			while(*p1 != '\n') {
				if(*p1++ == 0) {
					delflag++;
					return;
				}
			}
			p1++;
			while(*p2++ = *p1++)
				;
			spend = p2-1;
			jflag++;
			break;
		case EQCOM:
			Bprint(&fout, "%ld\n", lnum);
			break;
		case GCOM:
			p1 = linebuf;
			p2 = holdsp;
			while(*p1++ = *p2++)
				;
			spend = p1-1;
			break;
		case CGCOM:
			*spend++ = '\n';
			p1 = spend;
			p2 = holdsp;
			while(*p1++ = *p2++)
				if(p1 >= lbend)
					break;
			spend = p1-1;
			break;
		case HCOM:
			p1 = holdsp;
			p2 = linebuf;
			while(*p1++ = *p2++);
			hspend = p1-1;
			break;
		case CHCOM:
			*hspend++ = '\n';
			p1 = hspend;
			p2 = linebuf;
			while(*p1++ = *p2++)
				if(p1 >= hend)
					break;
			hspend = p1-1;
			break;
		case ICOM:
			for(rp = ipc->u.text; *rp; rp++)
				Bputrune(&fout, *rp);
			Bputc(&fout, '\n');
			break;
		case BCOM:
			jflag = 1;
			break;
		case LCOM:
			c = 0;
			for (i = 0, rp = linebuf; *rp; rp++) {
				c = *rp;
				if(c >= 0x20 && c < 0x7F && c != '\\') {
					Bputc(&fout, c);
					if(i++ > 71) {
						Bprint(&fout, "\\\n");
						i = 0;
					}
				} else {
					for (ucp = trans(*rp); *ucp; ucp++){
						c = *ucp;
						Bputc(&fout, c);
						if(i++ > 71) {
							Bprint(&fout, "\\\n");
							i = 0;
						}
					}
				}
			}
			if(c == ' ')
				Bprint(&fout, "\\n");
			Bputc(&fout, '\n');
			break;
		case NCOM:
			if(!nflag)
				putline(&fout, linebuf, spend-linebuf);

			if(aptr > abuf)
				arout();
			if((execp = gline(linebuf)) == 0) {
				delflag = 1;
				break;
			}
			spend = execp;
			break;
		case CNCOM:
			if(aptr > abuf)
				arout();
			*spend++ = '\n';
			if((execp = gline(spend)) == 0) {
				delflag = 1;
				break;
			}
			spend = execp;
			break;
		case PCOM:
			putline(&fout, linebuf, spend-linebuf);
			break;
		case CPCOM:
	cpcom:
			for(rp = linebuf; *rp && *rp != '\n'; rp++)
				Bputc(&fout, *rp);
			Bputc(&fout, '\n');
			break;
		case QCOM:
			if(!nflag)
				putline(&fout, linebuf, spend-linebuf);
			if(aptr > abuf)
				arout();
			exits(0);
		case RCOM:
			*aptr++ = ipc;
			if(aptr >= &abuf[MAXADDS])
				quit("sed: Too many reads after line %ld\n",
					(char *) lnum);
			*aptr = 0;
			break;
		case SCOM:
			i = substitute(ipc);
			if(i && ipc->pfl)
				if(ipc->pfl == 1)
					putline(&fout, linebuf, spend-linebuf);
				else
					goto cpcom;
			if(i && ipc->fcode)
				goto wcom;
			break;

		case TCOM:
			if(sflag == 0)	break;
			sflag = 0;
			jflag = 1;
			break;

		wcom:
		case WCOM:
			putline(ipc->fcode,linebuf, spend-linebuf);
			break;
		case XCOM:
			p1 = linebuf;
			p2 = genbuf;
			while(*p2++ = *p1++);
			p1 = holdsp;
			p2 = linebuf;
			while(*p2++ = *p1++);
			spend = p2 - 1;
			p1 = genbuf;
			p2 = holdsp;
			while(*p2++ = *p1++);
			hspend = p2 - 1;
			break;
		case YCOM:
			p1 = linebuf;
			p2 = ipc->u.text;
			for (i = *p2++;	*p1; p1++){
				if (*p1 <= i) *p1 = p2[*p1];
			}
			break;
	}

}

void
putline(Biobuf *bp, Rune *buf, int n)
{
	while (n--)
		Bputrune(bp, *buf++);
	Bputc(bp, '\n');
	if(lflag)
		Bflush(bp);
}

int
ecmp(Rune *a, Rune *b, int count)
{
	while(count--)
		if(*a++ != *b++)	return(0);
	return(1);
}

void
arout(void)
{
	Rune	*p1;
	Biobuf	*fi;
	int	c;
	char	*s;
	char	buf[128];

	for (aptr = abuf; *aptr; aptr++) {
		if((*aptr)->command == ACOM) {
			for(p1 = (*aptr)->u.text; *p1; p1++ )
				Bputrune(&fout, *p1);
			Bputc(&fout, '\n');
		} else {
			for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
					s += runetochar(s, p1);
			*s = '\0';
			if((fi = Bopen(buf, OREAD)) == 0)
				continue;
			while((c = Bgetc(fi)) >= 0)
				Bputc(&fout, c);
			Bterm(fi);
		}
	}
	aptr = abuf;
	*aptr = 0;
}

void
errexit(void)
{
	exits("error");
}

void
quit (char *msg, char *arg)
{
	fprint(2, "sed: ");
	fprint(2, msg, arg);
	fprint(2, "\n");
	errexit();
}

Rune *
gline(Rune *addr)
{
	long	c;
	Rune *p;

	static long peekc = 0;

	if (f == 0 && opendata() < 0)
		return 0;
	sflag = 0;
	lnum++;
/*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
	do {
		p = addr;
		for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
			if (c == '\n') {
				if ((peekc = Bgetrune(f)) < 0) {
					if (fhead == 0)
						dolflag = 1;
				}
				*p = '\0';
				return p;
			}
			if (c && p < lbend)
				*p++ = c;
		}
		/* return partial final line, adding implicit newline */
		if(p != addr) {
			*p = '\0';
			peekc = -1;
			if (fhead == 0)
				dolflag = 1;
			return p;
		}
		peekc = 0;
		Bterm(f);
	} while (opendata() > 0);	/* Switch to next stream */
	f = 0;
	return 0;
}

	/* Data file input section - the intent is to transparently
	 *	catenate all data input streams.
	 */
void
enroll(char *filename)		/* Add a file to the input file cache */
{
	FileCache *fp;

	if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
		quit("Out of memory", 0);
	if (ftail == 0)
		fhead = fp;
	else
		ftail->next = fp;
	ftail = fp;
	fp->next = 0;
	fp->name = filename;	/* 0 => stdin */
}

int
opendata(void)
{
	if (fhead == 0)
		return -1;
	if (fhead->name) {
		if ((f = Bopen(fhead->name, OREAD)) == 0)
			quit("Can't open %s", fhead->name);
	} else {
		Binit(&bstdin, 0, OREAD);
		f = &bstdin;
	}
	fhead = fhead->next;
	return 1;
}

Space Glenda

Copyright © 2005 Lucent Technologies, Russ Cox, MIT.
See license for details.