#!/usr/local/bin/perl
#
# midas516.pl assembler for the Honeywell DDP-516/316 computers
#
#	Copyright  2013  James A. Markevitch
#		ALL RIGHTS RESERVED
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
#
#
# MIDAS style assembler for the Honeywell DDP-516/316 computers intended
# to be compatible with the syntax used by BBN for the ARPANET IMP/TIP
# assembly language software.
#
# This was created based on a document describing the PDP-6 MIDAS assembler
# syntax and an output listing of a version of the IMP software.  Much of
# the syntax and semantics were inferred from them and may or may not be
# correct.  It is also highly likely that the original assembler supported
# a richer syntax, as did the PDP-6 MIDAS assembler, but without any examples
# or documentation of the syntax, there was no attempt to speculatively
# enhance this program.
#
# Subsequent examination of the PDP-1 MIDAS assembler document provided
# additional information, including identifying features of this assembler
# that are probably not implemented properly (e.g. nested macro defintions).
# The PDP-1 MIDAS document still doesn't completely describe some of the
# features found in the IMP listing.
#
# This program also implements code to generate an interrupt "concordance"
# listing that was, presumably, created by a post-processor in the original
# tool chain.
#
#
# General desription
#
# The program is a traditional two-pass assembler, where the intent of the
# first pass is to assign values to symbols and identify memory regions,
# and the second pass determines all final memory values and generates
# most output.
#
# The assembler has no built-in symbols for the target processor and only
# a small number of keywords.  The input is free format, allowing multiple
# words (or instructions) per line.  A single space character and multiple
# consecutive space characters have different treatment, as does an
# end-of-line.
#
# Except for implementing the syntax for directives, the assembler loops,
# evaluating tokens to determine and compute the value of a "word," then
# moves on to the next word.  A word consists of one or more "fields"
# separated by a *single* space character and terminated by a non-field
# character, including end-of-line and multiple spaces.  Fields consist
# of "syllables" (symbols and numbers), and numeric operators, such as
# plus, minus, etc.  The detailed syntax is described below.
#
# The assembler maintains a "location counter" that is used as the address
# associated with the word.  The location counter is set by following a
# word with the "/" character (with no spaces between the last field and
# the "/" character).  Normally, the location counter is incremented after
# each word.
#
# Addresses in the range 0 - 077777 (octal) represent memory locations that
# can contain the values of words.  Address greater than or equal to 0100000
# are special and represent a virtual "string space" instead of memory
# locations.  These strings can be output to a special output file intended
# for just that purpose.
#
# Words in the range 0 - 177777 (octal) represent normal data that can be
# stored in memory locations.  The word 0200000 indicates that the memory
# location is a blank storage segment.  Words in the range 0300000 - 0307777
# represent an interrupt level specification.
# 

#
# Unsupported syntax
#	NULL
#	OPSYN
#	STOP
#	DIMENSION
#	OFFSET
#	START
#	VARIABLES
#	Variable names using #
#	Address tag of the form A+3:
#	[01]IF N syntax
#	Arbitrary sequences of addition characters
#	Second symbol in IRP for remainder of list
#
# Incorrect behavior
#	.ASCII should read a character at a time for listing improvement
#	177777 appears to be treated differently from -1 for constant region
#
#	Symbol distinction is not limited to 6 characters
#	EQUALS is a syntactic operation, rather than symbol table operation
#	Separate symbol tables for names, synonyms, macros, pseudo-instructions
#	Support DEFINE and macro name on separate lines
#
# Questionable behavior
#	Is UNCON implemented properly?
#	Is "Z" handled correctly?
#	Should .RPTCNT be supported?
#	Should CHARACTER, FLEXO, TEXT be supported?
#	Should EQUALS substitution be remembered across passes?
#	Are 0IF and 1IF allowed to be redefined?
#
#	How should overflow be handled for + and -?
#	Should "T" fold the result?
#	Should "Q" and "R" treat numbers as signed or unsigned?
#	Which operators should preserve $V_ADDR?
#	What if [01]IF P is terminated by a slash?
#	Verify correct truncation of results by 2^16
#	What if macro argument matches a different dummy argument name?
#
# Todo
#	-longnames to differentiate names longer than 6 characters
#	-nowarn to suppress warnings
#	-compat or -strict for strict compabibility w/original version
#	-pgtitle for title lines at all page breaks
#	-nomulti to suppress multiple space to tab conversion
#	-nconst to limit number of constant regions
#	-lc for initial location counter value
#	-noinctl to not expand ^X into control
#	-noinquote to not expand "X" into control
#	-outctl to output controls as ^X
#	-outquote to output controls as "X"
#
#	token_bracketed can infinite loop if no closing bracket exists
#	token_definebody can infinite loop if no TERMINATE exists
#	token_irpbody can infinite loop if no ENDIRP exists
#	Prevent runaway input from consuming all of memory
#
#	Verify that unget_token(), tabs, spaces, and $g_atspace are consistent
#	Remove hack for "START" in parse_funcs
#	Create issymbol() to pattern match symbol
#	Ensure that subst_arg is not fooled by a period within a word
#	Figure out top of page and blank lines
#	Clarify space and tab
#	Ensure that single-quote is handled properly
#	Fix concordance order to match original
#	More general purpose mapping of control characters within .ASCII
#	Add general method for pruning symbols from concordance listing
#	Is slash character allowed in multiple positions in DEFINE statement?
#	How should tab and newline be handled in IRP?
#	Generate error message for incorrect name after TERMINATE
#	Ensure proper handling of macros with no args and too many args
#	Proper skipping of first line of defsfile and asmfile
#
#	Generate DDP-516 binary format files (what is the format?)
#	Display symbol table
#	Display constant/variable sections
#	MIDAS-style error messages
#	6-character names for all keywords
#	Special case -0
#	Identify re-definitions of tags, etc.
#	Ignore illegal characters
#	Handle indefinite location counter
#	Illegal expression with relocatable symbols
#	Print constant and variable regions on pass 1 and 2
#	Add check for current page reference in field 2 of a word
#	Generate error for octal numbers containing the digit 8 or 9
#	Generate error when using an undefined symbol on second pass
#	Add pdp1 mode
#	Test multiple expressions in WORD pseudo-instructions
#

#
# $g_lnum	Current line number
# $g_line	Current line
# $g_inbuf	Current input buffer
# @g_inbufstack	Stack of input buffers (for macro/repeat/etc. substitution)
# @g_filler	Stack of input fill functions
# @g_irp	Stack of IRP descriptors
#
# %g_synonym	Synonyms
# %g_macro	Macros
# @g_const	Constants
#
# $g_lc		Location counter
# $g_lasttag	Most recent address tag encountered
# $g_lastop	Most recent pseudo-instruction or macro encountered
#

$utilname = "midas516.pl";
$utilvers = "1.1 9/1/2013";

$V_UNDEF	= 004000000000;		# Value is undefined
$V_ADDR		= 002000000000;		# Value is an address
$V_SPECIAL	= 000000200000;		# Special value (bss or string)
$V_SIGN		= 000000100000;		# Sign bit of numeric value
$V_VALUE	= 000000177777;		# Actual numeric value

$V_STATUS = $V_UNDEF | $V_ADDR;		# Mask of status bits
$V_STICKY = $V_STATUS | $V_SPECIAL;	# Bits to save after binary operation
$V_WORD = $V_UNDEF | $V_SPECIAL | $V_VALUE;	# Bits after parse_word

$PREC_NEG	= 0;
$PREC_AND	= 1;
$PREC_OR	= 1;
$PREC_XOR	= 1;
$PREC_TIMES	= 1;
$PREC_DIVIDE	= 1;
$PREC_REMAINDER	= 1;
$PREC_PLUS	= 3;
$PREC_MINUS	= 3;
$PREC_NONE	= 4;

$pat_decimal = "[0-9]+\.";
$pat_octal = "[0-7]+";
$pat_symbol = "[0-9A-Z\.]+";
$pat_separator = "[\:\/\,\+\-\[\]\(\)]";
$pat_special = "\"[AQRTUXZ]\"";
$pat_newline = "[\n\r\f]";
$pat_space = " ";
$pat_optspace = "[ \t]";
$pat_whitespace = "[ \t\n]";

%parse_funcs = (
	".ASCII",	\&parse_ascii,
	"CONSTANTS",	\&parse_constants,
	"DECIMAL",	\&parse_decimal,
	"DEFINE",	\&parse_define,
	"EQUALS",	\&parse_equals,
	"EXPUNGE",	\&parse_expunge,
	"IRP",		\&parse_irp,
	"IRPC",		\&parse_irpc,
	"OCTAL",	\&parse_octal,
	"PNTNUM",	\&parse_pntnum,
	"PRINT",	\&parse_print,
	"PRINTX",	\&parse_printx,
	"REPEAT",	\&parse_repeat,
	"WORD",		\&parse_word_op,

	" ",		\&parse_no_operation,
	"\t",		\&parse_no_operation,
	"\n",		\&parse_no_operation,

	"START",	\&parse_no_operation,
);

@skipconc= (	"ADD", "ANA", "AOA", "ALR", "ALS", "ARR", "ARS", "BSS", "CAR",
		"CAL", "CAS", "CHS", "CMA", "CRA",
		"ERA", "I", "IAB", "ICA", "ICL", "ICR", "IMA", "IRS", "JMP",
		"JST", "K", "LDA", "LDX", "LGL", "LGR", "LLL", "LLR", "LLS",
		"LRL", "LRR", "LRS", "NOP", "SKP", "SLN", "SLZ",
		"SMI", "SNZ", "SPL", "SSM", "SSP", "STA", "STX", "SUB", "SZE",
		"TCA", "X", "XI" );

$g_lpp = 65;
$g_lead = 4;
$g_clead = 2;

while (@ARGV > 0 && $ARGV[0] =~ /^\-/) {
	$arg = shift(@ARGV);
	&prusage(0)					if ($arg eq "-help");
	$f_noconc = 1, next				if ($arg eq "-noconc");
	$f_noprint = 1, next				if ($arg eq "-noprint");
	$f_notitle = 1, next				if ($arg eq "-notitle");
	$f_quiet = 1, next				if ($arg eq "-q");
	$f_showff = 1, next				if ($arg eq "-showff");
	&prversion()					if ($arg eq "-version");
	&prusage(1) if (@ARGV < 1);
	push(@g_banners, shift(@ARGV)), next		if ($arg eq "-banner");
	push(@g_cmds, shift(@ARGV)), next		if ($arg eq "-c");
	$g_concfile = shift(@ARGV), next		if ($arg eq "-concfile");
	$g_clead = shift(@ARGV), next			if ($arg eq "-clead");
	$g_datestr = shift(@ARGV), next			if ($arg eq "-date");
	push(@g_defsfiles, shift(@ARGV)), next		if ($arg eq "-defs");
	$g_lead = shift(@ARGV), next			if ($arg eq "-lead");
	$g_lpp = shift(@ARGV), next			if ($arg eq "-lpp");
	$g_lstfile = shift(@ARGV), next			if ($arg eq "-lstfile");
	$g_memfile = shift(@ARGV), next			if ($arg eq "-memfile");
	$g_skiplines = shift(@ARGV), next		if ($arg eq "-skip");
	$g_strfile = shift(@ARGV), next			if ($arg eq "-strfile");
	$g_title = shift(@ARGV), next			if ($arg eq "-title");
	&prusage(1);
}

if (@ARGV != 1) {
	print "Error: missing assembly file name; -help for help\n";
	exit(1);
}
$g_asmfile = shift(@ARGV);

if (!defined($g_datestr)) {
	@timeinfo = localtime;
	$ampm = ($timeinfo[2] >= 12) ? "PM" : "AM";
	$hour = $timeinfo[2] % 12;
	$hour = 12 if ($hour == 0);
	$g_datestr = sprintf("%d:%02d %s %d/%d/%d",
			     $hour, $timeinfo[1], $ampm,
			     $timeinfo[4]+1, $timeinfo[3], $timeinfo[5]+1900);
}

if (defined($g_lstfile)) {
	open(LSTFILE, ">$g_lstfile") || die "Error: cannot create $g_lstfile\n";
}
if (defined($g_lstfile)) {
	open(MEMFILE, ">$g_memfile") || die "Error: cannot create $g_memfile\n";
}

while (@g_cmds) {
	$cmd = shift(@g_cmds);
	if ($cmd !~ /^([\.A-Z0-9]+)\=([0-7]+)$/) {
		die "Error: invalid command: $cmd\n";
	}
	($sym, $value) = ($1, $2);
	$g_symtab{$sym} = oct($value);
}

&pass(1);
#&dump_undefined();
&pass(2);
&dump_undefined();
&print_strings();
&print_concordance();

close(MEMFILE) if (defined($g_memfile));
close(LSTFILE) if (defined($g_lstfile));

sub prusage {
	my($code) = @_;

	print "Usage: $utilname  [ options ... ] asmfile\n";
	print "    -banner string   Add banner line at top of each page\n";
	print "    -c sym=value     Assign value to symbol\n";
	print "    -concfile file   Concordance output file\n";
	print "    -date string     Use string for date in listing\n";
	print "    -defs file       File containing definitions\n";
	print "    -help            Display this message\n";
	print "    -lstfile file    Listing output file\n";
	print "    -memfile file    Memory output file\n";
	print "    -noconc          Do now not show concordance in listing\n";
	print "    -noprint         Suppress PRINT, PRINTX, and PNTNUM output\n";
	print "    -notitle         Do not parse a title line\n";
	print "    -q               Do not output messages (quiet)\n";
	print "    -showff          Show form-feed in listing\n";
	print "    -skip nlines     Skip first nlines of input (default 1)\n";
	print "    -strfile file    String output file\n";
	print "    -title string    Title of program in listing\n";
	print "    -version         Print version number\n";

	exit($code);
}

sub prversion {
	print "$utilname version $utilvers\n";
	exit(0);
}

sub pass {
	my($pass) = @_;

	$g_lnum = 0;
	$g_line = undef;
	$g_inbuf = "";
	$g_atspace = 1;
	$g_havechar = 0;
	@g_inbufstack = ();
	@g_filler = ();
	@g_irp = ();

	%g_synonym = ();
	@g_const = ();

	$g_radix = 8;
	$g_lc = 0;
	$g_lasttag = "";
	$g_lastop = "";

	$g_lev = "";
	$g_conclev = "";

	$g_pass = $pass;

	@saved_defsfiles = @g_defsfiles;
	push(@g_filler, \&midas_readdefs);

	open(ASMFILE, $g_asmfile) || die "Error: cannot open $g_asmfile\n";
	&midas_parse();
	close(ASMFILE);

	@g_defsfiles = @saved_defsfiles;
}

#
# Syntax:
#	EQUALS symbol,symbol
#	.ASCII quoted
#	PRINT quoted
#	PRINTX quoted
#	PNTNUM word
#	REPEAT expr,[text]
#	IRP [sym1a,sym2a,list,...],[sym1b,sym2b,list,...],...
#	IRPC [symbol,,text]
#	ENDIRP
#	DEFINE symbol symbol,symbol,...
#	TERMINATE rest of line
#	macro arg,arg,[arg]
#	symbol=word
#	symbol:
#	word/
#	word
#
sub midas_parse {
	my($i, $token, $token2, $value, $func);

	for ($i = 0; $i < $g_skiplines; ++$i) {
		for (;;) {
			$token = &midas_token();
			last if (!defined($token));
			last if ($token eq "\n");
		}
	}

	#
	# Skip blank lines, then read title line
	#
	if (!$f_notitle) {
		while (($token = &midas_token()) eq "\n") {
			last if ($g_havechar);
		}
		if ($token ne "\n") {
			return if (!defined($token));
			&unget_token($token);
			$token = &token_rest_of_line();
			$token =~ s/\n//s;
			$g_title = $token if (!defined($g_title));
		}
	}

	for (;;) {
		$token = &midas_token();
		last if (!defined($token));

		$token = $g_synonym{$token} if (defined($g_synonym{$token}));

		# Parse pseudo-instruction
		$func = $parse_funcs{$token};
		if (defined($func)) {
			$g_lastop = $token if ($token =~ /^[A-Z]/);
			&$func($token);
			next;
		}

		# Parse symbol= and symbol:
		if ($token =~ /^[0-9A-Z\.\']*[A-Z\.\'][0-9A-Z\.\']*$/) {
			$token2 = &midas_token();
			last if (!defined($token2));
			if ($token2 eq "=") {
				$token =~ s/^(......)(.*)$/$1/;
				$g_symtab{$token} = $value if (($value = &parse_word()) ne "");
				next;
			} elsif ($token2 eq ":") {
				$token =~ s/^(......)(.*)$/$1/;
				$g_symtab{$token} = $g_lc | $V_ADDR;
				$g_lasttag = $token;
				next;
			}
			&unget_token($token2);
		}

		&unget_token($token);

		# The only choice left is a word, possibly followed by /
		if (($value = &parse_word()) ne "") {
			$token2 = &midas_token();
			last if (!defined($token2));
			if ($token2 eq "/") {
				$g_lc = $value;
				next;
			}
			&unget_token($token2);
			&emit_word($g_lc, $value);
			++$g_lc if ($g_lc <= 077777 && ($value < 0300000 || $value > 0377777));
			next;
		}

		$token = &midas_token();
		&error_parse("unexpected token: $token");
	}
}

sub parse_no_operation {
}

sub parse_octal {
	$g_radix = 8;
}

sub parse_decimal {
	$g_radix = 10;
}

sub parse_word_op {
	my($value, $token);

	return if !&parse_space();
	for (;;) {
		return if (($value = &parse_word()) eq "");
		&emit_word($g_lc, $value);
		$token = &midas_token();
		return if (!defined($token));
		last unless ($token eq ",");
	}
	&unget_token($token);
}

sub parse_equals {
	my($synonym, $token);

	return if !&parse_separator();
	return if (($synonym = &parse_symbol()) eq "");
	return if !&parse_separator();
	return if (($token = &parse_symbol()) eq "");
	$g_synonym{$synonym} = $token;
}

sub parse_expunge {
	my($token, $symbol);

	return if !&parse_separator();
	for (;;) {
		return if (($symbol = &parse_symbol()) eq "");
		$g_symtab{$symbol} = undef;
		last if (($token = &midas_token()) ne ",");
	}
	&unget_token($token);
}

sub parse_ascii {
	my($token);
	my($c1, $c2, $value);

	$token = &token_quoted_string();
	return if (!defined($token));
	$token =~ s/\'//g;
	if ($g_lc >= 0100000) {
		$g_strings{sprintf("%06o", $g_lc>>6)} .= $token if ($g_pass == 2);
	} else {
		$token =~ s/\n/\r\n/gs;
		while ($token ne "") {
			if ($token =~ /^(.)(.)(.*)$/s) {
				($c1, $c2, $token) = ($1, $2, $3);
				$value = (unpack("C", $c1) << 8) |
					 unpack("C", $c2) | 0100200;
				$value &= ~0177400 if ($c1 eq "#");
				$value &= ~0000377 if ($c2 eq "#");
			} else {
				$token =~ /^(.)(.*)$/s;
				($c1, $token) = ($1, $2);
				$value = (unpack("C", $c1) << 8) | 0100000;
				$value &= ~0177400 if ($c1 eq "#");
			}
			&emit_word($g_lc, $value);
			++$g_lc;
		}
	}
}

sub parse_print {
	my($token);

	$token = &token_quoted_string();
	return if (!defined($token));
	return if ($f_noprint);
	&midas_print("PNT", $token);
}

sub parse_printx {
	my($token);

	$token = &token_quoted_string();
	return if (!defined($token));
	return if ($f_noprint);
	print "$token";
}

sub parse_pntnum {
	my($value);

	return if !&parse_space();
	return if (($value = &parse_word()) eq "");
	return if ($f_noprint);
	printf "%o ", $value;
}

sub parse_constants {
	my($const);
	my($nconst, $base, $size);

	$nconst = @g_const;

	if ($g_pass == 1) {
		push(@g_constregions, [ $g_lc, $nconst ]);
		$g_lc += $nconst;
		@g_const = ();
	} elsif ($g_pass == 2) {
		if (@g_constregions == 0) {
			&error_parse("no constant region from pass 1");
			return;
		}
		$base = $g_constregions[0]->[0];
		$size = $g_constregions[0]->[1];
		shift(@g_constregions);
		if ($nconst > $size) {
			&error_parse("constant region size $size, but need $nconst");
		} elsif ($g_lc != $base) {
			&error_parse("constant region moved from $base to $g_lc");
		}
		while (@g_const) {
			$const = shift(@g_const);
			&emit_word($g_lc, $const);
			++$g_lc;
		}
		$g_symtab{"UNCON"} = $g_lc | $V_ADDR;
		$g_lc = $base + $size;
	}
}

sub parse_define {
	my($token, $macro, $body, $gen);
	my(@args);

	return if !&parse_space();
	return if ($g_inbuf !~ /^([\.0-9A-Z]+)(.*)$/s);
	($macro, $g_inbuf) = ($1, $2);

	$token = &midas_token();
	return if (!defined($token));
	$gen = -1;
	if ($token eq " ") {
		$token = &midas_token();
		return if (!defined($token));
		if ($token eq "/") {
			$gen = 0;
		} else {
			&unget_token($token);
		}
		for (;;) {
			return if (($token = &parse_symbol()) eq "");
			push(@args, $token);
			$token = &midas_token();
			return if (!defined($token));
			if ($token eq "/") {
				$gen = scalar @args;
			} elsif ($token ne ",") {
				last;
			}
		}
	}
	$gen = scalar @args if ($gen == -1);

	if ($token ne "\n") {
		&error_parse("bad DEFINE syntax");
		return;
	}
	$body = &token_definebody();
	$g_macro{$macro} = [ $body, $gen, @args ];
}

#
# Indefinite repeat.  This loops across list(s) and assigns values
# from the list into the symbol.  This function parses the lists,
# capturing the symbols and items, and saves the text for the body
# of the IRP.  The list information and body are stored for repeated
# insertion by insert_irp() and next_irp().
#
# IRP [sym1a,sym2a,list,...],[sym1b,sym2b,list,...],...
# body text
# ENDIRP
#
sub parse_irp {
	my(@argnames, @args, @list);
	my($token, $arg, $body);

	return if !&parse_space();
	for (;;) {
		return if !&parse_token("[");
		return if (($token = &parse_symbol()) eq "");
		push(@argnames, $token);
		&parse_whitespace();
		return if !&parse_token(",");
		return if !&parse_token(",");
		$arg = "";
		for (;;) {
			$token = &midas_unexpanded_token();
			if (!defined($token)) {
				&error_parse("bad IRP syntax");
				return;
			}
			if ($token eq ",") {
				push(@list, $arg);
				$arg = "";
			} elsif ($token eq "]") {
				push(@list, $arg);
				last;
			} elsif ($token eq "\t" || $token eq "\n") {
			} else {
				$arg .= $token;
			}
		}
		push(@args, [ @list ]);
		@list = ();
		&parse_optspace();
		$token = &midas_token();
		last if ($token ne ",");
		&parse_optspace();
	}
	&unget_token($token);
	$body = &token_irpbody();
	&insert_irp(\@argnames, \@args, $body);
}

#
# Indefinite repeat characters.  The body is repeated with each of the
# symbol replaced by each character in the text.  This list and body are
# stored for repeated insertion by insert_irp() and next_irp().
#
# IRPC [symbol,,text]
# body text
# ENDIRP
#
sub parse_irpc {
	my(@argnames, @args, @list);
	my($token, $string, $c, $body);

	return if !&parse_space();
	return if !&parse_token("[");
	return if (($token = &parse_symbol()) eq "");
	push(@argnames, $token);
	&parse_whitespace();
	return if !&parse_token(",");
	&parse_whitespace();
	return if !&parse_token(",");

	$string = &token_bracketed();
	while ($string =~ /^(.)(.*)$/s) {
		($c, $string) = ($1, $2);
		push(@list, $c);
	}
	push(@args, [ @list ]);

	$body = &token_irpbody();
	&insert_irp(\@argnames, \@args, $body);
}

#
# Repeat text as many times as specified by the word, which may
# even be zero.  The text can be enclosed in brackets, in which case
# it may span multiple lines, or it can just be the remainder of
# the line.
#
# REPEAT word,[text]
# REPEAT word,rest_of_line
#
sub parse_repeat {
	my($token, $token2, $token3, $value);
	my($count, $i);

	&parse_optspace();
	return if (($count = &parse_word()) eq "");
	&parse_optspace();
	return if !&parse_token(",");

	&parse_optspace();
	$token = &midas_token();
	if ($token eq "[") {
		$body = &token_bracketed();
	} else {
		&unget_token($token);
		$body = &token_rest_of_line();
	}

	for ($i = 0; $i < $count; ++$i) {
		$g_inbuf = $body . $g_inbuf;
	}
}

sub parse_optspace {
	my($token);

	while (($token = &midas_token()) =~ /^[ \t]$/) {
	}
	&unget_token($token);
}

sub parse_whitespace {
	my($token);

	while (($token = &midas_token()) =~ /^[ \t\n]$/) {
	}
	&unget_token($token);
}

sub parse_word {
	my($value);

	return "" if (($value = &parse_raw_word()) eq "");
	return $value & $V_WORD;
}

#
# A word consists of one or more fields separated by a single space.
# The fields are added together with special handling for the second
# field.  If the second field is an address, then the value of that
# field is masked to 10 bits with the tenth bit being 0 if the pre-masked
# value is in the range 0...777, else the tenth bit being 1.  This
# corresponds to "page zero" and "current page" access to an address.
#
# Returns "" if a valid word is not found.
#
sub parse_raw_word {
	my($value, $fieldnum, $token, $field);

	return "" if (($value = &parse_field($PREC_NONE)) eq "");
	$fieldnum = 2;
	for (;;) {
		last if (($token = &midas_token()) ne " ");
		return $value if (($field = &parse_field($PREC_NONE)) eq "");
		if ($fieldnum == 2 && ($field & $V_ADDR)) {
			$field |= 01000 if (($field & $V_VALUE & ~$V_SIGN) > 0777);
			$field &= $V_SIGN | 01777;
		}
		$value = (($value + $field) & $V_VALUE) | (($value | $field) & $V_STICKY);
		++$fieldnum;
	}
	&unget_token($token);
	return $value;
}

#
# Parse a field.  Returns the value of the field, including flags for
# undefined and address tags.  The valid field syntax is:
#
#	number		Numeric value (octal or decimal)
#	symbol		Value of a symbol
#	.		Value of the current location counter
#	[word]		Value of the word within the brackets
#	(word)		Address of the value stored in constant region
#	0IF ...		Conditional 0
#	1IF ...		Conditional 1
#	-field		Negate
#	field+field	Add
#	field-field	Subtract
#	field"A"field	Logical AND
#	field"T"field	Multiply
#	field"Q"field	Divide
#	field"R"field	Remainder
#	field"U"field	Inclusive-OR
#	field"X"field	Exclusive-OR
#
# Note that a missing right parenthesis is allowed for the (word) syntax.
#
sub parse_field {
	my($prec) = @_;
	my($token, $value, $field, $result);

	$token = &midas_token();
	return "" if (!defined($token));
	if ($token eq "[") {
		$value = &parse_raw_word();
		return "" if !&parse_token("]");
	} elsif ($token eq "(") {
		$value = &parse_raw_word();
		$value = &save_constant($value);
		$token = &midas_token();
		if ($token eq "\n") {
			&unget_token($token);
		} elsif ($token eq "\t") {
			&unget_token($token);
		} elsif ($token ne ")") {
			&error_parse("missing right paren");
			return "";
		}
	} elsif ($token eq "-") {
		return if (($value = &parse_field($PREC_NEG)) eq "");
		$value = (-$value & $V_VALUE) | ($value & $V_STICKY);
	} elsif ($token =~ /^[0-9]+\.$/) {
		$value = $token;
	} elsif ($token =~ /^[0-7]+$/) {
		$value = ($g_radix == 10) ? $token : oct($token);
	} elsif ($token =~ /^[0-9]+$/) {
		$value = $token;
	} elsif ($token eq ".") {
		$value = $g_lc | $V_ADDR;
	} elsif ($token =~ /^[01]IF$/) {
		return "" if (($value = &parse_cond($token)) eq "");
	} elsif ($token =~ /^[0-9A-Z\.\']+$/) {
		$token =~ s/^(......)(.*)$/$1/;
		$value = &sym_lookup($token);
		$value = 0 if (!defined($value));
	} else {
		&unget_token($token);
		&error_parse("unknown symbol in field: $token") if ($prec != $PREC_NONE);
		return "";
	}

	for (;;) {
		$token = &midas_token();
		return $value if (!defined($token));
		if ($token eq "+") {
			&unget_token($token), last if ($prec <= $PREC_PLUS);
			return "" if (($field = &parse_field($PREC_PLUS)) eq "");
			$result = $value + $field;
		} elsif ($token eq "-") {
			&unget_token($token), last if ($prec <= $PREC_MINUS);
			return "" if (($field = &parse_field($PREC_MINUS)) eq "");
			$field &= ~$V_ADDR if (!($value & $V_ADDR));
			$result = $value - $field;
		} elsif ($token eq "\"A\"") {
			&unget_token($token), last if ($prec <= $PREC_AND);
			return "" if (($field = &parse_field($PREC_AND)) eq "");
			$result = $value & $field;
		} elsif ($token eq "\"T\"") {
			&unget_token($token), last if ($prec <= $PREC_TIMES);
			return "" if (($field = &parse_field($PREC_TIMES)) eq "");
			$result = ($value & $V_VALUE) * ($field & $V_VALUE);
		} elsif ($token eq "\"Q\"") {
			&unget_token($token), last if ($prec <= $PREC_DIVIDE);
			return "" if (($field = &parse_field($PREC_DIVIDE)) eq "");
			if (($field & $V_VALUE) == 0) {
				&error_parse("divide by zero");
				last;
			}
			if ($value & $V_SIGN) {
				$result = (0200000 - int((0200000 - ($value & $V_VALUE)) / ($field & $V_VALUE))) & $V_VALUE;
			} else {
				$result = int(($value & $V_VALUE) / ($field & $V_VALUE));
			}
		} elsif ($token eq "\"R\"") {
			&unget_token($token), last if ($prec <= $PREC_REMAINDER);
			return "" if (($field = &parse_field($PREC_REMAINDER)) eq "");
			if (($field & $V_VALUE) == 0) {
				&error_parse("divide by zero");
				last;
			}
			$result = ($value & $V_VALUE) % ($field & $V_VALUE);
		} elsif ($token eq "\"U\"") {
			&unget_token($token), last if ($prec <= $PREC_OR);
			return "" if (($field = &parse_field($PREC_OR)) eq "");
			$result = $value | $field;
		} elsif ($token eq "\"X\"") {
			&unget_token($token), last if ($prec <= $PREC_XOR);
			return "" if (($field = &parse_field($PREC_XOR)) eq "");
			$result = $value ^ $field;
		} else {
			&unget_token($token);
			return $value;
		}
		$value = ($result & $V_VALUE) | (($value | $field) & $V_STICKY);
	}
	return $value;
}

#
# Parse a conditional expression.  A conditional expression beginning
# with 0IF evaluates to 0 if the expression is true and 1, otherwise.
# 1IF has the opposite result.  The syntax is:
#
#	nIF P		true on second pass
#	nIF D symbol	true if symbol is defined
#	nIF VP word	true if word is greater than or equal to zero
#	nIF VZ word	true if word is equal to zero
#
# A / can be used to terminate the word, and the slash character will
# be consumed by the parser here so that it is not later treated as
# an assignment to the location counter.
#
sub parse_cond {
	my($iftoken) = @_;
	my($rval, $token, $value);

	$rval = ($iftoken eq "1IF") ? 1 : 0;

	return "" if (!&parse_space());
	$token = &midas_token();
	return "" if (!defined($token));
	if ($token eq "P") {
		$rval ^= 1 if ($g_pass == 1);
	} elsif ($token eq "D") {
		return "" if (!&parse_space());
		return if (($token = &parse_symbol()) eq "");
		$rval ^= 1 if (!defined($g_symtab{$token}));
	} elsif ($token eq "VP") {
		return "" if (!&parse_space());
		return "" if (($value = &parse_raw_word()) eq "");
		$rval ^= 1 if ($value & $V_SIGN);
		&unget_token($token) if (($token = &midas_token()) ne "/");
	} elsif ($token eq "VZ") {
		return "" if (!&parse_space());
		return "" if (($value = &parse_raw_word()) eq "");
		$rval ^= 1 if (($value & $V_VALUE) != 0);
		&unget_token($token) if (($token = &midas_token()) ne "/");
	} else {
		&error_parse("unexpected token: $token");
		&unget_token($token);
		return "";
	}

	return $rval;
}

sub parse_space {
	my($token);

	$token = &midas_token();
	return 1 if ($token =~ /^[ \t]$/);
	&error_token("space", $token);
}

sub parse_separator {
	my($token);

	$token = &midas_token();
	return 1 if ($token =~ /^[ \t,]$/);
	&error_token("separator", $token);
}

sub parse_symbol {
	my($token);

	$token = &midas_token();
	return $token if ($token =~ /^[0-9A-Z\.\']+$/ && $token !~ /^\d+\.?$/);
	&error_token("symbol", $token);
}

sub parse_token {
	my($expect) = @_;
	my($token);

	$token = &midas_token();
	return 1 if ($token eq $expect);
	&error_token($expect, $token);
}

sub error_token {
	my($expected, $token) = @_;

	$token = "end of line" if ($token eq "\n");
	$token = "space" if ($token eq " ");
	$token = "tab" if ($token eq "\t");
	print "Error: line $g_lnum: wanted $expected, got $token\n";
	return "";
}

sub error_parse {
	my($msg) = @_;

	print "Error: line $g_lnum: $msg\n";
	return "";
}

#
# Display a message in MIDAS error format.  The format is
#
# eee aaaaa sym+o p string
#
# eee is the error code passed to this function
# aaaaa is the current location counter
# sym+o is the current location counter relative the most recent address tag
# p is the most recent pseudo-instruction or macro
# string is passed to this function
#
#
sub midas_print {
	my($code, $string) = @_;
	my($lc, $taglc);

	$lc = $g_lc & $V_VALUE;
	printf "%3.3s %05o", $code, $lc;
	if ($g_lasttag ne "") {
		$taglc = $g_symtab{$g_lasttag} & $V_VALUE;
		if ($lc < $taglc) {
			printf " %.6s-%o", $g_lasttag, $taglc - $lc;
		} else {
			printf " %.6s+%o", $g_lasttag, $lc - $taglc;
		}
	}
	if ($g_lastop ne "") {
		printf " %-6.6s", $g_lastop;
	}
	print " $string\n";
}

#
# Return a string representing the next token from the input stream.
# If the end of the input stream has been reached, then this will
# return undef.  Types of tokens that can be returned are:
#
#	symbol
#	number
#	\n			\n, \r, or \f
#	space
#	\t			\t or multiple spaces
#	: / + - = , [ ] ( )
#	"A" "Q" "R" "T" "U" "X"
#	"Z"
#
sub midas_token {
	my($token);

	$token = &midas_unexpanded_token();
	if (defined($g_macro{$token})) {
		$g_lastop = $token;
		&expand_macro($token);
		return &midas_token();
	}
	return $token;
}

sub midas_unexpanded_token {
	my($token, $quote, $c);

	if ($g_inbuf eq "") {
		&fill_inbuf();
		return undef if ($g_inbuf eq "");
	}

	# Comment begins with / after white-space and continues to end of line
	while ($g_atspace && $g_inbuf =~ /^\/[^\n]*(.*)/s) {
		$g_inbuf = $1;
	}

	$g_atspace = 0;
	if ($g_inbuf =~ /^[\n\r\f](.*)$/s) {
		$g_inbuf = $1;
		$token = "\n";
		$g_atspace = 1;
	} elsif ($g_inbuf =~ /^\t(.*)$/s) {
		$g_inbuf = $1;
		$token = "\t";
		$g_atspace = 1;
	} elsif ($g_inbuf =~ /^  +(.*)$/s) {
		$g_inbuf = $1;
		$token = "\t";
		$g_atspace = 1;
	} elsif ($g_inbuf =~ /^ (.*)$/s) {
		$g_inbuf = $1;
		$token = " ";
		$g_atspace = 1;
	} elsif ($g_inbuf =~ /^([0-9\']+\.)([^0-9A-Z\.\'].*)$/s) {
		($token, $g_inbuf) = ($1, $2);
		$token =~ s/\'//g;
	} elsif ($g_inbuf =~ /^([0-7\']+)([^0-9A-Z\.\'].*)$/s) {
		($token, $g_inbuf) = ($1, $2);
		$token =~ s/\'//g;
	} elsif ($g_inbuf =~ /^([0-9A-Z\.\']+)(.*)$/s) {
		($token, $g_inbuf) = ($1, $2);
		$token =~ s/\'//g;
	} elsif ($g_inbuf =~ /^([\:\/\+\-\=\,\[\]\(\)])(.*)$/s) {
		($token, $g_inbuf) = ($1, $2);
	} elsif ($g_inbuf =~ /^(\"[AQRTUX]\")(.*)$/s) {
		($token, $g_inbuf) = ($1, $2);
	} elsif ($g_inbuf =~ /^(\"[Z]\")(.*)$/s) {
		($token, $g_inbuf) = ($1, $2);
	} elsif ($g_inbuf =~ /^\^([AQRTUX])(.*)$/s) {
		($token, $g_inbuf) = ($1, $2);
		$token = "\"$token\"";
	} else {
		 print "Error: line $g_lnum: cannot tokenize: $g_inbuf\n";
		 if ($g_inbuf =~ /\n/) {
			 $g_inbuf =~ s/^(.*?)\n//;
		 } else {
			$g_inbuf = "";
		 }
		 return &midas_token();
	}
	return $token;
}

sub unget_token {
	my($token) = @_;

	$token = "  " if ($token eq "\t");
	$g_inbuf = $token . $g_inbuf;
}

sub midas_getc {
	my($token);

	if ($g_inbuf eq "") {
		&fill_inbuf();
		return undef if ($g_inbuf eq "");
	}

	$g_atspace = 0;
	if ($g_inbuf =~ /^  +(.*)$/s) {
		$g_inbuf = $1;
		$token = "\t";
		$g_atspace = 1;
	} else {
		$g_inbuf =~ /^(.)(.*)$/s;
		($token, $g_inbuf) = ($1, $2);
		$g_atspace = 1 if ($token =~ /[ \t\n]/s);
	}
	return $token;
}

#
# Special case: return a token that represents a quoted string.
# A quoted string begins with one character (the delimiter) followed by
# any amount of characters until the next occurrence of the delimiter.
#
sub token_quoted_string {
	my($quote, $c, $string);

	$g_inbuf =~ s/^[ \t]+//;
	if ($g_inbuf eq "") {
		&error_parse("missing start char for quoted string");
		return undef;
	}
	$g_inbuf =~ /^(.)(.*)$/s;
	($quote, $g_inbuf) = ($1, $2);
	$string = "";
	for (;;) {
		if ($g_inbuf eq "") {
			&fill_inbuf();
			return undef if ($g_inbuf eq "");
		}
		$g_inbuf =~ /^(.)(.*)$/s;
		($c, $g_inbuf) = ($1, $2);
		if ($c eq $quote) {
			$string =~ s/\^A/\001/gs;
			$string =~ s/\^B/\002/gs;
			$string =~ s/\^C/\003/gs;
			return $string;
		}
		$string .= $c;
	}
	return $string;
}

#
# Special case: return a token that represents the rest of
# of the line.
#
sub token_rest_of_line {
	my($string);

	$g_inbuf =~ /^([^\n]*\n)(.*)$/s;
	($string, $g_inbuf) = ($1, $2);
	return $string;
}

#
# Special case: return a token that matches the contents of
# bracketed text.  The first opening bracket has already been
# consumed.  This allows nested brackets.
#
sub token_bracketed {
	my($string, $pre, $c, $depth);

	$depth = 1;
	for (;;) {
		if ($g_inbuf =~ /^([^\[\]]*)([\[\]])(.*)$/s) {
			($pre, $c, $g_inbuf) = ($1, $2, $3);
			$string .= $pre;
			if ($c eq "[") {
				$string .= $c;
				++$depth;
			} elsif ($depth == 1) {
				return $string;
			} else {
				$string .= $c;
				--$depth;
			}
		} else {
			&fill_inbuf();
		}
	}
}

#
# Special case: return a token that represents the body of
# a macro definition.  This is all text up to a TERMINATE
# pseudo-instruction.
#
sub token_definebody {
	my($body, $depth, $token, $body1);

	for (;;) {
		($body1, $token) = &find_next_symbol();
		$body .= $body1;
		if ($token eq "TERMINATE" ||
		    $g_synonym{$token} eq "TERMINATE") {
			if ($depth == 0) {
				$g_inbuf =~ s/^[^\n]*\n//s;
				return $body;
			}
			--$depth;
		} elsif ($token eq "DEFINE" ||
			 $g_synonym{$token} eq "DEFINE") {
			++$depth;
		}
		$body .= $token;
	}
}

sub find_next_symbol {
	my($body, $token);

	$body = "";
	for (;;) {
		while ($g_inbuf =~ /^(.*?)([\.A-Z0-9]+)(.*)$/s) {
			($token, $g_inbuf) = ($2, $3);
			$body .= $1;
			if ($body =~ /\^$/ && $token =~ /^[A-Z]/) {
				$token =~ /^(.)(.*)$/;
				$body .= $1;
				$g_inbuf = $2 . $g_inbuf;
				next;
			}
			if ($body =~ /\"$/ && $token =~ /^[A-Z]$/ && $g_inbuf =~ /^\"/) {
				$body .= $token;
				next;
			}
			return ($body, $token);
		}
		&fill_inbuf();
	}
}

#
# Special case: return a token that represents the body of
# an IRP.  This is all text up to an ENDIRP pseudo-instruction.
#
sub token_irpbody {
	my($body, $depth, $token, $body1);

	for (;;) {
		($body1, $token) = &find_next_symbol();
		$body .= $body1;
		if ($token eq "ENDIRP" ||
		    $g_synonym{$token} eq "ENDIRP") {
			if ($depth == 0) {
				return $body;
			}
			--$depth;
		} elsif ($token eq "IRP" ||
			 $g_synonym{$token} eq "IRP" ||
			 $token eq "IRPC" ||
			 $g_synonym{$token} eq "IRPC") {
			++$depth;
		}
		$body .= $token;
	}
}

#
# Fill $g_inbuf with more characters.  This will come from
# IRP iteration (via @g_filler) or from the input file.
#
sub fill_inbuf {
	my($filler);

	while (@g_filler > 0) {
		$filler = pop(@g_filler);
		&$filler();
		$g_havechar = 1 if ($g_inbuf =~ /^[^\n]/s);
		return if ($g_inbuf ne "");
	}
	&midas_readline();
	$g_havechar = 1 if ($g_inbuf =~ /^[^\n]/s);
}

#
# Read the next line from the input file into $g_inbuf.
# As a side-effect, this will call &emit_line() if the
# previously read line has not yet been output to the
# listing.
#
sub midas_readline {
	&emit_line() if (defined($g_line));
	while (<ASMFILE>) {
		++$g_lnum;
		$g_line = $_;
		$g_inbuf .= $g_line;
		last;
	}
}

sub midas_readdefs {
	my($file);

	for (;;) {
		if (!$g_reading_defs) {
			return if (@g_defsfiles == 0);
			$file = shift(@g_defsfiles);
			open(DEFSFILE, $file) || die "Error: cannot open $file\n";
			$g_reading_defs = 1;
		}
		while (<DEFSFILE>) {
			$g_line = $_;
			$g_inbuf .= $g_line;
			push(@g_filler, \&midas_readdefs);
			return;
		}
		close(DEFSFILE);
		$g_line = undef;
		$g_reading_defs = 0;
	}
}

#
# Inserts an indefinite repeat into the input buffer.  This
# function saves the information about the repeat in @g_irp.
# The next_irp() function is called once by this
# function and will be called from fill_inbuf() each time the
# input buffer is emptied; this will continue until the entire
# indefinite repeat has been achieved.  In order to identify
# the end of the repeated text, any existing input is stored
# in @g_inbufstack, which will be restored after the indefinite
# repeat completes.
#
sub insert_irp {
	my($argnameref, $argsref, $body) = @_;
	my($irpdesc);

	push(@g_inbufstack, $g_inbuf);
	$irpdesc = [ [ @$argnameref ], [ @$argsref ], $body, 0 ];
	push(@g_irp, $irpdesc);
	&next_irp();
}

sub next_irp {
	my($irpdesc, $n, $i, $argname, $arg, $body);

	$irpdesc = pop(@g_irp);
	$n = $irpdesc->[3];
	++$irpdesc->[3];
	if (@{$irpdesc->[1]->[0]} > $n) {
		$body = $irpdesc->[2];
		for ($i = 0; $i < @{$irpdesc->[0]}; ++$i) {
			$argname = $irpdesc->[0]->[$i];
			$arg = $irpdesc->[1]->[$i]->[$n];
			$arg = "" if (!defined($arg));
			$body = &subst_arg($body, $argname, $arg);
		}
		$g_inbuf = $body;
		push(@g_irp, $irpdesc);
		push(@g_filler, \&next_irp);
	} else {
		$g_inbuf = pop(@g_inbufstack);
	}
}

sub subst_arg {
	my($body, $arg, $value) = @_;
	my($out, $pre);

	# Map caret-char to quoted char for ease of substitution
	$body =~ s/\^([A-Z])/\"$1\"/gs;

	while ($body =~ /^(.*?)\b$arg\b(.*)$/s) {
		($pre, $body) = ($1, $2);
		$out .= $pre;
		if ($out =~ /\"\z/ && $body =~ /^\"/ && length($arg) == 1) {
			$out .= $arg;
		} elsif ($out =~ /\.\z/ || $body =~ /^\./) {
			$out .= $arg;
		} else {
			$out .= $value;
		}
	}
	return $out . $body;
}

#
# Expand a macro.  The name has already been read.  This function reads
# the argument values, substitutes the values for the dummy arguments,
# and inserts the resulting text into $g_inbuf.
#
# If there is no value for an argument, then an empty string is
# substituted for it, instead.
#
sub expand_macro {
	my($macro) = @_;
	my($body, $argname, $value, $gen, $tmpsym);
	my(@args);

	($body, $gen, @args) = @{$g_macro{$macro}};
	if ($g_inbuf =~ /^ /) {
	  while (@args) {
		$argname = shift(@args);
		--$gen;
		if ($g_inbuf =~ /^\s*\[/) {
			$g_inbuf =~ s/^\s*\[//;
			$value = &token_bracketed();
		} elsif ($g_inbuf =~ /^\s*(([^\s\n\,\/]+ )*[^\s\n\,\/]+)(.*)$/s) {
			($value, $g_inbuf) = ($1, $3);
			$value =~ s/\'//g;
		} elsif ($g_inbuf =~ /^\s*,/s) {
			$value = "";
		} else {
			&error_parse("invalid macro invocation syntax");
			return;
		}
		$body = &subst_arg($body, $argname, $value);
		if (@args) {
			if ($g_inbuf !~ /^\s*,(.*)/s) {
				last;
			}
			$g_inbuf = $1;
		}
	  }
	}
	while (@args) {
		if ($gen <= 0) {
			++$g_tmpnum;
			$tmpsym = "..A$g_tmpnum";
			$body = &subst_arg($body, shift(@args), $tmpsym);
		} else {
			$body = &subst_arg($body, shift(@args), "");
			--$gen;
		}
	}
	$g_inbuf = $body . $g_inbuf;
}

sub sym_lookup {
	my($symbol) = @_;

	$g_symtab{$symbol} = $V_UNDEF if (!defined($g_symtab{$symbol}));
	print "Undefined symbol on line $g_lnum: $symbol\n" if ($g_pass == 2 && ($g_symtab{$symbol} & $V_UNDEF));
	return $g_symtab{$symbol};
}

sub dump_undefined {
	my($symbol);

	foreach $symbol (sort keys %g_symtab) {
		next if ($g_symtab{$symbol} <= 0177777);
		next if ($g_symtab{$symbol} & $V_ADDR);
		print "Symbol $symbol is undefined after pass $g_pass\n";
	}
}

sub save_constant {
	my($value) = @_;
	my($i);

	for ($i = 0; $i < @g_const; ++$i) {
		if (!($value & $V_UNDEF) && !($g_const[$i] & $V_UNDEF) &&
		    $g_const[$i] == ($value & $V_VALUE)) {
			if ($g_pass == 2 && @g_constregions > 0) {
				return ($g_constregions[0]->[0] + $i) | $V_ADDR;
			} else {
				return $V_UNDEF;
			}
		}
	}
	push(@g_const, $value & ($V_VALUE | $V_UNDEF));
	if ($g_pass == 2 && @g_constregions > 0) {
		return ($g_constregions[0]->[0] + scalar(@g_const) - 1) | $V_ADDR;
	}
	return $V_UNDEF;
}

#
# Emits a word to both the listing file and the memory file on pass 2.
# The general format for a line in the listing is:
#
#	aaaaa dddddd i i input_line
#
# where aaaaa is the address, dddddd is the data word, i are the interrupt
# level characters.
#
# There are some special cases that are handled here:
#
# If the address is greater than or equal to 0100000 then the word will
# be stored in string storage, rather than being put into memory.
#
# If the data value is between 0200000 and 0277777, then this represents
# the start of a blank storage segment, rather than a data word.
#
# If the data value is between 0300000 and 0377777, then this represents
# a description of the current interrupt level, rather than a data word.
#
sub emit_word {
	my($addr, $word) = @_;

	return if ($g_pass != 2);

	$word &= $V_VALUE if ($word & $V_UNDEF);

	if ($word >= 0300000 && $word <= 0377777) {
		my($xlev, $ylev);

		$xlev = ($word >> 6) & 077;
		$ylev = $word & 077;
		if ($xlev == 076) {
			$g_lev = "C";
		} elsif ($xlev == 075) {
			$g_lev = "V";
		} else {
			$g_lev = "$xlev";
		}
		if ($ylev != $xlev) {
			$g_lev .= " $ylev";
		}
		return;
	}

	if ($addr >= 0100000) {
		$g_strings{sprintf("%06o", $addr>>6)} .= sprintf("%6o  ", $word);
		return;
	}

	if (defined($g_memfile) && $word <= $V_VALUE) {
		if ($f_noconc) {
			printf MEMFILE "%05o  %06o\n", $addr, $word;
		} else {
			printf MEMFILE "%05o %06o\n", $addr, $word;
		}
	}

	if (defined($g_lstfile)) {
		if ($g_oline >= $g_lpp) {
			print LSTFILE "\f";
			print LSTFILE "\n" x $g_clead if ($g_clead > 0);
			$g_oline = $g_clead;
		}

		if ($word >= 0200000 && $word <= 0277777) {
			printf LSTFILE "%05o       ", $addr;
			printf LSTFILE " " if ($f_noconc);
		} else {
			if ($f_noconc) {
				printf LSTFILE "%05o  %06o", $addr, $word;
			} else {
				printf LSTFILE "%05o %06o", $addr, $word;
			}
		}

		if (defined($g_line) && $g_line ne "\n") {
			if ($f_noconc) {
				print LSTFILE "  ", &format_line($g_line);
			} else {
				printf LSTFILE " %-3.3s %s", $g_lev, &format_line($g_line);
			}
			++$g_oline;
			&add_concordance($g_line, $g_lev, $g_pagenum);
		} elsif ($g_lev ne "") {
			print LSTFILE " $g_lev\n";
			++$g_oline;
		} else {
			print LSTFILE "\n";
			++$g_oline;
		}
	}

	$g_conclev = $g_lev;
	$g_line = undef;
}

sub emit_line {
	return if ($g_pass != 2);
	return if (!defined($g_line));

	if (defined($g_lstfile)) {
		if ($g_oline >= $g_lpp && $g_line !~ /^\f/) {
			print LSTFILE "\f";
			print LSTFILE "\n" x $g_clead if ($g_clead > 0);
			$g_oline = $g_clead;
		}

		print LSTFILE "      ^L\n" if ($g_line =~ /^\f/ && $f_showff && $g_oline <= $g_lpp);
		$g_oline = 0 if ($g_line =~ /^\f/);
		if ($g_line =~ /^\f*\n$/) {
			print LSTFILE &format_line($g_line);
			++$g_oline;
		} else {
			if ($f_noconc) {
				print LSTFILE " "x15, &format_line($g_line);
			} else {
				print LSTFILE " "x17, &format_line($g_line);
			}
			++$g_oline;
		}
	}
	&add_concordance($g_line, $g_conclev, $g_pagenum);
	&print_header();
	$g_line = undef;
}

sub format_line {
	my($line) = @_;
	my($out);

	while ($line =~ /^([^\t]+)\t(.*)$/s) {
		$out .= $1;
		$line = $2;
		$out .= " ";
		while (length($out) % 8 != 0) {
			$out .= " ";
		}
	}
	return $out . $line;
}

sub print_strings {
	my($key, $string, $pagenum, $prevaddr);

	return if (!defined($g_strfile));
	open(STRFILE, ">$g_strfile") || die "Error: cannot create $g_strfile\n";
	foreach $key (sort keys %g_strings) {
		if (oct($key) != $prevaddr) {
			$prevaddr = oct($key);
			++$pagenum;
			print STRFILE "\f" if ($pagenum != 1);
			print STRFILE "\n";
			print STRFILE "\n";
			print STRFILE "\n";
			printf STRFILE "PAGE   %d               %s  %s\n", $pagenum, $g_title, $g_datestr;
			print STRFILE "\n";
		}
		$string = $g_strings{$key};
		$string =~ s/\"Z\"/\n/g;
		$string =~ s/[ \t]+\n/\n/g;
		print STRFILE $string;
	}
	close(STRFILE);
}

#
# Add the words in a line to the interrupt concordance list.
# Words within comments are not included.  The page number and
# interrupt level are passed in as arguments.
#
sub add_concordance {
	my($line, $lev, $pagenum) = @_;
	my($word, $conc);
	my(@words);

	return if ($g_pass != 2);
	return if ($line =~ /^\//);

	$pagenum = 1 if ($pagenum eq "");
	$pageref = $pagenum if ($lev eq "");
	$pageref = $pagenum . "($lev)" if ($lev ne "");

	# Remove comment
	$line =~ s/\s+\/.*$//s;

	# Convert "control" characters
	$line =~ s/\^([A-Z])/\"$1\"/gs;

	if ($line =~ /^([0-9A-Z\.]+):(.*)$/s) {
		($word, $line) = ($1, $2);
		$word =~ s/^(......).*$/$1/;
		if (defined($g_concdef{$word})) {
			$g_concdef{$word} .= ",$pageref";
		} else {
			$g_concdef{$word} = $pageref;
		}
		++$g_concword{$word};
	} elsif ($line =~ /^\s*([0-9A-Z\.]+)\=(.*)$/s) {
		($word, $line) = ($1, $2);
		$word =~ s/^(......).*$/$1/;
		if (defined($g_concdef{$word})) {
			$g_concdef{$word} .= ",$pageref";
		} else {
			$g_concdef{$word} = $pageref;
		}
		++$g_concword{$word};
	}
	$line =~ s/\"[AQRTUXZ]\"/ /g;
	$line =~ s/[^0-9A-Z\.]+/ /g;
	$line =~ s/\s\s+/ /g;
	$line =~ s/^\s+//;
	@words = split(/\s+/, $line);
	foreach $word (@words) {
		$word =~ s/^(......).*$/$1/;
		next if ($word eq ".");
		next if ($word =~ /^[0-9]+\.?$/);
		push(@{$g_concused{$word}}, $pageref);
		++$g_concword{$word};
	}
}

sub print_concordance {
	my($word, $def, $used, $col, $first);
	my(@locs);
	my(%skips);

	return if (!defined($g_concfile));
	open(CONCFILE, ">$g_concfile") || die "Error: cannot create $g_concfile\n";
	foreach $word (@skipconc) {
		$skips{$word} = 1;
	}

	foreach $word (sort keys %g_concword) {
		next if ($skips{$word});

		printf CONCFILE "%-6.6s  ", $word;
		$col = 9;

		$def = $g_concdef{$word};
		if (defined($def)) {
			print CONCFILE $def;
			$col += length($def);
		} else {
			print CONCFILE "-";
			++$col;
		}
		if (!defined($g_concused{$word})) {
			print CONCFILE "\n";
			next;
		}

		if ($col < 17) {
			print CONCFILE " "x(19-$col);
			$col = 19;
		} else {
			print CONCFILE "  ";
			$col += 2;
		}
		@locs = sort bypri @{$g_concused{$word}};
		$first = 1;
		foreach $used (@locs) {
			++$col, print CONCFILE "," if (!$first);
			$first = 0;
			$col = 20, print CONCFILE "\n" . " "x19 if ($col > 66);
			print CONCFILE $used;
			$col += length($used);
		}
		print CONCFILE "\n";
	}
	close(CONCFILE);
}

sub bypri {
	my($pg1, $hp1, $sp1);
	my($pg2, $hp2, $sp2);

	$a =~ /^(\d+)(\(((\d+) )?(\w+)\))?$/;
	($pg1, $hp1, $sp1) = ($1, $4, $5);
	$b =~ /^(\d+)(\(((\d+) )?(\w+)\))?$/;
	($pg2, $hp2, $sp2) = ($1, $4, $5);
	return -1 if ($pg1 < $pg2);
	return  1 if ($pg1 > $pg2);
	return -1 if ($sp1 lt $sp2);
	return  1 if ($sp1 gt $sp2);
	return -1 if ($hp1 ne "" && $hp2 ne "" && $hp1 < $hp2);
	return  1 if ($hp1 ne "" && $hp2 ne "" && $hp1 > $hp2);
	return 0;
}

sub print_header {
	my($string);

	return if ($g_pass != 2);
	return if (!defined($g_line));
	return if ($g_pagenum != 0 && $g_line !~ /\f/);
	return if (!defined($g_lstfile));
	++$g_pagenum;
	if ($g_lead > $g_oline) {
		print LSTFILE "\n" x ($g_lead - $g_oline);
		$g_oline = $g_lead;
	}
	foreach $string (@g_banners) {
		print LSTFILE "$string\n";
		++$g_oline;
	}
	printf LSTFILE "PAGE  %d   %s   %s\n", $g_pagenum, $g_title, $g_datestr;
	print LSTFILE "\n";
	$g_oline += 2;
}