From 99169df2ff4d92f67c7936ba6982d33670ea9a21 Mon Sep 17 00:00:00 2001
From: Michael Forney <mforney@mforney.org>
Date: Sat, 13 Apr 2024 03:28:04 -0700
Subject: [PATCH] parse: use dynamically sized hashtable for temporaries

This significantly improves parsing performance for massive functions
with a huge number of temporaries. Parsing the 86MiB IL produced
by cproc during zig bootstrap drops from 17m15s to 2.5s (over 400x
speedup).

The speedup is much smaller for IL produced from normal non-autogenerated
C code. Parsing the sqlite3 amalgamation drops from 0.40s to 0.33s.
---
 parse.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/parse.c b/parse.c
index a909b7d..6dee38f 100644
--- a/parse.c
+++ b/parse.c
@@ -152,7 +152,8 @@ static struct {
 static int lnum;
 
 static Fn *curf;
-static int tmph[TMask+1];
+static int *tmph;
+static int tmphcap;
 static Phi **plink;
 static Blk *curb;
 static Blk **blink;
@@ -384,19 +385,27 @@ expect(int t)
 static Ref
 tmpref(char *v)
 {
-	int t, *h;
+	int t, i;
 
-	h = &tmph[hash(v) & TMask];
-	t = *h;
-	if (t) {
+	if (tmphcap/2 <= curf->ntmp-Tmp0) {
+		free(tmph);
+		tmphcap = tmphcap ? tmphcap*2 : TMask+1;
+		tmph = emalloc(tmphcap * sizeof tmph[0]);
+		for (t=Tmp0; t<curf->ntmp; t++) {
+			i = hash(curf->tmp[t].name) & (tmphcap-1);
+			for (; tmph[i]; i=(i+1) & (tmphcap-1))
+				;
+			tmph[i] = t;
+		}
+	}
+	i = hash(v) & (tmphcap-1);
+	for (; tmph[i]; i=(i+1) & (tmphcap-1)) {
+		t = tmph[i];
 		if (strcmp(curf->tmp[t].name, v) == 0)
 			return TMP(t);
-		for (t=curf->ntmp-1; t>=Tmp0; t--)
-			if (strcmp(curf->tmp[t].name, v) == 0)
-				return TMP(t);
 	}
 	t = curf->ntmp;
-	*h = t;
+	tmph[i] = t;
 	newtmp(0, Kx, curf);
 	strcpy(curf->tmp[t].name, v);
 	return TMP(t);
@@ -926,7 +935,7 @@ parsefn(Lnk *lnk)
 		b->dlink = 0; /* was trashed by findblk() */
 	for (i=0; i<BMask+1; ++i)
 		blkh[i] = 0;
-	memset(tmph, 0, sizeof tmph);
+	memset(tmph, 0, tmphcap * sizeof tmph[0]);
 	typecheck(curf);
 	return curf;
 }