From fcbe2db0389a8bc98a8d09c6dead86b3f886a180 Mon Sep 17 00:00:00 2001
From: sam <sam@localhost.home>
Date: Sun, 10 Mar 2024 02:37:07 +1300
Subject: [PATCH] add text section

---
 crawl.js | 10 ++++++----
 db.js    | 12 ++++++------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/crawl.js b/crawl.js
index 7600436..b45970d 100644
--- a/crawl.js
+++ b/crawl.js
@@ -23,13 +23,15 @@ async function crawl(url, ignoreExisting = false) {
                 console.log(`[${res.status} ${res.statusText}] ${url}`); 
                 
                 if(res.status == 200) {
-                        const $         = cheerio.load(await res.text());
+			const html 	= await res.text();
+			const $         = cheerio.load(html);
                         const title     = $("title").text();
-        
+			const text 	= $.text().replace(/\s+/g, " ").trim();
+
                         console.log(`[TITLE] ${title} (${url})`);
                         
-                        db.run(`INSERT INTO pages(url, title) 
-                                VALUES(?, ?)`, url, title, (err) => {
+                        db.run(`INSERT INTO pages(url, title, text) 
+                                VALUES(?, ?, ?)`, url, title, text, (err) => {
                                 if(err) {
                                         console.log(`[INSERT FAIL: ${err}] ${url}|${title}`);
                                 } else {
diff --git a/db.js b/db.js
index f560088..0bce643 100644
--- a/db.js
+++ b/db.js
@@ -3,20 +3,20 @@ const db        = new sqlite3.Database('index.db');
 
 db.createTables = function() {
         this.exec(`
-CREATE TABLE IF NOT EXISTS pages(id INTEGER PRIMARY KEY, url TEXT NOT NULL UNIQUE, title TEXT);
-CREATE VIRTUAL TABLE IF NOT EXISTS page_search USING fts5(url, title, content=pages, content_rowid=id);
+CREATE TABLE IF NOT EXISTS pages(id INTEGER PRIMARY KEY, url TEXT NOT NULL UNIQUE, title TEXT, text TEXT);
+CREATE VIRTUAL TABLE IF NOT EXISTS page_search USING fts5(url, title, text, content=pages, content_rowid=id);
 
 CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
-  INSERT INTO page_search(rowid, url, title) VALUES (new.id, new.url, new.title);
+  INSERT INTO page_search(rowid, url, title, text) VALUES (new.id, new.url, new.title, new.text);
 END;
 
 CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
-  INSERT INTO page_search(page_search, rowid, url, title) VALUES ('delete', old.id, old.url, old.title);
+  INSERT INTO page_search(page_search, rowid, url, title, text) VALUES ('delete', old.id, old.url, old.title, old.text);
 END;
 
 CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
-  INSERT INTO page_search(page_search, rowid, url, title) VALUES ('delete', old.id, old.url, old.title);
-  INSERT INTO page_search(rowid, url, title) VALUES (new.id, new.url, new.title);
+  INSERT INTO page_search(page_search, rowid, url, title, text) VALUES ('delete', old.id, old.url, old.title, old.text);
+  INSERT INTO page_search(rowid, url, title, text) VALUES (new.id, new.url, new.title, new.text);
 END;
         `);
         console.log("[SQL] Tables created.");