From 4629ccd7247b8c29bdf0e59bfc11bc93949ef429 Mon Sep 17 00:00:00 2001
From: User <User@MacBook-Pro.local>
Date: Sun, 26 Oct 2025 10:11:56 +0100
Subject: [PATCH] feat: splitted into files and made some fixes

---
 README.md                                     |   2 +-
 wayback-machine-downloader/cli.js             |  88 ++++
 wayback-machine-downloader/dockerfile         |   5 +-
 wayback-machine-downloader/downloader.js      | 493 ------------------
 wayback-machine-downloader/index.js           |  39 ++
 .../lib/asset-manager.js                      | 392 ++++++++++++++
 wayback-machine-downloader/lib/downloader.js  | 222 ++++++++
 wayback-machine-downloader/lib/logger.js      |  21 +
 .../lib/snapshot-index.js                     | 138 +++++
 wayback-machine-downloader/lib/utils.js       | 117 +++++
 wayback-machine-downloader/package.json       |  34 +-
 11 files changed, 1043 insertions(+), 508 deletions(-)
 create mode 100644 wayback-machine-downloader/cli.js
 delete mode 100644 wayback-machine-downloader/downloader.js
 create mode 100644 wayback-machine-downloader/index.js
 create mode 100644 wayback-machine-downloader/lib/asset-manager.js
 create mode 100644 wayback-machine-downloader/lib/downloader.js
 create mode 100644 wayback-machine-downloader/lib/logger.js
 create mode 100644 wayback-machine-downloader/lib/snapshot-index.js
 create mode 100644 wayback-machine-downloader/lib/utils.js
diff --git a/README.md b/README.md
index 8f88f12..568dfb1 100644
--- a/README.md
+++ b/README.md
@@ -84,7 +84,7 @@ Got ideas or suggestions? Feel free to open an issue!
 ## Run
 
 ```bash
-node downloader.js
+node index.js
 ```
 
 After launching, an interactive menu will appear with the following questions:
diff --git a/wayback-machine-downloader/cli.js b/wayback-machine-downloader/cli.js
new file mode 100644
index 0000000..65a78aa
--- /dev/null
+++ b/wayback-machine-downloader/cli.js
@@ -0,0 +1,88 @@
+#!/usr/bin/env node
+
+import path from "path";
+import readline from "readline";
+
+import { WaybackMachineDownloader } from "./lib/downloader.js";
+import { normalizeBaseUrlInput } from "./lib/utils.js";
+
+function ask(rl, question) {
+  return new Promise((resolve) => rl.question(question, (answer) => resolve(answer.trim())));
+}
+
+async function interactiveMain() {
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+
+  let normalizedBase;
+  while (true) {
+    const baseInput = await ask(rl, "Enter domain or URL to archive (e.g., example.com): ");
+    if (!baseInput) continue;
+    try {
+      normalizedBase = normalizeBaseUrlInput(baseInput);
+      break;
+    } catch {
+      console.log("Please enter a valid domain or URL.\n");
+    }
+  }
+
+  const base_url = normalizedBase.canonicalUrl;
+
+  const from_timestamp = await ask(rl, "From timestamp (YYYYMMDDhhmmss) or leave blank: ");
+  const to_timestamp = await ask(rl, "To timestamp (YYYYMMDDhhmmss) or leave blank: ");
+
+  let rewrite_mode = "as-is";
+  const m = await ask(rl, "Rewrite links? (yes=relative / no=as-is, default no): ");
+  if (/^y(es)?$/i.test(m)) rewrite_mode = "relative";
+
+  let canonical_action = "keep";
+  if (rewrite_mode === "relative") {
+    const c = await ask(rl, 'Canonical: "keep" (default) or "remove": ');
+    if ((c || "").toLowerCase() === "remove") canonical_action = "remove";
+  }
+
+  let threads_count = await ask(rl, "How many download threads? (default 3): ");
+  threads_count = parseInt(threads_count || "3", 10);
+  if (!Number.isFinite(threads_count) || threads_count <= 0) threads_count = 3;
+
+  const exact_url = /^y(es)?$/i.test(await ask(rl, "Only exact URL (no wildcard /*)? (yes/no, default no): "));
+  const directory = await ask(rl, "Target directory (leave blank for default websites/<host>/): ");
+
+  const ext = await ask(rl, "Download external assets? (yes/no, default no): ");
+  const download_external_assets = /^y(es)?$/i.test(ext);
+
+  rl.close();
+
+  const dl = new WaybackMachineDownloader({
+    base_url,
+    normalized_base: normalizedBase,
+    exact_url,
+    directory: directory || null,
+    from_timestamp: from_timestamp || 0,
+    to_timestamp: to_timestamp || 0,
+    threads_count,
+    rewrite_mode,
+    canonical_action,
+    download_external_assets,
+  });
+
+  await dl.download_files();
+}
+
+const isDirectCliRun = (() => {
+  const entryArg = process.argv && process.argv.length > 1 ? process.argv[1] : null;
+  if (!entryArg) return false;
+  try {
+    return import.meta.url === `file://${path.resolve(entryArg)}`;
+  } catch {
+    return false;
+  }
+})();
+
+if (isDirectCliRun) {
+  interactiveMain().catch((err) => {
+    console.error(`FATAL: ${err?.stack || err}`);
+    process.exit(1);
+  });
+}
+
+export { interactiveMain };
\ No newline at end of file
diff --git a/wayback-machine-downloader/dockerfile b/wayback-machine-downloader/dockerfile
index 3a681f6..05d1b91 100644
--- a/wayback-machine-downloader/dockerfile
+++ b/wayback-machine-downloader/dockerfile
@@ -7,7 +7,4 @@ RUN npm install --production
 
 COPY . .
 
-CMD ["node", "downloader.js"]
-
-ENTRYPOINT ["node", "downloader.js"]
-
+ENTRYPOINT ["node", "index.js"]
diff --git a/wayback-machine-downloader/downloader.js b/wayback-machine-downloader/downloader.js
deleted file mode 100644
index ca81597..0000000
--- a/wayback-machine-downloader/downloader.js
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * Wayback Machine Downloader 0.2.1 by WhitelightSEO — Interactive (Node.js, ESM)
- * Run: node downloader.js
- */
-
-import fs from "fs";
-import path from "path";
-import { fileURLToPath, pathToFileURL, domainToUnicode } from "url";
-import { mkdir } from "fs/promises";
-import pLimit from "p-limit";
-import { load } from "cheerio";
-import { Readable } from "stream";
-import readline from "readline";
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-
-// ----------------------------- PROGRESS BAR -----------------------------
-function renderProgress(current, total) {
-  const width = 40;
-  const ratio = total > 0 ? current / total : 0;
-  const filled = Math.round(ratio * width);
-  const bar = "█".repeat(filled) + "-".repeat(width - filled);
-  process.stdout.write(`\r[${bar}] ${Math.round(ratio * 100)}% (${current}/${total})`);
-  if (current === total) process.stdout.write("\n");
-}
-
-// ----------------------------- HELPERS -----------------------------
-function toPosix(p) {
-  return p.split(path.sep).join("/");
-}
-function relativeLink(fromDir, toFile) {
-  const rel = path.relative(fromDir, toFile);
-  return toPosix(rel || path.basename(toFile));
-}
-function ensureLocalTargetForPath(pathname) {
-  return pathname.endsWith("/") || !path.posix.basename(pathname).includes(".")
-    ? path.posix.join(pathname, "index.html")
-    : pathname;
-}
-
-// ----------------------------- HTML CHECK -----------------------------
-function isHtmlFile(filePath, contentType, firstBytes) {
-  if (contentType && /text\/html/i.test(String(contentType))) return true;
-  const ext = path.extname(filePath).toLowerCase();
-  if ([".html", ".htm", ".php", ".asp", ".aspx"].includes(ext)) return true;
-  const head = (firstBytes || "").toString("utf8", 0, 512);
-  return /<!doctype html/i.test(head) || /<html[\\s>]/i.test(head);
-}
-
-
-// ----------------------------- Archive API -----------------------------
-async function getRawListFromApi({ baseUrl, pageIndex, all, fromTimestamp, toTimestamp }) {
-  const cdx = new URL("https://web.archive.org/cdx/search/xd");
-  const params = new URLSearchParams();
-  params.set("output", "json");
-  params.set("url", baseUrl);
-  params.set("fl", "timestamp,original");
-  params.set("collapse", "digest");
-  params.set("gzip", "false");
-  if (!all) params.append("filter", "statuscode:200");
-  if (fromTimestamp && Number(fromTimestamp) !== 0) params.set("from", String(fromTimestamp));
-  if (toTimestamp && Number(toTimestamp) !== 0) params.set("to", String(toTimestamp));
-  if (pageIndex != null) params.set("page", String(pageIndex));
-  cdx.search = params.toString();
-
-  try {
-    const res = await fetch(cdx.toString(), { method: "GET", redirect: "follow" });
-    const text = await res.text();
-    let json = [];
-    try {
-      json = JSON.parse(text);
-    } catch {
-      // silent: treat as empty page
-      return [];
-    }
-    if (Array.isArray(json) && Array.isArray(json[0]) && json[0].join(",") === "timestamp,original") {
-      json.shift();
-    }
-    return json || [];
-  } catch {
-    // silent: skip broken page
-    return [];
-  }
-}
-
-
-// ----------------------------- DOWNLOADER CLASS -----------------------------
-class WaybackMachineDownloader {
-  constructor(params) {
-    this.base_url = params.base_url;
-    this.exact_url = !!params.exact_url;
-    this.directory = params.directory || null;
-    this.from_timestamp = params.from_timestamp ? Number(params.from_timestamp) : 0;
-    this.to_timestamp = params.to_timestamp ? Number(params.to_timestamp) : 0;
-    this.threads_count = params.threads_count != null ? Number(params.threads_count) : 3;
-
-    this.download_external_assets = params.download_external_assets || false;
-
-    this.rewrite_mode = params.rewrite_mode || "as-is";
-    this.rewrite_links = this.rewrite_mode === "relative";
-    this.canonical_action = params.canonical_action || "keep";
-
-    this._processed = 0;
-  }
-
-  // Create a human-readable backup folder name, preserving IDNs
-  backup_name() {
-    try {
-      if (this.base_url.includes("//")) {
-        const u = new URL(this.base_url);
-        return domainToUnicode(u.host);
-      }
-    } catch {}
-    return this.base_url;
-  }
-
-  // Resolve output directory
-  backup_path() {
-    if (this.directory) {
-      return this.directory.endsWith(path.sep) ? this.directory : this.directory + path.sep;
-    }
-    return path.join("websites", this.backup_name(), path.sep);
-  }
-
-  // Fetch and merge snapshot lists
-  async get_all_snapshots_to_consider() {
-    console.log("Getting snapshot pages");
-    const httpOpts = { all: true, fromTimestamp: this.from_timestamp, toTimestamp: this.to_timestamp };
-    let list = [];
-
-    list = list.concat(await getRawListFromApi({ baseUrl: this.base_url, pageIndex: null, ...httpOpts }));
-    process.stdout.write(".");
-
-    if (!this.exact_url) {
-      const wildcard = this.base_url.endsWith("/*") ? this.base_url : this.base_url.replace(/\/*$/, "") + "/*";
-      for (let i = 0; i < 100; i++) {
-        const batch = await getRawListFromApi({ baseUrl: wildcard, pageIndex: i, ...httpOpts });
-        if (!batch || batch.length === 0) break;
-        list = list.concat(batch);
-        process.stdout.write(".");
-      }
-    }
-    console.log(` found ${list.length} snapshots to consider.\n`);
-    return list;
-  }
-
-
-  // Choose the latest timestamp per unique pathname
-  async get_file_list_by_timestamp() {
-    const curated = new Map();
-    const all = await this.get_all_snapshots_to_consider();
-    for (const pair of all) {
-      const ts = pair && pair[0];
-      const url = pair && pair[1];
-      if (!ts || !url) continue;
-      try {
-        const u = new URL(url);
-        const file_id = decodeURIComponent(u.pathname); // decode Cyrillic paths
-        const prev = curated.get(file_id);
-        if (!prev || prev.timestamp <= ts) {
-          curated.set(file_id, { file_url: url, timestamp: ts, file_id });
-        }
-      } catch {}
-    }
-    const arr = Array.from(curated, ([file_id, v]) => ({ ...v, file_id }));
-    arr.sort((a, b) => String(b.timestamp).localeCompare(String(a.timestamp)));
-    return arr;
-  }
-
-  // Replace Windows-hostile characters when running on Windows
-  _windowsSanitize(p) {
-    if (process.platform !== "win32") return p;
-    return p.replace(/[:*?&=<>\\|]/g, (s) => "%" + s.charCodeAt(0).toString(16));
-  }
-
-  // Ensure directory exists
-  async _structure_dir_path(dir_path) {
-    try {
-      await mkdir(dir_path, { recursive: true });
-    } catch (e) {
-      if (!e || e.code !== "EEXIST") throw e;
-    }
-  }
-
-  // Compute local file paths for a given archived URL
-  _determine_paths(file_url, file_id) {
-    if (!file_url || !file_id) return null;
-    if (file_url.startsWith("data:") || file_url.startsWith("javascript:")) return null;
-    if (file_id.length > 200) return null;
-
-    const backup = this.backup_path();
-    const parts = file_id.split("/").filter(Boolean);
-    let dir_path, file_path;
-
-    if (file_id === "") {
-      dir_path = backup;
-      file_path = path.join(backup, "index.html");
-    } else {
-      const lastPart = parts[parts.length - 1] || "";
-      if (file_url.endsWith("/") || !lastPart.includes(".")) {
-        dir_path = path.join(backup, ...parts);
-        file_path = path.join(dir_path, "index.html");
-      } else {
-        dir_path = path.join(backup, ...parts.slice(0, -1));
-        file_path = path.join(backup, ...parts);
-      }
-    }
-
-    dir_path = this._windowsSanitize(dir_path);
-    file_path = this._windowsSanitize(file_path);
-
-    return { dir_path, file_path };
-  }
-
-
-  // Download a single asset (img/css/js/etc.) referenced from an HTML page
-  async _download_asset(assetUrl, pageTimestamp, file_path, dir_path) {
-    try {
-      if (fs.existsSync(file_path)) return file_path;
-
-      await this._structure_dir_path(dir_path);
-      const snapshotUrl = `https://web.archive.org/web/${pageTimestamp}id_/${assetUrl}`;
-      let res;
-      try {
-        res = await fetch(snapshotUrl, { method: "GET", redirect: "follow" });
-      } catch (e) {
-        console.log(`Skipping asset ${assetUrl}, fetch failed: ${e}`);
-        return null;
-      }
-      if (!res.ok || !res.body) {
-        console.log(`Skipping asset ${assetUrl}, bad response ${res.status}`);
-        return null;
-      }
-
-      await new Promise((resolve, reject) => {
-        const ws = fs.createWriteStream(file_path);
-        Readable.fromWeb(res.body).pipe(ws);
-        ws.on("finish", resolve);
-        ws.on("error", reject);
-      });
-
-      return file_path;
-    } catch (e) {
-      console.log(`Asset download failed: ${assetUrl} → ${e}`);
-      return null;
-    }
-  }
-
-  // Parse saved HTML, optionally rewrite internal links to relative and fetch assets
-  async _process_html_assets(htmlPath, pageUrl, pageTimestamp) {
-    try {
-      const backupRoot = this.backup_path();
-      let html = fs.readFileSync(htmlPath, "utf8");
-      const $ = load(html, { decodeEntities: false }); // keep emojis & non-ASCII as-is
-      const site = new URL(this.base_url);
-      const siteHost = domainToUnicode(site.hostname.replace(/^www\\./, ""));
-      const baseDir = path.dirname(htmlPath);
-
-      const downloadTasks = [];
-
-      // ----------- ASSETS -----------
-      $("img[src], script[src], link[href], source[src], video[src], audio[src], iframe[src]").each((_, el) => {
-        const attr = el.tagName === "link" ? "href" : "src";
-        const val = $(el).attr(attr);
-        if (!val) return;
-
-        try {
-          const abs = new URL(val, pageUrl).toString();
-          const u = new URL(abs);
-          const isInternal = domainToUnicode(u.hostname.replace(/^www\\./, "")) === siteHost;
-
-          if (isInternal || this.download_external_assets) {
-            const file_id = decodeURIComponent(u.pathname);
-            let paths;
-            try {
-              paths = this._determine_paths(abs, file_id);
-            } catch (e) {
-              console.log(`Invalid path for asset ${abs}: ${e}`);
-              return;
-            }
-            if (!paths) return;
-            const { dir_path, file_path } = paths;
-
-            if (this.rewrite_links) {
-              const normPath = decodeURIComponent(u.pathname) + (u.hash || "");
-              const localTarget = ensureLocalTargetForPath(normPath);
-              const localAbsPath = path.join(backupRoot, localTarget);
-              $(el).attr(attr, relativeLink(baseDir, localAbsPath));
-            }
-
-            if (!fs.existsSync(file_path)) {
-              downloadTasks.push(this._download_asset(abs, pageTimestamp, file_path, dir_path));
-            }
-          }
-        } catch {}
-      });
-
-      // ----------- INTERNAL LINKS (pages/forms) -----------
-      if (this.rewrite_links) {
-        $("a[href], form[action]").each((_, el) => {
-          const attr = el.tagName === "a" ? "href" : "action";
-          const val = $(el).attr(attr);
-          if (!val) return;
-
-          try {
-            const abs = new URL(val, pageUrl).toString();
-            const u = new URL(abs);
-            const isInternal = domainToUnicode(u.hostname.replace(/^www\\./, "")) === siteHost;
-
-            if (isInternal) {
-              const normPath = decodeURIComponent(u.pathname) + (u.hash || "");
-              const localTarget = ensureLocalTargetForPath(normPath);
-              const localAbsPath = path.join(backupRoot, localTarget);
-              $(el).attr(attr, relativeLink(baseDir, localAbsPath));
-            }
-          } catch {}
-        });
-      }
-
-      await Promise.all(downloadTasks);
-
-      if (this.canonical_action === "remove") {
-        $("link[rel=\"canonical\"]").remove();
-      }
-
-      fs.writeFileSync(htmlPath, $.html(), "utf8");
-    } catch (e) {
-      console.log(`HTML processing error: ${e}`);
-    }
-  }
-
-
-  // Download one file from the snapshot list (page or asset saved by CDX)
-  async _download_single(file_remote_info, total) {
-    const file_url = String(file_remote_info.file_url);
-    const file_id = file_remote_info.file_id;
-    const file_timestamp = file_remote_info.timestamp;
-
-    let paths;
-    try {
-      paths = this._determine_paths(file_url, file_id);
-    } catch (e) {
-      console.log(`Invalid path for ${file_url}: ${e}`);
-      this._processed++;
-      renderProgress(this._processed, total);
-      return;
-    }
-
-    if (!paths) {
-      console.log(`Skipping invalid URL: ${file_url}`);
-      this._processed++;
-      renderProgress(this._processed, total);
-      return;
-    }
-
-    const { dir_path, file_path } = paths;
-
-    if (fs.existsSync(file_path)) {
-      this._processed++;
-      renderProgress(this._processed, total);
-      return;
-    }
-
-    try {
-      await this._structure_dir_path(dir_path);
-      const snapshotUrl = `https://web.archive.org/web/${file_timestamp}id_/${file_url}`;
-      let res;
-      try {
-        res = await fetch(snapshotUrl, { method: "GET", redirect: "follow" });
-      } catch (e) {
-        console.log(`Skipping ${file_url}, fetch failed: ${e}`);
-        return;
-      }
-
-      if (!res.ok || !res.body) {
-        console.log(`Skipping ${file_url}, bad response ${res.status}`);
-        return;
-      }
-
-      await new Promise((resolve, reject) => {
-        const ws = fs.createWriteStream(file_path);
-        Readable.fromWeb(res.body).pipe(ws);
-        ws.on("finish", resolve);
-        ws.on("error", reject);
-      });
-
-      const contentType = res.headers.get("content-type");
-      const ext = path.extname(file_path).toLowerCase();
-      const looksHtml = isHtmlFile(file_path, contentType, null) || ext === "" || ext === ".html" || ext === ".htm";
-      if (looksHtml) {
-        await this._process_html_assets(file_path, file_url, file_timestamp);
-      }
-    } catch (e) {
-      console.log(`Download failed for ${file_url}: ${e}`);
-    } finally {
-      this._processed++;
-      renderProgress(this._processed, total);
-    }
-  }
-
-  // Orchestrate downloads with concurrency
-  async download_files() {
-    const startTime = Date.now();
-    console.log(`Downloading ${this.base_url} to ${this.backup_path()} from Wayback Machine archives.`);
-    const list = await this.get_file_list_by_timestamp();
-    if (list.length === 0) {
-      console.log("No files to download.");
-      return;
-    }
-
-    const concurrency = this.threads_count && this.threads_count > 0 ? this.threads_count : 1;
-    const limit = pLimit(concurrency);
-    this._processed = 0;
-    await Promise.all(list.map((info) => limit(() => this._download_single(info, list.length))));
-    const endTime = Date.now();
-    console.log(`\nDownload completed in ${((endTime - startTime) / 1000).toFixed(2)}s, saved in ${this.backup_path()} (${list.length} files)`);
-  }
-}
-
-
-// ============================= INTERACTIVE RUN =============================
-function ask(rl, question) {
-  return new Promise((resolve) => rl.question(question, (answer) => resolve(answer.trim())));
-}
-
-async function interactiveMain() {
-  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
-
-  let base_url;
-  while (true) {
-    base_url = await ask(rl, "Enter base URL to archive (e.g., https://example.com): ");
-    if (!base_url) continue;
-    try {
-      new URL(base_url);
-      break;
-    } catch {
-      console.log("Please enter a valid URL.\n");
-    }
-  }
-
-  const from_timestamp = await ask(rl, "From timestamp (YYYYMMDDhhmmss) or leave blank: ");
-  const to_timestamp = await ask(rl, "To timestamp (YYYYMMDDhhmmss) or leave blank: ");
-
-  let rewrite_mode = "as-is";
-  const m = await ask(rl, "Rewrite links? (yes=relative / no=as-is, default no): ");
-  if (/^y(es)?$/i.test(m)) rewrite_mode = "relative";
-
-  let canonical_action = "keep";
-  if (rewrite_mode === "relative") {
-    const c = await ask(rl, 'Canonical: "keep" (default) or "remove": ');
-    if ((c || '').toLowerCase() === "remove") canonical_action = "remove";
-  }
-
-  let threads_count = await ask(rl, "How many download threads? (default 3): ");
-  threads_count = parseInt(threads_count || "3", 10);
-  if (!Number.isFinite(threads_count) || threads_count <= 0) threads_count = 3;
-
-  const exact_url = /^y(es)?$/i.test(await ask(rl, "Only exact URL (no wildcard /*)? (yes/no, default no): "));
-  const directory = await ask(rl, "Target directory (leave blank for default websites/<host>/): ");
-
-  const ext = await ask(rl, "Download external assets? (yes/no, default no): ");
-  const download_external_assets = /^y(es)?$/i.test(ext);
-
-  rl.close();
-
-  const dl = new WaybackMachineDownloader({
-    base_url,
-    exact_url,
-    directory: directory || null,
-    from_timestamp: from_timestamp || 0,
-    to_timestamp: to_timestamp || 0,
-    threads_count,
-    rewrite_mode,
-    canonical_action,
-    download_external_assets,
-  });
-
-  await dl.download_files();
-}
-
-const isDirectRun =
-  import.meta.url === `file://${process.argv[1]}` ||
-  import.meta.url === pathToFileURL(process.argv[1]).href;
-
-if (isDirectRun) {
-  interactiveMain().catch((err) => {
-    console.error(`FATAL: ${err?.stack || err}`);
-    process.exit(1);
-  });
-}
-
-export { WaybackMachineDownloader };
diff --git a/wayback-machine-downloader/index.js b/wayback-machine-downloader/index.js
new file mode 100644
index 0000000..db65769
--- /dev/null
+++ b/wayback-machine-downloader/index.js
@@ -0,0 +1,39 @@
+/*
+ * Wayback Machine Downloader 0.3.0 by WhitelightSEO
+ * Run: node index.js
+ */
+
+import { pathToFileURL } from "url";
+
+import { setDebugMode, getDebugMode, debugLog } from "./lib/logger.js";
+import { WaybackMachineDownloader } from "./lib/downloader.js";
+
+const DEBUG_MODE = false;
+setDebugMode(DEBUG_MODE);
+
+const isDirectRun = (() => {
+  const entryArg = process.argv && process.argv.length > 1 ? process.argv[1] : null;
+  if (!entryArg) return false;
+
+  if (import.meta.url === `file://${entryArg}`) {
+    return true;
+  }
+
+  try {
+    return import.meta.url === pathToFileURL(entryArg).href;
+  } catch (e) {
+    debugLog(`Failed to resolve entry script URL: ${e}`);
+    return false;
+  }
+})();
+
+if (isDirectRun) {
+  import("./cli.js")
+    .then(({ interactiveMain }) => interactiveMain())
+    .catch((err) => {
+      console.error(`FATAL: ${err?.stack || err}`);
+      process.exit(1);
+    });
+}
+
+export { WaybackMachineDownloader, DEBUG_MODE, setDebugMode, getDebugMode };
\ No newline at end of file
diff --git a/wayback-machine-downloader/lib/asset-manager.js b/wayback-machine-downloader/lib/asset-manager.js
new file mode 100644
index 0000000..7d931ee
--- /dev/null
+++ b/wayback-machine-downloader/lib/asset-manager.js
@@ -0,0 +1,392 @@
+import fs from "fs";
+import path from "path";
+import { mkdir } from "fs/promises";
+import { load } from "cheerio";
+import { Readable } from "stream";
+import { domainToUnicode } from "url";
+
+import { debugLog } from "./logger.js";
+import {
+  relativeLink,
+  ensureLocalTargetForPath,
+  isCssResource,
+} from "./utils.js";
+
+class AssetManager {
+  constructor({
+    backupPathResolver,
+    rewriteLinks,
+    canonicalAction,
+    downloadExternalAssets,
+    baseHostUnicode,
+    snapshotIndex,
+  }) {
+    this.backupPathResolver = backupPathResolver;
+    this.rewriteLinks = !!rewriteLinks;
+    this.canonicalAction = canonicalAction || "keep";
+    this.downloadExternalAssets = !!downloadExternalAssets;
+    this.baseHostUnicode = (baseHostUnicode || "").toLowerCase();
+    this.snapshotIndex = snapshotIndex || null;
+  }
+
+  setSnapshotIndex(index) {
+    this.snapshotIndex = index;
+  }
+
+  get backupPath() {
+    const resolver = this.backupPathResolver;
+    return typeof resolver === "function" ? resolver() : resolver;
+  }
+
+  windowsSanitize(p) {
+    if (process.platform !== "win32") return p;
+    return p.replace(/[:*?&=<>\\|]/g, (s) => "%" + s.charCodeAt(0).toString(16));
+  }
+
+  async ensureDir(dirPath) {
+    try {
+      await mkdir(dirPath, { recursive: true });
+    } catch (e) {
+      if (!e || e.code !== "EEXIST") throw e;
+    }
+  }
+
+  determinePaths(fileUrl, fileId) {
+    if (!fileUrl || !fileId) return null;
+    if (fileUrl.startsWith("data:") || fileUrl.startsWith("javascript:")) return null;
+    if (fileId.length > 200) return null;
+
+    const backup = this.backupPath;
+    const parts = fileId.split("/").filter(Boolean);
+    let dirPath;
+    let filePath;
+
+    if (fileId === "") {
+      dirPath = backup;
+      filePath = path.join(backup, "index.html");
+    } else {
+      const lastPart = parts[parts.length - 1] || "";
+      if (fileUrl.endsWith("/") || !lastPart.includes(".")) {
+        dirPath = path.join(backup, ...parts);
+        filePath = path.join(dirPath, "index.html");
+      } else {
+        dirPath = path.join(backup, ...parts.slice(0, -1));
+        filePath = path.join(backup, ...parts);
+      }
+    }
+
+    dirPath = this.windowsSanitize(dirPath);
+    filePath = this.windowsSanitize(filePath);
+
+    return { dirPath, filePath };
+  }
+
+  resolveAssetTimestamp(assetUrl, fallbackTimestamp) {
+    if (!this.snapshotIndex) return fallbackTimestamp || 0;
+    return this.snapshotIndex.resolve(assetUrl, fallbackTimestamp);
+  }
+
+  async downloadAsset(assetUrl, pageTimestamp, filePath, dirPath) {
+    try {
+      if (fs.existsSync(filePath)) return filePath;
+
+      await this.ensureDir(dirPath);
+      const assetTimestamp = this.resolveAssetTimestamp(assetUrl, pageTimestamp);
+      if (!assetTimestamp) {
+        debugLog(`Skipping asset ${assetUrl}, no timestamp available in range.`);
+        return null;
+      }
+      const snapshotUrl = `https://web.archive.org/web/${assetTimestamp}id_/${assetUrl}`;
+      let res;
+      try {
+        res = await fetch(snapshotUrl, { method: "GET", redirect: "follow" });
+      } catch (e) {
+        debugLog(`Skipping asset ${assetUrl}, fetch failed: ${e}`);
+        return null;
+      }
+      if (!res.ok || !res.body) {
+        debugLog(`Skipping asset ${assetUrl}, bad response ${res.status}`);
+        return null;
+      }
+
+      const contentType = res.headers.get("content-type") || "";
+
+      await new Promise((resolve, reject) => {
+        const ws = fs.createWriteStream(filePath);
+        Readable.fromWeb(res.body).pipe(ws);
+        ws.on("finish", resolve);
+        ws.on("error", reject);
+      });
+
+      if (this.rewriteLinks && isCssResource(filePath, assetUrl, contentType)) {
+        await this.rewriteCssFile(filePath, assetUrl, assetTimestamp);
+      }
+
+      return filePath;
+    } catch (e) {
+      debugLog(`Asset download failed: ${assetUrl} → ${e}`);
+      return null;
+    }
+  }
+
+  async rewriteCssContent(cssContent, cssSourceUrl, pageTimestamp, { baseDir, excludePath } = {}) {
+    if (!this.rewriteLinks) {
+      return { css: cssContent, downloads: [] };
+    }
+
+    if (!cssContent || !cssContent.trim()) {
+      return { css: cssContent, downloads: [] };
+    }
+
+    const siteHost = this.baseHostUnicode;
+    const downloads = [];
+    const seenPaths = new Set();
+    let updatedContent = cssContent;
+    let cssChanged = false;
+
+    const processReference = (rawValue) => {
+      if (!rawValue) return null;
+      const trimmed = rawValue.trim();
+      if (!trimmed) return null;
+      if (/^(data:|javascript:|#)/i.test(trimmed)) return null;
+
+      let absoluteUrl;
+      try {
+        absoluteUrl = new URL(trimmed, cssSourceUrl).toString();
+      } catch {
+        return null;
+      }
+
+      let parsed;
+      try {
+        parsed = new URL(absoluteUrl);
+      } catch {
+        return null;
+      }
+      if (!/^https?:$/i.test(parsed.protocol)) return null;
+
+      const normalizedHost = domainToUnicode(parsed.hostname.replace(/^www\./, "")).toLowerCase();
+      const isInternal = normalizedHost === siteHost;
+      if (!isInternal && !this.downloadExternalAssets) return null;
+
+      let fileId;
+      try {
+        fileId = decodeURIComponent(parsed.pathname);
+      } catch {
+        fileId = parsed.pathname;
+      }
+      let paths;
+      try {
+        paths = this.determinePaths(absoluteUrl, fileId);
+      } catch {
+        return null;
+      }
+      if (!paths) return null;
+
+      const { dirPath, filePath } = paths;
+      const assetTimestamp = this.resolveAssetTimestamp(absoluteUrl, pageTimestamp);
+
+      if (
+        filePath &&
+        (!excludePath || path.resolve(filePath) !== path.resolve(excludePath))
+      ) {
+        const key = path.resolve(filePath);
+        if (!fs.existsSync(filePath) && !seenPaths.has(key)) {
+          seenPaths.add(key);
+          downloads.push(this.downloadAsset(absoluteUrl, assetTimestamp, filePath, dirPath));
+        }
+      }
+
+      const relativeBase = baseDir || path.dirname(filePath);
+      const relativePath = relativeLink(relativeBase, filePath) + (parsed.hash || "");
+
+      return {
+        original: trimmed,
+        replacement: relativePath,
+      };
+    };
+
+    const urlPattern = /url\(\s*(['"]?)([^'")]+)\1\s*\)/gi;
+    updatedContent = updatedContent.replace(urlPattern, (match, quote, value) => {
+      const info = processReference(value);
+      if (!info) return match;
+      if (info.replacement === info.original) return match;
+      cssChanged = true;
+      const q = quote || "";
+      return `url(${q}${info.replacement}${q})`;
+    });
+
+    const importPattern = /@import\s+(?!url\()\s*(['"])([^'"]+)\1/gi;
+    updatedContent = updatedContent.replace(importPattern, (match, quote, value) => {
+      const info = processReference(value);
+      if (!info) return match;
+      if (info.replacement === info.original) return match;
+      cssChanged = true;
+      return match.replace(value, info.replacement);
+    });
+
+    return {
+      css: cssChanged && updatedContent !== cssContent ? updatedContent : cssContent,
+      downloads,
+    };
+  }
+
+  async rewriteCssFile(cssPath, cssSourceUrl, pageTimestamp) {
+    if (!this.rewriteLinks) return;
+
+    let cssContent;
+    try {
+      cssContent = fs.readFileSync(cssPath, "utf8");
+    } catch {
+      return;
+    }
+
+    const cssDir = path.dirname(cssPath);
+    const { css: updatedContent, downloads } = await this.rewriteCssContent(
+      cssContent,
+      cssSourceUrl,
+      pageTimestamp,
+      {
+        baseDir: cssDir,
+        excludePath: cssPath,
+      }
+    );
+
+    if (downloads.length > 0) {
+      await Promise.all(downloads);
+    }
+
+    if (updatedContent !== cssContent) {
+      fs.writeFileSync(cssPath, updatedContent, "utf8");
+    }
+  }
+
+  async processHtml(htmlPath, pageUrl, pageTimestamp) {
+    try {
+      let html = fs.readFileSync(htmlPath, "utf8");
+      const $ = load(html, { decodeEntities: false });
+      const siteHost = this.baseHostUnicode;
+      const baseDir = path.dirname(htmlPath);
+      const backupRoot = this.backupPath;
+
+      const downloadTasks = [];
+
+      const handleCssFragment = async (cssText) => {
+        const { css: updatedCss, downloads } = await this.rewriteCssContent(
+          cssText,
+          pageUrl,
+          pageTimestamp,
+          { baseDir }
+        );
+        if (downloads.length > 0) {
+          downloadTasks.push(...downloads);
+        }
+        return updatedCss;
+      };
+
+      $("img[src], script[src], link[href], source[src], video[src], audio[src], iframe[src]").each((_, el) => {
+        const attr = el.tagName === "link" ? "href" : "src";
+        const val = $(el).attr(attr);
+        if (!val) return;
+
+        try {
+          const abs = new URL(val, pageUrl).toString();
+          const u = new URL(abs);
+          const normalizedHost = domainToUnicode(u.hostname.replace(/^www\./, "")).toLowerCase();
+          const isInternal = normalizedHost === siteHost;
+
+          if (isInternal || this.downloadExternalAssets) {
+            let fileId;
+            try {
+              fileId = decodeURIComponent(u.pathname);
+            } catch {
+              fileId = u.pathname;
+            }
+            let paths;
+            try {
+              paths = this.determinePaths(abs, fileId);
+            } catch (e) {
+              console.log(`Invalid path for asset ${abs}: ${e}`);
+              return;
+            }
+            if (!paths) return;
+            const { dirPath, filePath } = paths;
+
+            if (this.rewriteLinks) {
+              const normPath = fileId + (u.hash || "");
+              const localTarget = ensureLocalTargetForPath(normPath);
+              const localAbsPath = path.join(backupRoot, localTarget);
+              $(el).attr(attr, relativeLink(baseDir, localAbsPath));
+            }
+
+            if (!fs.existsSync(filePath)) {
+              downloadTasks.push(
+                this.downloadAsset(abs, pageTimestamp, filePath, dirPath)
+              );
+            }
+          }
+        } catch {}
+      });
+
+      const styleNodes = $("style").toArray();
+      for (const node of styleNodes) {
+        const cssText = $(node).html();
+        if (!cssText) continue;
+        const updated = await handleCssFragment(cssText);
+        if (updated !== cssText) {
+          $(node).text(updated);
+        }
+      }
+
+      const inlineStyled = $("[style]").toArray();
+      for (const node of inlineStyled) {
+        const styleAttr = $(node).attr("style");
+        if (!styleAttr) continue;
+        const updated = await handleCssFragment(styleAttr);
+        if (updated !== styleAttr) {
+          $(node).attr("style", updated);
+        }
+      }
+
+      if (this.rewriteLinks) {
+        $("a[href], form[action]").each((_, el) => {
+          const attr = el.tagName === "a" ? "href" : "action";
+          const val = $(el).attr(attr);
+          if (!val) return;
+
+          try {
+            const abs = new URL(val, pageUrl).toString();
+            const u = new URL(abs);
+            const normalizedHost = domainToUnicode(u.hostname.replace(/^www\./, "")).toLowerCase();
+            const isInternal = normalizedHost === siteHost;
+
+            if (isInternal) {
+              let normPath;
+              try {
+                normPath = decodeURIComponent(u.pathname);
+              } catch {
+                normPath = u.pathname;
+              }
+              normPath += u.hash || "";
+              const localTarget = ensureLocalTargetForPath(normPath);
+              const localAbsPath = path.join(backupRoot, localTarget);
+              $(el).attr(attr, relativeLink(baseDir, localAbsPath));
+            }
+          } catch {}
+        });
+      }
+
+      await Promise.all(downloadTasks);
+
+      if (this.canonicalAction === "remove") {
+        $("link[rel=\"canonical\"]").remove();
+      }
+
+      fs.writeFileSync(htmlPath, $.html(), "utf8");
+    } catch (e) {
+      console.log(`HTML processing error: ${e}`);
+    }
+  }
+}
+
+export { AssetManager };
\ No newline at end of file
diff --git a/wayback-machine-downloader/lib/downloader.js b/wayback-machine-downloader/lib/downloader.js
new file mode 100644
index 0000000..c6d2ce6
--- /dev/null
+++ b/wayback-machine-downloader/lib/downloader.js
@@ -0,0 +1,222 @@
+import fs from "fs";
+import path from "path";
+import { domainToUnicode } from "url";
+import pLimit from "p-limit";
+import { Readable } from "stream";
+
+import { debugLog } from "./logger.js";
+import { renderProgress, normalizeBaseUrlInput, isHtmlFile, isCssResource } from "./utils.js";
+import { SnapshotIndex } from "./snapshot-index.js";
+import { AssetManager } from "./asset-manager.js";
+
+async function getRawListFromApi({ baseUrl, pageIndex, all, fromTimestamp, toTimestamp }) {
+  const cdx = new URL("https://web.archive.org/cdx/search/xd");
+  const params = new URLSearchParams();
+  params.set("output", "json");
+  params.set("url", baseUrl);
+  params.set("fl", "timestamp,original");
+  params.set("collapse", "digest");
+  params.set("gzip", "false");
+  if (!all) params.append("filter", "statuscode:200");
+  if (fromTimestamp && Number(fromTimestamp) !== 0) params.set("from", String(fromTimestamp));
+  if (toTimestamp && Number(toTimestamp) !== 0) params.set("to", String(toTimestamp));
+  if (pageIndex != null) params.set("page", String(pageIndex));
+  cdx.search = params.toString();
+
+  try {
+    const res = await fetch(cdx.toString(), { method: "GET", redirect: "follow" });
+    const text = await res.text();
+    let json = [];
+    try {
+      json = JSON.parse(text);
+    } catch {
+      return [];
+    }
+    if (Array.isArray(json) && Array.isArray(json[0]) && json[0].join(",") === "timestamp,original") {
+      json.shift();
+    }
+    return json || [];
+  } catch {
+    return [];
+  }
+}
+
+class WaybackMachineDownloader {
+  constructor(params) {
+    const normalized = params.normalized_base || normalizeBaseUrlInput(params.base_url);
+
+    this.base_url = normalized.canonicalUrl;
+    this.base_variants = normalized.variants;
+    this.base_host_unicode = (normalized.unicodeHost || normalized.bareHost).toLowerCase();
+
+    this.exact_url = !!params.exact_url;
+    this.directory = params.directory || null;
+    this.from_timestamp = params.from_timestamp ? Number(params.from_timestamp) : 0;
+    this.to_timestamp = params.to_timestamp ? Number(params.to_timestamp) : 0;
+    this.threads_count = params.threads_count != null ? Number(params.threads_count) : 3;
+
+    this.download_external_assets = params.download_external_assets || false;
+
+    this.rewrite_mode = params.rewrite_mode || "as-is";
+    this.rewrite_links = this.rewrite_mode === "relative";
+    this.canonical_action = params.canonical_action || "keep";
+
+    this._processed = 0;
+    this.snapshotIndex = null;
+
+    this.assetManager = new AssetManager({
+      backupPathResolver: () => this.backup_path(),
+      rewriteLinks: this.rewrite_links,
+      canonicalAction: this.canonical_action,
+      downloadExternalAssets: this.download_external_assets,
+      baseHostUnicode: this.base_host_unicode,
+      snapshotIndex: null,
+    });
+  }
+
+  backup_name() {
+    try {
+      if (this.base_url.includes("//")) {
+        const u = new URL(this.base_url);
+        return domainToUnicode(u.host);
+      }
+    } catch {}
+    return this.base_url;
+  }
+
+  backup_path() {
+    if (this.directory) {
+      return this.directory.endsWith(path.sep) ? this.directory : this.directory + path.sep;
+    }
+    return path.join("websites", this.backup_name(), path.sep);
+  }
+
+  async get_all_snapshots_to_consider() {
+    console.log("Getting snapshot pages");
+    const httpOpts = { all: true, fromTimestamp: this.from_timestamp, toTimestamp: this.to_timestamp };
+    let list = [];
+    const bases = this.base_variants && this.base_variants.length > 0 ? this.base_variants : [this.base_url];
+
+    for (const base of bases) {
+      list = list.concat(await getRawListFromApi({ baseUrl: base, pageIndex: null, ...httpOpts }));
+      process.stdout.write(".");
+
+      if (!this.exact_url) {
+        const wildcard = base.endsWith("/*") ? base : base.replace(/\/*$/, "") + "/*";
+        for (let i = 0; i < 100; i++) {
+          const batch = await getRawListFromApi({ baseUrl: wildcard, pageIndex: i, ...httpOpts });
+          if (!batch || batch.length === 0) break;
+          list = list.concat(batch);
+          process.stdout.write(".");
+        }
+      }
+    }
+    console.log(` found ${list.length} snapshots to consider.\n`);
+    return list;
+  }
+
+  async get_file_list_by_timestamp() {
+    const index = new SnapshotIndex();
+    const all = await this.get_all_snapshots_to_consider();
+    for (const pair of all) {
+      const ts = pair && pair[0];
+      const url = pair && pair[1];
+      if (!ts || !url) continue;
+      index.register(url, ts);
+    }
+
+    const manifest = index.getManifest();
+    this.snapshotIndex = index;
+    this.assetManager.setSnapshotIndex(index);
+    return manifest;
+  }
+
+  async _download_single(file_remote_info, total) {
+    const file_url = String(file_remote_info.file_url);
+    const file_id = file_remote_info.file_id;
+    const file_timestamp = file_remote_info.timestamp;
+
+    let paths;
+    try {
+      paths = this.assetManager.determinePaths(file_url, file_id);
+    } catch (e) {
+      console.log(`Invalid path for ${file_url}: ${e}`);
+      this._processed++;
+      renderProgress(this._processed, total);
+      return;
+    }
+
+    if (!paths) {
+      console.log(`Skipping invalid URL: ${file_url}`);
+      this._processed++;
+      renderProgress(this._processed, total);
+      return;
+    }
+
+    const { dirPath, filePath } = paths;
+
+    if (fs.existsSync(filePath)) {
+      this._processed++;
+      renderProgress(this._processed, total);
+      return;
+    }
+
+    try {
+      await this.assetManager.ensureDir(dirPath);
+      const snapshotUrl = `https://web.archive.org/web/${file_timestamp}id_/${file_url}`;
+      let res;
+      try {
+        res = await fetch(snapshotUrl, { method: "GET", redirect: "follow" });
+      } catch (e) {
+        debugLog(`Skipping ${file_url}, fetch failed: ${e}`);
+        return;
+      }
+
+      if (!res.ok || !res.body) {
+        debugLog(`Skipping ${file_url}, bad response ${res.status}`);
+        return;
+      }
+
+      await new Promise((resolve, reject) => {
+        const ws = fs.createWriteStream(filePath);
+        Readable.fromWeb(res.body).pipe(ws);
+        ws.on("finish", resolve);
+        ws.on("error", reject);
+      });
+
+      const contentType = res.headers.get("content-type") || "";
+      const ext = path.extname(filePath).toLowerCase();
+      const looksHtml = isHtmlFile(filePath, contentType, null) || ext === "" || ext === ".html" || ext === ".htm";
+      if (this.rewrite_links && isCssResource(filePath, file_url, contentType)) {
+        await this.assetManager.rewriteCssFile(filePath, file_url, file_timestamp);
+      }
+      if (this.rewrite_links && looksHtml) {
+        await this.assetManager.processHtml(filePath, file_url, file_timestamp);
+      }
+    } catch (e) {
+      debugLog(`Download failed for ${file_url}: ${e}`);
+    } finally {
+      this._processed++;
+      renderProgress(this._processed, total);
+    }
+  }
+
+  async download_files() {
+    const startTime = Date.now();
+    console.log(`Downloading ${this.base_url} to ${this.backup_path()} from Wayback Machine archives.`);
+    const list = await this.get_file_list_by_timestamp();
+    if (list.length === 0) {
+      console.log("No files to download.");
+      return;
+    }
+
+    const concurrency = this.threads_count && this.threads_count > 0 ? this.threads_count : 1;
+    const limit = pLimit(concurrency);
+    this._processed = 0;
+    await Promise.all(list.map((info) => limit(() => this._download_single(info, list.length))));
+    const endTime = Date.now();
+    console.log(`\nDownload completed in ${((endTime - startTime) / 1000).toFixed(2)}s, saved in ${this.backup_path()} (${list.length} files)`);
+  }
+}
+
+export { WaybackMachineDownloader };
\ No newline at end of file
diff --git a/wayback-machine-downloader/lib/logger.js b/wayback-machine-downloader/lib/logger.js
new file mode 100644
index 0000000..31c2869
--- /dev/null
+++ b/wayback-machine-downloader/lib/logger.js
@@ -0,0 +1,21 @@
+let debugMode = false;
+
+function setDebugMode(value) {
+  debugMode = !!value;
+}
+
+function getDebugMode() {
+  return debugMode;
+}
+
+function debugLog(...args) {
+  if (debugMode) {
+    console.log(...args);
+  }
+}
+
+function infoLog(...args) {
+  console.log(...args);
+}
+
+export { setDebugMode, getDebugMode, debugLog, infoLog };
\ No newline at end of file
diff --git a/wayback-machine-downloader/lib/snapshot-index.js b/wayback-machine-downloader/lib/snapshot-index.js
new file mode 100644
index 0000000..c247530
--- /dev/null
+++ b/wayback-machine-downloader/lib/snapshot-index.js
@@ -0,0 +1,138 @@
+class SnapshotIndex {
+  constructor() {
+    this.byPath = new Map();
+    this.byPathAndQuery = new Map();
+    this.lookupByPath = null;
+    this.lookupByPathAndQuery = null;
+    this.manifestCache = null;
+  }
+
+  register(url, timestamp) {
+    if (!url || !timestamp) return;
+
+    let parsed;
+    try {
+      parsed = new URL(url);
+    } catch {
+      return;
+    }
+
+    let filePath;
+    try {
+      filePath = decodeURIComponent(parsed.pathname);
+    } catch {
+      filePath = parsed.pathname;
+    }
+    const search = parsed.search || "";
+    const queryKey = `${filePath}${search}`;
+
+    const normalizedTimestamp = String(timestamp);
+
+    const currentByPath = this.byPath.get(filePath);
+    if (!currentByPath || String(currentByPath.timestamp) <= normalizedTimestamp) {
+      this.byPath.set(filePath, {
+        file_url: url,
+        timestamp: normalizedTimestamp,
+        file_id: filePath,
+      });
+    }
+
+    const currentByQuery = this.byPathAndQuery.get(queryKey);
+    if (!currentByQuery || String(currentByQuery.timestamp) <= normalizedTimestamp) {
+      this.byPathAndQuery.set(queryKey, {
+        file_url: url,
+        timestamp: normalizedTimestamp,
+        file_id: filePath,
+      });
+    }
+
+    this.lookupByPath = null;
+    this.lookupByPathAndQuery = null;
+    this.manifestCache = null;
+  }
+
+  buildCaches() {
+    if (this.manifestCache) {
+      return;
+    }
+
+    const manifest = Array.from(this.byPath.entries()).map(([file_id, value]) => ({
+      ...value,
+      file_id,
+    }));
+
+    manifest.sort((a, b) => String(b.timestamp).localeCompare(String(a.timestamp)));
+
+    const byPath = new Map();
+    const byQuery = new Map();
+
+    for (const entry of manifest) {
+      const { file_url, file_id, timestamp } = entry;
+      if (file_id && timestamp && !byPath.has(file_id)) {
+        byPath.set(file_id, timestamp);
+      }
+      if (file_url) {
+        try {
+          const u = new URL(file_url);
+          let decodedPath;
+          try {
+            decodedPath = decodeURIComponent(u.pathname);
+          } catch {
+            decodedPath = u.pathname;
+          }
+          const pathKey = `${decodedPath}${u.search || ""}`;
+          if (pathKey && timestamp && !byQuery.has(pathKey)) {
+            byQuery.set(pathKey, timestamp);
+          }
+        } catch {}
+      }
+    }
+
+    for (const [queryKey, entry] of this.byPathAndQuery.entries()) {
+      const ts = entry && entry.timestamp;
+      if (!queryKey || !ts) continue;
+      if (!byQuery.has(queryKey)) {
+        byQuery.set(queryKey, ts);
+      }
+      const basePath = queryKey.replace(/\?.*$/, "");
+      if (basePath && !byPath.has(basePath)) {
+        byPath.set(basePath, ts);
+      }
+    }
+
+    this.manifestCache = manifest;
+    this.lookupByPath = byPath;
+    this.lookupByPathAndQuery = byQuery;
+  }
+
+  getManifest() {
+    this.buildCaches();
+    return this.manifestCache || [];
+  }
+
+  resolve(assetUrl, fallbackTimestamp) {
+    this.buildCaches();
+    let resolved = fallbackTimestamp || 0;
+    if (!assetUrl) return resolved;
+
+    try {
+      const u = new URL(assetUrl);
+      let decodedPath;
+      try {
+        decodedPath = decodeURIComponent(u.pathname);
+      } catch {
+        decodedPath = u.pathname;
+      }
+      const queryKey = `${decodedPath}${u.search || ""}`;
+      if (this.lookupByPathAndQuery && this.lookupByPathAndQuery.has(queryKey)) {
+        resolved = this.lookupByPathAndQuery.get(queryKey);
+      } else if (this.lookupByPath && this.lookupByPath.has(decodedPath)) {
+        resolved = this.lookupByPath.get(decodedPath);
+      }
+    } catch {}
+
+    return resolved;
+  }
+}
+
+export { SnapshotIndex };
\ No newline at end of file
diff --git a/wayback-machine-downloader/lib/utils.js b/wayback-machine-downloader/lib/utils.js
new file mode 100644
index 0000000..283b48d
--- /dev/null
+++ b/wayback-machine-downloader/lib/utils.js
@@ -0,0 +1,117 @@
+import path from "path";
+import { domainToUnicode } from "url";
+
+function renderProgress(current, total) {
+  const width = 40;
+  const ratio = total > 0 ? current / total : 0;
+  const filled = Math.round(ratio * width);
+  const bar = "█".repeat(filled) + "-".repeat(width - filled);
+  process.stdout.write(`\r[${bar}] ${Math.round(ratio * 100)}% (${current}/${total})`);
+  if (current === total) process.stdout.write("\n");
+}
+
+function toPosix(p) {
+  return p.split(path.sep).join("/");
+}
+
+function relativeLink(fromDir, toFile) {
+  const rel = path.relative(fromDir, toFile);
+  return toPosix(rel || path.basename(toFile));
+}
+
+function ensureLocalTargetForPath(pathname) {
+  return pathname.endsWith("/") || !path.posix.basename(pathname).includes(".")
+    ? path.posix.join(pathname, "index.html")
+    : pathname;
+}
+
+function normalizeBaseUrlInput(input) {
+  if (!input || typeof input !== "string") {
+    throw new Error("Base URL must be a non-empty string");
+  }
+
+  let raw = input.trim();
+  if (!raw) {
+    throw new Error("Base URL must not be empty");
+  }
+
+  if (!/^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(raw)) {
+    raw = `https://${raw}`;
+  }
+
+  let parsed;
+  try {
+    parsed = new URL(raw);
+  } catch (e) {
+    throw new Error(`Invalid URL: ${e.message}`);
+  }
+
+  if (!/^https?:$/i.test(parsed.protocol)) {
+    throw new Error("Only http and https protocols are supported");
+  }
+
+  const asciiHost = parsed.hostname.toLowerCase();
+  if (!asciiHost) {
+    throw new Error("URL must contain a hostname");
+  }
+
+  const bareHost = asciiHost.replace(/^www\./, "");
+  const unicodeHost = domainToUnicode(bareHost);
+  const port = parsed.port ? `:${parsed.port}` : "";
+  const basePath = parsed.pathname && parsed.pathname !== "/" ? parsed.pathname.replace(/\/+$/, "") : "";
+
+  const canonicalUrl = `https://${bareHost}${port}${basePath}`;
+
+  const hostSet = new Set([`${bareHost}${port}`]);
+  if (asciiHost !== bareHost) {
+    hostSet.add(`${asciiHost}${port}`);
+  } else if (bareHost && bareHost.includes(".")) {
+    hostSet.add(`www.${bareHost}${port}`);
+  }
+
+  const protocols = ["https:", "http:"];
+  const variants = new Set();
+  for (const protocol of protocols) {
+    for (const host of hostSet) {
+      variants.add(`${protocol}//${host}${basePath}`);
+    }
+  }
+
+  return {
+    canonicalUrl,
+    variants: Array.from(variants),
+    bareHost,
+    unicodeHost,
+  };
+}
+
+function isHtmlFile(filePath, contentType, firstBytes) {
+  if (contentType && /text\/html/i.test(String(contentType))) return true;
+  const ext = path.extname(filePath).toLowerCase();
+  if ([".html", ".htm", ".php", ".asp", ".aspx"].includes(ext)) return true;
+  const head = (firstBytes || "").toString("utf8", 0, 512);
+  return /<!doctype html/i.test(head) || /<html[\s>]/i.test(head);
+}
+
+function isCssResource(filePath, resourceUrl, contentType) {
+  const ext = path.extname(filePath || "").toLowerCase();
+  if (ext === ".css") return true;
+  if (contentType && /text\/css/i.test(String(contentType))) return true;
+  if (resourceUrl) {
+    try {
+      const u = new URL(resourceUrl);
+      if (/\.css(?:$|\?)/i.test(u.pathname)) return true;
+    } catch {}
+  }
+  return false;
+}
+
+export {
+  renderProgress,
+  toPosix,
+  relativeLink,
+  ensureLocalTargetForPath,
+  normalizeBaseUrlInput,
+  isHtmlFile,
+  isCssResource,
+};
\ No newline at end of file
diff --git a/wayback-machine-downloader/package.json b/wayback-machine-downloader/package.json
index 42d5ba7..9b7c15a 100644
--- a/wayback-machine-downloader/package.json
+++ b/wayback-machine-downloader/package.json
@@ -1,14 +1,22 @@
 {
-  "name": "wayback-downloader",
+  "name": "wayback-machine-downloader",
   "version": "0.2.1",
   "description": "Interactive Wayback Machine downloader for archiving websites locally.",
   "type": "module",
-  "main": "downloader.js",
+  "main": "./index.js",
+  "exports": {
+    ".": "./index.js",
+    "./downloader": "./lib/downloader.js",
+    "./downloader.js": "./lib/downloader.js",
+    "./cli": "./cli.js",
+    "./package.json": "./package.json"
+  },
   "bin": {
-    "wayback-downloader": "downloader.js"
+    "wayback-machine-downloader": "./cli.js"
   },
   "scripts": {
-    "start": "node downloader.js"
+    "start": "node cli.js",
+    "download": "node cli.js"
   },
   "dependencies": {
     "cheerio": "^1.0.0-rc.12",
@@ -17,19 +25,25 @@
   "engines": {
     "node": ">=18"
   },
+  "files": [
+    "cli.js",
+    "index.js",
+    "lib"
+  ],
   "keywords": [
-    "wayback-machine-downloader",
-    "web-archive-downloder",
-    "archiver"
+    "wayback",
+    "archive",
+    "downloader",
+    "wayback-machine"
   ],
   "author": "birbwatcher",
   "license": "MIT",
   "repository": {
     "type": "git",
-    "url": "https://github.com/birbwatcher/wayback-downloader.git"
+    "url": "https://github.com/birbwatcher/wayback-machine-downloader.git"
   },
   "bugs": {
-    "url": "https://github.com/birbwatcher/wayback-downloader/issues"
+    "url": "https://github.com/birbwatcher/wayback-machine-downloader/issues"
   },
-  "homepage": "https://github.com/birbwatcher/wayback-downloader#readme"
+  "homepage": "https://github.com/birbwatcher/wayback-machine-downloader#readme"
 }