mirror of
https://github.com/birbwatcher/wayback-machine-downloader.git
synced 2026-01-29 01:40:41 +00:00
init: initial project setup
This commit is contained in:
114
README.md
114
README.md
@@ -1 +1,113 @@
|
||||
# wayback-machine-downloader
|
||||
# Wayback Machine Downloader JS
|
||||
|
||||

|
||||
|
||||
A script written in **Node.js** for downloading websites from [Web Archive](https://web.archive.org/).
|
||||
|
||||
Intended for use by:
|
||||
- **Webmasters** — to restore their lost or hacked projects
|
||||
- **OSINT researchers** — for local work with resources that no longer exist
|
||||
|
||||
This webarchive website downloader has an interactive interface, supports downloading with either original links preserved or rewritten into relative ones (for local usage).
|
||||
|
||||
---
|
||||
|
||||
## Features of Web Archive Website Downloader
|
||||
|
||||
1. Download entire websites or individual pages from the archive, including HTML, images, scripts, styles, and other assets.
|
||||
2. Rewrite internal links for correct local browsing.
|
||||
3. Multithreading support.
|
||||
4. Save results into a chosen folder while keeping the original structure.
|
||||
5. Ability to download external assets (e.g., images or scripts from a CDN).
|
||||
|
||||
#### Special Features
|
||||
|
||||
- The script fixes parameterized file names such as `main.css?ver=1.2` into `main.css` for proper local work.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- Node.js version 18.x or higher
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
git clone https://github.com/birbwatcher/wayback-machine-downloader.git
|
||||
cd wayback-machine-downloader
|
||||
|
||||
# Install dependencies
|
||||
npm install
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
node downloader.js
|
||||
```
|
||||
|
||||
After launching, an interactive menu will appear with the following questions:
|
||||
|
||||
- base URL (e.g., https://example.com)
|
||||
- date range (from/to)
|
||||
- number of threads
|
||||
- link rewriting mode (keep as-is or convert to relative)
|
||||
- whether to remove `rel=canonical` from the downloaded site
|
||||
- whether to download external assets
|
||||
- directory for saving the files
|
||||
|
||||
---
|
||||
|
||||
## Example
|
||||
|
||||
```bash
|
||||
node downloader.js
|
||||
```
|
||||
|
||||
Dialog example:
|
||||
|
||||
```bash
|
||||
Enter base URL to archive (e.g., https://example.com): https://example.com
|
||||
From timestamp (YYYYMMDDhhmmss) or leave blank: 20200101000000
|
||||
To timestamp (YYYYMMDDhhmmss) or leave blank: 20201231235959
|
||||
Rewrite links? (yes=relative / no=as-is, default no): yes
|
||||
Canonical: "keep" (default) or "remove": keep
|
||||
How many download threads? (default 3): 5
|
||||
Only exact URL (no wildcard /*)? (yes/no, default no): no
|
||||
Target directory (leave blank for default websites/<host>/):
|
||||
Download external assets? (yes/no, default no): no
|
||||
```
|
||||
|
||||
After this, the archive download will begin.
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
#### Script downloads only the homepage
|
||||
**Answer:** try specifying the base URL with `/*` at the end.
|
||||
For example: `https://example.com/*`, or try downloading a different time range.
|
||||
|
||||
---
|
||||
|
||||
## (Important) Download responsibly
|
||||
|
||||
Please note that downloading third-party websites may violate copyright laws.
|
||||
Use this tool responsibly and make sure not to break the law.
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Pull requests are welcome!
|
||||
For major changes, please open an issue first to discuss what you would like to change.
|
||||
|
||||
1. Fork the project
|
||||
2. Create your feature branch (`git checkout -b feature/fooBar`)
|
||||
3. Commit your changes (`git commit -am 'Add some fooBar'`)
|
||||
4. Push to the branch (`git push origin feature/fooBar`)
|
||||
5. Create a new Pull Request
|
||||
|
||||
BIN
assets/webarchive-downloader.jpg
Normal file
BIN
assets/webarchive-downloader.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 23 KiB |
508
wayback-machine-downloader/downloader.js
Normal file
508
wayback-machine-downloader/downloader.js
Normal file
@@ -0,0 +1,508 @@
|
||||
/*
|
||||
* Wayback Machine Downloader 0.1 by WhitelightSEO — Interactive (Node.js, ESM)
|
||||
* Run: node downloader.js
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { fileURLToPath, pathToFileURL } from "url";
|
||||
import { mkdir } from "fs/promises";
|
||||
import pLimit from "p-limit";
|
||||
import { load } from "cheerio";
|
||||
import { Readable } from "stream";
|
||||
import readline from "readline";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// ----------------------------- PROGRESS BAR -----------------------------
|
||||
function renderProgress(current, total) {
|
||||
const width = 40;
|
||||
const ratio = total > 0 ? current / total : 0;
|
||||
const filled = Math.round(ratio * width);
|
||||
const bar = "█".repeat(filled) + "-".repeat(width - filled);
|
||||
process.stdout.write(
|
||||
`\r[${bar}] ${Math.round(ratio * 100)}% (${current}/${total})`
|
||||
);
|
||||
if (current === total) process.stdout.write("\n");
|
||||
}
|
||||
|
||||
// ----------------------------- HELPERS -----------------------------
|
||||
function toPosix(p) {
|
||||
return p.split(path.sep).join("/");
|
||||
}
|
||||
function relativeLink(fromDir, toFile) {
|
||||
const rel = path.relative(fromDir, toFile);
|
||||
return toPosix(rel || path.basename(toFile));
|
||||
}
|
||||
function ensureLocalTargetForPath(pathname) {
|
||||
return pathname.endsWith("/") || !path.posix.basename(pathname).includes(".")
|
||||
? path.posix.join(pathname, "index.html")
|
||||
: pathname;
|
||||
}
|
||||
|
||||
// ----------------------------- HTML CHECK -----------------------------
|
||||
function isHtmlFile(filePath, contentType, firstBytes) {
|
||||
if (contentType && /text\/html/i.test(String(contentType))) return true;
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
if ([".html", ".htm", ".php", ".asp", ".aspx"].includes(ext)) return true;
|
||||
const head = (firstBytes || "").toString("utf8", 0, 512);
|
||||
return /<!doctype html/i.test(head) || /<html[\s>]/i.test(head);
|
||||
}
|
||||
|
||||
// ----------------------------- Archive API -----------------------------
|
||||
async function getRawListFromApi({
|
||||
baseUrl,
|
||||
pageIndex,
|
||||
all,
|
||||
fromTimestamp,
|
||||
toTimestamp,
|
||||
}) {
|
||||
const cdx = new URL("https://web.archive.org/cdx/search/xd");
|
||||
const params = new URLSearchParams();
|
||||
params.set("output", "json");
|
||||
params.set("url", baseUrl);
|
||||
params.set("fl", "timestamp,original");
|
||||
params.set("collapse", "digest");
|
||||
params.set("gzip", "false");
|
||||
if (!all) params.append("filter", "statuscode:200");
|
||||
if (fromTimestamp && Number(fromTimestamp) !== 0)
|
||||
params.set("from", String(fromTimestamp));
|
||||
if (toTimestamp && Number(toTimestamp) !== 0)
|
||||
params.set("to", String(toTimestamp));
|
||||
if (pageIndex != null) params.set("page", String(pageIndex));
|
||||
cdx.search = params.toString();
|
||||
|
||||
try {
|
||||
const res = await fetch(cdx.toString(), { method: "GET", redirect: "follow" });
|
||||
const text = await res.text();
|
||||
const json = JSON.parse(text);
|
||||
if (
|
||||
Array.isArray(json) &&
|
||||
Array.isArray(json[0]) &&
|
||||
json[0].join(",") === "timestamp,original"
|
||||
) {
|
||||
json.shift();
|
||||
}
|
||||
return json || [];
|
||||
} catch (e) {
|
||||
console.log(`ERROR getRawListFromApi: ${e}`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- DOWNLOADER CLASS -----------------------------
|
||||
class WaybackMachineDownloader {
|
||||
constructor(params) {
|
||||
this.base_url = params.base_url;
|
||||
this.exact_url = !!params.exact_url;
|
||||
this.directory = params.directory || null;
|
||||
this.from_timestamp = params.from_timestamp
|
||||
? Number(params.from_timestamp)
|
||||
: 0;
|
||||
this.to_timestamp = params.to_timestamp ? Number(params.to_timestamp) : 0;
|
||||
this.threads_count =
|
||||
params.threads_count != null ? Number(params.threads_count) : 3;
|
||||
|
||||
this.download_external_assets = params.download_external_assets || false;
|
||||
|
||||
this.rewrite_mode = params.rewrite_mode || "as-is";
|
||||
this.rewrite_links = this.rewrite_mode === "relative";
|
||||
this.canonical_action = params.canonical_action || "keep";
|
||||
|
||||
this._processed = 0;
|
||||
}
|
||||
|
||||
backup_name() {
|
||||
try {
|
||||
if (this.base_url.includes("//")) {
|
||||
const u = new URL(this.base_url);
|
||||
return u.host;
|
||||
}
|
||||
} catch {}
|
||||
return this.base_url;
|
||||
}
|
||||
backup_path() {
|
||||
if (this.directory) {
|
||||
return this.directory.endsWith(path.sep)
|
||||
? this.directory
|
||||
: this.directory + path.sep;
|
||||
}
|
||||
return path.join("websites", this.backup_name(), path.sep);
|
||||
}
|
||||
|
||||
async get_all_snapshots_to_consider() {
|
||||
console.log("Getting snapshot pages");
|
||||
const httpOpts = {
|
||||
all: true,
|
||||
fromTimestamp: this.from_timestamp,
|
||||
toTimestamp: this.to_timestamp,
|
||||
};
|
||||
let list = [];
|
||||
|
||||
list = list.concat(
|
||||
await getRawListFromApi({ baseUrl: this.base_url, pageIndex: null, ...httpOpts })
|
||||
);
|
||||
process.stdout.write(".");
|
||||
|
||||
if (!this.exact_url) {
|
||||
const wildcard = this.base_url.endsWith("/*")
|
||||
? this.base_url
|
||||
: this.base_url.replace(/\/*$/, "") + "/*";
|
||||
for (let i = 0; i < 100; i++) {
|
||||
const batch = await getRawListFromApi({
|
||||
baseUrl: wildcard,
|
||||
pageIndex: i,
|
||||
...httpOpts,
|
||||
});
|
||||
if (!batch || batch.length === 0) break;
|
||||
list = list.concat(batch);
|
||||
process.stdout.write(".");
|
||||
}
|
||||
}
|
||||
console.log(` found ${list.length} snapshots to consider.\n`);
|
||||
return list;
|
||||
}
|
||||
|
||||
async get_file_list_by_timestamp() {
|
||||
const curated = new Map();
|
||||
const all = await this.get_all_snapshots_to_consider();
|
||||
for (const pair of all) {
|
||||
const ts = pair[0];
|
||||
const url = pair[1];
|
||||
try {
|
||||
const u = new URL(url);
|
||||
const file_id = u.pathname;
|
||||
const prev = curated.get(file_id);
|
||||
if (!prev || prev.timestamp <= ts) {
|
||||
curated.set(file_id, { file_url: url, timestamp: ts, file_id });
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
const arr = Array.from(curated, ([file_id, v]) => ({ ...v, file_id }));
|
||||
arr.sort((a, b) => String(b.timestamp).localeCompare(String(a.timestamp)));
|
||||
return arr;
|
||||
}
|
||||
|
||||
_windowsSanitize(p) {
|
||||
if (process.platform !== "win32") return p;
|
||||
return p.replace(/[:*?&=<>\\|]/g, (s) =>
|
||||
"%" + s.charCodeAt(0).toString(16)
|
||||
);
|
||||
}
|
||||
async _structure_dir_path(dir_path) {
|
||||
try {
|
||||
await mkdir(dir_path, { recursive: true });
|
||||
} catch (e) {
|
||||
if (!e || e.code !== "EEXIST") throw e;
|
||||
}
|
||||
}
|
||||
|
||||
_determine_paths(file_url, file_id) {
|
||||
if (file_url.startsWith("data:") || file_url.startsWith("javascript:"))
|
||||
return null;
|
||||
if (file_id.length > 200) return null;
|
||||
|
||||
const backup = this.backup_path();
|
||||
const parts = file_id.split("/").filter(Boolean);
|
||||
let dir_path, file_path;
|
||||
|
||||
if (file_id === "") {
|
||||
dir_path = backup;
|
||||
file_path = path.join(backup, "index.html");
|
||||
} else if (
|
||||
file_url.endsWith("/") ||
|
||||
!parts[parts.length - 1].includes(".")
|
||||
) {
|
||||
dir_path = path.join(backup, ...parts);
|
||||
file_path = path.join(dir_path, "index.html");
|
||||
} else {
|
||||
dir_path = path.join(backup, ...parts.slice(0, -1));
|
||||
file_path = path.join(backup, ...parts);
|
||||
}
|
||||
|
||||
dir_path = this._windowsSanitize(dir_path);
|
||||
file_path = this._windowsSanitize(file_path);
|
||||
|
||||
return { dir_path, file_path };
|
||||
}
|
||||
|
||||
async _download_asset(assetUrl, pageTimestamp, file_path, dir_path) {
|
||||
try {
|
||||
if (fs.existsSync(file_path)) return file_path;
|
||||
|
||||
await this._structure_dir_path(dir_path);
|
||||
const snapshotUrl = `https://web.archive.org/web/${pageTimestamp}id_/${assetUrl}`;
|
||||
let res;
|
||||
try {
|
||||
res = await fetch(snapshotUrl, { method: "GET", redirect: "follow" });
|
||||
} catch (e) {
|
||||
console.log(`Skipping asset ${assetUrl}, fetch failed: ${e}`);
|
||||
return null;
|
||||
}
|
||||
if (!res.ok || !res.body) {
|
||||
console.log(`Skipping asset ${assetUrl}, bad response ${res.status}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
const ws = fs.createWriteStream(file_path);
|
||||
Readable.fromWeb(res.body).pipe(ws);
|
||||
ws.on("finish", resolve);
|
||||
ws.on("error", reject);
|
||||
});
|
||||
|
||||
return file_path;
|
||||
} catch (e) {
|
||||
console.log(`Asset download failed: ${assetUrl} → ${e}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async _process_html_assets(htmlPath, pageUrl, pageTimestamp) {
|
||||
try {
|
||||
const backupRoot = this.backup_path();
|
||||
let html = fs.readFileSync(htmlPath, "utf8");
|
||||
const $ = load(html);
|
||||
const site = new URL(this.base_url);
|
||||
const siteHost = site.hostname.replace(/^www\./, "");
|
||||
const baseDir = path.dirname(htmlPath);
|
||||
|
||||
const downloadTasks = [];
|
||||
|
||||
// ----------- ASSETS -----------
|
||||
$(
|
||||
"img[src], script[src], link[href], source[src], video[src], audio[src], iframe[src]"
|
||||
).each((_, el) => {
|
||||
const attr = el.tagName === "link" ? "href" : "src";
|
||||
const val = $(el).attr(attr);
|
||||
if (!val) return;
|
||||
|
||||
try {
|
||||
const abs = new URL(val, pageUrl).toString();
|
||||
const u = new URL(abs);
|
||||
const isInternal = u.hostname.replace(/^www\./, "") === siteHost;
|
||||
|
||||
if (isInternal || this.download_external_assets) {
|
||||
const file_id = u.pathname;
|
||||
const paths = this._determine_paths(abs, file_id);
|
||||
if (!paths) return;
|
||||
const { dir_path, file_path } = paths;
|
||||
|
||||
if (this.rewrite_links) {
|
||||
const normPath = u.pathname + (u.hash || "");
|
||||
const localTarget = ensureLocalTargetForPath(normPath);
|
||||
const localAbsPath = path.join(backupRoot, localTarget);
|
||||
$(el).attr(attr, relativeLink(baseDir, localAbsPath));
|
||||
}
|
||||
|
||||
if (!fs.existsSync(file_path)) {
|
||||
downloadTasks.push(
|
||||
this._download_asset(abs, pageTimestamp, file_path, dir_path)
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
});
|
||||
|
||||
// ----------- INTERNAL LINKS (pages/forms) -----------
|
||||
if (this.rewrite_links) {
|
||||
$("a[href], form[action]").each((_, el) => {
|
||||
const attr = el.tagName === "a" ? "href" : "action";
|
||||
const val = $(el).attr(attr);
|
||||
if (!val) return;
|
||||
|
||||
try {
|
||||
const abs = new URL(val, pageUrl).toString();
|
||||
const u = new URL(abs);
|
||||
const isInternal = u.hostname.replace(/^www\./, "") === siteHost;
|
||||
|
||||
if (isInternal) {
|
||||
const normPath = u.pathname + (u.hash || "");
|
||||
const localTarget = ensureLocalTargetForPath(normPath);
|
||||
const localAbsPath = path.join(backupRoot, localTarget);
|
||||
$(el).attr(attr, relativeLink(baseDir, localAbsPath));
|
||||
}
|
||||
} catch {}
|
||||
});
|
||||
}
|
||||
|
||||
await Promise.all(downloadTasks);
|
||||
|
||||
if (this.canonical_action === "remove") {
|
||||
$("link[rel=\"canonical\"]").remove();
|
||||
}
|
||||
|
||||
fs.writeFileSync(htmlPath, $.html(), "utf8");
|
||||
} catch (e) {
|
||||
console.log(`HTML processing error: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
async _download_single(file_remote_info, total) {
|
||||
const file_url = String(file_remote_info.file_url);
|
||||
const file_id = file_remote_info.file_id;
|
||||
const file_timestamp = file_remote_info.timestamp;
|
||||
const paths = this._determine_paths(file_url, file_id);
|
||||
if (!paths) {
|
||||
console.log(`Skipping invalid URL: ${file_url}`);
|
||||
this._processed++;
|
||||
renderProgress(this._processed, total);
|
||||
return;
|
||||
}
|
||||
const { dir_path, file_path } = paths;
|
||||
|
||||
if (fs.existsSync(file_path)) {
|
||||
this._processed++;
|
||||
renderProgress(this._processed, total);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this._structure_dir_path(dir_path);
|
||||
const snapshotUrl = `https://web.archive.org/web/${file_timestamp}id_/${file_url}`;
|
||||
let res;
|
||||
try {
|
||||
res = await fetch(snapshotUrl, { method: "GET", redirect: "follow" });
|
||||
} catch (e) {
|
||||
console.log(`Skipping ${file_url}, fetch failed: ${e}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!res.ok || !res.body) {
|
||||
console.log(`Skipping ${file_url}, bad response ${res.status}`);
|
||||
return;
|
||||
}
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
const ws = fs.createWriteStream(file_path);
|
||||
Readable.fromWeb(res.body).pipe(ws);
|
||||
ws.on("finish", resolve);
|
||||
ws.on("error", reject);
|
||||
});
|
||||
|
||||
const contentType = res.headers.get("content-type");
|
||||
const ext = path.extname(file_path).toLowerCase();
|
||||
const looksHtml =
|
||||
isHtmlFile(file_path, contentType, null) ||
|
||||
ext === "" ||
|
||||
ext === ".html" ||
|
||||
ext === ".htm";
|
||||
if (looksHtml) {
|
||||
await this._process_html_assets(file_path, file_url, file_timestamp);
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(`Download failed for ${file_url}: ${e}`);
|
||||
} finally {
|
||||
this._processed++;
|
||||
renderProgress(this._processed, total);
|
||||
}
|
||||
}
|
||||
|
||||
async download_files() {
|
||||
const startTime = Date.now();
|
||||
console.log(
|
||||
`Downloading ${this.base_url} to ${this.backup_path()} from Wayback Machine archives.`
|
||||
);
|
||||
const list = await this.get_file_list_by_timestamp();
|
||||
if (list.length === 0) {
|
||||
console.log("No files to download.");
|
||||
return;
|
||||
}
|
||||
|
||||
const concurrency =
|
||||
this.threads_count && this.threads_count > 0 ? this.threads_count : 1;
|
||||
const limit = pLimit(concurrency);
|
||||
this._processed = 0;
|
||||
await Promise.all(
|
||||
list.map((info) => limit(() => this._download_single(info, list.length)))
|
||||
);
|
||||
const endTime = Date.now();
|
||||
console.log(
|
||||
`\nDownload completed in ${((endTime - startTime) / 1000).toFixed(
|
||||
2
|
||||
)}s, saved in ${this.backup_path()} (${list.length} files)`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================= INTERACTIVE RUN =============================
|
||||
function ask(rl, question) {
|
||||
return new Promise((resolve) => rl.question(question, (answer) => resolve(answer.trim())));
|
||||
}
|
||||
|
||||
async function interactiveMain() {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
let base_url;
|
||||
while (true) {
|
||||
base_url = await ask(rl, "Enter base URL to archive (e.g., https://example.com): ");
|
||||
if (!base_url) continue;
|
||||
try {
|
||||
new URL(base_url);
|
||||
break;
|
||||
} catch {
|
||||
console.log("Please enter a valid URL.\n");
|
||||
}
|
||||
}
|
||||
|
||||
const from_timestamp = await ask(rl, "From timestamp (YYYYMMDDhhmmss) or leave blank: ");
|
||||
const to_timestamp = await ask(rl, "To timestamp (YYYYMMDDhhmmss) or leave blank: ");
|
||||
|
||||
let rewrite_mode = "as-is";
|
||||
const m = await ask(rl, "Rewrite links? (yes=relative / no=as-is, default no): ");
|
||||
if (/^y(es)?$/i.test(m)) rewrite_mode = "relative";
|
||||
|
||||
let canonical_action = "keep";
|
||||
if (rewrite_mode === "relative") {
|
||||
const c = await ask(rl, 'Canonical: "keep" (default) or "remove": ');
|
||||
if ((c || "").toLowerCase() === "remove") canonical_action = "remove";
|
||||
}
|
||||
|
||||
let threads_count = await ask(rl, "How many download threads? (default 3): ");
|
||||
threads_count = parseInt(threads_count || "3", 10);
|
||||
if (!Number.isFinite(threads_count) || threads_count <= 0) threads_count = 3;
|
||||
|
||||
const exact_url = /^y(es)?$/i.test(
|
||||
await ask(rl, "Only exact URL (no wildcard /*)? (yes/no, default no): ")
|
||||
);
|
||||
const directory = await ask(
|
||||
rl,
|
||||
"Target directory (leave blank for default websites/<host>/): "
|
||||
);
|
||||
|
||||
const ext = await ask(rl, "Download external assets? (yes/no, default no): ");
|
||||
const download_external_assets = /^y(es)?$/i.test(ext);
|
||||
|
||||
rl.close();
|
||||
|
||||
const dl = new WaybackMachineDownloader({
|
||||
base_url,
|
||||
exact_url,
|
||||
directory: directory || null,
|
||||
from_timestamp: from_timestamp || 0,
|
||||
to_timestamp: to_timestamp || 0,
|
||||
threads_count,
|
||||
rewrite_mode,
|
||||
canonical_action,
|
||||
download_external_assets,
|
||||
});
|
||||
|
||||
await dl.download_files();
|
||||
}
|
||||
|
||||
const isDirectRun =
|
||||
import.meta.url === `file://${process.argv[1]}` ||
|
||||
import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||
|
||||
if (isDirectRun) {
|
||||
interactiveMain().catch((err) => {
|
||||
console.error(`FATAL: ${err?.stack || err}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
export { WaybackMachineDownloader };
|
||||
35
wayback-machine-downloader/package.json
Normal file
35
wayback-machine-downloader/package.json
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"name": "wayback-downloader",
|
||||
"version": "0.1.0",
|
||||
"description": "Interactive Wayback Machine downloader for archiving websites locally.",
|
||||
"type": "module",
|
||||
"main": "downloader.js",
|
||||
"bin": {
|
||||
"wayback-downloader": "downloader.js"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node downloader.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"p-limit": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"keywords": [
|
||||
"wayback-machine-downloader",
|
||||
"web-archive-downloder",
|
||||
"archiver"
|
||||
],
|
||||
"author": "birbwatcher",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/birbwatcher/wayback-downloader.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/birbwatcher/wayback-downloader/issues"
|
||||
},
|
||||
"homepage": "https://github.com/birbwatcher/wayback-downloader#readme"
|
||||
}
|
||||
Reference in New Issue
Block a user