first commit

This commit is contained in:
Omer Sabic 2023-09-01 22:25:54 +02:00
commit 346413dc1a
4 changed files with 1446 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
./node_modules

164
main.js Executable file
View File

@ -0,0 +1,164 @@
#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
const prompt = require('prompt-sync')();
const url = require('url');
const cheerio = require('cheerio');
const { Readable, finished } = require('stream');
const removeTextBeforeUnderscore = (str) => {
const underscoreIndex = str.indexOf('_');
if (underscoreIndex === -1) {
return str;
}
return str.slice(underscoreIndex + 1);
}
async function downloadImage(url, path) {
const response = await fetch(url);
const buffer = await response.arrayBuffer();
await fs.promises.writeFile(`.${path}`, Buffer.from(buffer));
}
async function downloadImages(body) {
try {
fs.mkdirSync("images");
} catch (err) {
if (err.code !== 'EEXIST') {
throw err;
}
}
// Load the webpage
const $ = cheerio.load(body);
const imgPromises = [];
// Download all images on the page
$('img').each(async (i, el) => {
const imgUrl = $(el).attr('src');
const imgName = removeTextBeforeUnderscore(path.basename(imgUrl));
// Download the image and save it locally
const imgPromise = downloadImage(imgUrl, `/images/${imgName}`);
// Replace the image URL with the local path
$(el).attr('src', path.join('/images', imgName));
$(el).removeAttr('srcset')
imgPromises.push(imgPromise);
});
const faviconLink = $('link[rel="icon"], link[rel="shortcut icon"]').first();
const faviconUrl = faviconLink.attr('href');
imgPromises.push(downloadImage(faviconUrl, `/images/favicon.ico`))
faviconLink.href = "/images/favicon.ico"
await Promise.all(imgPromises);
return $.html()
}
async function findAllPages(body, url) {
const $ = cheerio.load(body);
const links = $('a').map((i, el) => $(el).attr('href')).get();
const results = new Set()
for (const link of links) {
if (link[0] === "/" || link.startsWith(url)) {
results.add(link)
}
}
return results
}
function removeBranding(js) {
return js.replace('shouldBrand = true;', 'shouldBrand = false;')
}
async function downloadAssets(htmlContent, websiteUrl) {
const $ = cheerio.load(htmlContent);
const assetPromises = [];
$('link[rel="stylesheet"][href]').each((i, el) => {
const cssUrl = $(el).attr('href');
if (cssUrl.startsWith('https://uploads-ssl.webflow.com')) {
const cssName = "style.css";
const cssPath = path.join('./css', cssName);
assetPromises.push(
fetch(cssUrl)
.then(res => res.text())
.then(css => {
fs.createWriteStream(cssPath).write(css);
$(el).attr('href', `/css/${cssName}`);
})
);
}
});
$('script[src]').each((i, el) => {
const jsUrl = $(el).attr('src');
if (jsUrl.startsWith('https://uploads-ssl.webflow.com')) {
const jsName = "script.js";
const jsPath = path.join('./js', jsName);
assetPromises.push(
fetch(jsUrl)
.then(res => res.text())
.then(js => {
js = removeBranding(js)
fs.createWriteStream(jsPath).write(js);
$(el).attr('src', `/js/${jsName}`);
})
);
}
});
assetPromises.push(
fetch(`${websiteUrl}/sitemap.xml`)
.then(res => res.text())
.then(asset => {
fs.createWriteStream("./sitemap.xml").write(asset);
})
)
assetPromises.push(
fetch(`${websiteUrl}/robots.txt`)
.then(res => res.text())
.then(asset => {
fs.createWriteStream("./robots.txt").write(asset);
})
)
await Promise.all(assetPromises);
return $.html();
}
async function main() {
let siteUrl = prompt("Website URL: ")
let siteBody = await fetch(siteUrl).then(res => res.text());
let pages = await findAllPages(siteBody, siteUrl)
pages.add('/')
// let correctPages = prompt(`I have found ${pages.length} pages, is this correct? [n/Y]: `) || "Y";
// if (correctPages.toLowerCase() !== "y") {
// // TODO: add manual page adding
// return
// }
fs.mkdirSync('js')
fs.mkdirSync('css')
fs.mkdirSync('images')
pages.forEach(page => {
fetch(siteUrl + page)
.then(data => data.text())
.then(async res => {
let newHTML = await downloadImages(res)
newHTML = await downloadAssets(newHTML, siteUrl)
const download_write_stream = fs.createWriteStream(`./${page === "/" ? "index" : page.substring((page.lastIndexOf('/')) + 1)}.html`);
download_write_stream.write(newHTML)
})
})
}
main().catch(error => console.error(error));

1260
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

21
package.json Normal file
View File

@ -0,0 +1,21 @@
{
"name": "de-webflower",
"version": "1.0.0",
"description": "",
"main": "main.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"bin": {
"deflow": "./main.js"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"node-fetch": "^3.3.0",
"prompt-sync": "^4.2.0",
"request": "^2.88.2"
}
}