revert captions to yt auto-gen

This commit is contained in:
Omer Sabic 2024-08-09 20:08:39 +02:00
parent ac57317a55
commit bb78997490
7 changed files with 67 additions and 5419 deletions

12
package-lock.json generated
View File

@ -9,7 +9,7 @@
"version": "1.0.0", "version": "1.0.0",
"license": "none", "license": "none",
"dependencies": { "dependencies": {
"@distube/ytdl-core": "^4.13.5", "@distube/ytdl-core": "^4.14.4",
"@fastify/cookie": "^9.3.1", "@fastify/cookie": "^9.3.1",
"@fastify/cors": "^8.4.2", "@fastify/cors": "^8.4.2",
"@fastify/multipart": "^8.2.0", "@fastify/multipart": "^8.2.0",
@ -232,19 +232,19 @@
} }
}, },
"node_modules/@distube/ytdl-core": { "node_modules/@distube/ytdl-core": {
"version": "4.13.5", "version": "4.14.4",
"resolved": "https://registry.npmjs.org/@distube/ytdl-core/-/ytdl-core-4.13.5.tgz", "resolved": "https://registry.npmjs.org/@distube/ytdl-core/-/ytdl-core-4.14.4.tgz",
"integrity": "sha512-g+4UJIR/auAJbia7iB0aWvaJDbs22P53NySWa47b1NT4xMTDJYguxHFArPrvRkcJrb/AgKjv/XoSZGghpL0CJA==", "integrity": "sha512-dHb4GW3qATIjRsS6VIhm3Pop7FdUcDFhsnyQlsPeXW7UhTPuNS0BmraKiTpFbpp0Ky+rxBQjJBfPRFsM+dT1fg==",
"dependencies": { "dependencies": {
"http-cookie-agent": "^6.0.5", "http-cookie-agent": "^6.0.5",
"m3u8stream": "^0.8.6", "m3u8stream": "^0.8.6",
"miniget": "^4.2.3", "miniget": "^4.2.3",
"sax": "^1.4.1", "sax": "^1.4.1",
"tough-cookie": "^4.1.4", "tough-cookie": "^4.1.4",
"undici": "^6.19.2" "undici": "five"
}, },
"engines": { "engines": {
"node": ">=16" "node": ">=14.0"
}, },
"funding": { "funding": {
"url": "https://github.com/distubejs/ytdl-core?sponsor" "url": "https://github.com/distubejs/ytdl-core?sponsor"

View File

@ -30,7 +30,7 @@
"xo": "^0.56.0" "xo": "^0.56.0"
}, },
"dependencies": { "dependencies": {
"@distube/ytdl-core": "^4.13.5", "@distube/ytdl-core": "^4.14.4",
"@fastify/cookie": "^9.3.1", "@fastify/cookie": "^9.3.1",
"@fastify/cors": "^8.4.2", "@fastify/cors": "^8.4.2",
"@fastify/multipart": "^8.2.0", "@fastify/multipart": "^8.2.0",

View File

@ -54,7 +54,7 @@ export const main = async () => {
server.register(oauth, { server.register(oauth, {
name: 'googleOAuth2', name: 'googleOAuth2',
scope: ['https://www.googleapis.com/auth/youtube.readonly', "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/userinfo.profile"], scope: ['https://www.googleapis.com/auth/youtube.readonly', 'https://www.googleapis.com/auth/youtube.force-ssl', "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/userinfo.profile"],
credentials: { credentials: {
client: { client: {
id: env.GOOGLE_CLIENT_ID, id: env.GOOGLE_CLIENT_ID,

View File

@ -4,7 +4,8 @@ import { and, desc, eq, getTableColumns, inArray, notInArray, sql } from "drizzl
import { db } from "../db/index.js"; import { db } from "../db/index.js";
import { articles, articles as articlesTable, signups as signupsTable, sites, users } from "../db/schemas.js"; import { articles, articles as articlesTable, signups as signupsTable, sites, users } from "../db/schemas.js";
import { authMiddleware, authMiddlewareFn } from "../modules/middleware.js"; import { authMiddleware, authMiddlewareFn } from "../modules/middleware.js";
import { jsonToCsv, createBlogFromCaptions, createArticleSlug, getVideoById, env, getWhisperCaptions, getVideoDetails } from "../utils/index.js"; import { jsonToCsv, createBlogFromCaptions, createArticleSlug, getVideoById, env, getWhisperCaptions, getVideoDetails, getVideoWithCaptions } from "../utils/index.js";
import { escape } from "querystring";
const websubVerifyToken = "FQNI4Suzih"; const websubVerifyToken = "FQNI4Suzih";
@ -231,14 +232,13 @@ export const dashboardRoutes = (fastify, _, done) => {
const site = await db.select().from(sites).where(eq(sites.user_id, req.session.user_id)); const site = await db.select().from(sites).where(eq(sites.user_id, req.session.user_id));
const video_data = await getVideoDetails(req.body.video_id); // const video_data = await getVideoDetails(req.body.video_id);
const video_data = await getVideoWithCaptions(req.body.video_id);
reply.send({ reply.send({
success: true success: true
}); });
await delay(5000);
article = (await db.insert(articlesTable).values({ article = (await db.insert(articlesTable).values({
title: video_data.title, title: video_data.title,
source_video_id: match[2], source_video_id: match[2],
@ -247,7 +247,7 @@ export const dashboardRoutes = (fastify, _, done) => {
is_public: false is_public: false
}).returning({ id: articlesTable.id }))[0]; }).returning({ id: articlesTable.id }))[0];
video_data.captions = await getWhisperCaptions(req.body.video_id); // video_data.captions = await getWhisperCaptions(req.body.video_id);
// const video_data = await getVideoById(access_token, req.body.video_id); // const video_data = await getVideoById(access_token, req.body.video_id);
await db.update(articlesTable).set({ await db.update(articlesTable).set({
@ -275,13 +275,22 @@ export const dashboardRoutes = (fastify, _, done) => {
} catch (e) { } catch (e) {
console.log(e); console.log(e);
if(e.message == "no_captions") {
reply.status(400).send({
success: false,
message: "This video does not have captions!"
});
}
article ? await db.update(articlesTable).set({ article ? await db.update(articlesTable).set({
status: "error" status: "error"
}).where(eq(articlesTable.id, article.id)) : ""; }).where(eq(articlesTable.id, article.id)) : "";
let message = "problem_creating_article";
reply.status(500).send({ reply.status(500).send({
success: false, success: false,
message: "problem_creating_article" message: message
}) })
} }
}); });
@ -455,6 +464,3 @@ export const dashboardRoutes = (fastify, _, done) => {
done(); done();
}; };
function delay(time) {
return new Promise(resolve => setTimeout(resolve, time));
}

View File

@ -118,7 +118,7 @@ export async function createBlogFromCaptions(captions, {
## OutputFormat: ## OutputFormat:
1. Do not include a title at the top of the article. 1. Do not include a title at the top of the article.
2. VERY IMPORTANT! Use markdown to add formatting to the article and make it easier to read. 2. VERY IMPORTANT! Use markdown to add formatting to the article and make it easier to read.
3. Length: The article should be roughly ${length || 700} words. 3. Length: The article should be roughly ${length || 1200} words.
${faq ? "4. FAQ Section: Add a FAQ section at the end of the article to address common questions." : ""} ${faq ? "4. FAQ Section: Add a FAQ section at the end of the article to address common questions." : ""}
## Workflow: ## Workflow:
1. First, analyze the provided transcript to understand the key points and factual content. 1. First, analyze the provided transcript to understand the key points and factual content.
@ -134,8 +134,6 @@ As an SEO Content Specialist, you must follow the specified rules and communicat
content: `# Information: content: `# Information:
## Title ## Title
${title} ${title}
## Description
${description}
## Transcript ## Transcript
${captions}` ${captions}`
}]; }];

View File

@ -5,6 +5,7 @@ import { google } from 'googleapis';
import ytdl from "@distube/ytdl-core"; import ytdl from "@distube/ytdl-core";
import { getWhisperCaptions } from './ai.js'; import { getWhisperCaptions } from './ai.js';
import { articles as articlesTable } from "../db/schemas.js"; import { articles as articlesTable } from "../db/schemas.js";
import * as xml2js from 'xml2js';
const service = google.youtube("v3"); const service = google.youtube("v3");
@ -127,14 +128,52 @@ export async function getVideoDetails(video_url) {
} }
} }
export function parseTextFromCaptions(caption_text) {
let text_content = "";
const captionEntries = caption_text.split(/\n\n/);
for (const entry of captionEntries) {
const lines = entry.trim().split('\n');
if (lines.length >= 2 && !lines[1].includes('-->')) {
text_content += lines.slice(1).join(' ').trim() + ' ';
}
}
return text_content
}
function parseXMLCaptions(xmlString) {
return new Promise((resolve, reject) => {
const parser = new xml2js.Parser();
parser.parseString(xmlString, (err, result) => {
if (err) {
reject(err);
} else {
const textElements = result.transcript.text;
let captionText = textElements.map(element => element._).join('\n');
resolve(captionText.trim());
}
});
});
}
function delay(time) {
return new Promise(resolve => setTimeout(resolve, time));
}
export async function getVideoWithCaptions(video_url) { export async function getVideoWithCaptions(video_url) {
const info = getVideoDetails(video_url); const info = await ytdl.getInfo(video_url);
const tracks = info
const captions = await getWhisperCaptions(video_url); .player_response.captions
.playerCaptionsTracklistRenderer.captionTracks;
if (!tracks || tracks.length < 1) throw new Error("no_captions");
const track = tracks.find(t => t.languageCode == "en");
if(!track) throw new Error("no_captions");
const captions = await (await fetch(`${track.baseUrl}&fmt=srv1`)).text();
return { return {
title: info.videoDetails.title, title: info.videoDetails.title,
description: info.videoDetails.description, description: info.videoDetails.description,
captions captions: await parseXMLCaptions(captions)
} }
} }

5395
yarn.lock

File diff suppressed because it is too large Load Diff