revert captions to yt auto-gen

This commit is contained in:
Omer Sabic 2024-08-09 20:08:39 +02:00
parent ac57317a55
commit bb78997490
7 changed files with 67 additions and 5419 deletions

12
package-lock.json generated
View File

@ -9,7 +9,7 @@
"version": "1.0.0",
"license": "none",
"dependencies": {
"@distube/ytdl-core": "^4.13.5",
"@distube/ytdl-core": "^4.14.4",
"@fastify/cookie": "^9.3.1",
"@fastify/cors": "^8.4.2",
"@fastify/multipart": "^8.2.0",
@ -232,19 +232,19 @@
}
},
"node_modules/@distube/ytdl-core": {
"version": "4.13.5",
"resolved": "https://registry.npmjs.org/@distube/ytdl-core/-/ytdl-core-4.13.5.tgz",
"integrity": "sha512-g+4UJIR/auAJbia7iB0aWvaJDbs22P53NySWa47b1NT4xMTDJYguxHFArPrvRkcJrb/AgKjv/XoSZGghpL0CJA==",
"version": "4.14.4",
"resolved": "https://registry.npmjs.org/@distube/ytdl-core/-/ytdl-core-4.14.4.tgz",
"integrity": "sha512-dHb4GW3qATIjRsS6VIhm3Pop7FdUcDFhsnyQlsPeXW7UhTPuNS0BmraKiTpFbpp0Ky+rxBQjJBfPRFsM+dT1fg==",
"dependencies": {
"http-cookie-agent": "^6.0.5",
"m3u8stream": "^0.8.6",
"miniget": "^4.2.3",
"sax": "^1.4.1",
"tough-cookie": "^4.1.4",
"undici": "^6.19.2"
"undici": "five"
},
"engines": {
"node": ">=16"
"node": ">=14.0"
},
"funding": {
"url": "https://github.com/distubejs/ytdl-core?sponsor"

View File

@ -30,7 +30,7 @@
"xo": "^0.56.0"
},
"dependencies": {
"@distube/ytdl-core": "^4.13.5",
"@distube/ytdl-core": "^4.14.4",
"@fastify/cookie": "^9.3.1",
"@fastify/cors": "^8.4.2",
"@fastify/multipart": "^8.2.0",

View File

@ -54,7 +54,7 @@ export const main = async () => {
server.register(oauth, {
name: 'googleOAuth2',
scope: ['https://www.googleapis.com/auth/youtube.readonly', "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/userinfo.profile"],
scope: ['https://www.googleapis.com/auth/youtube.readonly', 'https://www.googleapis.com/auth/youtube.force-ssl', "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/userinfo.profile"],
credentials: {
client: {
id: env.GOOGLE_CLIENT_ID,

View File

@ -4,7 +4,8 @@ import { and, desc, eq, getTableColumns, inArray, notInArray, sql } from "drizzl
import { db } from "../db/index.js";
import { articles, articles as articlesTable, signups as signupsTable, sites, users } from "../db/schemas.js";
import { authMiddleware, authMiddlewareFn } from "../modules/middleware.js";
import { jsonToCsv, createBlogFromCaptions, createArticleSlug, getVideoById, env, getWhisperCaptions, getVideoDetails } from "../utils/index.js";
import { jsonToCsv, createBlogFromCaptions, createArticleSlug, getVideoById, env, getWhisperCaptions, getVideoDetails, getVideoWithCaptions } from "../utils/index.js";
import { escape } from "querystring";
const websubVerifyToken = "FQNI4Suzih";
@ -231,14 +232,13 @@ export const dashboardRoutes = (fastify, _, done) => {
const site = await db.select().from(sites).where(eq(sites.user_id, req.session.user_id));
const video_data = await getVideoDetails(req.body.video_id);
// const video_data = await getVideoDetails(req.body.video_id);
const video_data = await getVideoWithCaptions(req.body.video_id);
reply.send({
success: true
});
await delay(5000);
article = (await db.insert(articlesTable).values({
title: video_data.title,
source_video_id: match[2],
@ -247,7 +247,7 @@ export const dashboardRoutes = (fastify, _, done) => {
is_public: false
}).returning({ id: articlesTable.id }))[0];
video_data.captions = await getWhisperCaptions(req.body.video_id);
// video_data.captions = await getWhisperCaptions(req.body.video_id);
// const video_data = await getVideoById(access_token, req.body.video_id);
await db.update(articlesTable).set({
@ -275,13 +275,22 @@ export const dashboardRoutes = (fastify, _, done) => {
} catch (e) {
console.log(e);
if(e.message == "no_captions") {
reply.status(400).send({
success: false,
message: "This video does not have captions!"
});
}
article ? await db.update(articlesTable).set({
status: "error"
}).where(eq(articlesTable.id, article.id)) : "";
let message = "problem_creating_article";
reply.status(500).send({
success: false,
message: "problem_creating_article"
message: message
})
}
});
@ -455,6 +464,3 @@ export const dashboardRoutes = (fastify, _, done) => {
done();
};
function delay(time) {
return new Promise(resolve => setTimeout(resolve, time));
}

View File

@ -118,7 +118,7 @@ export async function createBlogFromCaptions(captions, {
## OutputFormat:
1. Do not include a title at the top of the article.
2. VERY IMPORTANT! Use markdown to add formatting to the article and make it easier to read.
3. Length: The article should be roughly ${length || 700} words.
3. Length: The article should be roughly ${length || 1200} words.
${faq ? "4. FAQ Section: Add a FAQ section at the end of the article to address common questions." : ""}
## Workflow:
1. First, analyze the provided transcript to understand the key points and factual content.
@ -134,8 +134,6 @@ As an SEO Content Specialist, you must follow the specified rules and communicat
content: `# Information:
## Title
${title}
## Description
${description}
## Transcript
${captions}`
}];

View File

@ -5,6 +5,7 @@ import { google } from 'googleapis';
import ytdl from "@distube/ytdl-core";
import { getWhisperCaptions } from './ai.js';
import { articles as articlesTable } from "../db/schemas.js";
import * as xml2js from 'xml2js';
const service = google.youtube("v3");
@ -127,14 +128,52 @@ export async function getVideoDetails(video_url) {
}
}
export function parseTextFromCaptions(caption_text) {
let text_content = "";
const captionEntries = caption_text.split(/\n\n/);
for (const entry of captionEntries) {
const lines = entry.trim().split('\n');
if (lines.length >= 2 && !lines[1].includes('-->')) {
text_content += lines.slice(1).join(' ').trim() + ' ';
}
}
return text_content
}
function parseXMLCaptions(xmlString) {
return new Promise((resolve, reject) => {
const parser = new xml2js.Parser();
parser.parseString(xmlString, (err, result) => {
if (err) {
reject(err);
} else {
const textElements = result.transcript.text;
let captionText = textElements.map(element => element._).join('\n');
resolve(captionText.trim());
}
});
});
}
function delay(time) {
return new Promise(resolve => setTimeout(resolve, time));
}
export async function getVideoWithCaptions(video_url) {
const info = getVideoDetails(video_url);
const captions = await getWhisperCaptions(video_url);
const info = await ytdl.getInfo(video_url);
const tracks = info
.player_response.captions
.playerCaptionsTracklistRenderer.captionTracks;
if (!tracks || tracks.length < 1) throw new Error("no_captions");
const track = tracks.find(t => t.languageCode == "en");
if(!track) throw new Error("no_captions");
const captions = await (await fetch(`${track.baseUrl}&fmt=srv1`)).text();
return {
title: info.videoDetails.title,
description: info.videoDetails.description,
captions
captions: await parseXMLCaptions(captions)
}
}

5395
yarn.lock

File diff suppressed because it is too large Load Diff