Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

components ready ,still integeration #2

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
OPENAI_API_KEY =
LANGCHAIN_PROJECT=
LANGCHAIN_API_KEY=
LANGCHAIN_TRACING_V2=
ELEVENLABS_KEY=
31 changes: 9 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,10 @@
# Shortrocity
# short-videos
generate short videos using unstructured , langchain , cv2 , elevenlabs , OpenAi
# what is the difference ?
1.this app is using langchain
2. using Ai models to extract the narration and imgaes descriptions instead of heurstics
3. some tests for matching lengths
4. using langsmith for montoring
5. the app accepts a url for any html page instead of a manual way to copy text
5. [future] deploy as a restapi using langserve

Shortrocity is a tool for making AI generated short videos ("shorts" or "reels") with a ChatGPT generated script, narrated by ElevenLabs or OpenAI text-to-speech. DALL-E 3 generated background images are also added to the background.

## Quick Start

First, add your API-keys to the environment:

```console
$ export OPENAI_API_KEY=YOUR_OPENAI_API_KEY
$ export ELEVENLABS_API_KEY=YOUR_ELEVENLABS_API_KEY
```

Then, put your source content in a file, for example `source.txt` and run the `main.py`:

```console
$ ./main.py source.txt
Generating script...
Generating narration...
Generating images...
Generating video...
DONE! Here's your video: shorts/1701788183/short.avi
``````
Empty file added app/__init__.py
Empty file.
33 changes: 33 additions & 0 deletions app/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
create a narration audio out of a text
"""
from data_parser import parse_data
from templates import template_images , template_narrator
from utils import create_dict_pairs
from elevenlabs import set_api_key , generate , save
from load_dotenv import load_dotenv
load_dotenv()
import os
set_api_key(os.getenv("ELEVENLABS_KEY"))

def concatenate_text() :
text=""
parsed_narrations = parse_data(template_narrator)
parsed_images = parse_data(template_images)
l = create_dict_pairs(parsed_narrations,parsed_images)
for d in l :
text += d["text"]+"\n\n"
return text

def generate_audio(text):
audio = generate(
text=text ,
voice="T7QGPtToiqH4S8VlIkMJ",
model="eleven_multilingual_v2"
)
save(audio = audio , filename="./data/audio.mp3")


generate_audio(
text=concatenate_text()
)
55 changes: 55 additions & 0 deletions app/data_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate , SystemMessagePromptTemplate , ChatPromptTemplate
from langchain_community.chat_models import ChatOpenAI
from narration import call
from langchain.schema.messages import SystemMessage
from utils import clean
from templates import template_images , template_narrator

# context = call()
# output_parser = CommaSeparatedListOutputParser()
# format_instructions = output_parser.get_format_instructions()
# print(format_instructions)

# prompt = PromptTemplate(
# template=template,
# input_variables=["context"],
# )

# chat_template = ChatPromptTemplate.from_messages(
# [SystemMessagePromptTemplate.from_template(template=template_images)])

# model = ChatOpenAI(temperature=0 ,model_name="gpt-3.5-turbo-1106" , )

# input = chat_template.format_messages(context=context)

# messages = [
# SystemMessage(_input)
# ]
# print(input)

# chain = chat_template | model
# output = chain.invoke({"context":context}).content
# clean(data=output)
# print(f"model output is\n\n {output} , \nfirst element is \n {output[0]} , \n type is {type(output)}")
# print(output.split("\n")[0])
# text = output_parser.parse(output)
# print(context)
# print("\n\n***********\n\n")
# print(



def parse_data(template:str)->list:
context = call()
chat_template = ChatPromptTemplate.from_messages(
[SystemMessagePromptTemplate.from_template(template=template)])

model = ChatOpenAI(temperature=0 ,model_name="gpt-3.5-turbo-1106" )

input = chat_template.format_messages(context=context)
chain = chat_template | model
output = chain.invoke({"context":context}).content
cleaned_list = clean(data=output)
return cleaned_list

45 changes: 45 additions & 0 deletions app/generate_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
a text to image module .
responsible for generating images by using DALLE-3 openai model
"""
import base64
from data_parser import parse_data
from templates import template_images , template_narrator
from utils import create_dict_pairs
import os


def get_images_descriptions() :
images=[]
parsed_narrations = parse_data(template_narrator)
parsed_images = parse_data(template_images)
l = create_dict_pairs(parsed_narrations,parsed_images)
for d in l :
images.append(d["image"])

return images

def generate_images(images=get_images_descriptions()):
from openai import OpenAI
client = OpenAI()
for i , img in enumerate(images) :
response = client.images.generate(
model="dall-e-3",
prompt=img,
size="1024x1024",
quality="standard",
n=1,
response_format="b64_json"
)
image_b64 = response.data[0].b64_json

if not os.path.exists("./data/images/"):
os.makedirs("./data/images/")


with open(f"./data/images/image_{i}.webp" , "wb") as f :
f.write(base64.b64decode(image_b64))



generate_images()
12 changes: 12 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""
main file to handle all process
it should call all the main functions that handle the following scenario
1.generating data
2.clean the data generated
3.get structured data
4.generate audio
5.generate images
6.create the overall video
"""
from app.data_parser import parse_data
from app.templates import *
31 changes: 31 additions & 0 deletions app/narration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
create a narration text out of a raw text from a given website or an article
ideas:
1. scrape a website text , filter it and generate an add out of it .
2. create a youtube shorts app generator
"""
from load_dotenv import load_dotenv
load_dotenv()
import os
from utils import load_html_text
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from templates import template

def call():
# prompt = PromptTemplate.from_template(template)
# prompt.format(context=load_html_text())
context = load_html_text()
# print(prompt.format(context))
chat_prompt = ChatPromptTemplate.from_messages([
("system", template)
])
chain = chat_prompt | ChatOpenAI()
response = chain.invoke({"context":context })

with open("./data/response.txt" , "w") as f :
f.write(response.content)
print(response.content)
return response.content

# print(call())
89 changes: 89 additions & 0 deletions app/templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from langchain.prompts import PromptTemplate

from langchain.prompts.chat import ChatPromptTemplate

template = """
Craft a compelling advertisement script as if you were a seasoned content creator expert.
Your task is to create a persuasive and engaging promotional piece tailored to a specific context provided.
Consider the target audience, key messaging, and the overall tone to captivate and drive interest effectively.
Dive into the realm of creativity, utilizing your expertise to seamlessly blend innovation and consumer appeal into a seamless promotional narrative.
Remember to provide details that showcase the uniqueness of the product or service while delivering a memorable and impactful call-to-action.
###############################
your script will be used as a short reel/video along with images that describes the text .
your script will be passed to a text to speech model to convert it into an audio
use the following examples as a refrence
don't output music indicator like [Upbeat music playing] ,[Upbeat music fades out]'
**********************************************
examples :

Example Pair 1:

Image Description 1:
"A vibrant can of Bolt Boost energy drink surrounded by dynamic lightning bolts, symbolizing energy and power."
Ad Text 1:
"Unleash the Power Within! Introducing Bolt Boost – the energy drink that fuels your ambition. Tackle your day with vitality and focus. Time to elevate your energy game!"


Image Description 2:
"An energetic individual conquering challenges with a glowing aura, holding a can of Bolt Boost, surrounded by a vibrant, active environment."
Ad Text 2:
"Revitalize your day with Bolt Boost! Packed with natural ingredients and a burst of flavor, this energy elixir keeps you at your peak. Elevate your performance, embrace the Bolt Boost experience!"

Image Description 3:
"A creative workspace with Bolt Boost cans scattered around, featuring a laptop with artistic tools, showcasing the synergy between the energy drink and creative endeavors."
Ad Text 3:
"Fuel Your Passion! Bolt Boost, the ultimate energy companion for creators. Whether you're a designer, writer, or artist, power up your creativity and break through boundaries. Unleash your potential!"


Image Description 4:
"A visually stunning scene of a creative mind at work, surrounded by Bolt Boost cans and a burst of vibrant colors, highlighting the fusion of creativity and energy."
Ad Text 4:
"Create, Energize, Repeat! Bolt Boost – the choice of innovators. Sip on inspiration and crush creative blocks. Elevate your craft with the energy that matches your ambition."

************************************************
context:{context}
"""

template_narrator = """
scrape all the narrator text from the following context
use the examples blow as a refrence
examples :
\n\nNarrator: "Passion. Quality. Commitment. At McDonald\'s, we\'re passionate about our food, always striving to provide you with the best dining experience possible.
"\n\n[Images of fresh ingredients being prepared and cooked]
\n\nNarrator: "From our balanced options in the Happy Meal to our Quarter Pounder burgers made with 100% fresh beef cooked to order, we\'re committed to serving you quality food.
"\n\n[Close-up shots of various menu items]
your output should be like below :

Passion. Quality. Commitment. At McDonald\'s, we\'re passionate about our food, always striving to provide you with the best dining experience possible.\n
From our balanced options in the Happy Meal to our Quarter Pounder burgers made with 100% fresh beef cooked to order, we\'re committed to serving you quality food.\n
context:{context}.
Your response should be single values seperated by a new line \n
append \n to every value you parse
don't forget any narration
the count of narrations extracted should be the same as image descriptions
"""

template_images = """
scrape all the images description from the following context
images description are always enclosed in square brackets []
every Narrator text is followed by an image description . please scrape all the images
use the examples blow as a refrence
examples :
\n\nNarrator: "Passion. Quality. Commitment. At McDonald\'s, we\'re passionate about our food, always striving to provide you with the best dining experience possible.
"\n\n[Images of fresh ingredients being prepared and cooked]
\n\nNarrator: "From our balanced options in the Happy Meal to our Quarter Pounder burgers made with 100% fresh beef cooked to order, we\'re committed to serving you quality food.
"\n\n[Close-up shots of various menu items]

your output should be like below :

fresh ingredients being prepared and cooked\n
Close-up shots of various menu items\n

context:{context}.
Your response should be single values seperated by a new line \n
append \n to every value you parse
don't enclude any image indicator in the output . just extract all the image description.
don't add any text to it
the count of narrations extracted should be the same as image descriptions
don't forget any image.
"""
52 changes: 52 additions & 0 deletions app/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@

def download_html_from_url(url):
import requests
response = requests.get(url)
if response.status_code == 200:
html_content = response.text
# Now 'html_content' contains the HTML of the webpage
# print(html_content)
with open("./data/file.html" , "w") as f:
f.write(html_content)
else:
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")


def load_html_text(url:str="https://www.mcdonalds.com/us/en-us/about-our-food.html") :
"""
accepts a link to an html website
returns : A Document langchain object with text scrapped
"""
from langchain.document_loaders import BSHTMLLoader , UnstructuredHTMLLoader
download_html_from_url(url)
loader = UnstructuredHTMLLoader(file_path="./data/file.html")
data = loader.load()
# print(f"type of object : {type(data)} & data is : {data}")
with open("./data/file.txt" , "w") as f:
f.write(data[0].page_content)
return data[0].page_content

# load_html_text()

# load_html_text(url="https://www.mcdonalds.com/us/en-us/about-our-food.html")

def clean(data:str)->list :
l=[]
data = data.split("\n")
for d in data :
cleaned = d.strip().replace('"' , '').replace("\n",'').replace('[' ,'').replace(']','')
l.append(cleaned)
print(cleaned)
print(l)
return l

def create_dict_pairs(text:list , images:list) -> dict :
l = []
for tex , img in zip(text,images):
i={}
if len(tex) > 0 and len(img) > 0 :
i["text"] , i["image"]= tex , img
l.append(i)
print(l)
return l

Binary file added data/audio.mp3
Binary file not shown.
Loading