Using Sarvam Translate to translate my Film subtitles from English to Telugu¶
What I wanted to Experiment within Subtitles:
In [3]:
# Import libraries(pysrt is used for subtitle operations)
import pysrt
import numpy as np
In [5]:
# Store your subtitle file in another object called subs to perform operations with it.
subs = pysrt.open('subsdent.srt')
Subs is an object that contains set of dictionaries that specifies Text, Start and End timestamps.¶
In [8]:
# Total number of dialogues in my film:
print("Number of dialogues in my short film: ", len(subs))
Number of dialogues in my short film: 356
In [16]:
# To know a particular dialogue at index 'i' :-
i = 10
text = subs[i].text
In [18]:
# Length of any Dialogue at index i
len(subs[i].text)
Out[18]:
52
In [20]:
# To know the Start and End Time of a dialougue at index 'i' :-
starttime = subs[i].start
endtime = subs[i].end
# It appears in a format (Hours, Minutes, Seconds, Milliseconds).
# To know any one paramater, use subs[i].start(or)end.hours(or)minutes(or)seconds(or)milliseconds
In [26]:
# To know the duration(in seconds) and length of each dialogue in the subtitle file.
# Here, nod is referred to as 'number of dialogues.'
durations = []
lengths = []
for nod in np.arange(0,len(subs)):
length = len(subs[nod].text)
x = subs[nod].start.hours*3600 + subs[nod].start.minutes*60 + subs[nod].start.seconds
y = subs[nod].end.hours*3600 + subs[nod].end.minutes*60 + subs[nod].end.seconds
startduration = x
endduration = y
duration = endduration - startduration #Seconds
durations.append(duration) # To add it in the list of durations
lengths.append(length) # To add it in the list of lengths
In [38]:
# To know the dialogue of maximum length in my subtitle file.
print("The longest dialogue length: ", np.array(lengths).max())
print("This dialogue is:")
print("-----")
for i in np.arange(0, len(subs)):
if len(subs[i].text) == np.array(lengths).max():
print(subs[i].text)
print("-----")
The longest dialogue length: 98 This dialogue is: ----- <b> I even bought alcohol just because you wanted to drink,</b> <b> What else do you wanna do?</b> ----- <b>I said NO to triples that day! But you said Nothing will happen and police won't be there..</b> -----
In [40]:
# Defined a function that returns me the duration of a dialougue at index 'i'.
def GiveDurationof(i):
dialogueduration = (subs[i].start.hours*3600 + subs[i].end.minutes*60 + subs[i].end.seconds) - ( subs[i].end.hours*3600 + subs[i].start.minutes*60 + subs[i].start.seconds)
return dialogueduration
In [42]:
# To know the dialogue with maximum duration in my subtitle file.
for i in np.arange(0, len(subs)):
if GiveDurationof(i) == np.array(durations).max():
print(subs[i].text)
<b>(singing a cute telugu song from KOT movie)</b>
In [44]:
# Defining a function to Find the gap from ending of a particular dialogue to the beginning of the next one.
# Using 'i' as argument will mean the gap after 'i'th dialogue ends till 'i+1'th dialougue begins.
def FindGap(i):
Gap = (subs[i+1].start.minutes*60 + subs[i+1].start.seconds) - (subs[i].end.minutes*60 + subs[i].end.seconds)
return Gap
In [46]:
# To Store all the gaps in the subtitle file in a list
Gaps = []
for i in np.arange(0, len(subs) - 2):
Gaps.append(FindGap(i))
In [50]:
# Printing the maximum gap time between two dialougues
print(np.array(Gaps).max())
59
In [74]:
# To Know after what dialogue the maximum gap occurs:-
for i in np.arange(0, len(subs) - 1):
if FindGap(i) == np.array(Gaps).max():
print('The maximum gap is occuring after the dialogue {} at index {}: '.format(subs[i].text, i))
The maximum gap is occuring after the dialogue <b>serious.</b> at index 333:
In [52]:
# To change or makes changes to a dialogue at index 'i':-
subs[i].text = 'NEW DIALOGUE!'
# This will change the dialogue to "NEW DIALOGUE!"
In [54]:
subs[i].text
Out[54]:
'NEW DIALOGUE!'
In [249]:
# Selecting section of dialogues within a given time interval and shifting all of them 2 seconds prior.
part = subs.slice(starts_after = {'minutes':10, 'seconds':00}, ends_before = {'minutes':19, 'seconds':00})
part.shift(seconds = -2)
In [404]:
from pysrt import SubRipFile
In [261]:
# To selection a section of dialogues that start after a particular minute and ends before another particular minute.
def slice(start, end):
part = []
for sub in subs:
if (sub.start.minutes > start) and (sub.end.minutes < end):
part.append(sub)
return SubRipFile(items=part)
# It returns a SubRipFile that contains all the dialogues in the given condition.
In [ ]:
# Install 'sarvamai' package
!pip install sarvamai
In [273]:
# Import SarvamAI object from the package
from sarvamai import SarvamAI
In [279]:
# This is the syntax available at Sarvam API Documentation Website! Enter your API code in the specified place.
client = SarvamAI(
api_subscription_key="ENTERKEYHERE",
)
In [ ]:
# This is the cake! The "client.text.translate()" function will take 4 arguments.
# The 4 arguments are Input text, Source and Target languages, Speaker Gender. It returns the translated text as output.
response = client.text.translate(
input = "",
source_language_code="auto",
target_language_code="te-IN",
speaker_gender="Male"
)
print(response)
In [ ]:
# See how the function is called giving all the necessary arguments, coverting all my dialogues into Telugu Language!
translatedtext = []
for i in np.arange(0, len(subs)):
cleanedsub = subs[i].text
response = client.text.translate(input=cleanedsub,
source_language_code="auto",
target_language_code="te-IN",
speaker_gender="Male")
translatedtext.append(response.translated_text)
In [ ]:
# After making the necessary changes, Save your subtitle file! There we go!
subs.save('Directory\\Filename.srt')
References:-¶
In [ ]: