diff --git a/docs/changelog.rst b/docs/changelog.rst
index 74e79dd5..cc8c053d 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,8 +1,20 @@
Changelog
---------
+2.2.13
+^^^^^^
+- Mid-row codes only add spaces only if there isn't one before.
+- Mid-row codes add spaces only if they affect the text in the same row (not adding if it follows break or PACS).
+- Remove spaces to the end of the lines.
+- Close italics on receiving another style setting command.
+- Throw an CaptionReadNoCaptions error in case of empty input file are provided.
+- Ignore repositioning commands which are not followed by any text before breaks.
+- Mid-row codes will not add the space if it is in front of punctuation.
+- Fix a bug with background codes when the InstructionNodeCreator collection is empty.
+- Fix a bug WebVTT writer adding double line breaks.
+
2.2.12
^^^^^^
-- Pinned nltk version to 3.8.0
+- Pinned nltk to 3.8.0
2.2.11
^^^^^^
diff --git a/docs/conf.py b/docs/conf.py
index 9b455abf..36146434 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -53,9 +53,9 @@
# built documents.
#
# The short X.Y version.
-version = '2.2.11'
+version = '2.2.12.dev2'
# The full version, including alpha/beta/rc tags.
-release = '2.2.11'
+release = '2.2.12.dev2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/pycaption/__init__.py b/pycaption/__init__.py
index be44c55c..adc9b501 100644
--- a/pycaption/__init__.py
+++ b/pycaption/__init__.py
@@ -34,6 +34,9 @@ def detect_format(caps):
:returns: the reader class for the detected format.
"""
+ if not len(caps):
+ raise CaptionReadNoCaptions("Empty caption file")
+
for reader in SUPPORTED_READERS:
if reader().detect(caps):
return reader
diff --git a/pycaption/base.py b/pycaption/base.py
index 8e3da975..1fa77895 100644
--- a/pycaption/base.py
+++ b/pycaption/base.py
@@ -1,18 +1,19 @@
import os
+from collections import defaultdict
from datetime import timedelta
from numbers import Number
from .exceptions import CaptionReadError, CaptionReadTimingError
# `und` a special identifier for an undetermined language according to ISO 639-2
-DEFAULT_LANGUAGE_CODE = os.getenv('PYCAPTION_DEFAULT_LANG', 'und')
+DEFAULT_LANGUAGE_CODE = os.getenv("PYCAPTION_DEFAULT_LANG", "und")
def force_byte_string(content):
try:
- return content.encode('UTF-8')
+ return content.encode("UTF-8")
except UnicodeEncodeError:
- raise RuntimeError('Invalid content encoding')
+ raise RuntimeError("Invalid content encoding")
except UnicodeDecodeError:
return content
@@ -50,8 +51,9 @@ def read(self, content):
class BaseWriter:
- def __init__(self, relativize=True, video_width=None, video_height=None,
- fit_to_screen=True):
+ def __init__(
+ self, relativize=True, video_width=None, video_height=None, fit_to_screen=True
+ ):
"""
Initialize writer with the given parameters.
@@ -81,7 +83,8 @@ def _relativize_and_fit_to_screen(self, layout_info):
if self.relativize:
# Transform absolute values (e.g. px) into percentages
layout_info = layout_info.as_percentage_of(
- self.video_width, self.video_height)
+ self.video_width, self.video_height
+ )
if self.fit_to_screen:
# Make sure origin + extent <= 100%
layout_info = layout_info.fit_to_screen()
@@ -115,7 +118,7 @@ class CaptionNode:
BREAK = 3
def __init__(
- self, type_, layout_info=None, content=None, start=None, position=None
+ self, type_, layout_info=None, content=None, start=None, position=None
):
"""
:type type_: int
@@ -135,30 +138,34 @@ def __repr__(self):
if t == CaptionNode.TEXT:
return repr(self.content)
elif t == CaptionNode.BREAK:
- return repr('BREAK')
+ return repr("BREAK")
elif t == CaptionNode.STYLE:
- return repr(f'STYLE: {self.start} {self.content}')
+ return repr(f"STYLE: {self.start} {self.content}")
else:
- raise RuntimeError(f'Unknown node type: {t}')
+ raise RuntimeError(f"Unknown node type: {t}")
@staticmethod
def create_text(text, layout_info=None, position=None):
return CaptionNode(
- type_=CaptionNode.TEXT, layout_info=layout_info,
- position=position, content=text
+ type_=CaptionNode.TEXT,
+ layout_info=layout_info,
+ position=position,
+ content=text,
)
@staticmethod
def create_style(start, content, layout_info=None):
return CaptionNode(
- type_=CaptionNode.STYLE, layout_info=layout_info, content=content,
- start=start)
+ type_=CaptionNode.STYLE,
+ layout_info=layout_info,
+ content=content,
+ start=start,
+ )
@staticmethod
def create_break(layout_info=None, content=None):
return CaptionNode(
- type_=CaptionNode.BREAK, layout_info=layout_info,
- content=content
+ type_=CaptionNode.BREAK, layout_info=layout_info, content=content
)
@@ -184,11 +191,13 @@ def __init__(self, start, end, nodes, style={}, layout_info=None):
:type layout_info: Layout
"""
if not isinstance(start, Number):
- raise CaptionReadTimingError("Captions must be initialized with a"
- " valid start time")
+ raise CaptionReadTimingError(
+ "Captions must be initialized with a" " valid start time"
+ )
if not isinstance(end, Number):
- raise CaptionReadTimingError("Captions must be initialized with a"
- " valid end time")
+ raise CaptionReadTimingError(
+ "Captions must be initialized with a" " valid end time"
+ )
if not nodes:
raise CaptionReadError("Node list cannot be empty")
self.start = start
@@ -216,9 +225,7 @@ def format_end(self, msec_separator=None):
return self._format_timestamp(self.end, msec_separator)
def __repr__(self):
- return repr(
- f'{self.format_start()} --> {self.format_end()}\n{self.get_text()}'
- )
+ return repr(f"{self.format_start()} --> {self.format_end()}\n{self.get_text()}")
def get_text_nodes(self):
"""
@@ -229,22 +236,24 @@ def get_text_for_node(node):
if node.type_ == CaptionNode.TEXT:
return node.content
if node.type_ == CaptionNode.BREAK:
- return '\n'
- return ''
+ return "\n"
+ return ""
return [get_text_for_node(node) for node in self.nodes]
def get_text(self):
text_nodes = self.get_text_nodes()
- return ''.join(text_nodes).strip()
+ return "".join(text_nodes).strip()
def _format_timestamp(self, microseconds, msec_separator=None):
duration = timedelta(microseconds=microseconds)
hours, rem = divmod(duration.seconds, 3600)
minutes, seconds = divmod(rem, 60)
milliseconds = f"{duration.microseconds // 1000:03d}"
- timestamp = (f"{hours:02d}:{minutes:02d}:{seconds:02d}"
- f"{msec_separator or '.'}{milliseconds:.3s}")
+ timestamp = (
+ f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+ f"{msec_separator or '.'}{milliseconds:.3s}"
+ )
return timestamp
@@ -261,8 +270,7 @@ def __init__(self, iterable=None, layout_info=None):
super().__init__(*args)
def __getslice__(self, i, j):
- return CaptionList(
- list.__getslice__(self, i, j), layout_info=self.layout_info)
+ return CaptionList(list.__getslice__(self, i, j), layout_info=self.layout_info)
def __getitem__(self, y):
item = list.__getitem__(self, y)
@@ -272,20 +280,19 @@ def __getitem__(self, y):
def __add__(self, other):
add_is_safe = (
- not hasattr(other, 'layout_info')
+ not hasattr(other, "layout_info")
or not other.layout_info
or self.layout_info == other.layout_info
)
if add_is_safe:
- return CaptionList(
- list.__add__(self, other), layout_info=self.layout_info)
+ return CaptionList(list.__add__(self, other), layout_info=self.layout_info)
else:
raise ValueError(
- "Cannot add CaptionList objects with different layout_info")
+ "Cannot add CaptionList objects with different layout_info"
+ )
def __mul__(self, other):
- return CaptionList(
- list.__mul__(self, other), layout_info=self.layout_info)
+ return CaptionList(list.__mul__(self, other), layout_info=self.layout_info)
__rmul__ = __mul__
@@ -341,9 +348,7 @@ def set_styles(self, styles):
self._styles = styles
def is_empty(self):
- return all(
- [len(captions) == 0 for captions in list(self._captions.values())]
- )
+ return all([len(captions) == 0 for captions in list(self._captions.values())])
def set_layout_info(self, lang, layout_info):
self._captions[lang].layout_info = layout_info
@@ -412,6 +417,5 @@ def merge(captions):
new_nodes.append(CaptionNode.create_break())
for node in caption.nodes:
new_nodes.append(node)
- caption = Caption(
- captions[0].start, captions[0].end, new_nodes, captions[0].style)
+ caption = Caption(captions[0].start, captions[0].end, new_nodes, captions[0].style)
return caption
diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py
index ef74b406..398745ed 100644
--- a/pycaption/scc/__init__.py
+++ b/pycaption/scc/__init__.py
@@ -81,24 +81,38 @@
import math
import re
import textwrap
-from collections import deque, defaultdict
+from collections import defaultdict, deque
from copy import deepcopy
-from pycaption.base import (
- BaseReader, BaseWriter, CaptionSet
+from pycaption.base import BaseReader, BaseWriter, CaptionNode, CaptionSet
+from pycaption.exceptions import (
+ CaptionLineLengthError,
+ CaptionReadNoCaptions,
+ CaptionReadTimingError,
+ InvalidInputError,
)
-from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError, \
- CaptionReadTimingError, CaptionLineLengthError
+
from .constants import (
- HEADER, COMMANDS, SPECIAL_CHARS, EXTENDED_CHARS, CHARACTERS,
- MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE,
- SPECIAL_OR_EXTENDED_CHAR_TO_CODE, PAC_BYTES_TO_POSITIONING_MAP,
- PAC_HIGH_BYTE_BY_ROW, PAC_LOW_BYTE_BY_ROW_RESTRICTED,
- PAC_TAB_OFFSET_COMMANDS, CUE_STARTING_COMMAND
+ CHARACTER_TO_CODE,
+ CHARACTERS,
+ COMMANDS,
+ CUE_STARTING_COMMAND,
+ EXTENDED_CHARS,
+ HEADER,
+ MICROSECONDS_PER_CODEWORD,
+ PAC_BYTES_TO_POSITIONING_MAP,
+ PAC_HIGH_BYTE_BY_ROW,
+ PAC_LOW_BYTE_BY_ROW_RESTRICTED,
+ PAC_TAB_OFFSET_COMMANDS,
+ SPECIAL_CHARS,
+ SPECIAL_OR_EXTENDED_CHAR_TO_CODE,
)
-from .specialized_collections import ( # noqa: F401
- TimingCorrectingCaptionList, NotifyingDict, CaptionCreator,
- InstructionNodeCreator, PopOnCue,
+from .specialized_collections import CaptionCreator # noqa: F401
+from .specialized_collections import (
+ InstructionNodeCreator,
+ NotifyingDict,
+ PopOnCue,
+ TimingCorrectingCaptionList,
)
from .state_machines import DefaultProvidingPositionTracker
@@ -112,8 +126,8 @@ class NodeCreatorFactory:
this information must be erased after the reader's .read() operation
completes.
"""
- def __init__(self, position_tracker,
- node_creator=InstructionNodeCreator):
+
+ def __init__(self, position_tracker, node_creator=InstructionNodeCreator):
self.position_tracker = position_tracker
self.node_creator = node_creator
@@ -131,8 +145,7 @@ def from_list(self, roll_rows):
:return: a node_creator instance
"""
return self.node_creator.from_list(
- roll_rows,
- position_tracker=self.position_tracker
+ roll_rows, position_tracker=self.position_tracker
)
@@ -155,6 +168,7 @@ class SCCReader(BaseReader):
This can be then later used for converting into any other supported formats
"""
+
def __init__(self, *args, **kw):
self.caption_stash = CaptionCreator()
self.time_translator = _SccTimeTranslator()
@@ -163,18 +177,18 @@ def __init__(self, *args, **kw):
DefaultProvidingPositionTracker()
)
- self.last_command = ''
+ self.last_command = ""
self.double_starter = False
self.buffer_dict = NotifyingDict()
- self.buffer_dict['pop'] = self.node_creator_factory.new_creator()
- self.buffer_dict['paint'] = self.node_creator_factory.new_creator()
- self.buffer_dict['roll'] = self.node_creator_factory.new_creator()
+ self.buffer_dict["pop"] = self.node_creator_factory.new_creator()
+ self.buffer_dict["paint"] = self.node_creator_factory.new_creator()
+ self.buffer_dict["roll"] = self.node_creator_factory.new_creator()
# Call this method when the active key changes
self.buffer_dict.add_change_observer(self._flush_implicit_buffers)
- self.buffer_dict.set_active('pop')
+ self.buffer_dict.set_active("pop")
self.pop_ons_queue = deque()
@@ -197,7 +211,7 @@ def detect(self, content):
else:
return False
- def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
+ def read(self, content, lang="en-US", simulate_roll_up=False, offset=0):
"""Converts the unicode string into a CaptionSet
:type content: str
@@ -217,14 +231,13 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
:rtype: CaptionSet
"""
if not isinstance(content, str):
- raise InvalidInputError('The content is not a unicode string.')
+ raise InvalidInputError("The content is not a unicode string.")
self.simulate_roll_up = simulate_roll_up
self.time_translator.offset = offset * 1000000
# split lines
lines = content.splitlines()
-
# loop through each line except the first
for line in lines[1:]:
self._translate_line(line)
@@ -238,7 +251,9 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
for caption in self.caption_stash._collection:
caption_start = caption.to_real_caption().format_start()
caption_text = "".join(caption.to_real_caption().get_text_nodes())
- text_too_long = [line for line in caption_text.split("\n") if len(line) > 32]
+ text_too_long = [
+ line for line in caption_text.split("\n") if len(line) > 32
+ ]
if caption_start in lines_too_long:
lines_too_long[caption_start] = text_too_long
else:
@@ -264,9 +279,10 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
# EOC marker in the SCC file)
if 0 < cap.end - cap.start < 50000:
raise CaptionReadTimingError(
- f'Unsupported cue duration around {cap.format_start()} '
+ f"Unsupported cue duration around {cap.format_start()} "
f'for line beginning with "{cap.get_text()}". Duration '
- f'must be at least 0.05 seconds.')
+ f"must be at least 0.05 seconds."
+ )
if captions.is_empty():
raise CaptionReadNoCaptions("empty caption file")
@@ -286,22 +302,22 @@ def _flush_implicit_buffers(self, old_key=None, *args):
If they're on the last row however, or if the caption type is changing,
we make sure to convert the buffers to text, so we don't lose any info.
"""
- if old_key == 'pop':
+ if old_key == "pop":
if self.pop_ons_queue:
self._pop_on()
- elif old_key == 'roll':
+ elif old_key == "roll":
if not self.buffer.is_empty():
self._roll_up()
- elif old_key == 'paint':
+ elif old_key == "paint":
if not self.buffer.is_empty():
self.caption_stash.create_and_store(self.buffer, self.time)
self.buffer = self.node_creator_factory.new_creator()
def _translate_line(self, line):
# ignore blank lines
- if line.strip() == '':
+ if line.strip() == "":
return
# split line in timestamp and words
@@ -309,20 +325,15 @@ def _translate_line(self, line):
parts = r.findall(line.lower())
self.time_translator.start_at(parts[0][0])
- word_list = parts[0][2].split(' ')
+ word_list = parts[0][2].split(" ")
for idx, word in enumerate(word_list):
word = word.strip()
- previous_is_pac_or_tab = len(word_list) > 1 and (
- _is_pac_command(word_list[idx - 1]) or word_list[idx - 1] in PAC_TAB_OFFSET_COMMANDS
- )
if len(word) == 4:
- self._translate_word(
- word=word,
- previous_is_pac_or_tab=previous_is_pac_or_tab,
- )
+ next_command = word_list[idx + 1] if idx + 1 < len(word_list) else None
+ self._translate_word(word=word, next_command=next_command)
- def _translate_word(self, word, previous_is_pac_or_tab):
+ def _translate_word(self, word, next_command=None):
if self._handle_double_command(word):
# count frames for timing
self.time_translator.increment_frames()
@@ -331,7 +342,7 @@ def _translate_word(self, word, previous_is_pac_or_tab):
# TODO - check that all the positioning commands are here, or use
# some other strategy to determine if the word is a command.
if word in COMMANDS or _is_pac_command(word):
- self._translate_command(word=word, previous_is_pac_or_tab=previous_is_pac_or_tab)
+ self._translate_command(word=word, next_command=next_command)
# second, check if word is a special character
elif word in SPECIAL_CHARS:
@@ -358,7 +369,12 @@ def _handle_double_command(self, word):
doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word)
if self.double_starter:
- doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" or word in SPECIAL_CHARS
+ doubled_types = (
+ doubled_types
+ or word in EXTENDED_CHARS
+ or word == "94a1"
+ or word in SPECIAL_CHARS
+ )
if word in CUE_STARTING_COMMAND and word != self.last_command:
self.double_starter = False
@@ -366,12 +382,12 @@ def _handle_double_command(self, word):
if doubled_types and word == self.last_command:
if word in CUE_STARTING_COMMAND:
self.double_starter = True
- self.last_command = ''
+ self.last_command = ""
return True
# Fix for the
# repetition
elif _is_pac_command(word) and word in self.last_command:
- self.last_command = ''
+ self.last_command = ""
return True
elif word in PAC_TAB_OFFSET_COMMANDS:
if _is_pac_command(self.last_command):
@@ -398,40 +414,37 @@ def _translate_extended_char(self, word):
# add to buffer
self.buffer.add_chars(EXTENDED_CHARS[word])
- def _translate_command(self, word, previous_is_pac_or_tab):
+ def _translate_command(self, word, next_command=None):
# if command is pop_up
- if word == '9420':
- self.buffer_dict.set_active('pop')
+ if word == "9420":
+ self.buffer_dict.set_active("pop")
# command is paint_on [Resume Direct Captioning]
- elif word == '9429':
- self.buffer_dict.set_active('paint')
+ elif word == "9429":
+ self.buffer_dict.set_active("paint")
self.roll_rows_expected = 1
if not self.buffer.is_empty():
- self.caption_stash.create_and_store(
- self.buffer, self.time
- )
+ self.caption_stash.create_and_store(self.buffer, self.time)
self.buffer = self.node_creator_factory.new_creator()
self.time = self.time_translator.get_time()
# if command is roll_up 2, 3 or 4 rows
- elif word in ('9425', '9426', '94a7'):
- self.buffer_dict.set_active('roll')
+ elif word in ("9425", "9426", "94a7"):
+ self.buffer_dict.set_active("roll")
# count how many lines are expected
- if word == '9425':
+ if word == "9425":
self.roll_rows_expected = 2
- elif word == '9426':
+ elif word == "9426":
self.roll_rows_expected = 3
- elif word == '94a7':
+ elif word == "94a7":
self.roll_rows_expected = 4
# if content is in the queue, turn it into a caption
if not self.buffer.is_empty():
- self.caption_stash.create_and_store(
- self.buffer, self.time)
+ self.caption_stash.create_and_store(self.buffer, self.time)
self.buffer = self.node_creator_factory.new_creator()
# set rows to empty, configure start time for caption
@@ -439,11 +452,11 @@ def _translate_command(self, word, previous_is_pac_or_tab):
self.time = self.time_translator.get_time()
# clear pop_on buffer
- elif word == '94ae':
+ elif word == "94ae":
self.buffer = self.node_creator_factory.new_creator()
# display pop_on buffer [End Of Caption]
- elif word == '942f':
+ elif word == "942f":
self.time = self.time_translator.get_time()
if self.pop_ons_queue:
# there's a pop-on cue not ended by the 942c command
@@ -455,22 +468,19 @@ def _translate_command(self, word, previous_is_pac_or_tab):
self.buffer = self.node_creator_factory.new_creator()
# roll up captions [Carriage Return]
- elif word == '94ad':
+ elif word == "94ad":
# display roll-up buffer
if not self.buffer.is_empty():
self._roll_up()
# 942c - Erase Displayed Memory - Clear the current screen of any
# displayed captions or text.
- elif word == '942c' and self.pop_ons_queue:
+ elif word == "942c" and self.pop_ons_queue:
self._pop_on(end=self.time_translator.get_time())
# If command is not one of the aforementioned, add it to buffer
else:
- self.buffer.interpret_command(
- command=word,
- previous_is_pac_or_tab=previous_is_pac_or_tab
- )
+ self.buffer.interpret_command(command=word, next_command=next_command)
def _translate_characters(self, word):
# split word into the 2 bytes
@@ -508,8 +518,7 @@ def _roll_up(self):
self.roll_rows.pop(0)
self.roll_rows.append(self.buffer)
- self.buffer = self.node_creator_factory.from_list(
- self.roll_rows)
+ self.buffer = self.node_creator_factory.from_list(self.roll_rows)
# convert buffer and empty
self.caption_stash.create_and_store(self.buffer, self.time)
@@ -523,8 +532,7 @@ def _roll_up(self):
def _pop_on(self, end=0):
pop_on_cue = self.pop_ons_queue.pop()
- self.caption_stash.create_and_store(
- pop_on_cue.buffer, pop_on_cue.start, end)
+ self.caption_stash.create_and_store(pop_on_cue.buffer, pop_on_cue.start, end)
class SCCWriter(BaseWriter):
@@ -532,7 +540,7 @@ def __init__(self, *args, **kw):
super().__init__(*args, **kw)
def write(self, caption_set):
- output = HEADER + '\n\n'
+ output = HEADER + "\n\n"
if caption_set.is_empty():
return output
@@ -544,8 +552,10 @@ def write(self, caption_set):
captions = caption_set.get_captions(lang)
# PASS 1: compute codes for each caption
- codes = [(self._text_to_code(caption), caption.start, caption.end)
- for caption in captions]
+ codes = [
+ (self._text_to_code(caption), caption.start, caption.end)
+ for caption in captions
+ ]
# PASS 2:
# Advance start times so as to have time to write to the pop-on
@@ -563,13 +573,13 @@ def write(self, caption_set):
# PASS 3:
# Write captions.
- for (code, start, end) in codes:
- output += f'{self._format_timestamp(start)}\t'
- output += '94ae 94ae 9420 9420 '
+ for code, start, end in codes:
+ output += f"{self._format_timestamp(start)}\t"
+ output += "94ae 94ae 9420 9420 "
output += code
- output += '942c 942c 942f 942f\n\n'
+ output += "942c 942c 942f 942f\n\n"
if end is not None:
- output += f'{self._format_timestamp(end)}\t942c 942c\n\n'
+ output += f"{self._format_timestamp(end)}\t942c 942c\n\n"
return output
@@ -577,21 +587,21 @@ def write(self, caption_set):
@staticmethod
def _layout_line(caption):
caption_text = "".join(caption.get_text_nodes())
- inner_lines = caption_text.split('\n')
+ inner_lines = caption_text.split("\n")
inner_lines_laid_out = [textwrap.fill(x, 32) for x in inner_lines]
- return '\n'.join(inner_lines_laid_out)
+ return "\n".join(inner_lines_laid_out)
@staticmethod
def _maybe_align(code):
# Finish a half-word with a no-op so we can move to a full word
if len(code) % 5 == 2:
- code += '80 '
+ code += "80 "
return code
@staticmethod
def _maybe_space(code):
if len(code) % 5 == 4:
- code += ' '
+ code += " "
return code
def _print_character(self, code, char):
@@ -601,7 +611,7 @@ def _print_character(self, code, char):
try:
char_code = SPECIAL_OR_EXTENDED_CHAR_TO_CODE[char]
except KeyError:
- char_code = '91b6' # Use £ as "unknown character" symbol
+ char_code = "91b6" # Use £ as "unknown character" symbol
if len(char_code) == 2:
return code + char_code
@@ -612,14 +622,16 @@ def _print_character(self, code, char):
return code
def _text_to_code(self, s):
- code = ''
- lines = self._layout_line(s).split('\n')
+ code = ""
+ lines = self._layout_line(s).split("\n")
for row, line in enumerate(lines):
row += 16 - len(lines)
# Move cursor to column 0 of the destination row
for _ in range(2):
- code += (PAC_HIGH_BYTE_BY_ROW[row]
- + f'{PAC_LOW_BYTE_BY_ROW_RESTRICTED[row]} ')
+ code += (
+ PAC_HIGH_BYTE_BY_ROW[row]
+ + f"{PAC_LOW_BYTE_BY_ROW_RESTRICTED[row]} "
+ )
# Print the line using the SCC encoding
for char in line:
code = self._print_character(code, char)
@@ -639,14 +651,14 @@ def _format_timestamp(microseconds):
seconds = math.floor(seconds_float)
seconds_float -= seconds
frames = math.floor(seconds_float * 30)
- return f'{hours:02}:{minutes:02}:{seconds:02}:{frames:02}'
+ return f"{hours:02}:{minutes:02}:{seconds:02}:{frames:02}"
class _SccTimeTranslator:
"""Converts SCC time to microseconds, keeping track of frames passed"""
def __init__(self):
- self._time = '00:00:00;00'
+ self._time = "00:00:00;00"
# microseconds. The offset from which we begin the time calculation
self.offset = 0
@@ -659,8 +671,7 @@ def get_time(self):
:rtype: int
"""
return self._translate_time(
- self._time[:-2] + str(int(self._time[-2:]) + self._frames),
- self.offset
+ self._time[:-2] + str(int(self._time[-2:]) + self._frames), self.offset
)
@staticmethod
@@ -672,13 +683,14 @@ def _translate_time(stamp, offset):
Helpful for when the captions are off by some time interval.
:rtype: int
"""
- if not re.match(r'\d{2}:\d{2}:\d{2}[:;]\d{1,2}', stamp):
+ if not re.match(r"\d{2}:\d{2}:\d{2}[:;]\d{1,2}", stamp):
raise CaptionReadTimingError(
"Timestamps should follow the hour:minute:seconds"
";frames or hour:minute:seconds:frames format. Please correct "
- f"the following time: {stamp}.")
+ f"the following time: {stamp}."
+ )
- if ';' in stamp:
+ if ";" in stamp:
# Drop-frame timebase runs at the same rate as wall clock
seconds_per_timestamp_second = 1.0
else:
@@ -686,12 +698,14 @@ def _translate_time(stamp, offset):
# 1 second of timecode is longer than an actual second (1.001s)
seconds_per_timestamp_second = 1001.0 / 1000.0
- time_split = stamp.replace(';', ':').split(':')
+ time_split = stamp.replace(";", ":").split(":")
- timestamp_seconds = (int(time_split[0]) * 3600
- + int(time_split[1]) * 60
- + int(time_split[2])
- + int(time_split[3]) / 30.0)
+ timestamp_seconds = (
+ int(time_split[0]) * 3600
+ + int(time_split[1]) * 60
+ + int(time_split[2])
+ + int(time_split[3]) / 30.0
+ )
seconds = timestamp_seconds * seconds_per_timestamp_second
microseconds = seconds * 1000 * 1000 - offset
diff --git a/pycaption/scc/constants.py b/pycaption/scc/constants.py
index bc2fcd50..c8d5cefb 100644
--- a/pycaption/scc/constants.py
+++ b/pycaption/scc/constants.py
@@ -521,22 +521,22 @@
'97a8': '',
'9729': '',
'972a': '',
- '9120': '<$>{end-italic}<$>',
- '91a1': '',
- '91a2': '',
- '9123': '',
- '91a4': '',
- '9125': '',
- '9126': '',
- '91a7': '',
- '91a8': '',
- '9129': '',
- '912a': '',
- '91ab': '',
- '912c': '',
- '91ad': '',
- '97ae': '',
- '972f': '',
+ '9120': '<$>{end-italic}<$>', # plain white
+ '91a1': '<$>{end-italic}<$>', # white underlined
+ '91a2': '<$>{end-italic}<$>', # plain green
+ '9123': '<$>{end-italic}<$>', # green underlined
+ '91a4': '<$>{end-italic}<$>', # plain blue
+ '9125': '<$>{end-italic}<$>', # blue underlined
+ '9126': '<$>{end-italic}<$>', # plain cyan
+ '91a7': '<$>{end-italic}<$>', # cyan underlined
+ '91a8': '<$>{end-italic}<$>', # plain red
+ '9129': '<$>{end-italic}<$>', # red underlined
+ '912a': '<$>{end-italic}<$>', # plain yellow
+ '91ab': '<$>{end-italic}<$>', # yellow underlined
+ '912c': '<$>{end-italic}<$>', # plain magenta
+ '91ad': '<$>{end-italic}<$>', # magenta underlined
+ '97ae': '<$>{end-italic}<$>', # plain black
+ '972f': '<$>{end-italic}<$>', # black underlined
'91ae': '<$>{italic}<$>',
'912f': '<$>{italic}<$>',
'94a8': '',
@@ -1060,3 +1060,569 @@ def _restructure_bytes_to_position_map(byte_to_pos_map):
]
CUE_STARTING_COMMAND = ['9425', '9426', '94a7', '9429', '9420']
+
+ALL_CHARACTERS = {**CHARACTERS, **SPECIAL_CHARS, **EXTENDED_CHARS}
+
+COMMAND_LABELS = {
+ "9420": "Resume Caption Loading",
+ "9429": "Resume Direct Captioning",
+ "9425": "Roll-Up Captions--2 Rows",
+ "9426": "Roll-Up Captions--3 Rows",
+ "94a7": "Roll-Up Captions--4 Rows",
+ "942a": "Text Restart",
+ "94ab": "Resume Text Display",
+ "942c": "Erase Displayed Memory",
+ "94ae": "Erase Non-displayed Memory",
+ "942f": "End Of Caption",
+ "9140": "row 01, column 00, with plain white text.",
+ "91c1": "row 01, column 00, with white underlined text.",
+ "91c2": "row 01, column 00, with plain green text.",
+ "9143": "row 01, column 00, with green underlined text.",
+ "91c4": "row 01, column 00, with plain blue text.",
+ "9145": "row 01, column 00, with blue underlined text.",
+ "9146": "row 01, column 00, with plain cyan text.",
+ "91c7": "row 01, column 00, with cyan underlined text.",
+ "91c8": "row 01, column 00, with plain red text.",
+ "9149": "row 01, column 00, with red underlined text.",
+ "914a": "row 01, column 00, with plain yellow text.",
+ "91cb": "row 01, column 00, with yellow underlined text.",
+ "914c": "row 01, column 00, with plain magenta text.",
+ "91cd": "row 01, column 00, with magenta underlined text.",
+ "91ce": "row 01, column 00, with white italicized text.",
+ "914f": "row 01, column 00, with white underlined italicized text.",
+ "91d0": "row 01, column 00, with plain white text.",
+ "9151": "row 01, column 00, with white underlined text.",
+ "9152": "row 01, column 04, with plain white text.",
+ "91d3": "row 01, column 04, with white underlined text.",
+ "9154": "row 01, column 08, with plain white text.",
+ "91d5": "row 01, column 08, with white underlined text.",
+ "91d6": "row 01, column 12, with plain white text.",
+ "9157": "row 01, column 12, with white underlined text.",
+ "9158": "row 01, column 16, with plain white text.",
+ "91d9": "row 01, column 16, with white underlined text.",
+ "91da": "row 01, column 20, with plain white text.",
+ "915b": "row 01, column 20, with white underlined text.",
+ "91dc": "row 01, column 24, with plain white text.",
+ "915d": "row 01, column 24, with white underlined text.",
+ "915e": "row 01, column 28, with plain white text.",
+ "91df": "row 01, column 28, with white underlined text.",
+ "91e0": "row 02, column 00, with plain white text.",
+ "9161": "row 02, column 00, with white underlined text.",
+ "9162": "row 02, column 00, with plain green text.",
+ "91e3": "row 02, column 00, with green underlined text.",
+ "9164": "row 02, column 00, with plain blue text.",
+ "91e5": "row 02, column 00, with blue underlined text.",
+ "91e6": "row 02, column 00, with plain cyan text.",
+ "9167": "row 02, column 00, with cyan underlined text.",
+ "9168": "row 02, column 00, with plain red text.",
+ "91e9": "row 02, column 00, with red underlined text.",
+ "91ea": "row 02, column 00, with plain yellow text.",
+ "916b": "row 02, column 00, with yellow underlined text.",
+ "91ec": "row 02, column 00, with plain magenta text.",
+ "916d": "row 02, column 00, with magenta underlined text.",
+ "916e": "row 02, column 00, with white italicized text.",
+ "91ef": "row 02, column 00, with white underlined italicized text.",
+ "9170": "row 02, column 00, with plain white text.",
+ "91f1": "row 02, column 00, with white underlined text.",
+ "91f2": "row 02, column 04, with plain white text.",
+ "9173": "row 02, column 04, with white underlined text.",
+ "91f4": "row 02, column 08, with plain white text.",
+ "9175": "row 02, column 08, with white underlined text.",
+ "9176": "row 02, column 12, with plain white text.",
+ "91f7": "row 02, column 12, with white underlined text.",
+ "91f8": "row 02, column 16, with plain white text.",
+ "9179": "row 02, column 16, with white underlined text.",
+ "917a": "row 02, column 20, with plain white text.",
+ "91fb": "row 02, column 20, with white underlined text.",
+ "91fc": "row 02, column 24, with plain white text.",
+ "91fd": "row 02, column 24, with white underlined text.",
+ "91fe": "row 02, column 28, with plain white text.",
+ "917f": "row 02, column 28, with white underlined text.",
+ "9240": "row 03, column 00, with plain white text.",
+ "92c1": "row 03, column 00, with white underlined text.",
+ "92c2": "row 03, column 00, with plain green text.",
+ "9243": "row 03, column 00, with green underlined text.",
+ "92c4": "row 03, column 00, with plain blue text.",
+ "9245": "row 03, column 00, with blue underlined text.",
+ "9246": "row 03, column 00, with plain cyan text.",
+ "92c7": "row 03, column 00, with cyan underlined text.",
+ "92c8": "row 03, column 00, with plain red text.",
+ "9249": "row 03, column 00, with red underlined text.",
+ "924a": "row 03, column 00, with plain yellow text.",
+ "92cb": "row 03, column 00, with yellow underlined text.",
+ "924c": "row 03, column 00, with plain magenta text.",
+ "92cd": "row 03, column 00, with magenta underlined text.",
+ "92ce": "row 03, column 00, with white italicized text.",
+ "924f": "row 03, column 00, with white underlined italicized text.",
+ "92d0": "row 03, column 00, with plain white text.",
+ "9251": "row 03, column 00, with white underlined text.",
+ "9252": "row 03, column 04, with plain white text.",
+ "92d3": "row 03, column 04, with white underlined text.",
+ "9254": "row 03, column 08, with plain white text.",
+ "92d5": "row 03, column 08, with white underlined text.",
+ "92d6": "row 03, column 12, with plain white text.",
+ "9257": "row 03, column 12, with white underlined text.",
+ "9258": "row 03, column 16, with plain white text.",
+ "92d9": "row 03, column 16, with white underlined text.",
+ "92da": "row 03, column 20, with plain white text.",
+ "925b": "row 03, column 20, with white underlined text.",
+ "92dc": "row 03, column 24, with plain white text.",
+ "925d": "row 03, column 24, with white underlined text.",
+ "925e": "row 03, column 28, with plain white text.",
+ "92df": "row 03, column 28, with white underlined text.",
+ "92e0": "row 04, column 00, with plain white text.",
+ "9261": "row 04, column 00, with white underlined text.",
+ "9262": "row 04, column 00, with plain green text.",
+ "92e3": "row 04, column 00, with green underlined text.",
+ "9264": "row 04, column 00, with plain blue text.",
+ "92e5": "row 04, column 00, with blue underlined text.",
+ "92e6": "row 04, column 00, with plain cyan text.",
+ "9267": "row 04, column 00, with cyan underlined text.",
+ "9268": "row 04, column 00, with plain red text.",
+ "92e9": "row 04, column 00, with red underlined text.",
+ "92ea": "row 04, column 00, with plain yellow text.",
+ "926b": "row 04, column 00, with yellow underlined text.",
+ "92ec": "row 04, column 00, with plain magenta text.",
+ "926d": "row 04, column 00, with magenta underlined text.",
+ "926e": "row 04, column 00, with white italicized text.",
+ "92ef": "row 04, column 00, with white underlined italicized text.",
+ "9270": "row 04, column 00, with plain white text.",
+ "92f1": "row 04, column 00, with white underlined text.",
+ "92f2": "row 04, column 04, with plain white text.",
+ "9273": "row 04, column 04, with white underlined text.",
+ "92f4": "row 04, column 08, with plain white text.",
+ "9275": "row 04, column 08, with white underlined text.",
+ "9276": "row 04, column 12, with plain white text.",
+ "92f7": "row 04, column 12, with white underlined text.",
+ "92f8": "row 04, column 16, with plain white text.",
+ "9279": "row 04, column 16, with white underlined text.",
+ "927a": "row 04, column 20, with plain white text.",
+ "92fb": "row 04, column 20, with white underlined text.",
+ "92fc": "row 04, column 24, with plain white text.",
+ "92fd": "row 04, column 24, with white underlined text.",
+ "92fe": "row 04, column 28, with plain white text.",
+ "927f": "row 04, column 28, with white underlined text.",
+ "1540": "row 05, column 00, with plain white text.",
+ "15c1": "row 05, column 00, with white underlined text.",
+ "15c2": "row 05, column 00, with plain green text.",
+ "1543": "row 05, column 00, with green underlined text.",
+ "15c4": "row 05, column 00, with plain blue text.",
+ "1545": "row 05, column 00, with blue underlined text.",
+ "1546": "row 05, column 00, with plain cyan text.",
+ "15c7": "row 05, column 00, with cyan underlined text.",
+ "15c8": "row 05, column 00, with plain red text.",
+ "1549": "row 05, column 00, with red underlined text.",
+ "154a": "row 05, column 00, with plain yellow text.",
+ "15cb": "row 05, column 00, with yellow underlined text.",
+ "154c": "row 05, column 00, with plain magenta text.",
+ "15cd": "row 05, column 00, with magenta underlined text.",
+ "15ce": "row 05, column 00, with white italicized text.",
+ "154f": "row 05, column 00, with white underlined italicized text.",
+ "15d0": "row 05, column 00, with plain white text.",
+ "1551": "row 05, column 00, with white underlined text.",
+ "1552": "row 05, column 04, with plain white text.",
+ "15d3": "row 05, column 04, with white underlined text.",
+ "1554": "row 05, column 08, with plain white text.",
+ "15d5": "row 05, column 08, with white underlined text.",
+ "15d6": "row 05, column 12, with plain white text.",
+ "1557": "row 05, column 12, with white underlined text.",
+ "1558": "row 05, column 16, with plain white text.",
+ "15d9": "row 05, column 16, with white underlined text.",
+ "15da": "row 05, column 20, with plain white text.",
+ "155b": "row 05, column 20, with white underlined text.",
+ "15dc": "row 05, column 24, with plain white text.",
+ "155d": "row 05, column 24, with white underlined text.",
+ "155e": "row 05, column 28, with plain white text.",
+ "15df": "row 05, column 28, with white underlined text.",
+ "15e0": "row 06, column 00, with plain white text.",
+ "1561": "row 06, column 00, with white underlined text.",
+ "15462": "row 06, column 00, with plain green text.",
+ "15e3": "row 06, column 00, with green underlined text.",
+ "1564": "row 06, column 00, with plain blue text.",
+ "15e5": "row 06, column 00, with blue underlined text.",
+ "15e6": "row 06, column 00, with plain cyan text.",
+ "1567": "row 06, column 00, with cyan underlined text.",
+ "1568": "row 06, column 00, with plain red text.",
+ "15e9": "row 06, column 00, with red underlined text.",
+ "15ea": "row 06, column 00, with plain yellow text.",
+ "156b": "row 06, column 00, with yellow underlined text.",
+ "15ec": "row 06, column 00, with plain magenta text.",
+ "156d": "row 06, column 00, with magenta underlined text.",
+ "156e": "row 06, column 00, with white italicized text.",
+ "15ef": "row 06, column 00, with white underlined italicized text.",
+ "1570": "row 06, column 00, with plain white text.",
+ "15f1": "row 06, column 00, with white underlined text.",
+ "15f2": "row 06, column 04, with plain white text.",
+ "1573": "row 06, column 04, with white underlined text.",
+ "15f4": "row 06, column 08, with plain white text.",
+ "1575": "row 06, column 08, with white underlined text.",
+ "1576": "row 06, column 12, with plain white text.",
+ "15f7": "row 06, column 12, with white underlined text.",
+ "15f8": "row 06, column 16, with plain white text.",
+ "1579": "row 06, column 16, with white underlined text.",
+ "157a": "row 06, column 20, with plain white text.",
+ "15fb": "row 06, column 20, with white underlined text.",
+ "15fc": "row 06, column 24, with plain white text.",
+ "15fd": "row 06, column 24, with white underlined text.",
+ "15fe": "row 06, column 28, with plain white text.",
+ "157f": "row 06, column 28, with white underlined text.",
+ "1640": "row 07, column 00, with plain white text.",
+ "16c1": "row 07, column 00, with white underlined text.",
+ "16c2": "row 07, column 00, with plain green text.",
+ "1643": "row 07, column 00, with green underlined text.",
+ "16c4": "row 07, column 00, with plain blue text.",
+ "1645": "row 07, column 00, with blue underlined text.",
+ "1646": "row 07, column 00, with plain cyan text.",
+ "16c7": "row 07, column 00, with cyan underlined text.",
+ "16c8": "row 07, column 00, with plain red text.",
+ "1649": "row 07, column 00, with red underlined text.",
+ "164a": "row 07, column 00, with plain yellow text.",
+ "16cb": "row 07, column 00, with yellow underlined text.",
+ "164c": "row 07, column 00, with plain magenta text.",
+ "16cd": "row 07, column 00, with magenta underlined text.",
+ "16ce": "row 07, column 00, with white italicized text.",
+ "164f": "row 07, column 00, with white underlined italicized text.",
+ "16d0": "row 07, column 00, with plain white text.",
+ "1651": "row 07, column 00, with white underlined text.",
+ "1652": "row 07, column 04, with plain white text.",
+ "16d3": "row 07, column 04, with white underlined text.",
+ "1654": "row 07, column 08, with plain white text.",
+ "16d5": "row 07, column 08, with white underlined text.",
+ "16d6": "row 07, column 12, with plain white text.",
+ "1657": "row 07, column 12, with white underlined text.",
+ "1658": "row 07, column 16, with plain white text.",
+ "16d9": "row 07, column 16, with white underlined text.",
+ "16da": "row 07, column 20, with plain white text.",
+ "165b": "row 07, column 20, with white underlined text.",
+ "16dc": "row 07, column 24, with plain white text.",
+ "165d": "row 07, column 24, with white underlined text.",
+ "165e": "row 07, column 28, with plain white text.",
+ "16df": "row 07, column 28, with white underlined text.",
+ "16e0": "row 08, column 00, with plain white text.",
+ "1661": "row 08, column 00, with white underlined text.",
+ "16462": "row 08, column 00, with plain green text.",
+ "16e3": "row 08, column 00, with green underlined text.",
+ "1664": "row 08, column 00, with plain blue text.",
+ "16e5": "row 08, column 00, with blue underlined text.",
+ "16e6": "row 08, column 00, with plain cyan text.",
+ "1667": "row 08, column 00, with cyan underlined text.",
+ "1668": "row 08, column 00, with plain red text.",
+ "16e9": "row 08, column 00, with red underlined text.",
+ "16ea": "row 08, column 00, with plain yellow text.",
+ "166b": "row 08, column 00, with yellow underlined text.",
+ "16ec": "row 08, column 00, with plain magenta text.",
+ "166d": "row 08, column 00, with magenta underlined text.",
+ "166e": "row 08, column 00, with white italicized text.",
+ "16ef": "row 08, column 00, with white underlined italicized text.",
+ "1670": "row 08, column 00, with plain white text.",
+ "16f1": "row 08, column 00, with white underlined text.",
+ "16f2": "row 08, column 04, with plain white text.",
+ "1673": "row 08, column 04, with white underlined text.",
+ "16f4": "row 08, column 08, with plain white text.",
+ "1675": "row 08, column 08, with white underlined text.",
+ "1676": "row 08, column 12, with plain white text.",
+ "16f7": "row 08, column 12, with white underlined text.",
+ "16f8": "row 08, column 16, with plain white text.",
+ "1679": "row 08, column 16, with white underlined text.",
+ "167a": "row 08, column 20, with plain white text.",
+ "16fb": "row 08, column 20, with white underlined text.",
+ "16fc": "row 08, column 24, with plain white text.",
+ "16fd": "row 08, column 24, with white underlined text.",
+ "16fe": "row 08, column 28, with plain white text.",
+ "167f": "row 08, column 28, with white underlined text.",
+ "9740": "row 09, column 00, with plain white text.",
+ "97c1": "row 09, column 00, with white underlined text.",
+ "97c2": "row 09, column 00, with plain green text.",
+ "9743": "row 09, column 00, with green underlined text.",
+ "97c4": "row 09, column 00, with plain blue text.",
+ "9745": "row 09, column 00, with blue underlined text.",
+ "9746": "row 09, column 00, with plain cyan text.",
+ "97c7": "row 09, column 00, with cyan underlined text.",
+ "97c8": "row 09, column 00, with plain red text.",
+ "9749": "row 09, column 00, with red underlined text.",
+ "974a": "row 09, column 00, with plain yellow text.",
+ "97cb": "row 09, column 00, with yellow underlined text.",
+ "974c": "row 09, column 00, with plain magenta text.",
+ "97cd": "row 09, column 00, with magenta underlined text.",
+ "97ce": "row 09, column 00, with white italicized text.",
+ "974f": "row 09, column 00, with white underlined italicized text.",
+ "97d0": "row 09, column 00, with plain white text.",
+ "9751": "row 09, column 00, with white underlined text.",
+ "9752": "row 09, column 04, with plain white text.",
+ "97d3": "row 09, column 04, with white underlined text.",
+ "9754": "row 09, column 08, with plain white text.",
+ "97d5": "row 09, column 08, with white underlined text.",
+ "97d6": "row 09, column 12, with plain white text.",
+ "9757": "row 09, column 12, with white underlined text.",
+ "9758": "row 09, column 16, with plain white text.",
+ "97d9": "row 09, column 16, with white underlined text.",
+ "97da": "row 09, column 20, with plain white text.",
+ "975b": "row 09, column 20, with white underlined text.",
+ "97dc": "row 09, column 24, with plain white text.",
+ "975d": "row 09, column 24, with white underlined text.",
+ "975e": "row 09, column 28, with plain white text.",
+ "97df": "row 09, column 28, with white underlined text.",
+ "97e0": "row 10, column 00, with plain white text.",
+ "9761": "row 10, column 00, with white underlined text.",
+ "9762": "row 10, column 00, with plain green text.",
+ "97e3": "row 10, column 00, with green underlined text.",
+ "9764": "row 10, column 00, with plain blue text.",
+ "97e5": "row 10, column 00, with blue underlined text.",
+ "97e6": "row 10, column 00, with plain cyan text.",
+ "9767": "row 10, column 00, with cyan underlined text.",
+ "9768": "row 10, column 00, with plain red text.",
+ "97e9": "row 10, column 00, with red underlined text.",
+ "97ea": "row 10, column 00, with plain yellow text.",
+ "976b": "row 10, column 00, with yellow underlined text.",
+ "97ec": "row 10, column 00, with plain magenta text.",
+ "976d": "row 10, column 00, with magenta underlined text.",
+ "976e": "row 10, column 00, with white italicized text.",
+ "97ef": "row 10, column 00, with white underlined italicized text.",
+ "9770": "row 10, column 00, with plain white text.",
+ "97f1": "row 10, column 00, with white underlined text.",
+ "97f2": "row 10, column 04, with plain white text.",
+ "9773": "row 10, column 04, with white underlined text.",
+ "97f4": "row 10, column 08, with plain white text.",
+ "9775": "row 10, column 08, with white underlined text.",
+ "9776": "row 10, column 12, with plain white text.",
+ "97f7": "row 10, column 12, with white underlined text.",
+ "97f8": "row 10, column 16, with plain white text.",
+ "9779": "row 10, column 16, with white underlined text.",
+ "977a": "row 10, column 20, with plain white text.",
+ "97fb": "row 10, column 20, with white underlined text.",
+ "97fc": "row 10, column 24, with plain white text.",
+ "97fd": "row 10, column 24, with white underlined text.",
+ "97fe": "row 10, column 28, with plain white text.",
+ "977f": "row 10, column 28, with white underlined text.",
+ "1040": "row 11, column 00, with plain white text.",
+ "10c1": "row 11, column 00, with white underlined text.",
+ "10c2": "row 11, column 00, with plain green text.",
+ "1043": "row 11, column 00, with green underlined text.",
+ "10c4": "row 11, column 00, with plain blue text.",
+ "1045": "row 11, column 00, with blue underlined text.",
+ "1046": "row 11, column 00, with plain cyan text.",
+ "10c7": "row 11, column 00, with cyan underlined text.",
+ "10c8": "row 11, column 00, with plain red text.",
+ "1049": "row 11, column 00, with red underlined text.",
+ "104a": "row 11, column 00, with plain yellow text.",
+ "10cb": "row 11, column 00, with yellow underlined text.",
+ "104c": "row 11, column 00, with plain magenta text.",
+ "10cd": "row 11, column 00, with magenta underlined text.",
+ "10ce": "row 11, column 00, with white italicized text.",
+ "104f": "row 11, column 00, with white underlined italicized text.",
+ "10d0": "row 11, column 00, with plain white text.",
+ "1051": "row 11, column 00, with white underlined text.",
+ "1052": "row 11, column 04, with plain white text.",
+ "10d3": "row 11, column 04, with white underlined text.",
+ "1054": "row 11, column 08, with plain white text.",
+ "10d5": "row 11, column 08, with white underlined text.",
+ "10d6": "row 11, column 12, with plain white text.",
+ "1057": "row 11, column 12, with white underlined text.",
+ "1058": "row 11, column 16, with plain white text.",
+ "10d9": "row 11, column 16, with white underlined text.",
+ "10da": "row 11, column 20, with plain white text.",
+ "105b": "row 11, column 20, with white underlined text.",
+ "10dc": "row 11, column 24, with plain white text.",
+ "105d": "row 11, column 24, with white underlined text.",
+ "105e": "row 11, column 28, with plain white text.",
+ "10df": "row 11, column 28, with white underlined text.",
+ "1340": "row 12, column 00, with plain white text.",
+ "13c1": "row 12, column 00, with white underlined text.",
+ "13c2": "row 12, column 00, with plain green text.",
+ "1343": "row 12, column 00, with green underlined text.",
+ "13c4": "row 12, column 00, with plain blue text.",
+ "1345": "row 12, column 00, with blue underlined text.",
+ "1346": "row 12, column 00, with plain cyan text.",
+ "13c7": "row 12, column 00, with cyan underlined text.",
+ "13c8": "row 12, column 00, with plain red text.",
+ "1349": "row 12, column 00, with red underlined text.",
+ "134a": "row 12, column 00, with plain yellow text.",
+ "13cb": "row 12, column 00, with yellow underlined text.",
+ "134c": "row 12, column 00, with plain magenta text.",
+ "13cd": "row 12, column 00, with magenta underlined text.",
+ "13ce": "row 12, column 00, with white italicized text.",
+ "134f": "row 12, column 00, with white underlined italicized text.",
+ "13d0": "row 12, column 00, with plain white text.",
+ "1351": "row 12, column 00, with white underlined text.",
+ "1352": "row 12, column 04, with plain white text.",
+ "13d3": "row 12, column 04, with white underlined text.",
+ "1354": "row 12, column 08, with plain white text.",
+ "13d5": "row 12, column 08, with white underlined text.",
+ "13d6": "row 12, column 12, with plain white text.",
+ "1357": "row 12, column 12, with white underlined text.",
+ "1358": "row 12, column 16, with plain white text.",
+ "13d9": "row 12, column 16, with white underlined text.",
+ "13da": "row 12, column 20, with plain white text.",
+ "135b": "row 12, column 20, with white underlined text.",
+ "13dc": "row 12, column 24, with plain white text.",
+ "135d": "row 12, column 24, with white underlined text.",
+ "135e": "row 12, column 28, with plain white text.",
+ "13df": "row 12, column 28, with white underlined text.",
+ "13e0": "row 13, column 00, with plain white text.",
+ "1361": "row 13, column 00, with white underlined text.",
+ "13462": "row 13, column 00, with plain green text.",
+ "13e3": "row 13, column 00, with green underlined text.",
+ "1364": "row 13, column 00, with plain blue text.",
+ "13e5": "row 13, column 00, with blue underlined text.",
+ "13e6": "row 13, column 00, with plain cyan text.",
+ "1367": "row 13, column 00, with cyan underlined text.",
+ "1368": "row 13, column 00, with plain red text.",
+ "13e9": "row 13, column 00, with red underlined text.",
+ "13ea": "row 13, column 00, with plain yellow text.",
+ "136b": "row 13, column 00, with yellow underlined text.",
+ "13ec": "row 13, column 00, with plain magenta text.",
+ "136d": "row 13, column 00, with magenta underlined text.",
+ "136e": "row 13, column 00, with white italicized text.",
+ "13ef": "row 13, column 00, with white underlined italicized text.",
+ "1370": "row 13, column 00, with plain white text.",
+ "13f1": "row 13, column 00, with white underlined text.",
+ "13f2": "row 13, column 04, with plain white text.",
+ "1373": "row 13, column 04, with white underlined text.",
+ "13f4": "row 13, column 08, with plain white text.",
+ "1375": "row 13, column 08, with white underlined text.",
+ "1376": "row 13, column 12, with plain white text.",
+ "13f7": "row 13, column 12, with white underlined text.",
+ "13f8": "row 13, column 16, with plain white text.",
+ "1379": "row 13, column 16, with white underlined text.",
+ "137a": "row 13, column 20, with plain white text.",
+ "13fb": "row 13, column 20, with white underlined text.",
+ "13fc": "row 13, column 24, with plain white text.",
+ "13fd": "row 13, column 24, with white underlined text.",
+ "13fe": "row 13, column 28, with plain white text.",
+ "137f": "row 13, column 28, with white underlined text.",
+ "9440": "row 14, column 00, with plain white text.",
+ "94c1": "row 14, column 00, with white underlined text.",
+ "94c2": "row 14, column 00, with plain green text.",
+ "9443": "row 14, column 00, with green underlined text.",
+ "94c4": "row 14, column 00, with plain blue text.",
+ "9445": "row 14, column 00, with blue underlined text.",
+ "9446": "row 14, column 00, with plain cyan text.",
+ "94c7": "row 14, column 00, with cyan underlined text.",
+ "94c8": "row 14, column 00, with plain red text.",
+ "9449": "row 14, column 00, with red underlined text.",
+ "944a": "row 14, column 00, with plain yellow text.",
+ "94cb": "row 14, column 00, with yellow underlined text.",
+ "944c": "row 14, column 00, with plain magenta text.",
+ "94cd": "row 14, column 00, with magenta underlined text.",
+ "94ce": "row 14, column 00, with white italicized text.",
+ "944f": "row 14, column 00, with white underlined italicized text.",
+ "94d0": "row 14, column 00, with plain white text.",
+ "9451": "row 14, column 00, with white underlined text.",
+ "9452": "row 14, column 04, with plain white text.",
+ "94d3": "row 14, column 04, with white underlined text.",
+ "9454": "row 14, column 08, with plain white text.",
+ "94d5": "row 14, column 08, with white underlined text.",
+ "94d6": "row 14, column 12, with plain white text.",
+ "9457": "row 14, column 12, with white underlined text.",
+ "9458": "row 14, column 16, with plain white text.",
+ "94d9": "row 14, column 16, with white underlined text.",
+ "94da": "row 14, column 20, with plain white text.",
+ "945b": "row 14, column 20, with white underlined text.",
+ "94dc": "row 14, column 24, with plain white text.",
+ "945d": "row 14, column 24, with white underlined text.",
+ "945e": "row 14, column 28, with plain white text.",
+ "94df": "row 14, column 28, with white underlined text.",
+ "94e0": "row 15, column 00, with plain white text.",
+ "9461": "row 15, column 00, with white underlined text.",
+ "9462": "row 15, column 00, with plain green text.",
+ "94e3": "row 15, column 00, with green underlined text.",
+ "9464": "row 15, column 00, with plain blue text.",
+ "94e5": "row 15, column 00, with blue underlined text.",
+ "94e6": "row 15, column 00, with plain cyan text.",
+ "9467": "row 15, column 00, with cyan underlined text.",
+ "9468": "row 15, column 00, with plain red text.",
+ "94e9": "row 15, column 00, with red underlined text.",
+ "94ea": "row 15, column 00, with plain yellow text.",
+ "946b": "row 15, column 00, with yellow underlined text.",
+ "94ec": "row 15, column 00, with plain magenta text.",
+ "946d": "row 15, column 00, with magenta underlined text.",
+ "946e": "row 15, column 00, with white italicized text.",
+ "94ef": "row 15, column 00, with white underlined italicized text.",
+ "9470": "row 15, column 00, with plain white text.",
+ "94f1": "row 15, column 00, with white underlined text.",
+ "94f2": "row 15, column 04, with plain white text.",
+ "9473": "row 15, column 04, with white underlined text.",
+ "94f4": "row 15, column 08, with plain white text.",
+ "9475": "row 15, column 08, with white underlined text.",
+ "9476": "row 15, column 12, with plain white text.",
+ "94f7": "row 15, column 12, with white underlined text.",
+ "94f8": "row 15, column 16, with plain white text.",
+ "9479": "row 15, column 16, with white underlined text.",
+ "947a": "row 15, column 20, with plain white text.",
+ "94fb": "row 15, column 20, with white underlined text.",
+ "94fc": "row 15, column 24, with plain white text.",
+ "94fd": "row 15, column 24, with white underlined text.",
+ "94fe": "row 15, column 28, with plain white text.",
+ "947f": "row 15, column 28, with white underlined text.",
+ "97a1": "Tab Offset 1 column",
+ "97a2": "Tab Offset 2 columns",
+ "9723": "Tab Offset 3 columns",
+ "94a1": "BackSpace",
+ "94a4": "Delete to End of Row",
+ "94ad": "Carriage Return",
+ "1020": "Background White",
+ "10a1": "Background Semi-Transparent White",
+ "10a2": "Background Green",
+ "1023": "Background Semi-Transparent Green",
+ "10a4": "Background Blue",
+ "1025": "Background Semi-Transparent Blue",
+ "1026": "Background Cyan",
+ "10a7": "Background Semi-Transparent Cyan",
+ "10a8": "Background Red",
+ "1029": "Background Semi-Transparent Red",
+ "102a": "Background Yellow",
+ "10ab": "Background Semi-Transparent Yellow",
+ "102c": "Background Magenta",
+ "10ad": "Background Semi-Transparent Magenta",
+ "10ae": "Background Black",
+ "102f": "Background Semi-Transparent Black",
+ "97ad": "Background Transparent",
+ "97a4": "Standard Character Set",
+ "9725": "Double-Size Character Set",
+ "9726": "First Private Character Set",
+ "97a7": "Second Private Character Set",
+ "97a8": "People`s Republic of China Character Set",
+ "9729": "Korean Standard Character Set",
+ "972a": "First Registered Character Set",
+ "9120": "White Plain",
+ "91a1": "White Underline",
+ "91a2": "Green Plain",
+ "9123": "Green Underline",
+ "91a4": "Blue Plain",
+ "9125": "Blue Underline",
+ "9126": "Cyan Plain",
+ "91a7": "Cyan Underline",
+ "91a8": "Red Plain",
+ "9129": "Red Underline",
+ "912a": "Yellow Plain",
+ "91ab": "Yellow Underline",
+ "912c": "Magenta Plain",
+ "91ad": "Magenta Underline",
+ "97ae": "Black Plain",
+ "972f": "Black Underline",
+ "91ae": "Italics",
+ "912f": "Italics Underline",
+ "94a8": "Flash ON",
+ "9423": "Alarm Off",
+ "94a2": "Alarm On"
+}
+
+SCC_STYLES = ["bold", "italic", "underline", "plain", "underlined italicized"]
+
+ITALICS_COMMANDS = {
+ key: COMMAND_LABELS[key] for key in COMMAND_LABELS if "italic" in COMMAND_LABELS[key].lower()
+}
+
+UNDERLINE_COMMANDS = {
+ key: COMMAND_LABELS[key] for key in COMMAND_LABELS if
+ "italic" not in COMMAND_LABELS[key].lower() and
+ "underline" in COMMAND_LABELS[key].lower()
+}
+
+PLAIN_TEXT_COMMANDS = {
+ key: COMMAND_LABELS[key] for key in COMMAND_LABELS if "plain" in COMMAND_LABELS[key].lower()
+}
+
+STYLE_SETTING_COMMANDS = {
+ **ITALICS_COMMANDS, **UNDERLINE_COMMANDS, **PLAIN_TEXT_COMMANDS
+}
diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py
index 4b8800ed..1e8f12ee 100644
--- a/pycaption/scc/specialized_collections.py
+++ b/pycaption/scc/specialized_collections.py
@@ -1,14 +1,27 @@
import collections
-from ..base import CaptionList, Caption, CaptionNode
+from ..base import Caption, CaptionList, CaptionNode
from ..geometry import (
- UnitEnum, Size, Layout, Point, Alignment,
- VerticalAlignmentEnum, HorizontalAlignmentEnum
+ Alignment,
+ HorizontalAlignmentEnum,
+ Layout,
+ Point,
+ Size,
+ UnitEnum,
+ VerticalAlignmentEnum,
)
from .constants import (
- PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS,
- MICROSECONDS_PER_CODEWORD, BACKGROUND_COLOR_CODES,
- MID_ROW_CODES, EXTENDED_CHARS
+ BACKGROUND_COLOR_CODES,
+ COMMANDS,
+ EXTENDED_CHARS,
+ ITALICS_COMMANDS,
+ MICROSECONDS_PER_CODEWORD,
+ MID_ROW_CODES,
+ PAC_BYTES_TO_POSITIONING_MAP,
+ PAC_TAB_OFFSET_COMMANDS,
+ PLAIN_TEXT_COMMANDS,
+ STYLE_SETTING_COMMANDS,
+ UNDERLINE_COMMANDS,
)
PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")
@@ -31,9 +44,7 @@ def __init__(self, start=0, end=0):
self.layout_info = None
def to_real_caption(self):
- return Caption(
- self.start, self.end, self.nodes, self.style, self.layout_info
- )
+ return Caption(self.start, self.end, self.nodes, self.style, self.layout_info)
class TimingCorrectingCaptionList(list):
@@ -45,6 +56,7 @@ class TimingCorrectingCaptionList(list):
Also, doesn't allow Nones or empty captions
"""
+
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._last_batch = ()
@@ -98,9 +110,10 @@ def _update_last_batch(batch, *new_captions):
new_caption = new_captions[0]
- if batch and (batch[-1].end == 0
- or new_caption.start - batch[-1].end
- < 5 * MICROSECONDS_PER_CODEWORD + 1):
+ if batch and (
+ batch[-1].end == 0
+ or new_caption.start - batch[-1].end < 5 * MICROSECONDS_PER_CODEWORD + 1
+ ):
for caption in batch:
caption.end = new_caption.start
@@ -109,6 +122,7 @@ class NotifyingDict(dict):
"""Dictionary-like object, that treats one key as 'active',
and notifies observers if the active key changed
"""
+
# Need an unhashable object as initial value for the active key.
# That way we're sure this was never a key in the dict.
_guard = {}
@@ -124,7 +138,7 @@ def set_active(self, key):
:param key: any hashable object
"""
if key not in self:
- raise ValueError('No such key present')
+ raise ValueError("No such key present")
# Notify observers of the change
if key != self.active_key:
@@ -136,7 +150,7 @@ def set_active(self, key):
def get_active(self):
"""Returns the value corresponding to the active key"""
if self.active_key is self._guard:
- raise KeyError('No active key set')
+ raise KeyError("No active key set")
return self[self.active_key]
@@ -150,13 +164,14 @@ def add_change_observer(self, observer):
arguments
"""
if not callable(observer):
- raise TypeError('The observer should be callable')
+ raise TypeError("The observer should be callable")
self.observers.append(observer)
class CaptionCreator:
"""Creates and maintains a collection of Captions"""
+
def __init__(self):
self._collection = TimingCorrectingCaptionList()
@@ -226,28 +241,31 @@ def create_and_store(self, node_buffer, start, end=0):
# handle line breaks
elif instruction.is_explicit_break():
- caption.nodes.append(CaptionNode.create_break(
- layout_info=_get_layout_from_tuple(instruction.position)
- ))
+ caption.nodes.append(
+ CaptionNode.create_break(
+ layout_info=_get_layout_from_tuple(instruction.position)
+ )
+ )
# handle open italics
elif instruction.sets_italics_on():
caption.nodes.append(
CaptionNode.create_style(
- True, {'italics': True},
- layout_info=_get_layout_from_tuple(
- instruction.position
- ))
+ True,
+ {"italics": True},
+ layout_info=_get_layout_from_tuple(instruction.position),
+ )
)
# handle clone italics
elif instruction.sets_italics_off():
caption.nodes.append(
CaptionNode.create_style(
- False, {'italics': True},
- layout_info=_get_layout_from_tuple(
- instruction.position)
- ))
+ False,
+ {"italics": True},
+ layout_info=_get_layout_from_tuple(instruction.position),
+ )
+ )
# handle text
elif instruction.is_text_node():
@@ -256,7 +274,7 @@ def create_and_store(self, node_buffer, start, end=0):
CaptionNode.create_text(
text=instruction.text,
layout_info=layout_info,
- position=instruction.position
+ position=instruction.position,
)
)
caption.layout_info = layout_info
@@ -278,6 +296,7 @@ class InstructionNodeCreator:
"""Creates _InstructionNode instances from characters and commands, storing
them internally
"""
+
def __init__(self, collection=None, position_tracker=None):
"""
:param collection: an optional collection of nodes
@@ -290,8 +309,9 @@ def __init__(self, collection=None, position_tracker=None):
else:
self._collection = collection
- self.last_style = None
-
+ self.last_style = (
+ None # can be italic on or italic off as we only support italics
+ )
self._position_tracer = position_tracker
def is_empty(self):
@@ -309,8 +329,11 @@ def add_chars(self, *chars):
current_position = self._position_tracer.get_current_position()
# get or create a usable node
- if (self._collection and self._collection[-1].is_text_node()
- and not self._position_tracer.is_repositioning_required()):
+ if (
+ self._collection
+ and self._collection[-1].is_text_node()
+ and not self._position_tracer.is_repositioning_required()
+ ):
node = self._collection[-1]
else:
# create first node
@@ -319,19 +342,21 @@ def add_chars(self, *chars):
# handle a simple line break
if self._position_tracer.is_linebreak_required():
- # must insert a line break here
- self._collection.append(_InstructionNode.create_break(
- position=current_position))
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
+ self._position_tracer.acknowledge_linebreak_consumed()
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
- self._position_tracer.acknowledge_linebreak_consumed()
+ if self._position_tracer.is_repositioning_required():
+ # it means we have a reposition command which was not followed by
+ # any text, so we just ignore it and break
+ self._position_tracer.acknowledge_position_changed()
# handle completely new positioning
elif self._position_tracer.is_repositioning_required():
self._collection.append(
- _InstructionNode.create_repositioning_command(
- current_position
- )
+ _InstructionNode.create_repositioning_command(current_position)
)
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
@@ -339,20 +364,29 @@ def add_chars(self, *chars):
node.add_chars(*chars)
- def interpret_command(self, command, previous_is_pac_or_tab=False):
+ @staticmethod
+ def get_style_for_command(command):
+ if command in ITALICS_COMMANDS:
+ return "italic"
+ elif command in UNDERLINE_COMMANDS:
+ return "underline"
+ else:
+ # as we only check STYLE_SETTING_COMMANDS,
+ # only remaining possibility is plain text
+ return "plaintext"
+
+ def interpret_command(self, command, next_command=None):
"""Given a command determines whether to turn italics on or off,
or to set the positioning
This is mostly used to convert from the legacy-style commands
:type command: str
- :type previous_is_pac_or_tab: previous command code is for a PAC command
or a PAC_TAB_OFFSET_COMMANDS
+ :type next_command: the command that follows next
"""
self._update_positioning(command)
- text = COMMANDS.get(command, '')
-
if command == "94a1":
self.handle_backspace("94a1")
@@ -362,41 +396,75 @@ def interpret_command(self, command, previous_is_pac_or_tab=False):
# which will be deleted when the code is applied.
# ex: 2080 97ad 94a1
if (
- self._collection[-1].is_text_node() and
- self._collection[-1].text[-1].isspace()
+ len(self._collection) > 0
+ and self._collection[-1].is_text_node()
+ and self._collection[-1].text[-1].isspace()
):
self._collection[-1].text = self._collection[-1].text[:-1]
- if 'italic' in text:
- if self._position_tracer.is_linebreak_required():
- self._collection.append(_InstructionNode.create_break(
- position=self._position_tracer.get_current_position()))
- self._position_tracer.acknowledge_linebreak_consumed()
- if 'end' not in text:
- self._collection.append(
- _InstructionNode.create_italics_style(
- self._position_tracer.get_current_position())
- )
- self.last_style = "italics on"
+ if command in STYLE_SETTING_COMMANDS:
+ current_position = self._position_tracer.get_current_position()
+ # which style is command setting
+ command_style = self.get_style_for_command(command)
+ if command_style == "italic":
+ if self.last_style is None or self.last_style == "italics off":
+ # if we don't have any style yet, or we have a closed italics tag
+ # it should open italic tag
+ # if break is required, break then add style tag
+ if self._position_tracer.is_linebreak_required():
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
+ self._position_tracer.acknowledge_linebreak_consumed()
+ self._collection.append(
+ _InstructionNode.create_italics_style(current_position)
+ )
+ self.last_style = "italics on"
else:
- self._collection.append(
- _InstructionNode.create_italics_style(
- self._position_tracer.get_current_position(),
- turn_on=False
+ # command sets a different style (underline, plain)
+ # so we need to close italics if we have an open italics tag
+ # otherwise we ignore it
+ # if break is required,add style tag then break
+ if self.last_style == "italics on":
+ self._collection.append(
+ _InstructionNode.create_italics_style(
+ self._position_tracer.get_current_position(), turn_on=False
+ )
)
- )
- self.last_style = "italics off"
-
- # mid row code that is not first code on the line
- # (previous node is not a break node)
- if command in MID_ROW_CODES and not previous_is_pac_or_tab:
+ self.last_style = "italics off"
+ if self._position_tracer.is_linebreak_required():
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
+ self._position_tracer.acknowledge_linebreak_consumed()
+
+ # handle mid-row codes that follows a text node
+ # don't add space if the next command adds one of
+ # ['.', '!', '?', ',']
+ punctuation = ["ae", "a1", "bf", "2c"]
+ next_is_punctuation = next_command and next_command[:2] in punctuation
+ prev_text_node = self.get_previous_text_node()
+ prev_node_is_break = prev_text_node is not None and any(
+ x.is_explicit_break()
+ for x in self._collection[self._collection.index(prev_text_node) :]
+ )
+ if (
+ command in MID_ROW_CODES
+ and prev_text_node
+ and not prev_node_is_break
+ and not prev_text_node.text[-1].isspace()
+ and command not in PAC_TAB_OFFSET_COMMANDS
+ and not next_is_punctuation
+ ):
if self.last_style == "italics off":
- self.add_chars(' ')
+ # need to open italics tag, add a space
+ # to the beginning of the next text node
+ self.add_chars(" ")
else:
- for node in self._collection[::-1]:
- if node.is_text_node() and node.text:
- node.text += ' '
- break
+ # italics on
+ # need to close italics tag, add a space
+ # to the end of the previous text node
+ prev_text_node.text = prev_text_node.text + " "
def _update_positioning(self, command):
"""Sets the positioning information to use for the next nodes
@@ -404,16 +472,16 @@ def _update_positioning(self, command):
:type command: str
"""
if command in PAC_TAB_OFFSET_COMMANDS:
- tab_offset = PAC_TAB_OFFSET_COMMANDS[command]
prev_positioning = self._position_tracer.default
- positioning = (prev_positioning[0],
- prev_positioning[1] + tab_offset)
+ tab_offset = PAC_TAB_OFFSET_COMMANDS[command]
+ positioning = (prev_positioning[0], prev_positioning[1] + tab_offset)
else:
first, second = command[:2], command[2:]
-
try:
+ # is PAC
positioning = PAC_BYTES_TO_POSITIONING_MAP[first][second]
except KeyError:
+ # if not PAC or OFFSET we're not changing position
return
self._position_tracer.update_positioning(positioning)
@@ -444,7 +512,7 @@ def from_list(cls, stash_list, position_tracker):
# use space to separate the stashes, but don't add final space
if idx < len(stash_list) - 1:
try:
- instance._collection[-1].add_chars(' ')
+ instance._collection[-1].add_chars(" ")
except AttributeError:
pass
@@ -462,9 +530,8 @@ def handle_backspace(self, word):
return
last_char = node.text[-1]
delete_previous_condition = (
- (word in EXTENDED_CHARS and last_char not in EXTENDED_CHARS.values()) or
- word == "94a1"
- )
+ word in EXTENDED_CHARS and last_char not in EXTENDED_CHARS.values()
+ ) or word == "94a1"
# in case extended char, perform backspace
# only if the previous character in not also extended
if delete_previous_condition:
@@ -496,10 +563,10 @@ def _get_layout_from_tuple(position_tuple):
horizontal = Size(80 * column / 32.0 + 10, UnitEnum.PERCENT)
# Vertical safe area between 5% and 95%
vertical = Size(90 * (row - 1) / 15.0 + 5, UnitEnum.PERCENT)
- return Layout(origin=Point(horizontal, vertical),
- alignment=Alignment(HorizontalAlignmentEnum.LEFT,
- VerticalAlignmentEnum.TOP)
- )
+ return Layout(
+ origin=Point(horizontal, vertical),
+ alignment=Alignment(HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP),
+ )
class _InstructionNode:
@@ -509,6 +576,7 @@ class _InstructionNode:
These nodes will be aggregated into a RepresentableNode, which will then
be easily converted to a CaptionNode.
"""
+
TEXT = 0
BREAK = 1
ITALICS_ON = 2
@@ -533,9 +601,9 @@ def add_chars(self, *args):
:return:
"""
if self.text is None:
- self.text = ''
+ self.text = ""
- self.text += ''.join(args)
+ self.text += "".join(args)
def is_text_node(self):
"""
@@ -585,7 +653,7 @@ def requires_repositioning(self):
def get_text(self):
"""A little legacy code."""
- return ' '.join(self.text.split())
+ return " ".join(self.text.split())
@classmethod
def create_break(cls, position):
@@ -610,7 +678,7 @@ def create_text(cls, position, *chars):
:rtype: _InstructionNode
"""
- return cls(''.join(chars), position=position)
+ return cls("".join(chars), position=position)
@classmethod
def create_italics_style(cls, position, turn_on=True):
@@ -625,8 +693,7 @@ def create_italics_style(cls, position, turn_on=True):
:rtype: _InstructionNode
"""
return cls(
- position=position,
- type_=cls.ITALICS_ON if turn_on else cls.ITALICS_OFF
+ position=position, type_=cls.ITALICS_ON if turn_on else cls.ITALICS_OFF
)
@classmethod
@@ -638,19 +705,19 @@ def create_repositioning_command(cls, position=None):
"""
return cls(type_=cls.CHANGE_POSITION, position=position)
- def __repr__(self): # pragma: no cover
+ def __repr__(self): # pragma: no cover
if self._type == self.BREAK:
- extra = 'BR'
+ extra = "BR"
elif self._type == self.TEXT:
extra = f'"{self.text}"'
elif self._type in (self.ITALICS_ON, self.ITALICS_OFF):
- extra = 'italics {}'.format(
- 'on' if self._type == self.ITALICS_ON else 'off'
+ extra = "italics {}".format(
+ "on" if self._type == self.ITALICS_ON else "off"
)
else:
- extra = 'change position'
+ extra = "change position"
- return f''
+ return f""
def _format_italics(collection):
@@ -689,9 +756,27 @@ def _format_italics(collection):
# removes pairs of italics nodes that don't do anything noticeable
new_collection = _remove_noop_italics(new_collection)
+ # remove spaces to the end of the lines
+ new_collection = _remove_spaces_at_end_of_the_line(new_collection)
+
return new_collection
+def _remove_spaces_at_end_of_the_line(collection):
+ for idx, node in enumerate(collection):
+ if (
+ idx > 0
+ and node._type == _InstructionNode.BREAK
+ and collection[idx - 1].is_text_node()
+ and collection[idx - 1].text
+ ):
+ collection[idx - 1].text = collection[idx - 1].text.rstrip()
+ # handle last node
+ if collection[-1].is_text_node():
+ collection[-1].text = collection[-1].text.rstrip()
+ return collection
+
+
def _remove_noop_on_off_italics(collection):
"""Return an equivalent list to `collection`. It removes the italics node
pairs that don't surround text nodes, if those nodes are in the order:
@@ -798,8 +883,9 @@ def _skip_empty_text_nodes(collection):
:type collection: list[_InstructionNode]
:rtype: list[_InstructionNode]
"""
- return [node for node in collection
- if not (node.is_text_node() and node.is_empty())]
+ return [
+ node for node in collection if not (node.is_text_node() and node.is_empty())
+ ]
def _skip_redundant_italics_nodes(collection):
@@ -817,7 +903,8 @@ def _skip_redundant_italics_nodes(collection):
if node.is_italics_node():
if state is None:
state = node.sets_italics_on()
- new_collection.append(node)
+ if node.sets_italics_on():
+ new_collection.append(node)
continue
# skip the nodes that are like the previous
if node.sets_italics_on() is state:
@@ -831,19 +918,19 @@ def _skip_redundant_italics_nodes(collection):
def _close_italics_before_repositioning(collection):
"""Make sure that for every opened italic node, there's a corresponding
- closing node.
+ closing node.
- Will insert a closing italic node, before each repositioning node
+ Will insert a closing italic node, before each repositioning node
- :type collection: list[_InstructionNode]
- :rtype: list[_InstructionNode]
+ :type collection: list[_InstructionNode]
+ :rtype: list[_InstructionNode]
"""
new_collection = []
italics_on = False
last_italics_on_node = None
- for idx, node in enumerate(collection):
+ for node in collection:
if node.is_italics_node() and node.sets_italics_on():
italics_on = True
last_italics_on_node = node
@@ -855,17 +942,16 @@ def _close_italics_before_repositioning(collection):
_InstructionNode.create_italics_style(
# The position info of this new node should be the same
position=last_italics_on_node.position,
- turn_on=False
+ turn_on=False,
)
)
new_collection.append(node)
# Append an italics opening node after the positioning change
new_collection.append(
- _InstructionNode.create_italics_style(
- position=node.position
- )
+ _InstructionNode.create_italics_style(position=node.position)
)
continue
+
new_collection.append(node)
return new_collection
@@ -892,8 +978,7 @@ def _ensure_final_italics_node_closes(collection):
if italics_on:
new_collection.append(
_InstructionNode.create_italics_style(
- position=last_italics_on_node.position,
- turn_on=False
+ position=last_italics_on_node.position, turn_on=False
)
)
return new_collection
diff --git a/pycaption/scc/state_machines.py b/pycaption/scc/state_machines.py
index 04fc632b..af5cd537 100644
--- a/pycaption/scc/state_machines.py
+++ b/pycaption/scc/state_machines.py
@@ -5,6 +5,7 @@ class _PositioningTracker:
"""Helps determine the positioning of a node, having kept track of
positioning-related commands.
"""
+
def __init__(self, positioning=None):
"""
:param positioning: positioning information (row, column)
@@ -39,10 +40,9 @@ def update_positioning(self, positioning):
col = self._last_column
new_row, new_col = positioning
is_tab_offset = new_row == row and col + 1 <= new_col <= col + 3
-
# One line below will be treated as line break, not repositioning
if new_row == row + 1:
- self._positions.append((new_row, col))
+ self._positions.append((new_row, new_col))
self._break_required = True
self._last_column = new_col
# Tab offsets after line breaks will be ignored to avoid repositioning
@@ -64,9 +64,7 @@ def get_current_position(self):
:raise: CaptionReadSyntaxError
"""
if not any(self._positions):
- raise CaptionReadSyntaxError(
- 'No Preamble Address Code [PAC] was provided'
- )
+ raise CaptionReadSyntaxError("No Preamble Address Code [PAC] was provided")
else:
return self._positions[0]
@@ -97,6 +95,7 @@ class DefaultProvidingPositionTracker(_PositioningTracker):
"""A _PositioningTracker that provides if needed a default value (14, 0), or
uses the last positioning value set anywhere in the document
"""
+
default = (14, 0)
def __init__(self, positioning=None, default=None):
diff --git a/pycaption/scc/translator.py b/pycaption/scc/translator.py
index 88fc36e0..aba7f8a2 100644
--- a/pycaption/scc/translator.py
+++ b/pycaption/scc/translator.py
@@ -1,556 +1,11 @@
-from pycaption.scc.constants import CHARACTERS, SPECIAL_CHARS, EXTENDED_CHARS
-
-ALL_CHARACTERS = {**CHARACTERS, **SPECIAL_CHARS, **EXTENDED_CHARS}
-COMMAND_LABELS = {
- "9420": "Resume Caption Loading",
- "9429": "Resume Direct Captioning",
- "9425": "Roll-Up Captions--2 Rows",
- "9426": "Roll-Up Captions--3 Rows",
- "94a7": "Roll-Up Captions--4 Rows",
- "942a": "Text Restart",
- "94ab": "Resume Text Display",
- "942c": "Erase Displayed Memory",
- "94ae": "Erase Non-displayed Memory",
- "942f": "End Of Caption",
- "9140": "row 01, column 00, with plain white text.",
- "91c1": "row 01, column 00, with white underlined text.",
- "91c2": "row 01, column 00, with plain green text.",
- "9143": "row 01, column 00, with green underlined text.",
- "91c4": "row 01, column 00, with plain blue text.",
- "9145": "row 01, column 00, with blue underlined text.",
- "9146": "row 01, column 00, with plain cyan text.",
- "91c7": "row 01, column 00, with cyan underlined text.",
- "91c8": "row 01, column 00, with plain red text.",
- "9149": "row 01, column 00, with red underlined text.",
- "914a": "row 01, column 00, with plain yellow text.",
- "91cb": "row 01, column 00, with yellow underlined text.",
- "914c": "row 01, column 00, with plain magenta text.",
- "91cd": "row 01, column 00, with magenta underlined text.",
- "91ce": "row 01, column 00, with white italicized text.",
- "914f": "row 01, column 00, with white underlined italicized text.",
- "91d0": "row 01, column 00, with plain white text.",
- "9151": "row 01, column 00, with white underlined text.",
- "9152": "row 01, column 04, with plain white text.",
- "91d3": "row 01, column 04, with white underlined text.",
- "9154": "row 01, column 08, with plain white text.",
- "91d5": "row 01, column 08, with white underlined text.",
- "91d6": "row 01, column 12, with plain white text.",
- "9157": "row 01, column 12, with white underlined text.",
- "9158": "row 01, column 16, with plain white text.",
- "91d9": "row 01, column 16, with white underlined text.",
- "91da": "row 01, column 20, with plain white text.",
- "915b": "row 01, column 20, with white underlined text.",
- "91dc": "row 01, column 24, with plain white text.",
- "915d": "row 01, column 24, with white underlined text.",
- "915e": "row 01, column 28, with plain white text.",
- "91df": "row 01, column 28, with white underlined text.",
- "91e0": "row 02, column 00, with plain white text.",
- "9161": "row 02, column 00, with white underlined text.",
- "9162": "row 02, column 00, with plain green text.",
- "91e3": "row 02, column 00, with green underlined text.",
- "9164": "row 02, column 00, with plain blue text.",
- "91e5": "row 02, column 00, with blue underlined text.",
- "91e6": "row 02, column 00, with plain cyan text.",
- "9167": "row 02, column 00, with cyan underlined text.",
- "9168": "row 02, column 00, with plain red text.",
- "91e9": "row 02, column 00, with red underlined text.",
- "91ea": "row 02, column 00, with plain yellow text.",
- "916b": "row 02, column 00, with yellow underlined text.",
- "91ec": "row 02, column 00, with plain magenta text.",
- "916d": "row 02, column 00, with magenta underlined text.",
- "916e": "row 02, column 00, with white italicized text.",
- "91ef": "row 02, column 00, with white underlined italicized text.",
- "9170": "row 02, column 00, with plain white text.",
- "91f1": "row 02, column 00, with white underlined text.",
- "91f2": "row 02, column 04, with plain white text.",
- "9173": "row 02, column 04, with white underlined text.",
- "91f4": "row 02, column 08, with plain white text.",
- "9175": "row 02, column 08, with white underlined text.",
- "9176": "row 02, column 12, with plain white text.",
- "91f7": "row 02, column 12, with white underlined text.",
- "91f8": "row 02, column 16, with plain white text.",
- "9179": "row 02, column 16, with white underlined text.",
- "917a": "row 02, column 20, with plain white text.",
- "91fb": "row 02, column 20, with white underlined text.",
- "91fc": "row 02, column 24, with plain white text.",
- "91fd": "row 02, column 24, with white underlined text.",
- "91fe": "row 02, column 28, with plain white text.",
- "917f": "row 02, column 28, with white underlined text.",
- "9240": "row 03, column 00, with plain white text.",
- "92c1": "row 03, column 00, with white underlined text.",
- "92c2": "row 03, column 00, with plain green text.",
- "9243": "row 03, column 00, with green underlined text.",
- "92c4": "row 03, column 00, with plain blue text.",
- "9245": "row 03, column 00, with blue underlined text.",
- "9246": "row 03, column 00, with plain cyan text.",
- "92c7": "row 03, column 00, with cyan underlined text.",
- "92c8": "row 03, column 00, with plain red text.",
- "9249": "row 03, column 00, with red underlined text.",
- "924a": "row 03, column 00, with plain yellow text.",
- "92cb": "row 03, column 00, with yellow underlined text.",
- "924c": "row 03, column 00, with plain magenta text.",
- "92cd": "row 03, column 00, with magenta underlined text.",
- "92ce": "row 03, column 00, with white italicized text.",
- "924f": "row 03, column 00, with white underlined italicized text.",
- "92d0": "row 03, column 00, with plain white text.",
- "9251": "row 03, column 00, with white underlined text.",
- "9252": "row 03, column 04, with plain white text.",
- "92d3": "row 03, column 04, with white underlined text.",
- "9254": "row 03, column 08, with plain white text.",
- "92d5": "row 03, column 08, with white underlined text.",
- "92d6": "row 03, column 12, with plain white text.",
- "9257": "row 03, column 12, with white underlined text.",
- "9258": "row 03, column 16, with plain white text.",
- "92d9": "row 03, column 16, with white underlined text.",
- "92da": "row 03, column 20, with plain white text.",
- "925b": "row 03, column 20, with white underlined text.",
- "92dc": "row 03, column 24, with plain white text.",
- "925d": "row 03, column 24, with white underlined text.",
- "925e": "row 03, column 28, with plain white text.",
- "92df": "row 03, column 28, with white underlined text.",
- "92e0": "row 04, column 00, with plain white text.",
- "9261": "row 04, column 00, with white underlined text.",
- "9262": "row 04, column 00, with plain green text.",
- "92e3": "row 04, column 00, with green underlined text.",
- "9264": "row 04, column 00, with plain blue text.",
- "92e5": "row 04, column 00, with blue underlined text.",
- "92e6": "row 04, column 00, with plain cyan text.",
- "9267": "row 04, column 00, with cyan underlined text.",
- "9268": "row 04, column 00, with plain red text.",
- "92e9": "row 04, column 00, with red underlined text.",
- "92ea": "row 04, column 00, with plain yellow text.",
- "926b": "row 04, column 00, with yellow underlined text.",
- "92ec": "row 04, column 00, with plain magenta text.",
- "926d": "row 04, column 00, with magenta underlined text.",
- "926e": "row 04, column 00, with white italicized text.",
- "92ef": "row 04, column 00, with white underlined italicized text.",
- "9270": "row 04, column 00, with plain white text.",
- "92f1": "row 04, column 00, with white underlined text.",
- "92f2": "row 04, column 04, with plain white text.",
- "9273": "row 04, column 04, with white underlined text.",
- "92f4": "row 04, column 08, with plain white text.",
- "9275": "row 04, column 08, with white underlined text.",
- "9276": "row 04, column 12, with plain white text.",
- "92f7": "row 04, column 12, with white underlined text.",
- "92f8": "row 04, column 16, with plain white text.",
- "9279": "row 04, column 16, with white underlined text.",
- "927a": "row 04, column 20, with plain white text.",
- "92fb": "row 04, column 20, with white underlined text.",
- "92fc": "row 04, column 24, with plain white text.",
- "92fd": "row 04, column 24, with white underlined text.",
- "92fe": "row 04, column 28, with plain white text.",
- "927f": "row 04, column 28, with white underlined text.",
- "1540": "row 05, column 00, with plain white text.",
- "15c1": "row 05, column 00, with white underlined text.",
- "15c2": "row 05, column 00, with plain green text.",
- "1543": "row 05, column 00, with green underlined text.",
- "15c4": "row 05, column 00, with plain blue text.",
- "1545": "row 05, column 00, with blue underlined text.",
- "1546": "row 05, column 00, with plain cyan text.",
- "15c7": "row 05, column 00, with cyan underlined text.",
- "15c8": "row 05, column 00, with plain red text.",
- "1549": "row 05, column 00, with red underlined text.",
- "154a": "row 05, column 00, with plain yellow text.",
- "15cb": "row 05, column 00, with yellow underlined text.",
- "154c": "row 05, column 00, with plain magenta text.",
- "15cd": "row 05, column 00, with magenta underlined text.",
- "15ce": "row 05, column 00, with white italicized text.",
- "154f": "row 05, column 00, with white underlined italicized text.",
- "15d0": "row 05, column 00, with plain white text.",
- "1551": "row 05, column 00, with white underlined text.",
- "1552": "row 05, column 04, with plain white text.",
- "15d3": "row 05, column 04, with white underlined text.",
- "1554": "row 05, column 08, with plain white text.",
- "15d5": "row 05, column 08, with white underlined text.",
- "15d6": "row 05, column 12, with plain white text.",
- "1557": "row 05, column 12, with white underlined text.",
- "1558": "row 05, column 16, with plain white text.",
- "15d9": "row 05, column 16, with white underlined text.",
- "15da": "row 05, column 20, with plain white text.",
- "155b": "row 05, column 20, with white underlined text.",
- "15dc": "row 05, column 24, with plain white text.",
- "155d": "row 05, column 24, with white underlined text.",
- "155e": "row 05, column 28, with plain white text.",
- "15df": "row 05, column 28, with white underlined text.",
- "15e0": "row 06, column 00, with plain white text.",
- "1561": "row 06, column 00, with white underlined text.",
- "15462": "row 06, column 00, with plain green text.",
- "15e3": "row 06, column 00, with green underlined text.",
- "1564": "row 06, column 00, with plain blue text.",
- "15e5": "row 06, column 00, with blue underlined text.",
- "15e6": "row 06, column 00, with plain cyan text.",
- "1567": "row 06, column 00, with cyan underlined text.",
- "1568": "row 06, column 00, with plain red text.",
- "15e9": "row 06, column 00, with red underlined text.",
- "15ea": "row 06, column 00, with plain yellow text.",
- "156b": "row 06, column 00, with yellow underlined text.",
- "15ec": "row 06, column 00, with plain magenta text.",
- "156d": "row 06, column 00, with magenta underlined text.",
- "156e": "row 06, column 00, with white italicized text.",
- "15ef": "row 06, column 00, with white underlined italicized text.",
- "1570": "row 06, column 00, with plain white text.",
- "15f1": "row 06, column 00, with white underlined text.",
- "15f2": "row 06, column 04, with plain white text.",
- "1573": "row 06, column 04, with white underlined text.",
- "15f4": "row 06, column 08, with plain white text.",
- "1575": "row 06, column 08, with white underlined text.",
- "1576": "row 06, column 12, with plain white text.",
- "15f7": "row 06, column 12, with white underlined text.",
- "15f8": "row 06, column 16, with plain white text.",
- "1579": "row 06, column 16, with white underlined text.",
- "157a": "row 06, column 20, with plain white text.",
- "15fb": "row 06, column 20, with white underlined text.",
- "15fc": "row 06, column 24, with plain white text.",
- "15fd": "row 06, column 24, with white underlined text.",
- "15fe": "row 06, column 28, with plain white text.",
- "157f": "row 06, column 28, with white underlined text.",
- "1640": "row 07, column 00, with plain white text.",
- "16c1": "row 07, column 00, with white underlined text.",
- "16c2": "row 07, column 00, with plain green text.",
- "1643": "row 07, column 00, with green underlined text.",
- "16c4": "row 07, column 00, with plain blue text.",
- "1645": "row 07, column 00, with blue underlined text.",
- "1646": "row 07, column 00, with plain cyan text.",
- "16c7": "row 07, column 00, with cyan underlined text.",
- "16c8": "row 07, column 00, with plain red text.",
- "1649": "row 07, column 00, with red underlined text.",
- "164a": "row 07, column 00, with plain yellow text.",
- "16cb": "row 07, column 00, with yellow underlined text.",
- "164c": "row 07, column 00, with plain magenta text.",
- "16cd": "row 07, column 00, with magenta underlined text.",
- "16ce": "row 07, column 00, with white italicized text.",
- "164f": "row 07, column 00, with white underlined italicized text.",
- "16d0": "row 07, column 00, with plain white text.",
- "1651": "row 07, column 00, with white underlined text.",
- "1652": "row 07, column 04, with plain white text.",
- "16d3": "row 07, column 04, with white underlined text.",
- "1654": "row 07, column 08, with plain white text.",
- "16d5": "row 07, column 08, with white underlined text.",
- "16d6": "row 07, column 12, with plain white text.",
- "1657": "row 07, column 12, with white underlined text.",
- "1658": "row 07, column 16, with plain white text.",
- "16d9": "row 07, column 16, with white underlined text.",
- "16da": "row 07, column 20, with plain white text.",
- "165b": "row 07, column 20, with white underlined text.",
- "16dc": "row 07, column 24, with plain white text.",
- "165d": "row 07, column 24, with white underlined text.",
- "165e": "row 07, column 28, with plain white text.",
- "16df": "row 07, column 28, with white underlined text.",
- "16e0": "row 08, column 00, with plain white text.",
- "1661": "row 08, column 00, with white underlined text.",
- "16462": "row 08, column 00, with plain green text.",
- "16e3": "row 08, column 00, with green underlined text.",
- "1664": "row 08, column 00, with plain blue text.",
- "16e5": "row 08, column 00, with blue underlined text.",
- "16e6": "row 08, column 00, with plain cyan text.",
- "1667": "row 08, column 00, with cyan underlined text.",
- "1668": "row 08, column 00, with plain red text.",
- "16e9": "row 08, column 00, with red underlined text.",
- "16ea": "row 08, column 00, with plain yellow text.",
- "166b": "row 08, column 00, with yellow underlined text.",
- "16ec": "row 08, column 00, with plain magenta text.",
- "166d": "row 08, column 00, with magenta underlined text.",
- "166e": "row 08, column 00, with white italicized text.",
- "16ef": "row 08, column 00, with white underlined italicized text.",
- "1670": "row 08, column 00, with plain white text.",
- "16f1": "row 08, column 00, with white underlined text.",
- "16f2": "row 08, column 04, with plain white text.",
- "1673": "row 08, column 04, with white underlined text.",
- "16f4": "row 08, column 08, with plain white text.",
- "1675": "row 08, column 08, with white underlined text.",
- "1676": "row 08, column 12, with plain white text.",
- "16f7": "row 08, column 12, with white underlined text.",
- "16f8": "row 08, column 16, with plain white text.",
- "1679": "row 08, column 16, with white underlined text.",
- "167a": "row 08, column 20, with plain white text.",
- "16fb": "row 08, column 20, with white underlined text.",
- "16fc": "row 08, column 24, with plain white text.",
- "16fd": "row 08, column 24, with white underlined text.",
- "16fe": "row 08, column 28, with plain white text.",
- "167f": "row 08, column 28, with white underlined text.",
- "9740": "row 09, column 00, with plain white text.",
- "97c1": "row 09, column 00, with white underlined text.",
- "97c2": "row 09, column 00, with plain green text.",
- "9743": "row 09, column 00, with green underlined text.",
- "97c4": "row 09, column 00, with plain blue text.",
- "9745": "row 09, column 00, with blue underlined text.",
- "9746": "row 09, column 00, with plain cyan text.",
- "97c7": "row 09, column 00, with cyan underlined text.",
- "97c8": "row 09, column 00, with plain red text.",
- "9749": "row 09, column 00, with red underlined text.",
- "974a": "row 09, column 00, with plain yellow text.",
- "97cb": "row 09, column 00, with yellow underlined text.",
- "974c": "row 09, column 00, with plain magenta text.",
- "97cd": "row 09, column 00, with magenta underlined text.",
- "97ce": "row 09, column 00, with white italicized text.",
- "974f": "row 09, column 00, with white underlined italicized text.",
- "97d0": "row 09, column 00, with plain white text.",
- "9751": "row 09, column 00, with white underlined text.",
- "9752": "row 09, column 04, with plain white text.",
- "97d3": "row 09, column 04, with white underlined text.",
- "9754": "row 09, column 08, with plain white text.",
- "97d5": "row 09, column 08, with white underlined text.",
- "97d6": "row 09, column 12, with plain white text.",
- "9757": "row 09, column 12, with white underlined text.",
- "9758": "row 09, column 16, with plain white text.",
- "97d9": "row 09, column 16, with white underlined text.",
- "97da": "row 09, column 20, with plain white text.",
- "975b": "row 09, column 20, with white underlined text.",
- "97dc": "row 09, column 24, with plain white text.",
- "975d": "row 09, column 24, with white underlined text.",
- "975e": "row 09, column 28, with plain white text.",
- "97df": "row 09, column 28, with white underlined text.",
- "97e0": "row 10, column 00, with plain white text.",
- "9761": "row 10, column 00, with white underlined text.",
- "9762": "row 10, column 00, with plain green text.",
- "97e3": "row 10, column 00, with green underlined text.",
- "9764": "row 10, column 00, with plain blue text.",
- "97e5": "row 10, column 00, with blue underlined text.",
- "97e6": "row 10, column 00, with plain cyan text.",
- "9767": "row 10, column 00, with cyan underlined text.",
- "9768": "row 10, column 00, with plain red text.",
- "97e9": "row 10, column 00, with red underlined text.",
- "97ea": "row 10, column 00, with plain yellow text.",
- "976b": "row 10, column 00, with yellow underlined text.",
- "97ec": "row 10, column 00, with plain magenta text.",
- "976d": "row 10, column 00, with magenta underlined text.",
- "976e": "row 10, column 00, with white italicized text.",
- "97ef": "row 10, column 00, with white underlined italicized text.",
- "9770": "row 10, column 00, with plain white text.",
- "97f1": "row 10, column 00, with white underlined text.",
- "97f2": "row 10, column 04, with plain white text.",
- "9773": "row 10, column 04, with white underlined text.",
- "97f4": "row 10, column 08, with plain white text.",
- "9775": "row 10, column 08, with white underlined text.",
- "9776": "row 10, column 12, with plain white text.",
- "97f7": "row 10, column 12, with white underlined text.",
- "97f8": "row 10, column 16, with plain white text.",
- "9779": "row 10, column 16, with white underlined text.",
- "977a": "row 10, column 20, with plain white text.",
- "97fb": "row 10, column 20, with white underlined text.",
- "97fc": "row 10, column 24, with plain white text.",
- "97fd": "row 10, column 24, with white underlined text.",
- "97fe": "row 10, column 28, with plain white text.",
- "977f": "row 10, column 28, with white underlined text.",
- "1040": "row 11, column 00, with plain white text.",
- "10c1": "row 11, column 00, with white underlined text.",
- "10c2": "row 11, column 00, with plain green text.",
- "1043": "row 11, column 00, with green underlined text.",
- "10c4": "row 11, column 00, with plain blue text.",
- "1045": "row 11, column 00, with blue underlined text.",
- "1046": "row 11, column 00, with plain cyan text.",
- "10c7": "row 11, column 00, with cyan underlined text.",
- "10c8": "row 11, column 00, with plain red text.",
- "1049": "row 11, column 00, with red underlined text.",
- "104a": "row 11, column 00, with plain yellow text.",
- "10cb": "row 11, column 00, with yellow underlined text.",
- "104c": "row 11, column 00, with plain magenta text.",
- "10cd": "row 11, column 00, with magenta underlined text.",
- "10ce": "row 11, column 00, with white italicized text.",
- "104f": "row 11, column 00, with white underlined italicized text.",
- "10d0": "row 11, column 00, with plain white text.",
- "1051": "row 11, column 00, with white underlined text.",
- "1052": "row 11, column 04, with plain white text.",
- "10d3": "row 11, column 04, with white underlined text.",
- "1054": "row 11, column 08, with plain white text.",
- "10d5": "row 11, column 08, with white underlined text.",
- "10d6": "row 11, column 12, with plain white text.",
- "1057": "row 11, column 12, with white underlined text.",
- "1058": "row 11, column 16, with plain white text.",
- "10d9": "row 11, column 16, with white underlined text.",
- "10da": "row 11, column 20, with plain white text.",
- "105b": "row 11, column 20, with white underlined text.",
- "10dc": "row 11, column 24, with plain white text.",
- "105d": "row 11, column 24, with white underlined text.",
- "105e": "row 11, column 28, with plain white text.",
- "10df": "row 11, column 28, with white underlined text.",
- "1340": "row 12, column 00, with plain white text.",
- "13c1": "row 12, column 00, with white underlined text.",
- "13c2": "row 12, column 00, with plain green text.",
- "1343": "row 12, column 00, with green underlined text.",
- "13c4": "row 12, column 00, with plain blue text.",
- "1345": "row 12, column 00, with blue underlined text.",
- "1346": "row 12, column 00, with plain cyan text.",
- "13c7": "row 12, column 00, with cyan underlined text.",
- "13c8": "row 12, column 00, with plain red text.",
- "1349": "row 12, column 00, with red underlined text.",
- "134a": "row 12, column 00, with plain yellow text.",
- "13cb": "row 12, column 00, with yellow underlined text.",
- "134c": "row 12, column 00, with plain magenta text.",
- "13cd": "row 12, column 00, with magenta underlined text.",
- "13ce": "row 12, column 00, with white italicized text.",
- "134f": "row 12, column 00, with white underlined italicized text.",
- "13d0": "row 12, column 00, with plain white text.",
- "1351": "row 12, column 00, with white underlined text.",
- "1352": "row 12, column 04, with plain white text.",
- "13d3": "row 12, column 04, with white underlined text.",
- "1354": "row 12, column 08, with plain white text.",
- "13d5": "row 12, column 08, with white underlined text.",
- "13d6": "row 12, column 12, with plain white text.",
- "1357": "row 12, column 12, with white underlined text.",
- "1358": "row 12, column 16, with plain white text.",
- "13d9": "row 12, column 16, with white underlined text.",
- "13da": "row 12, column 20, with plain white text.",
- "135b": "row 12, column 20, with white underlined text.",
- "13dc": "row 12, column 24, with plain white text.",
- "135d": "row 12, column 24, with white underlined text.",
- "135e": "row 12, column 28, with plain white text.",
- "13df": "row 12, column 28, with white underlined text.",
- "13e0": "row 13, column 00, with plain white text.",
- "1361": "row 13, column 00, with white underlined text.",
- "13462": "row 13, column 00, with plain green text.",
- "13e3": "row 13, column 00, with green underlined text.",
- "1364": "row 13, column 00, with plain blue text.",
- "13e5": "row 13, column 00, with blue underlined text.",
- "13e6": "row 13, column 00, with plain cyan text.",
- "1367": "row 13, column 00, with cyan underlined text.",
- "1368": "row 13, column 00, with plain red text.",
- "13e9": "row 13, column 00, with red underlined text.",
- "13ea": "row 13, column 00, with plain yellow text.",
- "136b": "row 13, column 00, with yellow underlined text.",
- "13ec": "row 13, column 00, with plain magenta text.",
- "136d": "row 13, column 00, with magenta underlined text.",
- "136e": "row 13, column 00, with white italicized text.",
- "13ef": "row 13, column 00, with white underlined italicized text.",
- "1370": "row 13, column 00, with plain white text.",
- "13f1": "row 13, column 00, with white underlined text.",
- "13f2": "row 13, column 04, with plain white text.",
- "1373": "row 13, column 04, with white underlined text.",
- "13f4": "row 13, column 08, with plain white text.",
- "1375": "row 13, column 08, with white underlined text.",
- "1376": "row 13, column 12, with plain white text.",
- "13f7": "row 13, column 12, with white underlined text.",
- "13f8": "row 13, column 16, with plain white text.",
- "1379": "row 13, column 16, with white underlined text.",
- "137a": "row 13, column 20, with plain white text.",
- "13fb": "row 13, column 20, with white underlined text.",
- "13fc": "row 13, column 24, with plain white text.",
- "13fd": "row 13, column 24, with white underlined text.",
- "13fe": "row 13, column 28, with plain white text.",
- "137f": "row 13, column 28, with white underlined text.",
- "9440": "row 14, column 00, with plain white text.",
- "94c1": "row 14, column 00, with white underlined text.",
- "94c2": "row 14, column 00, with plain green text.",
- "9443": "row 14, column 00, with green underlined text.",
- "94c4": "row 14, column 00, with plain blue text.",
- "9445": "row 14, column 00, with blue underlined text.",
- "9446": "row 14, column 00, with plain cyan text.",
- "94c7": "row 14, column 00, with cyan underlined text.",
- "94c8": "row 14, column 00, with plain red text.",
- "9449": "row 14, column 00, with red underlined text.",
- "944a": "row 14, column 00, with plain yellow text.",
- "94cb": "row 14, column 00, with yellow underlined text.",
- "944c": "row 14, column 00, with plain magenta text.",
- "94cd": "row 14, column 00, with magenta underlined text.",
- "94ce": "row 14, column 00, with white italicized text.",
- "944f": "row 14, column 00, with white underlined italicized text.",
- "94d0": "row 14, column 00, with plain white text.",
- "9451": "row 14, column 00, with white underlined text.",
- "9452": "row 14, column 04, with plain white text.",
- "94d3": "row 14, column 04, with white underlined text.",
- "9454": "row 14, column 08, with plain white text.",
- "94d5": "row 14, column 08, with white underlined text.",
- "94d6": "row 14, column 12, with plain white text.",
- "9457": "row 14, column 12, with white underlined text.",
- "9458": "row 14, column 16, with plain white text.",
- "94d9": "row 14, column 16, with white underlined text.",
- "94da": "row 14, column 20, with plain white text.",
- "945b": "row 14, column 20, with white underlined text.",
- "94dc": "row 14, column 24, with plain white text.",
- "945d": "row 14, column 24, with white underlined text.",
- "945e": "row 14, column 28, with plain white text.",
- "94df": "row 14, column 28, with white underlined text.",
- "94e0": "row 15, column 00, with plain white text.",
- "9461": "row 15, column 00, with white underlined text.",
- "9462": "row 15, column 00, with plain green text.",
- "94e3": "row 15, column 00, with green underlined text.",
- "9464": "row 15, column 00, with plain blue text.",
- "94e5": "row 15, column 00, with blue underlined text.",
- "94e6": "row 15, column 00, with plain cyan text.",
- "9467": "row 15, column 00, with cyan underlined text.",
- "9468": "row 15, column 00, with plain red text.",
- "94e9": "row 15, column 00, with red underlined text.",
- "94ea": "row 15, column 00, with plain yellow text.",
- "946b": "row 15, column 00, with yellow underlined text.",
- "94ec": "row 15, column 00, with plain magenta text.",
- "946d": "row 15, column 00, with magenta underlined text.",
- "946e": "row 15, column 00, with white italicized text.",
- "94ef": "row 15, column 00, with white underlined italicized text.",
- "9470": "row 15, column 00, with plain white text.",
- "94f1": "row 15, column 00, with white underlined text.",
- "94f2": "row 15, column 04, with plain white text.",
- "9473": "row 15, column 04, with white underlined text.",
- "94f4": "row 15, column 08, with plain white text.",
- "9475": "row 15, column 08, with white underlined text.",
- "9476": "row 15, column 12, with plain white text.",
- "94f7": "row 15, column 12, with white underlined text.",
- "94f8": "row 15, column 16, with plain white text.",
- "9479": "row 15, column 16, with white underlined text.",
- "947a": "row 15, column 20, with plain white text.",
- "94fb": "row 15, column 20, with white underlined text.",
- "94fc": "row 15, column 24, with plain white text.",
- "94fd": "row 15, column 24, with white underlined text.",
- "94fe": "row 15, column 28, with plain white text.",
- "947f": "row 15, column 28, with white underlined text.",
- "97a1": "Tab Offset 1 column",
- "97a2": "Tab Offset 2 columns",
- "9723": "Tab Offset 3 columns",
- "94a1": "BackSpace",
- "94a4": "Delete to End of Row",
- "94ad": "Carriage Return",
- "1020": "Background White",
- "10a1": "Background Semi-Transparent White",
- "10a2": "Background Green",
- "1023": "Background Semi-Transparent Green",
- "10a4": "Background Blue",
- "1025": "Background Semi-Transparent Blue",
- "1026": "Background Cyan",
- "10a7": "Background Semi-Transparent Cyan",
- "10a8": "Background Red",
- "1029": "Background Semi-Transparent Red",
- "102a": "Background Yellow",
- "10ab": "Background Semi-Transparent Yellow",
- "102c": "Background Magenta",
- "10ad": "Background Semi-Transparent Magenta",
- "10ae": "Background Black",
- "102f": "Background Semi-Transparent Black",
- "97ad": "Background Transparent",
- "97a4": "Standard Character Set",
- "9725": "Double-Size Character Set",
- "9726": "First Private Character Set",
- "97a7": "Second Private Character Set",
- "97a8": "People`s Republic of China Character Set",
- "9729": "Korean Standard Character Set",
- "972a": "First Registered Character Set",
- "9120": "White",
- "91a1": "White Underline",
- "91a2": "Green",
- "9123": "Green Underline",
- "91a4": "Blue",
- "9125": "Blue Underline",
- "9126": "Cyan",
- "91a7": "Cyan Underline",
- "91a8": "Red",
- "9129": "Red Underline",
- "912a": "Yellow",
- "91ab": "Yellow Underline",
- "912c": "Magenta",
- "91ad": "Magenta Underline",
- "97ae": "Black",
- "972f": "Black Underline",
- "91ae": "Italics",
- "912f": "Italics Underline",
- "94a8": "Flash ON",
- "9423": "Alarm Off",
- "94a2": "Alarm On"
-}
+from pycaption.scc.constants import ALL_CHARACTERS, COMMAND_LABELS
def translate_scc(scc_content, brackets='[]'):
"""
Replaces hexadecimal words with their meaning
- In order to make SCC files more human readable and easier to debug,
+ In order to make SCC files more human-readable and easier to debug,
this function is used to replace command codes with their labels and
character bytes with their actual characters
diff --git a/pycaption/transcript.py b/pycaption/transcript.py
index a65d9b3b..46df9f95 100644
--- a/pycaption/transcript.py
+++ b/pycaption/transcript.py
@@ -4,6 +4,7 @@
import nltk.data
except ModuleNotFoundError:
nltk = None
+
from pycaption.base import BaseWriter, CaptionNode
diff --git a/pycaption/webvtt.py b/pycaption/webvtt.py
index d40f02c3..805663f8 100644
--- a/pycaption/webvtt.py
+++ b/pycaption/webvtt.py
@@ -3,11 +3,11 @@
import sys
from copy import deepcopy
-from .base import (
- BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode,
-)
+from .base import BaseReader, BaseWriter, Caption, CaptionList, CaptionNode, CaptionSet
from .exceptions import (
- CaptionReadError, CaptionReadSyntaxError, CaptionReadNoCaptions,
+ CaptionReadError,
+ CaptionReadNoCaptions,
+ CaptionReadSyntaxError,
InvalidInputError,
)
from .geometry import HorizontalAlignmentEnum, Layout
@@ -15,22 +15,22 @@
# A WebVTT timing line has both start/end times and layout related settings
# (referred to as 'cue settings' in the documentation)
# The following pattern captures [start], [end] and [cue settings] if existent
-TIMING_LINE_PATTERN = re.compile(r'^(\S+)\s+-->\s+(\S+)(?:\s+(.*?))?\s*$')
-TIMESTAMP_PATTERN = re.compile(r'^(\d+):(\d{2})(:\d{2})?\.(\d{3})')
-VOICE_SPAN_PATTERN = re.compile(']*)>')
+TIMING_LINE_PATTERN = re.compile(r"^(\S+)\s+-->\s+(\S+)(?:\s+(.*?))?\s*$")
+TIMESTAMP_PATTERN = re.compile(r"^(\d+):(\d{2})(:\d{2})?\.(\d{3})")
+VOICE_SPAN_PATTERN = re.compile("]*)>")
OTHER_SPAN_PATTERN = re.compile(
- r'?([cibuv]|ruby|rt|lang|(\d+):(\d{2})(:\d{2})?\.(\d{3})).*?>'
+ r"?([cibuv]|ruby|rt|lang|(\d+):(\d{2})(:\d{2})?\.(\d{3})).*?>"
) # These WebVTT tags are stripped off the cues on conversion
WEBVTT_VERSION_OF = {
- HorizontalAlignmentEnum.LEFT: 'left',
- HorizontalAlignmentEnum.CENTER: 'center',
- HorizontalAlignmentEnum.RIGHT: 'right',
- HorizontalAlignmentEnum.START: 'start',
- HorizontalAlignmentEnum.END: 'end'
+ HorizontalAlignmentEnum.LEFT: "left",
+ HorizontalAlignmentEnum.CENTER: "center",
+ HorizontalAlignmentEnum.RIGHT: "right",
+ HorizontalAlignmentEnum.START: "start",
+ HorizontalAlignmentEnum.END: "end",
}
-DEFAULT_ALIGN = 'start'
+DEFAULT_ALIGN = "start"
def microseconds(h, m, s, f):
@@ -42,7 +42,9 @@ def microseconds(h, m, s, f):
class WebVTTReader(BaseReader):
- def __init__(self, ignore_timing_errors=True, time_shift_milliseconds=0, *args, **kwargs):
+ def __init__(
+ self, ignore_timing_errors=True, time_shift_milliseconds=0, *args, **kwargs
+ ):
"""
:param ignore_timing_errors: Whether to ignore timing checks
:type ignore_timing_errors: bool
@@ -53,11 +55,11 @@ def __init__(self, ignore_timing_errors=True, time_shift_milliseconds=0, *args,
self.time_shift_microseconds = time_shift_milliseconds * 1000
def detect(self, content):
- return 'WEBVTT' in content
+ return "WEBVTT" in content
- def read(self, content, lang='en-US'):
+ def read(self, content, lang="en-US"):
if not isinstance(content, str):
- raise InvalidInputError('The content is not a unicode string.')
+ raise InvalidInputError("The content is not a unicode string.")
caption_set = CaptionSet({lang: self._parse(content.splitlines())})
@@ -76,31 +78,30 @@ def _parse(self, lines):
for i, line in enumerate(lines):
- if '-->' in line:
+ if "-->" in line:
found_timing = True
timing_line = i
last_start_time = captions[-1].start if captions else 0
try:
start, end, layout_info = self._parse_timing_line(
- line, last_start_time)
+ line, last_start_time
+ )
except CaptionReadError as e:
- new_msg = f'{e.args[0]} (line {timing_line})'
+ new_msg = f"{e.args[0]} (line {timing_line})"
tb = sys.exc_info()[2]
raise type(e)(new_msg).with_traceback(tb) from None
- elif '' == line:
+ elif "" == line:
if found_timing and nodes:
found_timing = False
- caption = Caption(
- start, end, nodes, layout_info=layout_info)
+ caption = Caption(start, end, nodes, layout_info=layout_info)
captions.append(caption)
nodes = []
else:
if found_timing:
if nodes:
nodes.append(CaptionNode.create_break())
- nodes.append(CaptionNode.create_text(
- self._decode(line)))
+ nodes.append(CaptionNode.create_text(self._decode(line)))
else:
# it's a comment or some metadata; ignore it
pass
@@ -113,21 +114,21 @@ def _parse(self, lines):
return captions
def _remove_styles(self, line):
- partial_result = VOICE_SPAN_PATTERN.sub('\\2: ', line)
- return OTHER_SPAN_PATTERN.sub('', partial_result)
+ partial_result = VOICE_SPAN_PATTERN.sub("\\2: ", line)
+ return OTHER_SPAN_PATTERN.sub("", partial_result)
def _validate_timings(self, start, end, last_start_time):
if start is None:
- raise CaptionReadSyntaxError('Invalid cue start timestamp.')
+ raise CaptionReadSyntaxError("Invalid cue start timestamp.")
if end is None:
- raise CaptionReadSyntaxError('Invalid cue end timestamp.')
+ raise CaptionReadSyntaxError("Invalid cue end timestamp.")
if start > end:
- raise CaptionReadError(
- 'End timestamp is not greater than start timestamp.')
+ raise CaptionReadError("End timestamp is not greater than start timestamp.")
if start < last_start_time:
raise CaptionReadError(
- 'Start timestamp is not greater than or equal'
- 'to start timestamp of previous cue.')
+ "Start timestamp is not greater than or equal"
+ "to start timestamp of previous cue."
+ )
def _parse_timing_line(self, line, last_start_time):
"""
@@ -135,7 +136,7 @@ def _parse_timing_line(self, line, last_start_time):
"""
m = TIMING_LINE_PATTERN.search(line)
if not m:
- raise CaptionReadSyntaxError('Invalid timing format.')
+ raise CaptionReadSyntaxError("Invalid timing format.")
start = self._parse_timestamp(m.group(1)) + self.time_shift_microseconds
end = self._parse_timestamp(m.group(2)) + self.time_shift_microseconds
@@ -157,7 +158,7 @@ def _parse_timestamp(self, timestamp):
"""
m = TIMESTAMP_PATTERN.search(timestamp)
if not m:
- raise CaptionReadSyntaxError('Invalid timing format.')
+ raise CaptionReadSyntaxError("Invalid timing format.")
m = m.groups()
@@ -175,23 +176,23 @@ def _decode(self, s):
"""
s = s.strip()
# Covert voice span
- s = VOICE_SPAN_PATTERN.sub('\\2: ', s)
+ s = VOICE_SPAN_PATTERN.sub("\\2: ", s)
# TODO: Add support for other WebVTT tags. For now just strip them
# off the text.
- s = OTHER_SPAN_PATTERN.sub('', s)
+ s = OTHER_SPAN_PATTERN.sub("", s)
# Replace WebVTT special XML codes with plain unicode values
- s = s.replace('<', '<')
- s = s.replace('>', '>')
- s = s.replace('', '\u200e')
- s = s.replace('', '\u200f')
- s = s.replace(' ', '\u00a0')
+ s = s.replace("<", "<")
+ s = s.replace(">", ">")
+ s = s.replace("", "\u200e")
+ s = s.replace("", "\u200f")
+ s = s.replace(" ", "\u00a0")
# Must do ampersand last
- s = s.replace('&', '&')
+ s = s.replace("&", "&")
return s
class WebVTTWriter(BaseWriter):
- HEADER = 'WEBVTT\n\n'
+ HEADER = "WEBVTT\n\n"
global_layout = None
video_width = None
video_height = None
@@ -219,9 +220,9 @@ def write(self, caption_set, lang=None):
captions = caption_set.get_captions(lang)
- return output + '\n'.join(
- [self._convert_caption(caption_set, caption)
- for caption in captions])
+ return output + "\n".join(
+ [self._convert_caption(caption_set, caption) for caption in captions]
+ )
def _timestamp(self, ts):
td = datetime.timedelta(microseconds=ts)
@@ -234,23 +235,23 @@ def _timestamp(self, ts):
@staticmethod
def _convert_style_to_text_tag(style):
- if style == 'italics':
- return ['', '']
- elif style == 'underline':
- return ['', '']
- elif style == 'bold':
- return ['', '']
+ if style == "italics":
+ return ["", ""]
+ elif style == "underline":
+ return ["", ""]
+ elif style == "bold":
+ return ["", ""]
else:
- return ['', '']
+ return ["", ""]
def _calculate_resulting_style(self, style, caption_set):
resulting_style = {}
style_classes = []
- if 'classes' in style:
- style_classes = style['classes']
- elif 'class' in style:
- style_classes = [style['class']]
+ if "classes" in style:
+ style_classes = style["classes"]
+ elif "class" in style:
+ style_classes = [style["class"]]
for style_class in style_classes:
sub_style = caption_set.get_style(style_class).copy()
@@ -271,11 +272,11 @@ def _convert_caption(self, caption_set, caption):
start = self._timestamp(caption.start)
end = self._timestamp(caption.end)
- timespan = f'{start} --> {end}'
+ timespan = f"{start} --> {end}"
- output = ''
+ output = ""
- cue_style_tags = ['', '']
+ cue_style_tags = ["", ""]
# Text styling
style = self._calculate_resulting_style(caption.style, caption_set)
@@ -289,8 +290,8 @@ def _convert_caption(self, caption_set, caption):
if not layout:
layout = caption.layout_info or self.global_layout
cue_settings = self._convert_positioning(layout)
- output += timespan + cue_settings + '\n'
- output += cue_style_tags[0] + cue_text + cue_style_tags[1] + '\n'
+ output += timespan + cue_settings + "\n"
+ output += cue_style_tags[0] + cue_text + cue_style_tags[1] + "\n"
return output
@@ -301,12 +302,12 @@ def _convert_positioning(self, layout):
:rtype: str
"""
if not layout:
- return ''
+ return ""
# If it's converting from WebVTT to WebVTT, keep positioning info
# unchanged
if layout.webvtt_positioning:
- return f' {layout.webvtt_positioning}'
+ return f" {layout.webvtt_positioning}"
left_offset = None
top_offset = None
@@ -320,15 +321,14 @@ def _convert_positioning(self, layout):
# There are absolute positioning values for this cue but the
# Writer is explicitly configured not to do any relativization.
# Ignore all positioning for this cue.
- return ''
+ return ""
# Ensure that all positioning values are measured using percentage.
# This may raise an exception if layout.is_relative() == False
# If you want to avoid it, you have to turn off relativization by
# initializing this Writer with relativize=False.
if not already_relative:
- layout = layout.as_percentage_of(
- self.video_width, self.video_height)
+ layout = layout.as_percentage_of(self.video_width, self.video_height)
# Ensure that when there's a left offset the caption is not pushed out
# of the screen. If the execution got this far it means origin and
@@ -366,13 +366,13 @@ def _convert_positioning(self, layout):
if layout.alignment:
alignment = WEBVTT_VERSION_OF.get(
- layout.alignment.horizontal, DEFAULT_ALIGN)
+ layout.alignment.horizontal, DEFAULT_ALIGN
+ )
else:
alignment = DEFAULT_ALIGN
- cue_settings = ''
+ cue_settings = ""
- if alignment and \
- alignment != WEBVTT_VERSION_OF[HorizontalAlignmentEnum.CENTER]:
+ if alignment and alignment != WEBVTT_VERSION_OF[HorizontalAlignmentEnum.CENTER]:
# Not sure why this condition was here, maybe because center
# alignment is applied automatically without needing to specify it
cue_settings += f" align:{alignment}"
@@ -402,23 +402,22 @@ def _group_cues_by_layout(self, nodes, caption_set):
layout_groups = []
# A properly encoded WebVTT string (plain unicode must be properly
# escaped before being appended to this string)
- s = ''
- row, column, prev_row, prev_column = 0, 0, 0, 0
+ s = ""
for i, node in enumerate(nodes):
if node.type_ == CaptionNode.TEXT:
if s and current_layout and node.layout_info != current_layout:
# If the positioning changes from one text node to
# another, a new WebVTT cue has to be created.
row, column = node.position if node.position else (0, 0)
- prev_row, prev_column = current_node.position if current_node.position else (0, 0)
- if row == prev_row + 1:
- s += '\n'
- else:
+ prev_row, prev_column = (
+ current_node.position if current_node.position else (0, 0)
+ )
+ if row != prev_row + 1:
layout_groups.append((s, current_layout))
- s = ''
+ s = ""
# ATTENTION: This is where the plain unicode node content is
# finally encoded as WebVTT.
- s += self._encode_illegal_characters(node.content) or ' '
+ s += self._encode_illegal_characters(node.content) or " "
current_layout = node.layout_info
current_node = node
elif node.type_ == CaptionNode.STYLE:
@@ -426,7 +425,7 @@ def _group_cues_by_layout(self, nodes, caption_set):
node.content, caption_set
)
- styles = ['italics', 'underline', 'bold']
+ styles = ["italics", "underline", "bold"]
if not node.start:
styles.reverse()
@@ -442,10 +441,10 @@ def _group_cues_by_layout(self, nodes, caption_set):
# "Style node"
elif node.type_ == CaptionNode.BREAK:
if i > 0 and nodes[i - 1].type_ != CaptionNode.TEXT:
- s += ' '
+ s += " "
if i == 0: # cue text starts with a break
- s += ' '
- s += '\n'
+ s += " "
+ s += "\n"
if s:
layout_groups.append((s, current_layout))
@@ -458,12 +457,12 @@ def _encode_illegal_characters(self, s):
- http://dev.w3.org/html5/webvtt/#dfn-webvtt-cue-text-span
:type s: str
"""
- s = s.replace('&', '&')
- s = s.replace('<', '<')
+ s = s.replace("&", "&")
+ s = s.replace("<", "<")
# The substring "-->" is also not allowed according to this:
# - http://dev.w3.org/html5/webvtt/#dfn-webvtt-cue-block
- s = s.replace('-->', '-->')
+ s = s.replace("-->", "-->")
# The following characters have escaping codes for some reason, but
# they're not illegal, so for now I'll leave this commented out so that
diff --git a/setup.py b/setup.py
index c1c2cf3e..9be9c5d6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,62 +1,54 @@
#!/usr/bin/env python
import os
-from setuptools import setup, find_packages
+
+from setuptools import find_packages, setup
README_PATH = os.path.join(
os.path.abspath(os.path.dirname(__file__)),
- 'README.rst',
+ "README.rst",
)
dependencies = [
- 'beautifulsoup4>=4.12.1',
- 'lxml>=4.9.1',
- 'cssutils>=2.0.0',
+ "beautifulsoup4>=4.12.1",
+ "lxml>=4.9.1",
+ "cssutils>=2.0.0",
]
-dev_dependencies = [
- 'pytest',
- 'pytest-lazy-fixture'
-]
+dev_dependencies = ["pytest", "pytest-lazy-fixture"]
-transcript_dependencies = [
- 'nltk==3.8.0'
-]
+transcript_dependencies = ["nltk==3.8.0"]
setup(
- name='pycaption',
- version='2.2.12',
- description='Closed caption converter',
+ name="pycaption",
+ version="2.2.12.dev8",
+ description="Closed caption converter",
long_description=open(README_PATH).read(),
- author='Joe Norton',
- author_email='joey@nortoncrew.com',
+ author="Joe Norton",
+ author_email="joey@nortoncrew.com",
project_urls={
- 'Source': 'https://github.com/pbs/pycaption',
- 'Documentation': 'https://pycaption.readthedocs.io/',
- 'Release notes': 'https://pycaption.readthedocs.io'
- '/en/stable/changelog.html',
+ "Source": "https://github.com/pbs/pycaption",
+ "Documentation": "https://pycaption.readthedocs.io/",
+ "Release notes": "https://pycaption.readthedocs.io" "/en/stable/changelog.html",
},
- python_requires='>=3.8,<4.0',
+ python_requires=">=3.8,<4.0",
install_requires=dependencies,
- extras_require={
- 'dev': dev_dependencies,
- 'transcript': transcript_dependencies
- },
+ extras_require={"dev": dev_dependencies, "transcript": transcript_dependencies},
packages=find_packages(),
include_package_data=True,
classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'License :: OSI Approved :: Apache Software License',
- 'Operating System :: OS Independent',
- 'Programming Language :: Python',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.8',
- 'Programming Language :: Python :: 3.9',
- 'Programming Language :: Python :: 3.10',
- 'Programming Language :: Python :: 3.11',
- 'Programming Language :: Python :: 3.12',
- 'Topic :: Software Development :: Libraries',
- 'Topic :: Software Development :: Libraries :: Python Modules',
- 'Topic :: Multimedia :: Video',
+ "Development Status :: 5 - Production/Stable",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Software Development :: Libraries",
+ "Topic :: Software Development :: Libraries :: Python Modules",
+ "Topic :: Multimedia :: Video",
],
test_suite="tests",
)
diff --git a/tests/conftest.py b/tests/conftest.py
index 74530ae8..acb97edb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -61,7 +61,20 @@
sample_scc_with_ampersand_character, sample_scc_multiple_formats,
sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters,
sample_scc_tab_offset, sample_scc_with_unknown_commands,
- sample_scc_special_and_extended_characters,
+ sample_scc_special_and_extended_characters, sample_scc_mid_row_before_text_pop,
+ sample_scc_mid_row_before_text_roll, sample_scc_mid_row_before_text_paint,
+ sample_scc_mid_row_following_text_no_text_before_italics_off_pop,
+ sample_scc_mid_row_following_text_no_text_before_italics_off_roll,
+ sample_scc_mid_row_following_text_no_text_before_italics_off_paint,
+ sample_scc_mid_row_following_text_no_text_before_italics_on_pop,
+ sample_scc_mid_row_following_text_no_text_before_italics_on_roll,
+ sample_scc_mid_row_following_text_no_text_before_italics_on_paint,
+ sample_scc_mid_row_with_space_before_pop,
+ sample_scc_mid_row_with_space_before_roll,
+ sample_scc_mid_row_with_space_before_paint,
+ sample_scc_with_spaces_at_eol_pop,
+ sample_scc_with_spaces_at_eol_roll,
+ sample_scc_with_spaces_at_eol_paint,
)
from tests.fixtures.srt import ( # noqa: F401
sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty,
diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py
index 714e5d52..864f3e7a 100644
--- a/tests/fixtures/dfxp.py
+++ b/tests/fixtures/dfxp.py
@@ -729,6 +729,7 @@ def sample_dfxp_to_render_with_only_default_positioning_input():
-
+
cccccc
c!c!
-
+
bbbb
-
- cccc
- bbaa
+
+ cccc
+ bbaa
-
+
aa
-
- bb
- cc
+
+ bb
+ cc
-
+
abcd
-
+
abcd
@@ -1525,4 +1526,4 @@ def sample_dfxp_default_styling_p_tags():