Source: lib/text/vtt_text_parser.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.text.VttTextParser');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.log');
  9. goog.require('shaka.media.ManifestParser');
  10. goog.require('shaka.text.Cue');
  11. goog.require('shaka.text.CueRegion');
  12. goog.require('shaka.text.TextEngine');
  13. goog.require('shaka.util.Error');
  14. goog.require('shaka.util.StringUtils');
  15. goog.require('shaka.util.TextParser');
  16. goog.require('shaka.util.XmlUtils');
  17. /**
  18. * @implements {shaka.extern.TextParser}
  19. * @export
  20. */
  21. shaka.text.VttTextParser = class {
  22. /** Constructs a VTT parser. */
  23. constructor() {
  24. /** @private {boolean} */
  25. this.sequenceMode_ = false;
  26. /** @private {string} */
  27. this.manifestType_ = shaka.media.ManifestParser.UNKNOWN;
  28. }
  29. /**
  30. * @override
  31. * @export
  32. */
  33. parseInit(data) {
  34. goog.asserts.assert(false, 'VTT does not have init segments');
  35. }
  36. /**
  37. * @override
  38. * @export
  39. */
  40. setSequenceMode(sequenceMode) {
  41. this.sequenceMode_ = sequenceMode;
  42. }
  43. /**
  44. * @override
  45. * @export
  46. */
  47. setManifestType(manifestType) {
  48. this.manifestType_ = manifestType;
  49. }
  50. /**
  51. * @override
  52. * @export
  53. */
  54. parseMedia(data, time) {
  55. const VttTextParser = shaka.text.VttTextParser;
  56. // Get the input as a string. Normalize newlines to \n.
  57. let str = shaka.util.StringUtils.fromUTF8(data);
  58. str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n');
  59. const blocks = str.split(/\n{2,}/m);
  60. if (!/^WEBVTT($|[ \t\n])/m.test(blocks[0])) {
  61. throw new shaka.util.Error(
  62. shaka.util.Error.Severity.CRITICAL,
  63. shaka.util.Error.Category.TEXT,
  64. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  65. }
  66. // Depending on "segmentRelativeVttTiming" configuration,
  67. // "vttOffset" will correspond to either "periodStart" (default)
  68. // or "segmentStart", for segmented VTT where timings are relative
  69. // to the beginning of each segment.
  70. // NOTE: "periodStart" is the timestamp offset applied via TextEngine.
  71. // It is no longer closely tied to periods, but the name stuck around.
  72. // NOTE: This offset and the flag choosing its meaning have no effect on
  73. // HLS content, which should use X-TIMESTAMP-MAP and periodStart instead.
  74. let offset = time.vttOffset;
  75. // Only use 'X-TIMESTAMP-MAP' with HLS. This overrides offset above.
  76. if (blocks[0].includes('X-TIMESTAMP-MAP') &&
  77. this.manifestType_ == shaka.media.ManifestParser.HLS) {
  78. if (this.sequenceMode_) {
  79. // Compute a different, rollover-based offset for sequence mode.
  80. offset = this.computeHlsSequenceModeOffset_(blocks[0], time);
  81. } else {
  82. // Calculate the offset from the segment startTime.
  83. offset = time.segmentStart;
  84. }
  85. }
  86. // Parse VTT regions.
  87. /* !Array.<!shaka.text.CueRegion> */
  88. const regions = [];
  89. for (const line of blocks[0].split('\n')) {
  90. if (/^Region:/.test(line)) {
  91. const region = VttTextParser.parseRegion_(line);
  92. regions.push(region);
  93. }
  94. }
  95. /** @type {!Map.<string, shaka.text.Cue>} */
  96. const styles = new Map();
  97. VttTextParser.addDefaultTextColor_(styles);
  98. // Parse cues.
  99. const ret = [];
  100. for (const block of blocks.slice(1)) {
  101. const lines = block.split('\n');
  102. VttTextParser.parseStyle_(lines, styles);
  103. const cue = VttTextParser.parseCue_(lines, offset, regions, styles);
  104. if (cue) {
  105. ret.push(cue);
  106. }
  107. }
  108. return ret;
  109. }
  110. /**
  111. * @param {string} headerBlock Contains X-TIMESTAMP-MAP.
  112. * @param {shaka.extern.TextParser.TimeContext} time
  113. * @return {number}
  114. * @private
  115. */
  116. computeHlsSequenceModeOffset_(headerBlock, time) {
  117. // https://bit.ly/2K92l7y
  118. // The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
  119. // the rest of the media.
  120. // The header format is 'X-TIMESTAMP-MAP=MPEGTS:n,LOCAL:m'
  121. // (the attributes can go in any order)
  122. // where n is MPEG-2 time and m is cue time it maps to.
  123. // For example 'X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:900000'
  124. // means an offset of 10 seconds
  125. // 900000/MPEG_TIMESCALE - cue time.
  126. const cueTimeMatch = headerBlock.match(
  127. /LOCAL:((?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3}))/m);
  128. const mpegTimeMatch = headerBlock.match(/MPEGTS:(\d+)/m);
  129. if (!cueTimeMatch || !mpegTimeMatch) {
  130. throw new shaka.util.Error(
  131. shaka.util.Error.Severity.CRITICAL,
  132. shaka.util.Error.Category.TEXT,
  133. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  134. }
  135. const parser = new shaka.util.TextParser(cueTimeMatch[1]);
  136. const cueTime = shaka.text.VttTextParser.parseTime_(parser);
  137. if (cueTime == null) {
  138. throw new shaka.util.Error(
  139. shaka.util.Error.Severity.CRITICAL,
  140. shaka.util.Error.Category.TEXT,
  141. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  142. }
  143. const mpegTime = Number(mpegTimeMatch[1]);
  144. const mpegTimescale = shaka.text.VttTextParser.MPEG_TIMESCALE_;
  145. return time.periodStart + mpegTime / mpegTimescale - cueTime;
  146. }
  147. /**
  148. * Add default color
  149. *
  150. * @param {!Map.<string, shaka.text.Cue>} styles
  151. * @private
  152. */
  153. static addDefaultTextColor_(styles) {
  154. const textColor = shaka.text.Cue.defaultTextColor;
  155. for (const [key, value] of Object.entries(textColor)) {
  156. const cue = new shaka.text.Cue(0, 0, '');
  157. cue.color = value;
  158. styles.set('.' + key, cue);
  159. }
  160. const bgColor = shaka.text.Cue.defaultTextBackgroundColor;
  161. for (const [key, value] of Object.entries(bgColor)) {
  162. const cue = new shaka.text.Cue(0, 0, '');
  163. cue.backgroundColor = value;
  164. styles.set('.' + key, cue);
  165. }
  166. }
  167. /**
  168. * Parses a string into a Region object.
  169. *
  170. * @param {string} text
  171. * @return {!shaka.text.CueRegion}
  172. * @private
  173. */
  174. static parseRegion_(text) {
  175. const VttTextParser = shaka.text.VttTextParser;
  176. const parser = new shaka.util.TextParser(text);
  177. // The region string looks like this:
  178. // Region: id=fred width=50% lines=3 regionanchor=0%,100%
  179. // viewportanchor=10%,90% scroll=up
  180. const region = new shaka.text.CueRegion();
  181. // Skip 'Region:'
  182. parser.readWord();
  183. parser.skipWhitespace();
  184. let word = parser.readWord();
  185. while (word) {
  186. if (!VttTextParser.parseRegionSetting_(region, word)) {
  187. shaka.log.warning(
  188. 'VTT parser encountered an invalid VTTRegion setting: ', word,
  189. ' The setting will be ignored.');
  190. }
  191. parser.skipWhitespace();
  192. word = parser.readWord();
  193. }
  194. return region;
  195. }
  196. /**
  197. * Parses a style block into a Cue object.
  198. *
  199. * @param {!Array.<string>} text
  200. * @param {!Map.<string, shaka.text.Cue>} styles
  201. * @private
  202. */
  203. static parseStyle_(text, styles) {
  204. // Skip empty blocks.
  205. if (text.length == 1 && !text[0]) {
  206. return;
  207. }
  208. // Skip comment blocks.
  209. if (/^NOTE($|[ \t])/.test(text[0])) {
  210. return;
  211. }
  212. // Only style block are allowed.
  213. if (text[0] != 'STYLE') {
  214. return;
  215. }
  216. /** @type {!Array.<!Array.<string>>} */
  217. const styleBlocks = [];
  218. let lastBlockIndex = -1;
  219. for (let i = 1; i < text.length; i++) {
  220. if (text[i].includes('::cue')) {
  221. styleBlocks.push([]);
  222. lastBlockIndex = styleBlocks.length - 1;
  223. }
  224. if (lastBlockIndex == -1) {
  225. continue;
  226. }
  227. styleBlocks[lastBlockIndex].push(text[i]);
  228. if (text[i].includes('}')) {
  229. lastBlockIndex = -1;
  230. }
  231. }
  232. for (const styleBlock of styleBlocks) {
  233. let styleSelector = 'global';
  234. // Look for what is within parentheses. For example:
  235. // <code>:: cue (b) {</code>, what we are looking for is <code>b</code>
  236. const selector = styleBlock[0].match(/\((.*)\)/);
  237. if (selector) {
  238. styleSelector = selector.pop();
  239. }
  240. // We start at 1 to avoid '::cue' and end earlier to avoid '}'
  241. let propertyLines = styleBlock.slice(1, -1);
  242. if (styleBlock[0].includes('}')) {
  243. const payload = /\{(.*?)\}/.exec(styleBlock[0]);
  244. if (payload) {
  245. propertyLines = payload[1].split(';');
  246. }
  247. }
  248. // Continue styles over multiple selectors if necessary.
  249. // For example,
  250. // ::cue(b) { background: white; } ::cue(b) { color: blue; }
  251. // should set both the background and foreground of bold tags.
  252. let cue = styles.get(styleSelector);
  253. if (!cue) {
  254. cue = new shaka.text.Cue(0, 0, '');
  255. }
  256. let validStyle = false;
  257. for (let i = 0; i < propertyLines.length; i++) {
  258. // We look for CSS properties. As a general rule they are separated by
  259. // <code>:</code>. Eg: <code>color: red;</code>
  260. const lineParts = /^\s*([^:]+):\s*(.*)/.exec(propertyLines[i]);
  261. if (lineParts) {
  262. const name = lineParts[1].trim();
  263. const value = lineParts[2].trim().replace(';', '');
  264. switch (name) {
  265. case 'background-color':
  266. case 'background':
  267. validStyle = true;
  268. cue.backgroundColor = value;
  269. break;
  270. case 'color':
  271. validStyle = true;
  272. cue.color = value;
  273. break;
  274. case 'font-family':
  275. validStyle = true;
  276. cue.fontFamily = value;
  277. break;
  278. case 'font-size':
  279. validStyle = true;
  280. cue.fontSize = value;
  281. break;
  282. case 'font-weight':
  283. if (parseInt(value, 10) >= 700 || value == 'bold') {
  284. validStyle = true;
  285. cue.fontWeight = shaka.text.Cue.fontWeight.BOLD;
  286. }
  287. break;
  288. case 'font-style':
  289. switch (value) {
  290. case 'normal':
  291. validStyle = true;
  292. cue.fontStyle = shaka.text.Cue.fontStyle.NORMAL;
  293. break;
  294. case 'italic':
  295. validStyle = true;
  296. cue.fontStyle = shaka.text.Cue.fontStyle.ITALIC;
  297. break;
  298. case 'oblique':
  299. validStyle = true;
  300. cue.fontStyle = shaka.text.Cue.fontStyle.OBLIQUE;
  301. break;
  302. }
  303. break;
  304. case 'opacity':
  305. validStyle = true;
  306. cue.opacity = parseFloat(value);
  307. break;
  308. case 'text-combine-upright':
  309. validStyle = true;
  310. cue.textCombineUpright = value;
  311. break;
  312. case 'text-shadow':
  313. validStyle = true;
  314. cue.textShadow = value;
  315. break;
  316. case 'white-space':
  317. validStyle = true;
  318. cue.wrapLine = value != 'noWrap';
  319. break;
  320. default:
  321. shaka.log.warning('VTT parser encountered an unsupported style: ',
  322. lineParts);
  323. break;
  324. }
  325. }
  326. }
  327. if (validStyle) {
  328. styles.set(styleSelector, cue);
  329. }
  330. }
  331. }
  332. /**
  333. * Parses a text block into a Cue object.
  334. *
  335. * @param {!Array.<string>} text
  336. * @param {number} timeOffset
  337. * @param {!Array.<!shaka.text.CueRegion>} regions
  338. * @param {!Map.<string, shaka.text.Cue>} styles
  339. * @return {shaka.text.Cue}
  340. * @private
  341. */
  342. static parseCue_(text, timeOffset, regions, styles) {
  343. const VttTextParser = shaka.text.VttTextParser;
  344. // Skip empty blocks.
  345. if (text.length == 1 && !text[0]) {
  346. return null;
  347. }
  348. // Skip comment blocks.
  349. if (/^NOTE($|[ \t])/.test(text[0])) {
  350. return null;
  351. }
  352. // Skip style and region blocks.
  353. if (text[0] == 'STYLE' || text[0] == 'REGION') {
  354. return null;
  355. }
  356. let id = null;
  357. if (!text[0].includes('-->')) {
  358. id = text[0];
  359. text.splice(0, 1);
  360. }
  361. // Parse the times.
  362. const parser = new shaka.util.TextParser(text[0]);
  363. let start = VttTextParser.parseTime_(parser);
  364. const expect = parser.readRegex(/[ \t]+-->[ \t]+/g);
  365. let end = VttTextParser.parseTime_(parser);
  366. if (start == null || expect == null || end == null) {
  367. shaka.log.alwaysWarn(
  368. 'Failed to parse VTT time code. Cue skipped:', id, text);
  369. return null;
  370. }
  371. start += timeOffset;
  372. end += timeOffset;
  373. // Get the payload.
  374. const payload = text.slice(1).join('\n').trim();
  375. let cue = null;
  376. if (styles.has('global')) {
  377. cue = styles.get('global').clone();
  378. cue.startTime = start;
  379. cue.endTime = end;
  380. cue.payload = '';
  381. } else {
  382. cue = new shaka.text.Cue(start, end, '');
  383. }
  384. // Parse optional settings.
  385. parser.skipWhitespace();
  386. let word = parser.readWord();
  387. while (word) {
  388. if (!VttTextParser.parseCueSetting(cue, word, regions)) {
  389. shaka.log.warning('VTT parser encountered an invalid VTT setting: ',
  390. word,
  391. ' The setting will be ignored.');
  392. }
  393. parser.skipWhitespace();
  394. word = parser.readWord();
  395. }
  396. VttTextParser.parseCueStyles(payload, cue, styles);
  397. if (id != null) {
  398. cue.id = id;
  399. }
  400. return cue;
  401. }
  402. /**
  403. * Parses a WebVTT styles from the given payload.
  404. *
  405. * @param {string} payload
  406. * @param {!shaka.text.Cue} rootCue
  407. * @param {!Map.<string, shaka.text.Cue>} styles
  408. */
  409. static parseCueStyles(payload, rootCue, styles) {
  410. const VttTextParser = shaka.text.VttTextParser;
  411. // Optimization for unstyled payloads.
  412. if (!payload.includes('<')) {
  413. rootCue.payload = VttTextParser.htmlUnescape_(payload);
  414. return;
  415. }
  416. if (styles.size === 0) {
  417. VttTextParser.addDefaultTextColor_(styles);
  418. }
  419. payload = VttTextParser.replaceColorPayload_(payload);
  420. payload = VttTextParser.replaceKaraokeStylePayload_(payload);
  421. payload = VttTextParser.replaceVoiceStylePayload_(payload);
  422. const xmlPayload = '<span>' + payload + '</span>';
  423. const element = shaka.util.XmlUtils.parseXmlString(xmlPayload, 'span');
  424. if (element) {
  425. const childNodes = element.childNodes;
  426. if (childNodes.length == 1) {
  427. const childNode = childNodes[0];
  428. if (childNode.nodeType == Node.TEXT_NODE ||
  429. childNode.nodeType == Node.CDATA_SECTION_NODE) {
  430. rootCue.payload = VttTextParser.htmlUnescape_(payload);
  431. return;
  432. }
  433. }
  434. for (const childNode of childNodes) {
  435. VttTextParser.generateCueFromElement_(childNode, rootCue, styles);
  436. }
  437. } else {
  438. shaka.log.warning('The cue\'s markup could not be parsed: ', payload);
  439. rootCue.payload = VttTextParser.htmlUnescape_(payload);
  440. }
  441. }
  442. /**
  443. * Converts voice style tag to be valid for xml parsing
  444. * For example,
  445. * input: <v Shaka>Test
  446. * output: <v.voice-Shaka>Test</v.voice-Shaka>
  447. *
  448. * @param {string} payload
  449. * @return {string} processed payload
  450. * @private
  451. */
  452. static replaceVoiceStylePayload_(payload) {
  453. const voiceTag = 'v';
  454. const names = [];
  455. let nameStart = -1;
  456. let newPayload = '';
  457. let hasVoiceEndTag = false;
  458. for (let i = 0; i < payload.length; i++) {
  459. // This condition is used to manage tags that have end tags.
  460. if (payload[i] === '/') {
  461. const end = payload.indexOf('>', i);
  462. if (end === -1) {
  463. return payload;
  464. }
  465. const tagEnd = payload.substring(i + 1, end);
  466. if (!tagEnd || tagEnd != voiceTag) {
  467. newPayload += payload[i];
  468. continue;
  469. }
  470. hasVoiceEndTag = true;
  471. let tagStart = null;
  472. if (names.length) {
  473. tagStart = names[names.length -1];
  474. }
  475. if (!tagStart) {
  476. newPayload += payload[i];
  477. } else if (tagStart === tagEnd) {
  478. newPayload += '/' + tagEnd + '>';
  479. i += tagEnd.length + 1;
  480. } else {
  481. if (!tagStart.startsWith(voiceTag)) {
  482. newPayload += payload[i];
  483. continue;
  484. }
  485. newPayload += '/' + tagStart + '>';
  486. i += tagEnd.length + 1;
  487. }
  488. } else {
  489. // Here we only want the tag name, not any other payload.
  490. if (payload[i] === '<') {
  491. nameStart = i + 1;
  492. if (payload[nameStart] != voiceTag) {
  493. nameStart = -1;
  494. }
  495. } else if (payload[i] === '>') {
  496. if (nameStart > 0) {
  497. names.push(payload.substr(nameStart, i - nameStart));
  498. nameStart = -1;
  499. }
  500. }
  501. newPayload += payload[i];
  502. }
  503. }
  504. for (const name of names) {
  505. const newName = name.replace(' ', '.voice-');
  506. newPayload = newPayload.replace(`<${name}>`, `<${newName}>`);
  507. newPayload = newPayload.replace(`</${name}>`, `</${newName}>`);
  508. if (!hasVoiceEndTag) {
  509. newPayload += `</${newName}>`;
  510. }
  511. }
  512. return newPayload;
  513. }
  514. /**
  515. * Converts karaoke style tag to be valid for xml parsing
  516. * For example,
  517. * input: Text <00:00:00.450> time <00:00:01.450> 1
  518. * output: Text <div time="00:00:00.450"> time
  519. * <div time="00:00:01.450"> 1</div></div>
  520. *
  521. * @param {string} payload
  522. * @return {string} processed payload
  523. * @private
  524. */
  525. static replaceKaraokeStylePayload_(payload) {
  526. const names = [];
  527. let nameStart = -1;
  528. for (let i = 0; i < payload.length; i++) {
  529. if (payload[i] === '<') {
  530. nameStart = i + 1;
  531. } else if (payload[i] === '>') {
  532. if (nameStart > 0) {
  533. const name = payload.substr(nameStart, i - nameStart);
  534. if (name.match(shaka.text.VttTextParser.timeFormat_)) {
  535. names.push(name);
  536. }
  537. nameStart = -1;
  538. }
  539. }
  540. }
  541. let newPayload = payload;
  542. for (const name of names) {
  543. const replaceTag = '<' + name + '>';
  544. const startTag = '<div time="' + name + '">';
  545. const endTag = '</div>';
  546. newPayload = newPayload.replace(replaceTag, startTag);
  547. newPayload += endTag;
  548. }
  549. return newPayload;
  550. }
  551. /**
  552. * Converts color end tag to be valid for xml parsing
  553. * For example,
  554. * input: <c.yellow.bg_blue>Yellow text on blue bg</c>
  555. * output: <c.yellow.bg_blue>Yellow text on blue bg</c.yellow.bg_blue>
  556. *
  557. * Returns original payload if invalid tag is found.
  558. * Invalid tag example: <c.yellow><b>Example</c></b>
  559. *
  560. * @param {string} payload
  561. * @return {string} processed payload
  562. * @private
  563. */
  564. static replaceColorPayload_(payload) {
  565. const names = [];
  566. let nameStart = -1;
  567. let newPayload = '';
  568. for (let i = 0; i < payload.length; i++) {
  569. if (payload[i] === '/' && i > 0 && payload[i - 1] === '<') {
  570. const end = payload.indexOf('>', i);
  571. if (end <= i) {
  572. return payload;
  573. }
  574. const tagEnd = payload.substring(i + 1, end);
  575. if (!tagEnd || tagEnd !== 'c') {
  576. newPayload += payload[i];
  577. continue;
  578. }
  579. const tagStart = names.pop();
  580. if (!tagStart) {
  581. newPayload += payload[i];
  582. } else if (tagStart === tagEnd) {
  583. newPayload += '/' + tagEnd + '>';
  584. i += tagEnd.length + 1;
  585. } else {
  586. if (!tagStart.startsWith('c.')) {
  587. newPayload += payload[i];
  588. continue;
  589. }
  590. i += tagEnd.length + 1;
  591. newPayload += '/' + tagStart + '>';
  592. }
  593. } else {
  594. if (payload[i] === '<') {
  595. nameStart = i + 1;
  596. if (payload[nameStart] != 'c') {
  597. nameStart = -1;
  598. }
  599. } else if (payload[i] === '>') {
  600. if (nameStart > 0) {
  601. names.push(payload.substr(nameStart, i - nameStart));
  602. nameStart = -1;
  603. }
  604. }
  605. newPayload += payload[i];
  606. }
  607. }
  608. return newPayload;
  609. }
  610. /**
  611. * @param {string} value
  612. * @param {string} defaultValue
  613. * @private
  614. */
  615. static getOrDefault_(value, defaultValue) {
  616. if (value && value.length > 0) {
  617. return value;
  618. }
  619. return defaultValue;
  620. }
  621. /**
  622. * Merges values created in parseStyle_
  623. * @param {!shaka.text.Cue} cue
  624. * @param {shaka.text.Cue} refCue
  625. * @private
  626. */
  627. static mergeStyle_(cue, refCue) {
  628. if (!refCue) {
  629. return;
  630. }
  631. const VttTextParser = shaka.text.VttTextParser;
  632. // Overwrites if new value string length > 0
  633. cue.backgroundColor = VttTextParser.getOrDefault_(
  634. refCue.backgroundColor, cue.backgroundColor);
  635. cue.color = VttTextParser.getOrDefault_(
  636. refCue.color, cue.color);
  637. cue.fontFamily = VttTextParser.getOrDefault_(
  638. refCue.fontFamily, cue.fontFamily);
  639. cue.fontSize = VttTextParser.getOrDefault_(
  640. refCue.fontSize, cue.fontSize);
  641. cue.textShadow = VttTextParser.getOrDefault_(
  642. refCue.textShadow, cue.textShadow);
  643. // Overwrite with new values as unable to determine
  644. // if new value is set or not
  645. cue.fontWeight = refCue.fontWeight;
  646. cue.fontStyle = refCue.fontStyle;
  647. cue.opacity = refCue.opacity;
  648. cue.rubyTag = refCue.rubyTag;
  649. cue.textCombineUpright = refCue.textCombineUpright;
  650. cue.wrapLine = refCue.wrapLine;
  651. }
  652. /**
  653. * @param {!Node} element
  654. * @param {!shaka.text.Cue} rootCue
  655. * @param {!Map.<string, shaka.text.Cue>} styles
  656. * @private
  657. */
  658. static generateCueFromElement_(element, rootCue, styles) {
  659. const VttTextParser = shaka.text.VttTextParser;
  660. const nestedCue = rootCue.clone();
  661. // We don't want propagate some properties.
  662. nestedCue.nestedCues = [];
  663. nestedCue.payload = '';
  664. nestedCue.rubyTag = '';
  665. // We don't want propagate some position settings
  666. nestedCue.line = null;
  667. nestedCue.region = new shaka.text.CueRegion();
  668. nestedCue.position = null;
  669. nestedCue.size = 0;
  670. if (element.nodeType === Node.ELEMENT_NODE && element.nodeName) {
  671. const bold = shaka.text.Cue.fontWeight.BOLD;
  672. const italic = shaka.text.Cue.fontStyle.ITALIC;
  673. const underline = shaka.text.Cue.textDecoration.UNDERLINE;
  674. const tags = element.nodeName.split(/(?=[ .])+/g);
  675. for (const tag of tags) {
  676. let styleTag = tag;
  677. // White blanks at start indicate that the style is a voice
  678. if (styleTag.startsWith('.voice-')) {
  679. const voice = styleTag.split('-').pop();
  680. styleTag = `v[voice="${voice}"]`;
  681. // The specification allows to have quotes and not, so we check to
  682. // see which one is being used.
  683. if (!styles.has(styleTag)) {
  684. styleTag = `v[voice=${voice}]`;
  685. }
  686. }
  687. if (styles.has(styleTag)) {
  688. VttTextParser.mergeStyle_(nestedCue, styles.get(styleTag));
  689. }
  690. switch (tag) {
  691. case 'br': {
  692. const lineBreakCue = shaka.text.Cue.lineBreak(
  693. nestedCue.startTime, nestedCue.endTime);
  694. rootCue.nestedCues.push(lineBreakCue);
  695. return;
  696. }
  697. case 'b':
  698. nestedCue.fontWeight = bold;
  699. break;
  700. case 'i':
  701. nestedCue.fontStyle = italic;
  702. break;
  703. case 'u':
  704. nestedCue.textDecoration.push(underline);
  705. break;
  706. case 'font': {
  707. const color =
  708. /** @type {!Element} */(element).getAttribute('color');
  709. if (color) {
  710. nestedCue.color = color;
  711. }
  712. break;
  713. }
  714. case 'div': {
  715. const time = /** @type {!Element} */(element).getAttribute('time');
  716. if (!time) {
  717. break;
  718. }
  719. const parser = new shaka.util.TextParser(time);
  720. const cueTime = shaka.text.VttTextParser.parseTime_(parser);
  721. if (cueTime) {
  722. nestedCue.startTime = cueTime;
  723. }
  724. break;
  725. }
  726. case 'ruby':
  727. case 'rp':
  728. case 'rt':
  729. nestedCue.rubyTag = tag;
  730. break;
  731. default:
  732. break;
  733. }
  734. }
  735. }
  736. const isTextNode = (item) => shaka.util.XmlUtils.isText(item);
  737. const childNodes = element.childNodes;
  738. if (isTextNode(element) ||
  739. (childNodes.length == 1 && isTextNode(childNodes[0]))) {
  740. // Trailing line breaks may lost when convert cue to HTML tag
  741. // Need to insert line break cue to preserve line breaks
  742. const textArr = element.textContent.split('\n');
  743. let isFirst = true;
  744. for (const text of textArr) {
  745. if (!isFirst) {
  746. const lineBreakCue = shaka.text.Cue.lineBreak(
  747. nestedCue.startTime, nestedCue.endTime);
  748. rootCue.nestedCues.push(lineBreakCue);
  749. }
  750. if (text.length > 0) {
  751. const textCue = nestedCue.clone();
  752. textCue.payload = VttTextParser.htmlUnescape_(text);
  753. rootCue.nestedCues.push(textCue);
  754. }
  755. isFirst = false;
  756. }
  757. } else {
  758. rootCue.nestedCues.push(nestedCue);
  759. for (const childNode of childNodes) {
  760. VttTextParser.generateCueFromElement_(childNode, nestedCue, styles);
  761. }
  762. }
  763. }
  764. /**
  765. * Parses a WebVTT setting from the given word.
  766. *
  767. * @param {!shaka.text.Cue} cue
  768. * @param {string} word
  769. * @param {!Array.<!shaka.text.CueRegion>} regions
  770. * @return {boolean} True on success.
  771. */
  772. static parseCueSetting(cue, word, regions) {
  773. const VttTextParser = shaka.text.VttTextParser;
  774. let results = null;
  775. if ((results = /^align:(start|middle|center|end|left|right)$/.exec(word))) {
  776. VttTextParser.setTextAlign_(cue, results[1]);
  777. } else if ((results = /^vertical:(lr|rl)$/.exec(word))) {
  778. VttTextParser.setVerticalWritingMode_(cue, results[1]);
  779. } else if ((results = /^size:([\d.]+)%$/.exec(word))) {
  780. cue.size = Number(results[1]);
  781. } else if ((results =
  782. // eslint-disable-next-line max-len
  783. /^position:([\d.]+)%(?:,(line-left|line-right|middle|center|start|end|auto))?$/
  784. .exec(word))) {
  785. cue.position = Number(results[1]);
  786. if (results[2]) {
  787. VttTextParser.setPositionAlign_(cue, results[2]);
  788. }
  789. } else if ((results = /^region:(.*)$/.exec(word))) {
  790. const region = VttTextParser.getRegionById_(regions, results[1]);
  791. if (region) {
  792. cue.region = region;
  793. }
  794. } else {
  795. return VttTextParser.parsedLineValueAndInterpretation_(cue, word);
  796. }
  797. return true;
  798. }
  799. /**
  800. *
  801. * @param {!Array.<!shaka.text.CueRegion>} regions
  802. * @param {string} id
  803. * @return {?shaka.text.CueRegion}
  804. * @private
  805. */
  806. static getRegionById_(regions, id) {
  807. const regionsWithId = regions.filter((region) => {
  808. return region.id == id;
  809. });
  810. if (!regionsWithId.length) {
  811. shaka.log.warning('VTT parser could not find a region with id: ',
  812. id,
  813. ' The region will be ignored.');
  814. return null;
  815. }
  816. goog.asserts.assert(regionsWithId.length == 1,
  817. 'VTTRegion ids should be unique!');
  818. return regionsWithId[0];
  819. }
  820. /**
  821. * Parses a WebVTTRegion setting from the given word.
  822. *
  823. * @param {!shaka.text.CueRegion} region
  824. * @param {string} word
  825. * @return {boolean} True on success.
  826. * @private
  827. */
  828. static parseRegionSetting_(region, word) {
  829. let results = null;
  830. if ((results = /^id=(.*)$/.exec(word))) {
  831. region.id = results[1];
  832. } else if ((results = /^width=(\d{1,2}|100)%$/.exec(word))) {
  833. region.width = Number(results[1]);
  834. } else if ((results = /^lines=(\d+)$/.exec(word))) {
  835. region.height = Number(results[1]);
  836. region.heightUnits = shaka.text.CueRegion.units.LINES;
  837. } else if ((results = /^regionanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  838. .exec(word))) {
  839. region.regionAnchorX = Number(results[1]);
  840. region.regionAnchorY = Number(results[2]);
  841. } else if ((results = /^viewportanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  842. .exec(word))) {
  843. region.viewportAnchorX = Number(results[1]);
  844. region.viewportAnchorY = Number(results[2]);
  845. } else if ((results = /^scroll=up$/.exec(word))) {
  846. region.scroll = shaka.text.CueRegion.scrollMode.UP;
  847. } else {
  848. return false;
  849. }
  850. return true;
  851. }
  852. /**
  853. * @param {!shaka.text.Cue} cue
  854. * @param {string} align
  855. * @private
  856. */
  857. static setTextAlign_(cue, align) {
  858. const Cue = shaka.text.Cue;
  859. if (align == 'middle') {
  860. cue.textAlign = Cue.textAlign.CENTER;
  861. } else {
  862. goog.asserts.assert(align.toUpperCase() in Cue.textAlign,
  863. align.toUpperCase() +
  864. ' Should be in Cue.textAlign values!');
  865. cue.textAlign = Cue.textAlign[align.toUpperCase()];
  866. }
  867. }
  868. /**
  869. * @param {!shaka.text.Cue} cue
  870. * @param {string} align
  871. * @private
  872. */
  873. static setPositionAlign_(cue, align) {
  874. const Cue = shaka.text.Cue;
  875. if (align == 'line-left' || align == 'start') {
  876. cue.positionAlign = Cue.positionAlign.LEFT;
  877. } else if (align == 'line-right' || align == 'end') {
  878. cue.positionAlign = Cue.positionAlign.RIGHT;
  879. } else if (align == 'center' || align == 'middle') {
  880. cue.positionAlign = Cue.positionAlign.CENTER;
  881. } else {
  882. cue.positionAlign = Cue.positionAlign.AUTO;
  883. }
  884. }
  885. /**
  886. * @param {!shaka.text.Cue} cue
  887. * @param {string} value
  888. * @private
  889. */
  890. static setVerticalWritingMode_(cue, value) {
  891. const Cue = shaka.text.Cue;
  892. if (value == 'lr') {
  893. cue.writingMode = Cue.writingMode.VERTICAL_LEFT_TO_RIGHT;
  894. } else {
  895. cue.writingMode = Cue.writingMode.VERTICAL_RIGHT_TO_LEFT;
  896. }
  897. }
  898. /**
  899. * @param {!shaka.text.Cue} cue
  900. * @param {string} word
  901. * @return {boolean}
  902. * @private
  903. */
  904. static parsedLineValueAndInterpretation_(cue, word) {
  905. const Cue = shaka.text.Cue;
  906. let results = null;
  907. if ((results = /^line:([\d.]+)%(?:,(start|end|center))?$/.exec(word))) {
  908. cue.lineInterpretation = Cue.lineInterpretation.PERCENTAGE;
  909. cue.line = Number(results[1]);
  910. if (results[2]) {
  911. goog.asserts.assert(
  912. results[2].toUpperCase() in Cue.lineAlign,
  913. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  914. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  915. }
  916. } else if ((results =
  917. /^line:(-?\d+)(?:,(start|end|center))?$/.exec(word))) {
  918. cue.lineInterpretation = Cue.lineInterpretation.LINE_NUMBER;
  919. cue.line = Number(results[1]);
  920. if (results[2]) {
  921. goog.asserts.assert(
  922. results[2].toUpperCase() in Cue.lineAlign,
  923. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  924. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  925. }
  926. } else {
  927. return false;
  928. }
  929. return true;
  930. }
  931. /**
  932. * Parses a WebVTT time from the given parser.
  933. *
  934. * @param {!shaka.util.TextParser} parser
  935. * @return {?number}
  936. * @private
  937. */
  938. static parseTime_(parser) {
  939. const results = parser.readRegex(shaka.text.VttTextParser.timeFormat_);
  940. if (results == null) {
  941. return null;
  942. }
  943. // This capture is optional, but will still be in the array as undefined,
  944. // in which case it is 0.
  945. const hours = Number(results[1]) || 0;
  946. const minutes = Number(results[2]);
  947. const seconds = Number(results[3]);
  948. const milliseconds = Number(results[4]);
  949. if (minutes > 59 || seconds > 59) {
  950. return null;
  951. }
  952. return (milliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
  953. }
  954. /**
  955. * This method converts the HTML entities &amp;, &lt;, &gt;, &quot;, &#39;,
  956. * &nbsp;, &lrm; and &rlm; in string to their corresponding characters.
  957. *
  958. * @param {!string} input
  959. * @return {string}
  960. * @private
  961. */
  962. static htmlUnescape_(input) {
  963. // Used to map HTML entities to characters.
  964. const htmlUnescapes = {
  965. '&amp;': '&',
  966. '&lt;': '<',
  967. '&gt;': '>',
  968. '&quot;': '"',
  969. '&#39;': '\'',
  970. '&nbsp;': '\u{a0}',
  971. '&lrm;': '\u{200e}',
  972. '&rlm;': '\u{200f}',
  973. };
  974. // Used to match HTML entities and HTML characters.
  975. const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39|nbsp|lrm|rlm);/g;
  976. const reHasEscapedHtml = RegExp(reEscapedHtml.source);
  977. // This check is an optimization, since replace always makes a copy
  978. if (input && reHasEscapedHtml.test(input)) {
  979. return input.replace(reEscapedHtml, (entity) => {
  980. // The only thing that might not match the dictionary above is the
  981. // single quote, which can be matched by many strings in the regex, but
  982. // only has a single entry in the dictionary.
  983. return htmlUnescapes[entity] || '\'';
  984. });
  985. }
  986. return input || '';
  987. }
  988. };
  989. /**
  990. * @const {number}
  991. * @private
  992. */
  993. shaka.text.VttTextParser.MPEG_TIMESCALE_ = 90000;
  994. /**
  995. * @const
  996. * @private {!RegExp}
  997. * @example 00:00.000 or 00:00:00.000 or 0:00:00.000 or
  998. * 00:00.00 or 00:00:00.00 or 0:00:00.00
  999. */
  1000. shaka.text.VttTextParser.timeFormat_ =
  1001. /(?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{2,3})/g;
  1002. shaka.text.TextEngine.registerParser(
  1003. 'text/vtt', () => new shaka.text.VttTextParser());
  1004. shaka.text.TextEngine.registerParser(
  1005. 'text/vtt; codecs="vtt"', () => new shaka.text.VttTextParser());
  1006. shaka.text.TextEngine.registerParser(
  1007. 'text/vtt; codecs="wvtt"', () => new shaka.text.VttTextParser());