Timestamp: Handle Safari + better parsing
## Issue - The previous regex uses lookback (I think ES2018?) which Safari has yet to implement. - There were a few bugs in the previous regex too, like missing out multiple timestamps in a line, or parsing "62:02" as "2:02" (although YT does this as well). - The previous method searched too deep (matchAll) on each 'locate' call, which was wasteful.
This commit is contained in:
parent
897128a168
commit
267c019d7a
1 changed files with 78 additions and 7 deletions
|
@ -1,15 +1,86 @@
|
||||||
import visit from 'unist-util-visit';
|
import visit from 'unist-util-visit';
|
||||||
|
|
||||||
const TIMESTAMP_NODE_TYPE = 'timestamp';
|
const TIMESTAMP_NODE_TYPE = 'timestamp';
|
||||||
const TIMESTAMP_REGEX = /(?<!\d|:)([01]?\d|2[0-3]):([0-5]\d)(?::([0-5]\d))?(?!\d|:)/g;
|
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
// Tokenize timestamp
|
// Tokenize timestamp
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
||||||
|
function findNextTimestamp(value, fromIndex, strictlyFromIndex) {
|
||||||
|
let begin = 0;
|
||||||
|
while (begin < value.length) {
|
||||||
|
// Start with a rough match
|
||||||
|
const match = value.substring(begin).match(/[0-9:]+/);
|
||||||
|
|
||||||
|
if (!match) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compensate 'substring' index. 'match.index' is relative to 'value' from now on.
|
||||||
|
match.index += begin;
|
||||||
|
|
||||||
|
if (strictlyFromIndex && match.index !== fromIndex) {
|
||||||
|
if (match.index > fromIndex) {
|
||||||
|
// Already gone past desired index. Skip the rest.
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
// Next match might fit 'fromIndex'.
|
||||||
|
begin = match.index + match[0].length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fromIndex > 0 && fromIndex >= match.index && fromIndex < match.index + match[0].length) {
|
||||||
|
// Skip previously-rejected word, preventing "62:01" from being tokenized as "2:01", for example.
|
||||||
|
// This assumes that a non-zero 'fromIndex' means that a previous lookup has failed.
|
||||||
|
begin = match.index + match[0].length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exclude trailing colons to allow "0:12: Start of section", for example.
|
||||||
|
const str = match[0].replace(/:+$/, '');
|
||||||
|
|
||||||
|
let isValidTimestamp;
|
||||||
|
switch (str.length) {
|
||||||
|
case 4: // "9:59"
|
||||||
|
isValidTimestamp = /^[0-9]:[0-5][0-9]$/.test(str);
|
||||||
|
break;
|
||||||
|
case 5: // "59:59"
|
||||||
|
isValidTimestamp = /^[0-5][0-9]:[0-5][0-9]$/.test(str);
|
||||||
|
break;
|
||||||
|
case 7: // "9:59:59"
|
||||||
|
isValidTimestamp = /^[0-9]:[0-5][0-9]:[0-5][0-9]$/.test(str);
|
||||||
|
break;
|
||||||
|
case 8: // "23:59:59"
|
||||||
|
isValidTimestamp = /^[0-2][0-3]:[0-5][0-9]:[0-5][0-9]$/.test(str);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Reject
|
||||||
|
isValidTimestamp = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isValidTimestamp) {
|
||||||
|
// Profit!
|
||||||
|
return {
|
||||||
|
text: str,
|
||||||
|
index: match.index,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strictlyFromIndex && match.index >= fromIndex) {
|
||||||
|
return null; // Since it failed and we've gone past the desired index, skip the rest.
|
||||||
|
}
|
||||||
|
|
||||||
|
begin = match.index + match[0].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
function locateTimestamp(value, fromIndex) {
|
function locateTimestamp(value, fromIndex) {
|
||||||
const timestamps = Array.from(value.matchAll(TIMESTAMP_REGEX));
|
const ts = findNextTimestamp(value, fromIndex, false);
|
||||||
return timestamps.length === 0 ? -1 : timestamps[0].index;
|
return ts ? ts.index : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate 'timestamp' markdown node
|
// Generate 'timestamp' markdown node
|
||||||
|
@ -25,10 +96,10 @@ function tokenizeTimestamp(eat, value, silent) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const match = value.match(TIMESTAMP_REGEX);
|
const ts = findNextTimestamp(value, 0, true);
|
||||||
if (match) {
|
if (ts) {
|
||||||
try {
|
try {
|
||||||
const text = match[0];
|
const text = ts.text;
|
||||||
return eat(text)(createTimestampNode(text));
|
return eat(text)(createTimestampNode(text));
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// Do nothing
|
// Do nothing
|
||||||
|
@ -37,7 +108,7 @@ function tokenizeTimestamp(eat, value, silent) {
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenizeTimestamp.locator = locateTimestamp;
|
tokenizeTimestamp.locator = locateTimestamp;
|
||||||
tokenizeTimestamp.notInList = true;
|
tokenizeTimestamp.notInList = true; // Flag doesn't work? It'll always tokenizes in List and never in Bullet.
|
||||||
tokenizeTimestamp.notInLink = true;
|
tokenizeTimestamp.notInLink = true;
|
||||||
tokenizeTimestamp.notInBlock = true;
|
tokenizeTimestamp.notInBlock = true;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue