diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoParser.java b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoParser.java index a1ef801c62..619de9e74b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoParser.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoParser.java @@ -7,21 +7,14 @@ import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; -import java.util.ArrayList; -import java.util.List; import java.util.Map; import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.util.regex.MatchResult; /** * A helper class that is meant to be used by services that need to parse durations such as * {@code 23 seconds} and/or upload dates in the format {@code 2 days ago} or similar. */ public class TimeAgoParser { - - private static final Pattern DURATION_PATTERN = Pattern.compile("(?:(\\d+) )?([A-z]+)"); - private final PatternsHolder patternsHolder; private final OffsetDateTime now; @@ -35,8 +28,22 @@ public class TimeAgoParser { * language word separator. */ public TimeAgoParser(final PatternsHolder patternsHolder) { + this(patternsHolder, OffsetDateTime.now(ZoneOffset.UTC)); + } + + /** + * Creates a helper to parse upload dates in the format '2 days ago'. + *

+ * Instantiate a new {@link TimeAgoParser} every time you extract a new batch of items. + *

+ * + * @param patternsHolder An object that holds the "time ago" patterns, special cases, and the + * language word separator. + * @param now The current time + */ + public TimeAgoParser(final PatternsHolder patternsHolder, final OffsetDateTime now) { this.patternsHolder = patternsHolder; - now = OffsetDateTime.now(ZoneOffset.UTC); + this.now = now; } /** @@ -50,13 +57,11 @@ public TimeAgoParser(final PatternsHolder patternsHolder) { * @throws ParsingException if the time unit could not be recognized */ public DateWrapper parse(final String textualDate) throws ParsingException { - for (final Map.Entry> caseUnitEntry - : patternsHolder.specialCases().entrySet()) { + for (final var caseUnitEntry : patternsHolder.specialCases().entrySet()) { final ChronoUnit chronoUnit = caseUnitEntry.getKey(); - for (final Map.Entry caseMapToAmountEntry - : caseUnitEntry.getValue().entrySet()) { + for (final var caseMapToAmountEntry : caseUnitEntry.getValue().entrySet()) { final String caseText = caseMapToAmountEntry.getKey(); - final Integer caseAmount = caseMapToAmountEntry.getValue(); + final int caseAmount = caseMapToAmountEntry.getValue(); if (textualDateMatches(textualDate, caseText)) { return getResultFor(caseAmount, chronoUnit); @@ -67,48 +72,6 @@ public DateWrapper parse(final String textualDate) throws ParsingException { return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate)); } - /** - * Parses a textual duration into a duration computer number. - * - * @param textualDuration the textual duration to parse - * @return the textual duration parsed, as a primitive {@code long} - * @throws ParsingException if the textual duration could not be parsed - */ - public long parseDuration(final String textualDuration) throws ParsingException { - // We can't use Matcher.results, as it is only available on Android 14 and above - final Matcher matcher = DURATION_PATTERN.matcher(textualDuration); - final List results = new ArrayList<>(); - while (matcher.find()) { - results.add(matcher.toMatchResult()); - } - - return results.stream() - .map(match -> { - final String digits = match.group(1); - final String word = match.group(2); - - int amount; - try { - amount = Integer.parseInt(digits); - } catch (final NumberFormatException ignored) { - amount = 1; - } - - final ChronoUnit unit; - try { - unit = parseChronoUnit(word); - } catch (final ParsingException ignored) { - return 0L; - } - - return amount * unit.getDuration().getSeconds(); - }) - .filter(n -> n > 0) - .reduce(Long::sum) - .orElseThrow(() -> new ParsingException( - "Could not parse duration \"" + textualDuration + "\"")); - } - private int parseTimeAgoAmount(final String textualDate) { try { return Integer.parseInt(textualDate.replaceAll("\\D+", "")); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoPatternsManager.java b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoPatternsManager.java index 19b697661c..47889a5d32 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoPatternsManager.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoPatternsManager.java @@ -3,6 +3,8 @@ import org.schabi.newpipe.extractor.timeago.PatternsHolder; import org.schabi.newpipe.extractor.timeago.PatternsManager; +import java.time.OffsetDateTime; + import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -26,4 +28,17 @@ public static TimeAgoParser getTimeAgoParserFor(@Nonnull final Localization loca return new TimeAgoParser(holder); } + + @Nullable + public static TimeAgoParser getTimeAgoParserFor( + @Nonnull final Localization localization, + @Nonnull final OffsetDateTime now) { + final PatternsHolder holder = getPatternsFor(localization); + + if (holder == null) { + return null; + } + + return new TimeAgoParser(holder, now); + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index c2502139a3..8e23f19122 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -239,8 +239,8 @@ private YoutubeParsingHelper() { private static final String IOS_OS_VERSION = "18.1.0.22B83"; /** - * Spoofing an iPhone 15 Pro Max running iOS 18.1.0 with the hardcoded version of the iOS app. To be - * used in the user agent for requests. + * Spoofing an iPhone 15 Pro Max running iOS 18.1.0 with the hardcoded version of the iOS app. + * To be used in the user agent for requests. * * @see #IOS_OS_VERSION */ @@ -1412,7 +1412,8 @@ public static String getAndroidUserAgent(@Nullable final Localization localizati */ @Nonnull public static String getIosUserAgent(@Nullable final Localization localization) { - // Spoofing an iPhone 15 Pro Max running iOS 18.1.0 with the hardcoded version of the iOS app + // Spoofing an iPhone 15 Pro Max running iOS 18.1.0 + // with the hardcoded version of the iOS app return "com.google.ios.youtube/" + IOS_YOUTUBE_CLIENT_VERSION + "(" + IOS_DEVICE_MODEL + "; U; CPU iOS " + IOS_USER_AGENT_VERSION + " like Mac OS X; " diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/localization/TimeAgoParserTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/localization/TimeAgoParserTest.java index 4d12b3da93..db45e807b9 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/localization/TimeAgoParserTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/localization/TimeAgoParserTest.java @@ -1,31 +1,114 @@ package org.schabi.newpipe.extractor.localization; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.schabi.newpipe.extractor.exceptions.ParsingException; - +import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.schabi.newpipe.extractor.localization.TimeAgoParserTest.ParseTimeAgoTestData.greaterThanDay; +import static org.schabi.newpipe.extractor.localization.TimeAgoParserTest.ParseTimeAgoTestData.lessThanDay; -class TimeAgoParserTest { - private static TimeAgoParser timeAgoParser; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.temporal.ChronoUnit; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Stream; - @BeforeAll - static void setUp() { - timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT); +class TimeAgoParserTest { + public static Stream parseTimeAgo() { + return Stream.of( + lessThanDay(Duration.ofSeconds(1), "1 second", "1 sec"), + lessThanDay(Duration.ofSeconds(12), "12 second", "12 sec"), + lessThanDay(Duration.ofMinutes(1), "1 minute", "1 min"), + lessThanDay(Duration.ofMinutes(23), "23 minutes", "23 min"), + lessThanDay(Duration.ofHours(1), "1 hour", "1 hr"), + lessThanDay(Duration.ofHours(8), "8 hour", "8 hr"), + greaterThanDay(d -> d.minusDays(1), "1 day", "1 day"), + greaterThanDay(d -> d.minusDays(3), "3 days", "3 day"), + greaterThanDay(d -> d.minusWeeks(1), "1 week", "1 wk"), + greaterThanDay(d -> d.minusWeeks(3), "3 weeks", "3 wk"), + greaterThanDay(d -> d.minusMonths(1), "1 month", "1 mo"), + greaterThanDay(d -> d.minusMonths(3), "3 months", "3 mo"), + greaterThanDay(d -> d.minusYears(1).minusDays(1), "1 year", "1 yr"), + greaterThanDay(d -> d.minusYears(3).minusDays(1), "3 years", "3 yr") + ).map(Arguments::of); } - @Test - void testGetDuration() throws ParsingException { - assertEquals(1, timeAgoParser.parseDuration("one second")); - assertEquals(1, timeAgoParser.parseDuration("second")); - assertEquals(49, timeAgoParser.parseDuration("49 seconds")); - assertEquals(61, timeAgoParser.parseDuration("1 minute, 1 second")); + @ParameterizedTest + @MethodSource + void parseTimeAgo(final ParseTimeAgoTestData testData) { + final OffsetDateTime now = OffsetDateTime.of( + LocalDateTime.of(2020, 1, 1, 1, 1, 1), + ZoneOffset.UTC); + final TimeAgoParser parser = Objects.requireNonNull( + TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT, now)); + + final OffsetDateTime expected = testData.getExpectedApplyToNow().apply(now); + + assertAll( + Stream.of( + testData.getTextualDateLong(), + testData.getTextualDateShort()) + .map(textualDate -> () -> assertEquals( + expected, + parser.parse(textualDate).offsetDateTime(), + "Expected " + expected + " for " + textualDate + )) + ); } - @Test - void testGetDurationError() { - assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("abcd")); - assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("12 abcd")); + static class ParseTimeAgoTestData { + public static final String AGO_SUFFIX = " ago"; + private final Function expectedApplyToNow; + private final String textualDateLong; + private final String textualDateShort; + + ParseTimeAgoTestData( + final Function expectedApplyToNow, + final String textualDateLong, + final String textualDateShort + ) { + this.expectedApplyToNow = expectedApplyToNow; + this.textualDateLong = textualDateLong; + this.textualDateShort = textualDateShort; + } + + public static ParseTimeAgoTestData lessThanDay( + final Duration duration, + final String textualDateLong, + final String textualDateShort + ) { + return new ParseTimeAgoTestData( + d -> d.minus(duration), + textualDateLong + AGO_SUFFIX, + textualDateShort + AGO_SUFFIX); + } + + public static ParseTimeAgoTestData greaterThanDay( + final Function expectedApplyToNow, + final String textualDateLong, + final String textualDateShort + ) { + return new ParseTimeAgoTestData( + d -> expectedApplyToNow.apply(d).truncatedTo(ChronoUnit.HOURS), + textualDateLong + AGO_SUFFIX, + textualDateShort + AGO_SUFFIX); + } + + public Function getExpectedApplyToNow() { + return expectedApplyToNow; + } + + public String getTextualDateLong() { + return textualDateLong; + } + + public String getTextualDateShort() { + return textualDateShort; + } } -} \ No newline at end of file +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/TimeagoTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/TimeagoTest.java deleted file mode 100644 index 0b4eecb50b..0000000000 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/TimeagoTest.java +++ /dev/null @@ -1,154 +0,0 @@ -package org.schabi.newpipe.extractor.utils; - -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.localization.Localization; -import org.schabi.newpipe.extractor.localization.TimeAgoParser; -import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; - -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import java.time.temporal.ChronoUnit; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class TimeagoTest { - private static TimeAgoParser parser; - private static OffsetDateTime now; - - @BeforeAll - public static void setUp() { - parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT); - now = OffsetDateTime.now(ZoneOffset.UTC); - } - - @Test - void parseTimeago() throws ParsingException { - assertTimeWithin1s( - now.minus(1, ChronoUnit.SECONDS), - parser.parse("1 second ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(12, ChronoUnit.SECONDS), - parser.parse("12 second ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(1, ChronoUnit.MINUTES), - parser.parse("1 minute ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(23, ChronoUnit.MINUTES), - parser.parse("23 minutes ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(1, ChronoUnit.HOURS), - parser.parse("1 hour ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(8, ChronoUnit.HOURS), - parser.parse("8 hours ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 day ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 days ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 week ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 weeks ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 month ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 months ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 year ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 years ago").offsetDateTime() - ); - } - - @Test - void parseTimeagoShort() throws ParsingException { - final TimeAgoParser parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT); - final OffsetDateTime now = OffsetDateTime.now(ZoneOffset.UTC); - - assertTimeWithin1s( - now.minus(1, ChronoUnit.SECONDS), - parser.parse("1 sec ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(12, ChronoUnit.SECONDS), - parser.parse("12 sec ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(1, ChronoUnit.MINUTES), - parser.parse("1 min ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(23, ChronoUnit.MINUTES), - parser.parse("23 min ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(1, ChronoUnit.HOURS), - parser.parse("1 hr ago").offsetDateTime() - ); - assertTimeWithin1s( - now.minus(8, ChronoUnit.HOURS), - parser.parse("8 hr ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 day ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 days ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 wk ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 wk ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 mo ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 mo ago").offsetDateTime() - ); - assertEquals( - now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS), - parser.parse("1 yr ago").offsetDateTime() - ); - assertEquals( - now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS), - parser.parse("3 yr ago").offsetDateTime() - ); - } - - void assertTimeWithin1s(final OffsetDateTime expected, final OffsetDateTime actual) { - final long delta = Math.abs(expected.toEpochSecond() - actual.toEpochSecond()); - assertTrue(delta <= 1, String.format("Expected: %s\nActual: %s", expected, actual)); - } -}