Skip to content

Commit

Permalink
Remove dependency on recommendation-api service. (#4905)
Browse files Browse the repository at this point in the history
* Rip out recommendation-api service.

* Untangle adding description.

* Untangle description translation.

* Untangle adding captions.

* Untangle translating image captions.

* Put try-catch around Commons calls.

* Enforce null.

---------

Co-authored-by: Cooltey Feng <[email protected]>
  • Loading branch information
dbrant and cooltey authored Sep 12, 2024
1 parent 80bad1f commit bcbc54b
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 104 deletions.
20 changes: 0 additions & 20 deletions app/src/main/java/org/wikipedia/dataclient/RestService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import org.wikipedia.readinglist.sync.SyncedReadingLists.RemoteIdResponseBatch
import org.wikipedia.readinglist.sync.SyncedReadingLists.RemoteReadingList
import org.wikipedia.readinglist.sync.SyncedReadingLists.RemoteReadingListEntry
import org.wikipedia.readinglist.sync.SyncedReadingLists.RemoteReadingListEntryBatch
import org.wikipedia.suggestededits.provider.SuggestedEditItem
import retrofit2.Call
import retrofit2.Response
import retrofit2.http.Body
Expand Down Expand Up @@ -197,25 +196,6 @@ interface RestService {
@Query("csrf_token") token: String?
): Call<Unit>

// ------- Recommendations -------
@Headers("Cache-Control: no-cache")
@GET("data/recommendation/caption/addition/{lang}")
suspend fun getImagesWithoutCaptions(@Path("lang") lang: String): List<SuggestedEditItem>

@Headers("Cache-Control: no-cache")
@GET("data/recommendation/caption/translation/from/{fromLang}/to/{toLang}")
suspend fun getImagesWithTranslatableCaptions(@Path("fromLang") fromLang: String,
@Path("toLang") toLang: String): List<SuggestedEditItem>

@Headers("Cache-Control: no-cache")
@GET("data/recommendation/description/addition/{lang}")
suspend fun getArticlesWithoutDescriptions(@Path("lang") lang: String): List<SuggestedEditItem>

@Headers("Cache-Control: no-cache")
@GET("data/recommendation/description/translation/from/{fromLang}/to/{toLang}")
suspend fun getArticlesWithTranslatableDescriptions(@Path("fromLang") fromLang: String,
@Path("toLang") toLang: String): List<SuggestedEditItem>

// ------- Talk pages -------
@Headers("Cache-Control: no-cache")
@GET("page/talk/{title}")
Expand Down
11 changes: 9 additions & 2 deletions app/src/main/java/org/wikipedia/dataclient/Service.kt
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ interface Service {
@Query("wbetlanguage") entityLang: String
): MwQueryResponse

@GET(MW_API_PREFIX + "action=query&prop=info&inprop=protection")
suspend fun getProtection(@Query("titles") titles: String): MwQueryResponse

@GET(MW_API_PREFIX + "action=query&meta=userinfo&prop=info&inprop=protection&uiprop=groups")
suspend fun getProtectionWithUserInfo(@Query("titles") titles: String): MwQueryResponse

Expand Down Expand Up @@ -190,8 +193,12 @@ interface Service {
@Query("gcmcontinue") continueStr: String?
): MwQueryResponse

@GET(MW_API_PREFIX + "action=query&generator=random&redirects=1&grnnamespace=6&prop=description|imageinfo|revisions&rvprop=ids|timestamp|flags|comment|user|content&rvslots=mediainfo&iiprop=timestamp|user|url|mime|extmetadata&iiurlwidth=" + PREFERRED_THUMB_SIZE)
@Headers("Cache-Control: no-cache")
@GET(MW_API_PREFIX + "action=query&generator=random&redirects=1&grnnamespace=0&prop=pageprops|description|info&inprop=protection")
suspend fun getRandomPages(
@Query("grnlimit") count: Int = 50,
): MwQueryResponse

@GET(MW_API_PREFIX + "action=query&generator=random&redirects=1&grnnamespace=6&prop=info|description|imageinfo|revisions|globalusage&inprop=protection&gunamespace=0&rvprop=ids|timestamp|flags|comment|user|content&rvslots=mediainfo&iiprop=timestamp|user|url|mime|extmetadata&iilocalonly=1&iiurlwidth=" + PREFERRED_THUMB_SIZE)
suspend fun getRandomImages(
@Query("grnlimit") count: Int = 10,
): MwQueryResponse
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ class SuggestedEditsCardItemViewModel(bundle: Bundle) : ViewModel() {

private suspend fun addDescription(langFromCode: String): PageSummaryForEdit {
val pageSummary = EditingSuggestionsProvider.getNextArticleWithMissingDescription(
WikiSite.forLanguageCode(langFromCode),
SuggestedEditsCardItemFragment.MAX_RETRY_LIMIT)
WikiSite.forLanguageCode(langFromCode))

return PageSummaryForEdit(
pageSummary.apiTitle,
Expand All @@ -102,8 +101,7 @@ class SuggestedEditsCardItemViewModel(bundle: Bundle) : ViewModel() {

private suspend fun translateDescription(langFromCode: String, targetLanguage: String): Pair<PageSummaryForEdit, PageSummaryForEdit> {
val pair = EditingSuggestionsProvider.getNextArticleWithMissingDescription(
WikiSite.forLanguageCode(langFromCode),
targetLanguage, SuggestedEditsCardItemFragment.MAX_RETRY_LIMIT)
WikiSite.forLanguageCode(langFromCode), targetLanguage)
val source = pair.first
val target = pair.second

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ internal class DeveloperSettingsPreferenceLoader(fragment: PreferenceFragmentCom
.show()
}) {
val summary = EditingSuggestionsProvider.getNextArticleWithMissingDescription(WikipediaApp.instance.wikiSite,
WikipediaApp.instance.languageState.appLanguageCodes[1], 10)
WikipediaApp.instance.languageState.appLanguageCodes[1])
MaterialAlertDialogBuilder(fragment.requireActivity())
.setTitle(fromHtml(summary.second.displayTitle))
.setMessage(fromHtml(summary.second.extract))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@ import kotlinx.coroutines.withContext
import org.wikipedia.Constants
import org.wikipedia.dataclient.ServiceFactory
import org.wikipedia.dataclient.WikiSite
import org.wikipedia.dataclient.mwapi.MwException
import org.wikipedia.dataclient.mwapi.MwQueryPage
import org.wikipedia.dataclient.mwapi.MwQueryResult
import org.wikipedia.dataclient.page.PageSummary
import org.wikipedia.dataclient.wikidata.Entities
import org.wikipedia.descriptions.DescriptionEditUtil
import org.wikipedia.json.JsonUtil
import org.wikipedia.page.PageTitle
import org.wikipedia.suggestededits.SuggestedEditsRecentEditsViewModel
import org.wikipedia.util.log.L
import java.time.Instant
import java.util.concurrent.Semaphore
import kotlin.math.abs
Expand Down Expand Up @@ -42,7 +47,7 @@ object EditingSuggestionsProvider {
private var revertCandidateLastRevId = 0L
private var revertCandidateLastTimeStamp = Instant.now()

private const val MAX_RETRY_LIMIT: Long = 50
private const val MAX_RETRY_LIMIT: Long = 20

suspend fun getNextArticleWithMissingDescription(wiki: WikiSite, retryLimit: Long = MAX_RETRY_LIMIT): PageSummary {
var pageSummary: PageSummary
Expand All @@ -59,21 +64,35 @@ object EditingSuggestionsProvider {
}

var tries = 0
do {
val listOfSuggestedEditItem = ServiceFactory.getRest(Constants.wikidataWikiSite)
.getArticlesWithoutDescriptions(WikiSite.normalizeLanguageCode(wiki.languageCode))
val mwQueryResponse = ServiceFactory.get(wiki)
.getDescription(listOfSuggestedEditItem.joinToString("|") { it.title() })
while (tries++ <= retryLimit && title.isEmpty()) {
// Fetch a batch of random articles, and get the ones that have no description.
val resultsWithNoDescription = ServiceFactory.get(wiki).getRandomPages().query?.pages?.filter {
it.description.isNullOrEmpty()
}.orEmpty()

articlesWithMissingDescriptionCacheLang = wiki.languageCode
mwQueryResponse.query?.pages?.forEach {
if (it.description.isNullOrEmpty()) {

if (resultsWithNoDescription.isEmpty() || DescriptionEditUtil.wikiUsesLocalDescriptions(wiki.languageCode)) {
resultsWithNoDescription.forEach {
articlesWithMissingDescriptionCache.addFirst(it.title)
}
} else {
// If the wiki uses Wikidata descriptions, check protection status of the Wikidata items.
val qNums = resultsWithNoDescription.mapNotNull { it.pageProps?.wikiBaseItem.orEmpty().ifEmpty { null } }
val wdResponse = ServiceFactory.get(Constants.wikidataWikiSite).getProtection(qNums.joinToString("|"))
val unprotectedQNums = wdResponse.query?.pages?.filter { it.protection.isEmpty() }?.map { it.title }

resultsWithNoDescription.forEach {
if (unprotectedQNums?.contains(it.pageProps?.wikiBaseItem) == true) {
articlesWithMissingDescriptionCache.addFirst(it.title)
}
}
}

if (!articlesWithMissingDescriptionCache.isEmpty()) {
title = articlesWithMissingDescriptionCache.removeFirst()
}
} while (tries++ < retryLimit && title.isEmpty())
}

pageSummary = ServiceFactory.getRest(wiki).getPageSummary(null, title)
} finally {
Expand All @@ -83,8 +102,7 @@ object EditingSuggestionsProvider {
return pageSummary
}

suspend fun getNextArticleWithMissingDescription(sourceWiki: WikiSite, targetLang: String,
retryLimit: Long = MAX_RETRY_LIMIT): Pair<PageSummary, PageSummary> {
suspend fun getNextArticleWithMissingDescription(sourceWiki: WikiSite, targetLang: String, retryLimit: Long = MAX_RETRY_LIMIT): Pair<PageSummary, PageSummary> {
var pair = Pair(PageSummary(), PageSummary())
withContext(Dispatchers.IO) {
mutex.acquire()
Expand All @@ -100,41 +118,39 @@ object EditingSuggestionsProvider {
titles = articlesWithTranslatableDescriptionCache.removeFirst()
}
var tries = 0
do {
val listOfSuggestedEditItem = ServiceFactory.getRest(Constants.wikidataWikiSite)
.getArticlesWithTranslatableDescriptions(WikiSite.normalizeLanguageCode(sourceWiki.languageCode),
WikiSite.normalizeLanguageCode(targetLang))
val mwQueryPages = ServiceFactory.get(targetWiki)
.getDescription(listOfSuggestedEditItem.joinToString("|") { it.title() }).query?.pages
while (tries++ <= retryLimit && titles == null) {
// Fetch a batch of random articles from the target language wiki, and get ones that have no description.
val resultsWithNoDescription = ServiceFactory.get(targetWiki).getRandomPages().query?.pages?.filter {
it.description.isNullOrEmpty()
}.orEmpty()

articlesWithTranslatableDescriptionCacheFromLang = sourceWiki.languageCode
articlesWithTranslatableDescriptionCacheToLang = targetLang

listOfSuggestedEditItem.forEach { item ->
val page = mwQueryPages?.find { it.title == item.title() }
if (page != null && !page.description.isNullOrEmpty()) {
return@forEach
}
val descriptions = item.entity?.getDescriptions().orEmpty()
val siteLinks = item.entity?.getSiteLinks().orEmpty()
if (descriptions.containsKey(targetLang) ||
!descriptions.containsKey(sourceWiki.languageCode) ||
!siteLinks.containsKey(sourceWiki.dbName()) ||
!siteLinks.containsKey(targetWiki.dbName())
) {
return@forEach
}
val sourceTitle = PageTitle(siteLinks[sourceWiki.dbName()]!!.title, sourceWiki).apply {
description = descriptions[sourceWiki.languageCode]?.value
// Get the Wikidata entities for the articles, to see if they have descriptions in the source language.
val qNums = resultsWithNoDescription.mapNotNull { it.pageProps?.wikiBaseItem.orEmpty().ifEmpty { null } }
val wdResponse = ServiceFactory.get(Constants.wikidataWikiSite).getWikidataLabelsAndDescriptions(
qNums.joinToString("|"),
WikiSite.normalizeLanguageCode(sourceWiki.languageCode) + "|" + WikiSite.normalizeLanguageCode(targetLang),
sourceWiki.dbName() + "|" + targetWiki.dbName())

// Get the Q numbers for which the source language description exists
val sourceLangEntities = wdResponse.entities.filter {
it.value.getDescriptions()[sourceWiki.languageCode]?.value.orEmpty().isNotEmpty() &&
it.value.getSiteLinks()[sourceWiki.dbName()]?.title.orEmpty().isNotEmpty() }

sourceLangEntities.values.forEach { entity ->
val sourceTitle = PageTitle(entity.getSiteLinks()[sourceWiki.dbName()]!!.title, sourceWiki).apply {
description = entity.getDescriptions()[sourceWiki.languageCode]?.value
}
val targetTitle = PageTitle(siteLinks[targetWiki.dbName()]!!.title, targetWiki)
val targetTitle = PageTitle(entity.getSiteLinks()[targetWiki.dbName()]!!.title, targetWiki)
articlesWithTranslatableDescriptionCache.addFirst(sourceTitle to targetTitle)
}

if (!articlesWithTranslatableDescriptionCache.isEmpty()) {
titles = articlesWithTranslatableDescriptionCache.removeFirst()
}
} while (tries++ < retryLimit && titles == null)
}

titles?.let {
val sourcePageSummary = async {
Expand Down Expand Up @@ -170,16 +186,33 @@ object EditingSuggestionsProvider {
}
imagesWithMissingCaptionsCacheLang = lang
var tries = 0
do {
val listOfSuggestedEditItem = ServiceFactory.getRest(Constants.commonsWikiSite)
.getImagesWithoutCaptions(WikiSite.normalizeLanguageCode(lang))
listOfSuggestedEditItem.forEach {
imagesWithMissingCaptionsCache.addFirst(it.title())
}
if (!imagesWithMissingCaptionsCache.isEmpty()) {
title = imagesWithMissingCaptionsCache.removeFirst()
while (tries++ <= retryLimit && title.isEmpty()) {
try {
val candidates = ServiceFactory.get(Constants.commonsWikiSite).getRandomImages()
.query?.pages?.filter {
it.imageInfo()?.mime.orEmpty().startsWith("image") &&
it.protection.isEmpty()
}.orEmpty()

candidates.forEach { candidate ->
val entityJson = candidate.revisions.firstOrNull()?.getContentFromSlot("mediainfo")
if (entityJson.isNullOrEmpty()) {
return@forEach
}
JsonUtil.decodeFromString<Entities.Entity>(entityJson)?.let { entity ->
if (entity.getLabels()[WikiSite.normalizeLanguageCode(lang)]?.value.isNullOrEmpty()) {
imagesWithMissingCaptionsCache.addFirst(candidate.title)
}
}
}

if (!imagesWithMissingCaptionsCache.isEmpty()) {
title = imagesWithMissingCaptionsCache.removeFirst()
}
} catch (e: MwException) {
L.w(e)
}
} while (tries++ < retryLimit && title.isEmpty())
}
} finally {
mutex.release()
}
Expand All @@ -205,21 +238,35 @@ object EditingSuggestionsProvider {
imagesWithTranslatableCaptionCacheFromLang = sourceLang
imagesWithTranslatableCaptionCacheToLang = targetLang
var tries = 0
do {
val listOfSuggestedEditItem = ServiceFactory.getRest(Constants.commonsWikiSite).getImagesWithTranslatableCaptions(
WikiSite.normalizeLanguageCode(sourceLang),
WikiSite.normalizeLanguageCode(targetLang)
)
listOfSuggestedEditItem.forEach {
if (!it.captions.containsKey(sourceLang) || it.captions.containsKey(targetLang)) {
return@forEach
while (tries++ <= retryLimit && (pair.first.isEmpty() || pair.second.isEmpty())) {
try {
val candidates = ServiceFactory.get(Constants.commonsWikiSite).getRandomImages(50)
.query?.pages?.filter {
it.imageInfo()?.mime.orEmpty().startsWith("image") &&
it.protection.isEmpty()
}.orEmpty()

candidates.forEach { candidate ->
val entityJson = candidate.revisions.firstOrNull()?.getContentFromSlot("mediainfo")
if (entityJson.isNullOrEmpty()) {
return@forEach
}
JsonUtil.decodeFromString<Entities.Entity>(entityJson)?.let { entity ->
val labels = entity.getLabels()
if (labels[WikiSite.normalizeLanguageCode(sourceLang)]?.value.orEmpty().isNotEmpty() &&
labels[WikiSite.normalizeLanguageCode(targetLang)]?.value.isNullOrEmpty()) {
imagesWithTranslatableCaptionCache.addFirst(labels[sourceLang]?.value.orEmpty() to candidate.title)
}
}
}
imagesWithTranslatableCaptionCache.addFirst((it.captions[sourceLang] ?: error("")) to it.title())
} catch (e: MwException) {
L.w(e)
}

if (!imagesWithTranslatableCaptionCache.isEmpty()) {
pair = imagesWithTranslatableCaptionCache.removeFirst()
}
} while (tries++ < retryLimit && (pair.first.isEmpty() || pair.second.isEmpty()))
}
} finally {
mutex.release()
}
Expand Down

This file was deleted.

0 comments on commit bcbc54b

Please sign in to comment.