diff --git a/src/data/download/downloadLinkGetter.ts b/src/data/download/downloadLinkGetter.ts deleted file mode 100644 index 7bcd81d..0000000 --- a/src/data/download/downloadLinkGetter.ts +++ /dev/null @@ -1,23 +0,0 @@ -import cheerio from 'cheerio'; -import fetch from 'node-fetch'; - -const BASE_URL = 'https://app.thedigitalbiblelibrary.org'; - -export default async function getDownloadLink(url: string): Promise { - try { - const initialResponse = await fetch(url); - const initialData = await initialResponse.text(); - const $1 = cheerio.load(initialData); - const downloadResponse = await fetch( - BASE_URL + $1('.list-group-item > a').attr('href') - ); - - const downloadData = await downloadResponse.text(); - const $2 = cheerio.load(downloadData); - const href: string = BASE_URL + $2('#download_button').attr('href'); - - return href; - } catch (error) { - throw new Error(`getDownloadLink failed: ${error.message}`); - } -} diff --git a/src/data/download/downloader.ts b/src/data/download/downloader.ts deleted file mode 100644 index cef9612..0000000 --- a/src/data/download/downloader.ts +++ /dev/null @@ -1,16 +0,0 @@ -import fs from 'fs'; -import fetch, { Blob, Response } from 'node-fetch'; - -export default async function downloadZip( - url: string, - downloadPath: string -): Promise { - try { - const response: Response = await fetch(url); - const blob: Blob = await response.blob(); - const buffer = Buffer.from(await (blob as any).arrayBuffer()); - await fs.writeFileSync(downloadPath, buffer); - } catch (error) { - throw new Error(`download failed: ${error.message}`); - } -} diff --git a/src/data/download/index.ts b/src/data/download/index.ts deleted file mode 100644 index 9ca315b..0000000 --- a/src/data/download/index.ts +++ /dev/null @@ -1,7 +0,0 @@ -import getDownloadLink from './downloadLinkGetter'; -import downloadZip from './downloader'; - -export default class Download { - static downloadZip = downloadZip; - static getDownloadLink = getDownloadLink; -} diff --git a/src/data/files/delete.ts b/src/data/files/delete.ts deleted file mode 100644 index 7d83211..0000000 --- a/src/data/files/delete.ts +++ /dev/null @@ -1,17 +0,0 @@ -import fs from 'fs'; - -const deleteFolder = (path: string) => { - if (fs.existsSync(path)) { - fs.readdirSync(path).forEach((file) => { - const curPath = path + '/' + file; - if (fs.lstatSync(curPath).isDirectory()) { - deleteFolder(curPath); - } else { - fs.unlinkSync(curPath); - } - }); - fs.rmdirSync(path); - } -}; - -export default deleteFolder; diff --git a/src/data/files/index.ts b/src/data/files/index.ts deleted file mode 100644 index 2f21831..0000000 --- a/src/data/files/index.ts +++ /dev/null @@ -1,7 +0,0 @@ -import deleteFolder from './delete'; -import unzip from './unzipper'; - -export default class Files { - static unzip = unzip; - static deleteFolder = deleteFolder; -} diff --git a/src/data/files/unzipper.ts b/src/data/files/unzipper.ts deleted file mode 100644 index 25486dd..0000000 --- a/src/data/files/unzipper.ts +++ /dev/null @@ -1,18 +0,0 @@ -import * as unzipFile from 'unzip-stream'; -import fs from 'fs'; - -export default async function unzip( - outPath: string, - downloadPath: string -): Promise { - try { - await new Promise((resolve, reject) => { - fs.createReadStream(downloadPath) - .pipe(unzipFile.Extract({ path: outPath })) - .on('error', reject) - .on('finish', resolve); - }); - } catch (error) { - throw new Error(`unzip failed: ${error.message}`); - } -} diff --git a/src/data/importer/folderImporter.ts b/src/data/importer/folderImporter.ts deleted file mode 100644 index 8008871..0000000 --- a/src/data/importer/folderImporter.ts +++ /dev/null @@ -1,17 +0,0 @@ -import fs from 'fs'; - -import Download from '../download'; -import Files from '../files'; - -export default async function importFolder(url: string, outPath: string) { - try { - const downloadPath = `./${Math.random().toString(36).substring(2)}.zip`; - const downloadLink: string = await Download.getDownloadLink(url); - - await Download.downloadZip(downloadLink, downloadPath); - await Files.unzip(outPath, downloadPath); - await fs.promises.unlink(downloadPath); - } catch (error) { - throw new Error(`importFolder failed: ${error.message}`); - } -} diff --git a/src/data/importer/index.ts b/src/data/importer/index.ts deleted file mode 100644 index d3f47ab..0000000 --- a/src/data/importer/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -import importFolder from './folderImporter'; - -export default class Importer { - static importFolder = importFolder; -} diff --git a/src/errors/processingError.ts b/src/errors/processingError.ts deleted file mode 100644 index e69de29..0000000 diff --git a/src/index.ts b/src/index.ts index d4273b1..ea17cae 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,3 @@ -import main from './main'; +import main from './main' -main(); +main() diff --git a/src/main.ts b/src/main.ts index 901d1e4..7ee3612 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,44 +1,462 @@ -import fetch, { Response } from 'node-fetch'; - -import Importer from './data/importer'; -import Content from './processing/content'; -import Directory from './processing/directory'; +import * as types from 'src/types' +import * as unzipFile from 'unzip-stream' +import cheerio from 'cheerio' +import fs from 'fs' +import fetch, { Blob, Response } from 'node-fetch' +import path from 'path' +import { DOMParser } from 'xmldom' const url = - 'https://app.thedigitalbiblelibrary.org/entries/_public_domain_entries_tabledata.json'; + 'https://app.thedigitalbiblelibrary.org/entries/_public_domain_entries_tabledata.json' + +const BASE_URL = 'https://app.thedigitalbiblelibrary.org' export default async function main(): Promise { try { - let count = 0; - const response: Response = await fetch(url); - const initialData = await response.json(); - const array = initialData.aaData; + let count = 0 + const response: Response = await fetch(url) + const initialData = await response.json() + const array = initialData.aaData for await (const item of array) { - count++; - console.log(`(${count + '/' + array.length}) Setting up: ${item[4]}`); + count++ + console.log(`(${count + '/' + array.length}) Setting up: ${item[4]}`) await setupBible( `https://app.thedigitalbiblelibrary.org/entry?id=${item[0]}` - ); + ) } } catch (error) { - throw new Error(`test failed: ${error.message}`); + throw new Error(`test failed: ${error.message}`) } } async function setupBible(url: string): Promise { try { - const startTime = performance.now(); - const outPath = `./${Math.random().toString(36).substring(2)}`; + const startTime = performance.now() + const outPath = `./delete/${Math.random().toString(36).substring(2)}` + + await importFolder(url, outPath) + await createDirs(outPath) + await populate(outPath) + + const endTime = performance.now() + const timeTaken = endTime - startTime + console.log( + `Finished In ${(timeTaken / 1000).toFixed( + 2 + )} seconds @ ${new Date().toLocaleString('en-US', { + timeZone: 'America/Los_Angeles' + })}}` + ) + } catch (error) { + console.error(`Error setting up Bible: ${error.message}`) + } +} + +async function importFolder(url: string, outPath: string) { + try { + const downloadPath = `./bibles/${Math.random() + .toString(36) + .substring(2)}.zip` + const downloadLink: string = await getDownloadLink(url) + + await downloadZip(downloadLink, downloadPath) + await unzip(outPath, downloadPath) + await fs.promises.unlink(downloadPath) + } catch (error) { + throw new Error(`importFolder failed: ${error.message}`) + } +} + +async function unzip(outPath: string, downloadPath: string): Promise { + try { + await new Promise((resolve, reject) => { + fs.createReadStream(downloadPath) + .pipe(unzipFile.Extract({ path: outPath })) + .on('error', reject) + .on('finish', resolve) + }) + } catch (error) { + throw new Error(`unzip failed: ${error.message}`) + } +} + +async function downloadZip(url: string, downloadPath: string): Promise { + try { + const response: Response = await fetch(url) + const blob: Blob = await response.blob() + const buffer = Buffer.from(await (blob as any).arrayBuffer()) + await fs.writeFileSync(downloadPath, buffer) + } catch (error) { + throw new Error(`download failed: ${error.message}`) + } +} +async function getDownloadLink(url: string): Promise { + try { + const initialResponse = await fetch(url) + const initialData = await initialResponse.text() + const $1 = cheerio.load(initialData) + const downloadResponse = await fetch( + BASE_URL + $1('.list-group-item > a').attr('href') + ) + + const downloadData = await downloadResponse.text() + const $2 = cheerio.load(downloadData) + const href: string = BASE_URL + $2('#download_button').attr('href') + + return href + } catch (error) { + throw new Error(`getDownloadLink failed: ${error.message}`) + } +} + +async function createDirs(outPath: string): Promise { + const bibleInfo = await getInfo(outPath) + const books = await booksInfo(outPath) + const filteredBooks = getHighestChapters(books) + + for (const book of filteredBooks) { + for (let i = 1; i <= Number(book.chapter); i++) { + const dir = `./bibles/${bibleInfo.id}/books/${formatBookName( + book.name + )}/chapters/${i}/verses` + + fs.mkdirSync(dir, { recursive: true }) + } + } +} + +function getHighestChapters(data: types.DataItem[]): types.DataItem[] { + return data.reduce((acc: types.DataItem[], curr: types.DataItem) => { + const existing = acc.find((item) => item.name === curr.name) + if (existing) { + if (Number(existing.chapter) < Number(curr.chapter)) { + existing.chapter = curr.chapter + existing.verses = curr.verses + } + } else { + acc.push(curr) + } + return acc + }, []) +} + +async function populate(outPath: string): Promise { + const biblesPath = path.join('./bibles', 'bibles.json') + const data = JSON.parse(await fs.promises.readFile(biblesPath, 'utf8')) + const bibleInfo: types.versionInfo = await getInfo(outPath) + + if (data.some((bible: types.versionInfo) => bible.id === bibleInfo.id)) { + await deleteFolder(outPath) + throw new Error('Already imported bible') + } + + data.push(bibleInfo) + await fs.promises.writeFile(biblesPath, JSON.stringify(data, null, 2)) + + const biblePath = path.join('./bibles', bibleInfo.id) + await fs.promises.mkdir(biblePath, { recursive: true }) + await fs.promises.writeFile( + path.join(biblePath, `${bibleInfo.id}.json`), + JSON.stringify(bibleInfo, null, 2) + ) + + const contents: types.ContentItem[] = await getContent(outPath) + await processContents(bibleInfo, contents) + + await deleteFolder(outPath) +} + +async function processContents( + bibleInfo: types.versionInfo, + contents: types.ContentItem[] +): Promise { + for (const content of contents) { + const sanitizedBookName = sanitizeBookName(content.book) + const bookPath = path.join( + './bibles', + bibleInfo.id, + 'books', + sanitizedBookName + ) + await fs.promises.mkdir(path.join(bookPath, 'chapters'), { + recursive: true + }) + + const chapterPath = path.join( + bookPath, + 'chapters', + `${content.chapter}.json` + ) + await fs.promises.writeFile( + chapterPath, + JSON.stringify({ data: content.verses }) + ) + + for (const verse of content.verses) { + const versePath = path.join( + bookPath, + 'chapters', + content.chapter, + 'verses', + `${verse.verse}.json` + ) + await fs.promises.mkdir(path.dirname(versePath), { recursive: true }) + await fs.promises.writeFile( + versePath, + JSON.stringify({ verse: verse.verse, text: verse.text }) + ) + } + } +} + +function sanitizeBookName(bookName: string): string { + return bookName + .toLowerCase() + .replaceAll(' ', '') + .replace(/first/i, '1') + .replace(/second/i, '2') + .replace(/third/i, '3') +} + +async function getContent(outPath: string): Promise { + const bookInfos = await booksInfo(outPath) + const usxData = await parseUSX(outPath) + return bookInfos + .map((bookInfo) => ({ + book: sanitizeBookName(bookInfo.name), + chapter: bookInfo.chapter, + verses: usxData.filter( + (verse) => + verse.book === sanitizeBookName(bookInfo.name) && + verse.chapter === bookInfo.chapter + ) + })) + .filter((content) => content.verses.length > 0) +} + +async function booksInfo(outPath: string) { + try { + // Asynchronously read the metadata and versification data + const [metadata, versificationData] = await Promise.all([ + fs.promises.readFile(path.join(outPath, 'metadata.xml'), 'utf8'), + fs.promises.readFile( + path.join(outPath, 'release', 'versification.vrs'), + 'utf8' + ) + ]) - await Importer.importFolder(url, outPath); - await Directory.createDirs(outPath); - await Content.populate(outPath); + const $ = cheerio.load(metadata) + const lines = versificationData.replace(/\r\n/g, '\n').split('\n') + const result = [] - const endTime = performance.now(); - const timeTaken = endTime - startTime; - console.log(`Finished In ${(timeTaken / 1000).toFixed(2)} seconds @ ${new Date().toLocaleString('en-US', {timeZone: 'America/Los_Angeles'})}}`); + for (const line of lines) { + if (line.includes('=') || line.includes('#')) continue + + const parts = line.split(' ') + for (const part of parts) { + if (part && part.includes(':')) { + const [bookCode, verseInfo] = part.split(':') + const chapter = verseInfo.split('-')[0] // Assuming verseInfo format is "chapter-verse" + const sanitizedBookCode = sanitizeBookName(bookCode) + + const bookName = $(`name[id="book-${sanitizedBookCode}"] > short`) + .first() + .text() + if (bookName) { + result.push({ name: bookName, chapter, verses: verseInfo }) + } + } + } + } + + return result.filter(({ name, verses }) => name && verses) } catch (error) { - console.error(`Error setting up Bible: ${error.message}`); + console.error('Failed to load book info:', error) + return [] + } +} + +async function readFolder(currentDir: string): Promise { + let files: string[] = [] + + const entries = await fs.promises.readdir(currentDir, { + withFileTypes: true + }) + + for (const entry of entries) { + const fullPath: string = path.join(currentDir, entry.name) + + if (entry.isDirectory()) { + files = files.concat(await readFolder(fullPath)) + } else { + files.push(fullPath) + } } + + return files +} + +async function parseUSX(folder: string): Promise { + let array: types.Verse[] = [] + const infoFile = fs.readFileSync(path.join(folder, 'metadata.xml')) + const $I = cheerio.load(infoFile) + + const files = await readFolder(folder) + const usxFiles = files.filter((path) => path.endsWith('.usx')) + + for (const file of usxFiles) { + let verses + const usxData = await fs.promises.readFile(file) + const $ = cheerio.load(usxData, { xmlMode: true }) + + const sid = $('*').filter(function () { + return $(this).attr('sid') !== undefined + }) + + if (sid.length > 0) { + let xmlString = fs.readFileSync(file).toString() + let parser = new DOMParser() + let xmlDoc = parser.parseFromString(xmlString, 'text/xml') + let tags = xmlDoc.getElementsByTagName('verse') + verses = Array.from(tags) + .map((verse) => { + if ((verse as any).hasAttribute('sid')) { + let chapterVerse = (verse as any).getAttribute('sid').split(' ')[1] + let [chapter, verseNumber] = chapterVerse.split(':') + let textContent = '' + let nextSibling = (verse as any).nextSibling + while (nextSibling && nextSibling.nodeName !== 'verse') { + textContent += nextSibling.textContent + nextSibling = nextSibling.nextSibling + } + return { + book: $I( + `name[id="book-${path + .basename(file) + .replace('.usx', '') + .toLowerCase()}"] > short` + ) + .first() + .text() + .replace(/first/i, '1') + .replace(/second/i, '2') + .replace(/third/i, '3'), + chapter: chapter, + verse: verseNumber, + text: textContent + .trim() + .split(`\n`) + [textContent.trim().split(`\n`).length - 1].trim() + } + } + }) + .filter((a) => a) + .filter((a) => a.verse && a.text) + } else { + verses = $('para').map(function () { + let chapterNumber = $(this).prevAll('chapter').first().attr('number') + return $(this) + .find('verse') + .map(function () { + let verseNumber = $(this).attr('number') + let verseText = ($(this)[0] as any)?.nextSibling?.nodeValue?.trim() + return { + book: $I( + `name[id="book-${path + .basename(file) + .replace('.usx', '') + .toLowerCase()}"] > short` + ) + .first() + .text() + .replace(/first/i, '1') + .replace(/second/i, '2') + .replace(/third/i, '3'), + chapter: chapterNumber, + verse: verseNumber, + text: verseText.trim() + } + }) + .get() + }) + } + + array = [...array, ...verses] + } + + return array } + +async function getInfo(outPath: string): Promise { + const infoFile = await fs.promises.readFile(outPath + '/metadata.xml') + const $ = cheerio.load(infoFile) + return { + id: `${$('ldml').text() || $('language > iso').text()}-${ + $('abbreviationLocal').first().text().toLowerCase() || + $('abbreviation').first().text().toLowerCase() + }`, + version: $('name').first().text(), + description: $('description').first().text(), + scope: $('scope').text(), + language: { + name: $('language > name').text(), + code: $('language > iso').text(), + level: $('audience').text() + }, + country: { + name: $('country > name').text(), + code: $('country > iso').text() + }, + numeralSystem: $('numerals').text(), + script: $('script').text(), + archivist: $('archivistName').text(), + copyright: $( + 'copyright > fullStatement > statementContent > p > strong' + ).text(), + localVersionName: $('nameLocal').first().text(), + localVersionAbbreviation: $('abbreviationLocal').first().text() + } +} + +/** + * Formats the book name for directory creation. + * + * @param {string} bookName - The book name to format. + * @returns {string} - The formatted book name. + */ +function formatBookName(bookName: string): string { + return bookName + .toLowerCase() + .replaceAll(' ', '') + .replace(/first/i, '1') + .replace(/second/i, '2') + .replace(/third/i, '3') +} + +const deleteFolder = (path: string) => { + if (fs.existsSync(path)) { + fs.readdirSync(path).forEach((file) => { + const curPath = path + '/' + file + if (fs.lstatSync(curPath).isDirectory()) { + deleteFolder(curPath) + } else { + fs.unlinkSync(curPath) + } + }) + fs.rmdirSync(path) + } +} + +async function cleanup() { + await fs.promises.rmdir(path.join(__dirname, '../delete'), { + recursive: true + }) +} + +process.on('SIGINT', cleanup) +process.on('SIGTERM', cleanup) + +process.on('exit', (code) => { + cleanup() +}) diff --git a/src/processing/content/contentGetter.ts b/src/processing/content/contentGetter.ts deleted file mode 100644 index ad1ba40..0000000 --- a/src/processing/content/contentGetter.ts +++ /dev/null @@ -1,97 +0,0 @@ -import * as global from 'src/types'; -import * as types from 'src/types'; -import cheerio from 'cheerio'; -import fs from 'fs'; - -import usxParser from '../usxParser'; - -export async function booksInfo(outPath: string) { - const infoFile = fs.readFileSync(outPath + '/metadata.xml'); - const $ = cheerio.load(infoFile); - const data = fs.readFileSync(outPath + '/release/versification.vrs'); - - const result = []; - - const lines = data.toString().replace(/\r\n/g, '\n').split('\n'); - - for (var i = 0; i < lines.length; i++) { - if (lines[i].includes('=') || lines[i].includes('#')) {continue;} - const parts = lines[i].split(' '); - - for (var x = 0; x < parts.length; x++) { - if (parts[i] !== undefined) {if (!parts[i].includes(':')) {continue;}} - - result.push({ - name: $( - `name[id="book-${parts[0] - .toLowerCase() - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3')}"] > short` - ) - .first() - .text(), - chapter: parts[x].split(':')[0], - verses: parts[x].split(':')[1], - }); - } - } - - return result.filter((a) => a.name !== '' && a.verses); -} - -export async function getInfo(outPath: string): Promise { - const infoFile = await fs.promises.readFile(outPath + '/metadata.xml'); - const $ = cheerio.load(infoFile); - return { - id: `${$('ldml').text() || $('language > iso').text()}-${ - $('abbreviationLocal').first().text().toLowerCase() || - $('abbreviation').first().text().toLowerCase() - }`, - version: $('name').first().text(), - description: $('description').first().text(), - scope: $('scope').text(), - language: { - name: $('language > name').text(), - code: $('language > iso').text(), - level: $('audience').text(), - }, - country: { - name: $('country > name').text(), - code: $('country > iso').text(), - }, - numeralSystem: $('numerals').text(), - script: $('script').text(), - archivist: $('archivistName').text(), - copyright: $( - 'copyright > fullStatement > statementContent > p > strong' - ).text(), - localVersionName: $('nameLocal').first().text(), - localVersionAbbreviation: $('abbreviationLocal').first().text(), - }; -} - -export async function getContent(outPath: string) { - const arr = await booksInfo(outPath); - const usx = await usxParser.parseUSX(outPath); - const array = []; - - for (var i = 0; i < arr.length; i++) { - array.push({ - book: arr[i].name - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3'), - chapter: arr[i].chapter, - verses: usx.filter( - (a: any) => - a.book == - arr[i].name - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3') && a.chapter == arr[i].chapter - ), - }); - } - return array.filter((a) => a.verses.length > 0); -} diff --git a/src/processing/content/contentPopulator.ts b/src/processing/content/contentPopulator.ts deleted file mode 100644 index 780829b..0000000 --- a/src/processing/content/contentPopulator.ts +++ /dev/null @@ -1,58 +0,0 @@ -import * as global from 'src/types'; -import * as types from 'src/types'; -import fs from 'fs'; - -import Content from '.'; -import Files from '../../../src/data/files'; - -export default async function populate(outPath: string): Promise { - const data = JSON.parse(fs.readFileSync('./bibles/bibles.json', 'utf8')); - - const bibleInfo: global.versionInfo = await Content.getInfo(outPath); - - if (data.some((bible: global.versionInfo) => bible.id === bibleInfo.id)) { - await Files.deleteFolder(outPath); - throw new Error('Already imported bible'); - } - - data.push(bibleInfo); - fs.writeFileSync('./bibles/bibles.json', JSON.stringify(data, null)); - fs.writeFileSync( - `./bibles/${bibleInfo.id}/${bibleInfo.id}.json`, - JSON.stringify(bibleInfo, null) - ); - - const contents: types.ContentItem[] = await Content.getContent(outPath); - - for (let i = 0; i < contents.length; i++) { - const chapterPath = `./bibles/${bibleInfo.id}/books/${contents[i]?.book - .toLowerCase() - .replaceAll(' ', '') - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3')}/chapters/${contents[i].chapter}.json`; - fs.writeFileSync( - chapterPath, - JSON.stringify({ data: contents[i].verses.map((a) => a) }) - ); - for (let x = 0; x < contents[i].verses.length; x++) { - const versePath = `./bibles/${bibleInfo.id}/books/${contents[i].book - .toLowerCase() - .replaceAll(' ', '') - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3')}/chapters/${contents[i].chapter}/verses/${ - contents[i].verses[x].verse - }.json`; - fs.writeFileSync( - versePath, - JSON.stringify({ - verse: contents[i].verses[x].verse, - text: contents[i].verses[x].text, - }) - ); - } - } - - await Files.deleteFolder(outPath); -} diff --git a/src/processing/content/index.ts b/src/processing/content/index.ts deleted file mode 100644 index 2397de9..0000000 --- a/src/processing/content/index.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { booksInfo, getContent, getInfo } from './contentGetter'; -import populate from './contentPopulator'; - -export default class Content { - static booksInfo = booksInfo; - static getInfo = getInfo; - static getContent = getContent; - static populate = populate; -} diff --git a/src/processing/directory/directoryCreator.ts b/src/processing/directory/directoryCreator.ts deleted file mode 100644 index 4e811f7..0000000 --- a/src/processing/directory/directoryCreator.ts +++ /dev/null @@ -1,41 +0,0 @@ -import fs from 'fs'; - -import Directory from '.'; -import Content from '../content'; - -/** - * Creates directories for Bible books and chapters. - * - * @param {string} outPath - The output path for the directories. - * @returns {Promise} - */ -export default async function createDirs(outPath: string): Promise { - const bibleInfo = await Content.getInfo(outPath); - const books = await Content.booksInfo(outPath); - const filteredBooks = Directory.getHighestChapters(books); - - for (const book of filteredBooks) { - for (let i = 1; i <= Number(book.chapter); i++) { - const dir = `./bibles/${bibleInfo.id}/books/${formatBookName( - book.name - )}/chapters/${i}/verses`; - - fs.mkdirSync(dir, { recursive: true }); - } - } -} - -/** - * Formats the book name for directory creation. - * - * @param {string} bookName - The book name to format. - * @returns {string} - The formatted book name. - */ -function formatBookName(bookName: string): string { - return bookName - .toLowerCase() - .replaceAll(' ', '') - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3'); -} diff --git a/src/processing/directory/directoryReader.ts b/src/processing/directory/directoryReader.ts deleted file mode 100644 index 4040706..0000000 --- a/src/processing/directory/directoryReader.ts +++ /dev/null @@ -1,38 +0,0 @@ -import fs from 'fs'; -import path from 'path'; -import { DataItem } from 'src/types'; - -export async function readFolder(currentDir: string): Promise { - let files: string[] = []; - - const entries = await fs.promises.readdir(currentDir, { - withFileTypes: true, - }); - - for (const entry of entries) { - const fullPath: string = path.join(currentDir, entry.name); - - if (entry.isDirectory()) { - files = files.concat(await readFolder(fullPath)); - } else { - files.push(fullPath); - } - } - - return files; -} - -export function getHighestChapters(data: DataItem[]): DataItem[] { - return data.reduce((acc: DataItem[], curr: DataItem) => { - const existing = acc.find((item) => item.name === curr.name); - if (existing) { - if (Number(existing.chapter) < Number(curr.chapter)) { - existing.chapter = curr.chapter; - existing.verses = curr.verses; - } - } else { - acc.push(curr); - } - return acc; - }, []); -} diff --git a/src/processing/directory/index.ts b/src/processing/directory/index.ts deleted file mode 100644 index 376d597..0000000 --- a/src/processing/directory/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -import createDirs from './directoryCreator'; -import { getHighestChapters, readFolder } from './directoryReader'; - -export default class Directory { - static readFolder = readFolder; - static getHighestChapters = getHighestChapters; - static createDirs = createDirs; -} diff --git a/src/processing/usxParser/index.ts b/src/processing/usxParser/index.ts deleted file mode 100644 index 8a57f09..0000000 --- a/src/processing/usxParser/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -import parseUSX from './usxContentParser'; - -export default class usxParser { - static parseUSX = parseUSX; -} diff --git a/src/processing/usxParser/usxContentParser.ts b/src/processing/usxParser/usxContentParser.ts deleted file mode 100644 index d5476f8..0000000 --- a/src/processing/usxParser/usxContentParser.ts +++ /dev/null @@ -1,99 +0,0 @@ -//rewrite -import * as types from 'src/types'; -import cheerio from 'cheerio'; -import fs from 'fs'; -import path from 'path'; -import { DOMParser } from 'xmldom'; - -import Directory from '../directory'; - -export default async function parseUSX(folder: string): Promise { - let array: types.Verse[] = []; - const infoFile = fs.readFileSync(path.join(folder, 'metadata.xml')); - const $I = cheerio.load(infoFile); - - const files = await Directory.readFolder(folder); - const usxFiles = files.filter((path) => path.endsWith('.usx')); - - for (const file of usxFiles) { - let verses; - const usxData = await fs.promises.readFile(file); - const $ = cheerio.load(usxData, { xmlMode: true }); - - const sid = $('*').filter(function() { - return $(this).attr('sid') !== undefined; - }); - - if (sid.length > 0) { - let xmlString = fs.readFileSync(file).toString(); - let parser = new DOMParser(); - let xmlDoc = parser.parseFromString(xmlString, 'text/xml'); - let tags = xmlDoc.getElementsByTagName('verse'); - verses = Array.from(tags) - .map((verse) => { - if ((verse as any).hasAttribute('sid')) { - let chapterVerse = (verse as any).getAttribute('sid').split(' ')[1]; - let [chapter, verseNumber] = chapterVerse.split(':'); - let textContent = ''; - let nextSibling = (verse as any).nextSibling; - while (nextSibling && nextSibling.nodeName !== 'verse') { - textContent += nextSibling.textContent; - nextSibling = nextSibling.nextSibling; - } - return { - book: $I( - `name[id="book-${path - .basename(file) - .replace('.usx', '') - .toLowerCase()}"] > short` - ) - .first() - .text() - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3'), - chapter: chapter, - verse: verseNumber, - text: textContent - .trim() - .split(`\n`) - [textContent.trim().split(`\n`).length - 1].trim(), - }; - } - }) - .filter((a) => a) - .filter((a) => a.verse && a.text); - } else { - verses = $('para').map(function() { - let chapterNumber = $(this).prevAll('chapter').first().attr('number'); - return $(this) - .find('verse') - .map(function() { - let verseNumber = $(this).attr('number'); - let verseText = ($(this)[0] as any)?.nextSibling?.nodeValue?.trim(); - return { - book: $I( - `name[id="book-${path - .basename(file) - .replace('.usx', '') - .toLowerCase()}"] > short` - ) - .first() - .text() - .replace(/first/i, '1') - .replace(/second/i, '2') - .replace(/third/i, '3'), - chapter: chapterNumber, - verse: verseNumber, - text: verseText.trim(), - }; - }) - .get(); - }); - } - - array = [...array, ...verses]; - } - - return array; -} diff --git a/src/types.ts b/src/types.ts index 7de75ce..15bfecb 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,42 +1,40 @@ export interface Verse { - book: string | undefined - chapter: string | undefined - verse: string | undefined - text: string | null - } - - export interface DataItem { + book: string | undefined + chapter: string | undefined + verse: string | undefined + text: string | null +} + +export interface DataItem { + name: string + chapter: string + verses: string +} + +export interface ContentItem { + book: string + chapter: string + verses: Verse[] +} + +export interface versionInfo { + id: string + version: string + description: string + scope: string + language: { name: string - chapter: string - verses: string + code: string + level: string } - - export interface ContentItem { - book: string - chapter: string - verses: Verse[] - } - - export interface versionInfo { - id: string - version: string - description: string - scope: string - language: { - name: string - code: string - level: string - } - country: { - name: string - code: string - } - numeralSystem: string - script: string - archivist: string - copyright: string - localVersionName: string - localVersionAbbreviation: string + country: { + name: string + code: string } - - \ No newline at end of file + numeralSystem: string + script: string + archivist: string + copyright: string + localVersionName: string + localVersionAbbreviation: string +}