import axios from 'axios'; import { randomUUID } from 'crypto'; import * as fs from 'fs'; import { parse } from 'node-html-parser'; import config from '../config/config.js'; export namespace DownloaderProvider { const bibleserver_endpoint = 'https://www.bibleserver.com'; export class Downloader { private _translation: string; private _logs: string[] = []; private _status: 'idle' | 'running' | 'completed' | 'error' = 'idle'; private _books: string[] = []; private _data_directory: string = 'data'; private _operation_id: string = '' private _time_start: Date | undefined; private _time_end: Date | undefined; public get translation() { return this._translation } public get uuid() { return this._operation_id } public get logs() { return this._logs } public get time_start() { return this._time_start } public get time_end() { return this._time_end } constructor(private translation_in: string) { this._translation = translation_in; this._operation_id = randomUUID() this._status = 'idle'; this.log(`Initialized downloader for translation: ${this._translation}`); this.log(`Using bibleserver endpoint: ${bibleserver_endpoint}`); } public start() { this._time_start = new Date(); this.log(`Starting download for translation: ${this._translation}`); this._status = 'running'; // get all translations from reference file try { const book_list_file = `${this._data_directory}/books.json`; this.log(`Loading book list from ${book_list_file}`); this._books = JSON.parse(fs.readFileSync(book_list_file, 'utf-8')).books; this.log(`Loaded ${this._books.length} books to download`); } catch (error) { this.log(`Error loading book list: ${error}`); this._status = 'error'; return; } // create directory try { if (!fs.existsSync(`${this._data_directory}/${this._translation}`)) { fs.mkdirSync(`${this._data_directory}/${this._translation}`, { recursive: true }); this.log(`Created directory: ${this._data_directory}/${this._translation}`); } } catch (error) { this.log(`Error creating translation directory: ${error}`); this._status = 'error'; return; } this.fetch_all_books(); } public getStatus(): 'idle' | 'running' | 'completed' | 'error' { return this._status; } private delay(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } private async fetch_all_books() { try { for (let book of this._books) { if (fs.existsSync(`${this._data_directory}/${this._translation}/${book}.json`)) { this.log(`Book ${book} already exists for ${this._translation}, skipping`); continue; } this.log(`Fetching book: ${book} for translation: ${this._translation}`); let chapter = 1; let book_content: any[] = []; while (true) { this.log(`Trying to fetch chapter ${chapter} of book ${book}`); await this.delay(config.downloadDelay * 1000); // sleep to prevent DDOS let chapter_content: any = await this.fetch_chapter(book, chapter); if (chapter_content.length === 0) { break; } book_content = book_content.concat(chapter_content); chapter++; } fs.writeFileSync(`${this._data_directory}/${this._translation}/${book}.json`, JSON.stringify(book_content, null, 4)); this.log(`Saved book ${book} for translation ${this._translation} with ${book_content.length} verses`); this.log(`Completed fetching book: ${book} for translation: ${this._translation}`); } this._time_end = new Date(); this._status = "completed" } catch (error) { this.log(`Error fetching books: ${error}`); this._status = 'error'; return; } } private async fetch_chapter(book: string, chapter: number) { let bibleserver_url = `${bibleserver_endpoint}/${this._translation}/${book}${chapter}`; this.log(`Fetching URL from: ${bibleserver_url}`) try { let response = await axios.get(bibleserver_url) this.log("Received response") let html = response.data; let root = parse(html); // verify to avoid redirect let book_verify_name = root.querySelector('.chapter')?.querySelector('header')?.querySelector('h1')?.text.trim() || ''; if (book_verify_name !== `${book} ${chapter}`) { // chapter does not exist, return empty list return []; } let verse_elements = root.querySelectorAll('.verse'); let result_array = []; for (let verse_element of verse_elements) { verse_element.querySelectorAll('.footnote').forEach(fn => fn.remove()); // remove footnotes let verse_raw = verse_element.querySelector('.verse-number')?.childNodes[0].text // resolve verse ranges if (verse_raw?.includes('-')) { let ranges = verse_raw.split('-'); for (let v = parseInt(ranges[0]); v <= parseInt(ranges[1]); v++) { result_array.push({ translation: this._translation, book: book, chapter: chapter, verse: v, text: verse_element.querySelector('.verse-content')?.childNodes[0].text || '' }); } } else { result_array.push({ translation: this._translation, book: book, chapter: chapter, verse: Number(verse_element.querySelector('.verse-number')?.childNodes[0].text) || -1, text: verse_element.querySelector('.verse-content')?.childNodes[0].text || '' }); } } return result_array } catch (error: any) { if (error.response && error.response.status === 404) { // translation does not exist this.log(`Translation ${this._translation} does not exist for book ${book}`); return []; } else { this.log(`Error fetching ${this._translation} ${book} ${chapter}- ${error}`); this.log(`URL: ${bibleserver_url}`); throw (error) } } } private log(message: string) { let log_entry = `[Downloader<${this._operation_id}>:${this._translation}][${new Date().toISOString()}] ${message}`; this._logs.push(log_entry); console.log(log_entry); } } }