Files
bible-api/api-gateway/src/providers/downloader.provider.ts
2026-01-08 19:10:51 +01:00

185 lines
7.8 KiB
TypeScript

import axios from 'axios';
import { randomUUID } from 'crypto';
import * as fs from 'fs';
import { parse } from 'node-html-parser';
import config from '../config/config.js';
export namespace DownloaderProvider {
const bibleserver_endpoint = 'https://www.bibleserver.com';
export class Downloader {
private _translation: string;
private _logs: string[] = [];
private _status: 'idle' | 'running' | 'completed' | 'error' = 'idle';
private _books: string[] = [];
private _data_directory: string = 'data';
private _operation_id: string = ''
private _time_start: Date | undefined;
private _time_end: Date | undefined;
public get translation() {
return this._translation
}
public get uuid() {
return this._operation_id
}
public get logs() {
return this._logs
}
public get time_start() {
return this._time_start
}
public get time_end() {
return this._time_end
}
constructor(private translation_in: string) {
this._translation = translation_in;
this._operation_id = randomUUID()
this._status = 'idle';
this.log(`Initialized downloader for translation: ${this._translation}`);
this.log(`Using bibleserver endpoint: ${bibleserver_endpoint}`);
}
public start() {
this._time_start = new Date();
this.log(`Starting download for translation: ${this._translation}`);
this._status = 'running';
// get all translations from reference file
try {
const book_list_file = `${this._data_directory}/books.json`;
this.log(`Loading book list from ${book_list_file}`);
this._books = JSON.parse(fs.readFileSync(book_list_file, 'utf-8')).books;
this.log(`Loaded ${this._books.length} books to download`);
} catch (error) {
this.log(`Error loading book list: ${error}`);
this._status = 'error';
return;
}
// create directory
try {
if (!fs.existsSync(`${this._data_directory}/${this._translation}`)) {
fs.mkdirSync(`${this._data_directory}/${this._translation}`, { recursive: true });
this.log(`Created directory: ${this._data_directory}/${this._translation}`);
}
} catch (error) {
this.log(`Error creating translation directory: ${error}`);
this._status = 'error';
return;
}
this.fetch_all_books();
}
public getStatus(): 'idle' | 'running' | 'completed' | 'error' {
return this._status;
}
private delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
private async fetch_all_books() {
try {
for (let book of this._books) {
if (fs.existsSync(`${this._data_directory}/${this._translation}/${book}.json`)) {
this.log(`Book ${book} already exists for ${this._translation}, skipping`);
continue;
}
this.log(`Fetching book: ${book} for translation: ${this._translation}`);
let chapter = 1;
let book_content: any[] = [];
while (true) {
this.log(`Trying to fetch chapter ${chapter} of book ${book}`);
await this.delay(config.downloadDelay * 1000); // sleep to prevent DDOS
let chapter_content: any = await this.fetch_chapter(book, chapter);
if (chapter_content.length === 0) {
break;
}
book_content = book_content.concat(chapter_content);
chapter++;
}
fs.writeFileSync(`${this._data_directory}/${this._translation}/${book}.json`, JSON.stringify(book_content, null, 4));
this.log(`Saved book ${book} for translation ${this._translation} with ${book_content.length} verses`);
this.log(`Completed fetching book: ${book} for translation: ${this._translation}`);
}
this._time_end = new Date();
this._status = "completed"
} catch (error) {
this.log(`Error fetching books: ${error}`);
this._status = 'error';
return;
}
}
private async fetch_chapter(book: string, chapter: number) {
let bibleserver_url = `${bibleserver_endpoint}/${this._translation}/${book}${chapter}`;
this.log(`Fetching URL from: ${bibleserver_url}`)
try {
let response = await axios.get(bibleserver_url)
this.log("Received response")
let html = response.data;
let root = parse(html);
// verify to avoid redirect
let book_verify_name = root.querySelector('.chapter')?.querySelector('header')?.querySelector('h1')?.text.trim() || '';
if (book_verify_name !== `${book} ${chapter}`) {
// chapter does not exist, return empty list
return [];
}
let verse_elements = root.querySelectorAll('.verse');
let result_array = [];
for (let verse_element of verse_elements) {
verse_element.querySelectorAll('.footnote').forEach(fn => fn.remove()); // remove footnotes
let verse_raw = verse_element.querySelector('.verse-number')?.childNodes[0].text
// resolve verse ranges
if (verse_raw?.includes('-')) {
let ranges = verse_raw.split('-');
for (let v = parseInt(ranges[0]); v <= parseInt(ranges[1]); v++) {
result_array.push({
translation: this._translation,
book: book,
chapter: chapter,
verse: v,
text: verse_element.querySelector('.verse-content')?.childNodes[0].text || ''
});
}
} else {
result_array.push({
translation: this._translation,
book: book,
chapter: chapter,
verse: Number(verse_element.querySelector('.verse-number')?.childNodes[0].text) || -1,
text: verse_element.querySelector('.verse-content')?.childNodes[0].text || ''
});
}
}
return result_array
} catch (error: any) {
if (error.response && error.response.status === 404) {
// translation does not exist
this.log(`Translation ${this._translation} does not exist for book ${book}`);
return [];
} else {
this.log(`Error fetching ${this._translation} ${book} ${chapter}- ${error}`);
this.log(`URL: ${bibleserver_url}`);
throw (error)
}
}
}
private log(message: string) {
let log_entry = `[Downloader<${this._operation_id}>:${this._translation}][${new Date().toISOString()}] ${message}`;
this._logs.push(log_entry);
console.log(log_entry);
}
}
}