185 lines
7.8 KiB
TypeScript
185 lines
7.8 KiB
TypeScript
import axios from 'axios';
|
|
import { randomUUID } from 'crypto';
|
|
import * as fs from 'fs';
|
|
import { parse } from 'node-html-parser';
|
|
import config from '../config/config.js';
|
|
|
|
export namespace DownloaderProvider {
|
|
const bibleserver_endpoint = 'https://www.bibleserver.com';
|
|
|
|
export class Downloader {
|
|
private _translation: string;
|
|
private _logs: string[] = [];
|
|
private _status: 'idle' | 'running' | 'completed' | 'error' = 'idle';
|
|
private _books: string[] = [];
|
|
private _data_directory: string = 'data';
|
|
private _operation_id: string = ''
|
|
private _time_start: Date | undefined;
|
|
private _time_end: Date | undefined;
|
|
|
|
public get translation() {
|
|
return this._translation
|
|
}
|
|
|
|
public get uuid() {
|
|
return this._operation_id
|
|
}
|
|
|
|
public get logs() {
|
|
return this._logs
|
|
}
|
|
|
|
public get time_start() {
|
|
return this._time_start
|
|
}
|
|
|
|
public get time_end() {
|
|
return this._time_end
|
|
}
|
|
|
|
constructor(private translation_in: string) {
|
|
this._translation = translation_in;
|
|
this._operation_id = randomUUID()
|
|
this._status = 'idle';
|
|
this.log(`Initialized downloader for translation: ${this._translation}`);
|
|
this.log(`Using bibleserver endpoint: ${bibleserver_endpoint}`);
|
|
|
|
}
|
|
|
|
public start() {
|
|
this._time_start = new Date();
|
|
this.log(`Starting download for translation: ${this._translation}`);
|
|
this._status = 'running';
|
|
// get all translations from reference file
|
|
try {
|
|
const book_list_file = `${this._data_directory}/books.json`;
|
|
this.log(`Loading book list from ${book_list_file}`);
|
|
this._books = JSON.parse(fs.readFileSync(book_list_file, 'utf-8')).books;
|
|
this.log(`Loaded ${this._books.length} books to download`);
|
|
} catch (error) {
|
|
this.log(`Error loading book list: ${error}`);
|
|
this._status = 'error';
|
|
return;
|
|
}
|
|
// create directory
|
|
try {
|
|
if (!fs.existsSync(`${this._data_directory}/${this._translation}`)) {
|
|
fs.mkdirSync(`${this._data_directory}/${this._translation}`, { recursive: true });
|
|
this.log(`Created directory: ${this._data_directory}/${this._translation}`);
|
|
}
|
|
} catch (error) {
|
|
this.log(`Error creating translation directory: ${error}`);
|
|
this._status = 'error';
|
|
return;
|
|
}
|
|
|
|
this.fetch_all_books();
|
|
|
|
}
|
|
|
|
public getStatus(): 'idle' | 'running' | 'completed' | 'error' {
|
|
return this._status;
|
|
}
|
|
|
|
private delay(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
private async fetch_all_books() {
|
|
try {
|
|
for (let book of this._books) {
|
|
if (fs.existsSync(`${this._data_directory}/${this._translation}/${book}.json`)) {
|
|
this.log(`Book ${book} already exists for ${this._translation}, skipping`);
|
|
continue;
|
|
}
|
|
this.log(`Fetching book: ${book} for translation: ${this._translation}`);
|
|
|
|
let chapter = 1;
|
|
let book_content: any[] = [];
|
|
|
|
while (true) {
|
|
this.log(`Trying to fetch chapter ${chapter} of book ${book}`);
|
|
await this.delay(config.downloadDelay * 1000); // sleep to prevent DDOS
|
|
let chapter_content: any = await this.fetch_chapter(book, chapter);
|
|
if (chapter_content.length === 0) {
|
|
break;
|
|
}
|
|
book_content = book_content.concat(chapter_content);
|
|
chapter++;
|
|
}
|
|
fs.writeFileSync(`${this._data_directory}/${this._translation}/${book}.json`, JSON.stringify(book_content, null, 4));
|
|
this.log(`Saved book ${book} for translation ${this._translation} with ${book_content.length} verses`);
|
|
this.log(`Completed fetching book: ${book} for translation: ${this._translation}`);
|
|
}
|
|
this._time_end = new Date();
|
|
this._status = "completed"
|
|
} catch (error) {
|
|
this.log(`Error fetching books: ${error}`);
|
|
this._status = 'error';
|
|
return;
|
|
}
|
|
}
|
|
|
|
private async fetch_chapter(book: string, chapter: number) {
|
|
let bibleserver_url = `${bibleserver_endpoint}/${this._translation}/${book}${chapter}`;
|
|
this.log(`Fetching URL from: ${bibleserver_url}`)
|
|
try {
|
|
let response = await axios.get(bibleserver_url)
|
|
this.log("Received response")
|
|
let html = response.data;
|
|
let root = parse(html);
|
|
// verify to avoid redirect
|
|
let book_verify_name = root.querySelector('.chapter')?.querySelector('header')?.querySelector('h1')?.text.trim() || '';
|
|
if (book_verify_name !== `${book} ${chapter}`) {
|
|
// chapter does not exist, return empty list
|
|
return [];
|
|
}
|
|
let verse_elements = root.querySelectorAll('.verse');
|
|
let result_array = [];
|
|
for (let verse_element of verse_elements) {
|
|
verse_element.querySelectorAll('.footnote').forEach(fn => fn.remove()); // remove footnotes
|
|
let verse_raw = verse_element.querySelector('.verse-number')?.childNodes[0].text
|
|
// resolve verse ranges
|
|
if (verse_raw?.includes('-')) {
|
|
let ranges = verse_raw.split('-');
|
|
for (let v = parseInt(ranges[0]); v <= parseInt(ranges[1]); v++) {
|
|
result_array.push({
|
|
translation: this._translation,
|
|
book: book,
|
|
chapter: chapter,
|
|
verse: v,
|
|
text: verse_element.querySelector('.verse-content')?.childNodes[0].text || ''
|
|
});
|
|
}
|
|
} else {
|
|
result_array.push({
|
|
translation: this._translation,
|
|
book: book,
|
|
chapter: chapter,
|
|
verse: Number(verse_element.querySelector('.verse-number')?.childNodes[0].text) || -1,
|
|
text: verse_element.querySelector('.verse-content')?.childNodes[0].text || ''
|
|
});
|
|
}
|
|
}
|
|
return result_array
|
|
} catch (error: any) {
|
|
if (error.response && error.response.status === 404) {
|
|
// translation does not exist
|
|
this.log(`Translation ${this._translation} does not exist for book ${book}`);
|
|
return [];
|
|
} else {
|
|
this.log(`Error fetching ${this._translation} ${book} ${chapter}- ${error}`);
|
|
this.log(`URL: ${bibleserver_url}`);
|
|
throw (error)
|
|
}
|
|
}
|
|
}
|
|
|
|
private log(message: string) {
|
|
let log_entry = `[Downloader<${this._operation_id}>:${this._translation}][${new Date().toISOString()}] ${message}`;
|
|
this._logs.push(log_entry);
|
|
console.log(log_entry);
|
|
}
|
|
|
|
}
|
|
} |