mirror of
https://gitlab.com/walljm/dynamicbible.git
synced 2025-07-23 15:30:14 -04:00
190 lines
9.1 KiB
C#
190 lines
9.1 KiB
C#
using DynamicBibleUtility.Geolocation;
|
|
|
|
namespace DynamicBible.DataPreparation.Models.Geolocation;
|
|
|
|
/// <summary>
|
|
/// A parser for Biblical location information from openbible.info.
|
|
/// Specifically, this class only handles parsing the tab-delimited
|
|
/// file from https://www.openbible.info/geo/data/merged.txt.
|
|
/// Parsing this specific file was chosen over the KMZ/KML files
|
|
/// because it was a much simpler way to get the relevant data.
|
|
/// The "merged" version of the raw data was chosen over the
|
|
/// "unmerged" version because it seemed to contain more data.
|
|
/// The data parsed by this parser is licensed under the
|
|
/// Creative Commons Attribution license (see
|
|
/// https://www.openbible.info/geo/ and
|
|
/// https://creativecommons.org/licenses/by/4.0/).
|
|
/// Strong's numbers are added using <see cref="LocationNameToStrongsNumberLookup" />.
|
|
/// </summary>
|
|
public class OpenBibleDotInfoLocationParser
|
|
{
|
|
/// <summary>
|
|
/// Parses Biblical location information from the specified file.
|
|
/// </summary>
|
|
/// <param name="filepath">The relative or absolute path to the file to parse.</param>
|
|
/// <returns>Location references parsed from the file; never null.</returns>
|
|
/// <exception cref="Exception">Thrown if a parsing error occurs.</exception>
|
|
public static IEnumerable<BibleLocationReference> Parse(string filepath)
|
|
{
|
|
// READ THE ENTIRE GEOLOCATION DATA FILE.
|
|
// It is small enough to store completely in memory.
|
|
var geolocation_input_file_lines = File.ReadAllLines(filepath);
|
|
|
|
// PARSE EACH LINE OF GEOLOCATION DATA.
|
|
// The first line contains a comment and the second line contains a header,
|
|
// so those two lines can be skipped.
|
|
const int FIRST_GEOLOCATION_LINE_INDEX = 2;
|
|
var locations = new List<BibleLocationReference>();
|
|
for (var line_index = FIRST_GEOLOCATION_LINE_INDEX; line_index < geolocation_input_file_lines.Length; ++line_index)
|
|
{
|
|
// SPLIT THE LINE INTO SEPARATE FIELDS.
|
|
// Since empty fields sometimes exist in the actual data, empty entries are still included
|
|
// from the string splitting operation to make indexing into known fields simpler.
|
|
const char FIELD_SEPARATOR = '\t';
|
|
var current_geolocation_line = geolocation_input_file_lines[line_index];
|
|
var current_line_fields = current_geolocation_line.Split(
|
|
new [] { FIELD_SEPARATOR },
|
|
StringSplitOptions.None
|
|
);
|
|
|
|
// PARSE THE LOCATION INFORMATION FROM CURRENT LINE.
|
|
var location = new BibleLocationReference();
|
|
|
|
// The name is converted to lowercase to make it easier to do
|
|
// case insensitive lookups.
|
|
const int BIBLE_LOCATION_NAME_FIELD_INDEX = 0;
|
|
location.Name = current_line_fields[BIBLE_LOCATION_NAME_FIELD_INDEX];
|
|
location.Name = location.Name.ToLower();
|
|
|
|
// The file contains both the name of the location as mentioned in the Bible (parsed above)
|
|
// and this second name for the actual location that the geographic coordinates reference.
|
|
// Since the geographics coordinates are expected to be close enough to the Biblical name
|
|
// and the primary purpose of this data is to cross-reference the Biblical text,
|
|
// this second name is silently ignored but could be added later if desired.
|
|
const int GEO_COORDINATE_LOCATION_NAME_FIELD_INDEX = 1;
|
|
var geo_coordinate_location_name = current_line_fields[GEO_COORDINATE_LOCATION_NAME_FIELD_INDEX];
|
|
|
|
const int LATITUDE_INDEX = 2;
|
|
var latitude_string = current_line_fields[LATITUDE_INDEX];
|
|
location.Latitude = ParseGeographicCoordinate(latitude_string);
|
|
|
|
const int LONGITUDE_INDEX = 3;
|
|
var longitude_string = current_line_fields[LONGITUDE_INDEX];
|
|
location.Longitude = ParseGeographicCoordinate(longitude_string);
|
|
|
|
const int VERSE_REFERENCES_INDEX = 4;
|
|
var verse_references_csv_list = current_line_fields[VERSE_REFERENCES_INDEX];
|
|
location.VerseReferences = ParseVerseReferences(verse_references_csv_list);
|
|
|
|
// ADD STRONG'S NUMBERS REFERENCES TO THE LOCATION.
|
|
location.StrongsNumbers = LocationNameToStrongsNumberLookup.GetStrongsNumbers(location.Name);
|
|
|
|
// ADD THE LOCATION INFORMATION FOR RETURNING.
|
|
locations.Add(location);
|
|
}
|
|
|
|
return locations;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Attempts to parse a geographic coordinate from the specified string.
|
|
/// This method is necessary because not all coordinate values in the file
|
|
/// are necessarily completely numeric.
|
|
/// </summary>
|
|
/// <param name="coordinate_string">The coordinate string to parse.</param>
|
|
/// <returns>
|
|
/// The geographic coordinate, if successfully parsed.
|
|
/// Null only if no geographic coordinate exists (an exception is thrown
|
|
/// if an unexpected parsing error occurs in order to provide easier visibilty
|
|
/// into such errors).
|
|
/// </returns>
|
|
/// <exception cref="Exception">Thrown if a parsing error occurs.</exception>
|
|
private static double? ParseGeographicCoordinate(string coordinate_string)
|
|
{
|
|
// REMOVE ANY KNOWN NON-NUMERIC CHARACTERS FROM THE STRING.
|
|
// These characters are used to mark cases where the location isn't known
|
|
// or the location may not be exact. That exactness isn't super important
|
|
// in this context, so the "marker" characters are ignored.
|
|
var numeric_coordinate_string = coordinate_string.Trim('?', '~', '<', '>');
|
|
|
|
// A '-' is used sometimes to indicate no location. Since a '-' could also
|
|
// be used for a negative geographic coordinate, it can only be safely
|
|
// trimmed from the end.
|
|
numeric_coordinate_string = numeric_coordinate_string.TrimEnd('-');
|
|
|
|
// CHECK IF A COORDINATE EXISTS.
|
|
var coordinate_exists = !string.IsNullOrWhiteSpace(numeric_coordinate_string);
|
|
if (!coordinate_exists)
|
|
{
|
|
// Not all locations in this file may have geographic coordinates.
|
|
return null;
|
|
}
|
|
|
|
// PARSE THE NUMERIC COORDINATE.
|
|
var coordinate = double.Parse(numeric_coordinate_string);
|
|
return coordinate;
|
|
}
|
|
|
|
private static readonly char[] VerseSeparator = [ ':' ];
|
|
|
|
/// <summary>
|
|
/// Attempts to parse Bible verse references from a CSV list.
|
|
/// </summary>
|
|
/// <param name="verse_references_csv_list">
|
|
/// A CSV list of Bible verse references.
|
|
/// Each reference is expected to be separated by a comma OR a comma and single space.
|
|
/// </param>
|
|
/// <returns>The verse references from the string; an empty list if no verse references exist in the string.</returns>
|
|
/// <exception cref="Exception">Thrown if a parsing error occurs.</exception>
|
|
private static IEnumerable<BibleVerseReference> ParseVerseReferences(string verse_references_csv_list)
|
|
{
|
|
// GET THE INDIVIDUAL VERSE REFERENCE STRINGS FROM THE LIST.
|
|
var verse_reference_strings = verse_references_csv_list.Split(
|
|
[ ", ", "," ],
|
|
StringSplitOptions.RemoveEmptyEntries
|
|
);
|
|
|
|
// PARSE EACH VERSE REFERENCE.
|
|
var verse_references = new List<BibleVerseReference>();
|
|
foreach (var verse_reference_string in verse_reference_strings)
|
|
{
|
|
// PARSE THE BOOK.
|
|
// A single space separates the book from the chapter and verse numbers.
|
|
// Since there might be an additional space before that separator
|
|
// for books with numbers at the start, a split can't be used directly.
|
|
const int BOOK_START_INDEX = 0;
|
|
var index_of_space_after_book = verse_reference_string.LastIndexOf(' ');
|
|
var book_string_length_in_characters = index_of_space_after_book;
|
|
var book_string = verse_reference_string.Substring(BOOK_START_INDEX, book_string_length_in_characters);
|
|
var book = new BibleBook(book_string);
|
|
|
|
// PARSE THE CHAPTER.
|
|
// A single colon separates the chapter and verse numbers.
|
|
var chapter_start_index = index_of_space_after_book + 1;
|
|
var chapter_and_verse_string = verse_reference_string.Substring(chapter_start_index);
|
|
var chapter_and_verse_numbers = chapter_and_verse_string.Split(
|
|
VerseSeparator,
|
|
StringSplitOptions.RemoveEmptyEntries
|
|
);
|
|
const int CHAPTER_INDEX = 0;
|
|
var chapter_string = chapter_and_verse_numbers[CHAPTER_INDEX];
|
|
var chapter = int.Parse(chapter_string);
|
|
|
|
// PARSE THE VERSE.
|
|
const int VERSE_INDEX = 1;
|
|
var verse_string = chapter_and_verse_numbers[VERSE_INDEX];
|
|
var verse = int.Parse(verse_string);
|
|
|
|
// ADD THE PARSED THE BIBLE VERSE REFERENCE.
|
|
var verse_reference = new BibleVerseReference
|
|
{
|
|
Book = book,
|
|
Chapter = chapter,
|
|
Verse = verse,
|
|
};
|
|
verse_references.Add(verse_reference);
|
|
}
|
|
|
|
return verse_references;
|
|
}
|
|
} |