using DynamicBibleUtility.Geolocation; namespace DynamicBible.DataPreparation.Models.Geolocation; /// /// A parser for Biblical location information from openbible.info. /// Specifically, this class only handles parsing the tab-delimited /// file from https://www.openbible.info/geo/data/merged.txt. /// Parsing this specific file was chosen over the KMZ/KML files /// because it was a much simpler way to get the relevant data. /// The "merged" version of the raw data was chosen over the /// "unmerged" version because it seemed to contain more data. /// The data parsed by this parser is licensed under the /// Creative Commons Attribution license (see /// https://www.openbible.info/geo/ and /// https://creativecommons.org/licenses/by/4.0/). /// Strong's numbers are added using . /// public class OpenBibleDotInfoLocationParser { /// /// Parses Biblical location information from the specified file. /// /// The relative or absolute path to the file to parse. /// Location references parsed from the file; never null. /// Thrown if a parsing error occurs. public static IEnumerable Parse(string filepath) { // READ THE ENTIRE GEOLOCATION DATA FILE. // It is small enough to store completely in memory. var geolocation_input_file_lines = File.ReadAllLines(filepath); // PARSE EACH LINE OF GEOLOCATION DATA. // The first line contains a comment and the second line contains a header, // so those two lines can be skipped. const int FIRST_GEOLOCATION_LINE_INDEX = 2; var locations = new List(); for (var line_index = FIRST_GEOLOCATION_LINE_INDEX; line_index < geolocation_input_file_lines.Length; ++line_index) { // SPLIT THE LINE INTO SEPARATE FIELDS. // Since empty fields sometimes exist in the actual data, empty entries are still included // from the string splitting operation to make indexing into known fields simpler. const char FIELD_SEPARATOR = '\t'; var current_geolocation_line = geolocation_input_file_lines[line_index]; var current_line_fields = current_geolocation_line.Split( new [] { FIELD_SEPARATOR }, StringSplitOptions.None ); // PARSE THE LOCATION INFORMATION FROM CURRENT LINE. var location = new BibleLocationReference(); // The name is converted to lowercase to make it easier to do // case insensitive lookups. const int BIBLE_LOCATION_NAME_FIELD_INDEX = 0; location.Name = current_line_fields[BIBLE_LOCATION_NAME_FIELD_INDEX]; location.Name = location.Name.ToLower(); // The file contains both the name of the location as mentioned in the Bible (parsed above) // and this second name for the actual location that the geographic coordinates reference. // Since the geographics coordinates are expected to be close enough to the Biblical name // and the primary purpose of this data is to cross-reference the Biblical text, // this second name is silently ignored but could be added later if desired. const int GEO_COORDINATE_LOCATION_NAME_FIELD_INDEX = 1; var geo_coordinate_location_name = current_line_fields[GEO_COORDINATE_LOCATION_NAME_FIELD_INDEX]; const int LATITUDE_INDEX = 2; var latitude_string = current_line_fields[LATITUDE_INDEX]; location.Latitude = ParseGeographicCoordinate(latitude_string); const int LONGITUDE_INDEX = 3; var longitude_string = current_line_fields[LONGITUDE_INDEX]; location.Longitude = ParseGeographicCoordinate(longitude_string); const int VERSE_REFERENCES_INDEX = 4; var verse_references_csv_list = current_line_fields[VERSE_REFERENCES_INDEX]; location.VerseReferences = ParseVerseReferences(verse_references_csv_list); // ADD STRONG'S NUMBERS REFERENCES TO THE LOCATION. location.StrongsNumbers = LocationNameToStrongsNumberLookup.GetStrongsNumbers(location.Name); // ADD THE LOCATION INFORMATION FOR RETURNING. locations.Add(location); } return locations; } /// /// Attempts to parse a geographic coordinate from the specified string. /// This method is necessary because not all coordinate values in the file /// are necessarily completely numeric. /// /// The coordinate string to parse. /// /// The geographic coordinate, if successfully parsed. /// Null only if no geographic coordinate exists (an exception is thrown /// if an unexpected parsing error occurs in order to provide easier visibilty /// into such errors). /// /// Thrown if a parsing error occurs. private static double? ParseGeographicCoordinate(string coordinate_string) { // REMOVE ANY KNOWN NON-NUMERIC CHARACTERS FROM THE STRING. // These characters are used to mark cases where the location isn't known // or the location may not be exact. That exactness isn't super important // in this context, so the "marker" characters are ignored. var numeric_coordinate_string = coordinate_string.Trim('?', '~', '<', '>'); // A '-' is used sometimes to indicate no location. Since a '-' could also // be used for a negative geographic coordinate, it can only be safely // trimmed from the end. numeric_coordinate_string = numeric_coordinate_string.TrimEnd('-'); // CHECK IF A COORDINATE EXISTS. var coordinate_exists = !string.IsNullOrWhiteSpace(numeric_coordinate_string); if (!coordinate_exists) { // Not all locations in this file may have geographic coordinates. return null; } // PARSE THE NUMERIC COORDINATE. var coordinate = double.Parse(numeric_coordinate_string); return coordinate; } private static readonly char[] VerseSeparator = [ ':' ]; /// /// Attempts to parse Bible verse references from a CSV list. /// /// /// A CSV list of Bible verse references. /// Each reference is expected to be separated by a comma OR a comma and single space. /// /// The verse references from the string; an empty list if no verse references exist in the string. /// Thrown if a parsing error occurs. private static IEnumerable ParseVerseReferences(string verse_references_csv_list) { // GET THE INDIVIDUAL VERSE REFERENCE STRINGS FROM THE LIST. var verse_reference_strings = verse_references_csv_list.Split( [ ", ", "," ], StringSplitOptions.RemoveEmptyEntries ); // PARSE EACH VERSE REFERENCE. var verse_references = new List(); foreach (var verse_reference_string in verse_reference_strings) { // PARSE THE BOOK. // A single space separates the book from the chapter and verse numbers. // Since there might be an additional space before that separator // for books with numbers at the start, a split can't be used directly. const int BOOK_START_INDEX = 0; var index_of_space_after_book = verse_reference_string.LastIndexOf(' '); var book_string_length_in_characters = index_of_space_after_book; var book_string = verse_reference_string.Substring(BOOK_START_INDEX, book_string_length_in_characters); var book = new BibleBook(book_string); // PARSE THE CHAPTER. // A single colon separates the chapter and verse numbers. var chapter_start_index = index_of_space_after_book + 1; var chapter_and_verse_string = verse_reference_string.Substring(chapter_start_index); var chapter_and_verse_numbers = chapter_and_verse_string.Split( VerseSeparator, StringSplitOptions.RemoveEmptyEntries ); const int CHAPTER_INDEX = 0; var chapter_string = chapter_and_verse_numbers[CHAPTER_INDEX]; var chapter = int.Parse(chapter_string); // PARSE THE VERSE. const int VERSE_INDEX = 1; var verse_string = chapter_and_verse_numbers[VERSE_INDEX]; var verse = int.Parse(verse_string); // ADD THE PARSED THE BIBLE VERSE REFERENCE. var verse_reference = new BibleVerseReference { Book = book, Chapter = chapter, Verse = verse, }; verse_references.Add(verse_reference); } return verse_references; } }