mirror of
https://gitlab.com/walljm/dynamicbible.git
synced 2025-07-23 07:19:50 -04:00
924 lines
33 KiB
C#
924 lines
33 KiB
C#
using System.Xml;
|
|
using System.Xml.Linq;
|
|
using DynamicBible.DataPreparation.Models;
|
|
using DynamicBible.Schemas;
|
|
using JMW.Extensions.String;
|
|
using Microsoft.Extensions.Logging;
|
|
using Porter2Stemmer;
|
|
using Index = DynamicBible.DataPreparation.Models.Index;
|
|
|
|
namespace DynamicBible.DataPreparation;
|
|
|
|
public class BibleProcessor(ILogger<BibleProcessor>? logger = null)
|
|
{
|
|
private readonly HashSet<string> stems = [];
|
|
private readonly HashSet<string> words = [];
|
|
|
|
private readonly HashSet<string> capitals = [];
|
|
private readonly HashSet<string> lowercase = [];
|
|
|
|
private readonly Index idx = [];
|
|
private readonly List<WordToStem> wordStemsIndex = [];
|
|
|
|
|
|
private readonly Dictionary<string, Dictionary<string, StrongCrossReference>> hebCrossRefs = new();
|
|
private readonly Dictionary<string, Dictionary<string, StrongCrossReference>> grkCrossRefs = new();
|
|
|
|
private readonly List<string> exclusions =
|
|
[
|
|
"us", "these", "her", "saith", "shalt", "let", "do", "your", "we", "no", "go", "if", "at", "an", "so", "before", "also", "on", "had",
|
|
"you", "there", "then", "up", "by", "upon", "were", "are", "this", "when", "thee", "their", "ye", "will", "as", "thy", "my", "me", "have",
|
|
"from", "was", "but", "which", "thou", "all", "it", "with", "them", "him", "they", "is", "be", "not", "his", "i", "shall", "a", "for",
|
|
"unto", "he", "in", "to", "that", "of", "and", "the",
|
|
];
|
|
|
|
private readonly char[] trims =
|
|
[
|
|
'\'', ',', ':', ';', '"', '?', '.', '[', ']', '{', '}', '<', '>', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '=', '+',
|
|
];
|
|
|
|
private readonly HashSet<char> uppers =
|
|
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'];
|
|
|
|
private readonly string referenceDelimiter = ":";
|
|
|
|
private readonly HashSet<string> endPunc = [".", "?", "!"];
|
|
|
|
private class BookRecord
|
|
{
|
|
public int Number { get; set; }
|
|
public string ShortName { get; set; } = string.Empty;
|
|
public string LongName { get; set; } = string.Empty;
|
|
public int LastChapter { get; set; }
|
|
public List<int> Chapters { get; set; } = [];
|
|
}
|
|
|
|
private class WordToStem
|
|
{
|
|
public string w { get; set; } = "";
|
|
public string s { get; set; } = "";
|
|
}
|
|
|
|
private record StrongCrossReference
|
|
{
|
|
public string? Word { get; set; }
|
|
public HashSet<string> Refs { get; set; } = [];
|
|
}
|
|
|
|
public void CreateText(string dataPath)
|
|
{
|
|
if (!Directory.Exists("bibles"))
|
|
{
|
|
Directory.CreateDirectory("bibles");
|
|
}
|
|
|
|
if (!Directory.Exists("bibles/kjv_strongs"))
|
|
{
|
|
Directory.CreateDirectory("bibles/kjv_strongs");
|
|
}
|
|
|
|
if (!Directory.Exists("strongscr"))
|
|
{
|
|
Directory.CreateDirectory("strongscr");
|
|
}
|
|
|
|
if (!Directory.Exists("strongs"))
|
|
{
|
|
Directory.CreateDirectory("strongs");
|
|
}
|
|
|
|
if (!Directory.Exists("index"))
|
|
{
|
|
Directory.CreateDirectory("index");
|
|
}
|
|
|
|
if (!Directory.Exists("rmac"))
|
|
{
|
|
Directory.CreateDirectory("rmac");
|
|
}
|
|
|
|
CreateStrongsDict(dataPath);
|
|
CreateRmac(dataPath);
|
|
CreateRmacCrossRefs(dataPath);
|
|
|
|
var xmlBible = XML.GetData<XMLBIBLE>(Path.Combine(dataPath, "sf_kjv_strongs_rev1b.xml"));
|
|
ArgumentNullException.ThrowIfNull(xmlBible);
|
|
CreateIndex(xmlBible);
|
|
|
|
// upddate the lower/upper lists
|
|
foreach (var word in lowercase)
|
|
{
|
|
var q = word[0].ToString().ToUpper() + word.Substring(1);
|
|
if (capitals.Contains(q))
|
|
{
|
|
capitals.Remove(q);
|
|
}
|
|
}
|
|
|
|
// iterate through text, output json format.
|
|
var bookRecords = new List<BookRecord>();
|
|
|
|
var xmlDoc = XDocument.Load(Path.Combine(dataPath, "sf_kjv_strongs_rev1b.xml"));
|
|
ArgumentNullException.ThrowIfNull(xmlDoc.Root);
|
|
|
|
foreach (var n in xmlDoc.Root.Nodes())
|
|
{
|
|
if (n.NodeType != XmlNodeType.Element)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var el = (XElement)n;
|
|
if (el.Name != "BIBLEBOOK")
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var bk = new Bible
|
|
{
|
|
BookNumber = Convert.ToInt32(el.FirstAttribute?.Value),
|
|
};
|
|
|
|
var br = new BookRecord
|
|
{
|
|
Number = bk.BookNumber,
|
|
};
|
|
|
|
bookRecords.Add(br);
|
|
br.Chapters.Add(0);
|
|
foreach (var xNode in el.Nodes())
|
|
{
|
|
var chn = (XElement)xNode;
|
|
var ch = new Chapter { ChapterId = Convert.ToInt32(chn.FirstAttribute?.Value) };
|
|
var last = 0;
|
|
var lastVsW = string.Empty;
|
|
foreach (var xNode1 in chn.Nodes())
|
|
{
|
|
var vs = (XElement)xNode1;
|
|
var v = new Verse
|
|
{
|
|
VerseId = Convert.ToInt32(vs.FirstAttribute?.Value),
|
|
};
|
|
last = v.VerseId;
|
|
var first = true;
|
|
foreach (var o in vs.Nodes())
|
|
{
|
|
var wordNodes = ProcessText(o, $"{bk.BookNumber}{referenceDelimiter}{ch.ChapterId}{referenceDelimiter}{v.VerseId}");
|
|
// handle lowercasing the word if its not the start of a sentence
|
|
if (
|
|
wordNodes.Count > 0
|
|
&& first
|
|
&& lastVsW.Length > 0
|
|
&& !endPunc.Contains(lastVsW.Last().ToString())
|
|
)
|
|
{
|
|
var x = wordNodes.First().Word;
|
|
var fw = x.ParseToIndexOf(" ");
|
|
if (fw.Length > 0 && !capitals.Contains(fw))
|
|
{
|
|
x = x.Substring(0, 1).ToLower() + x.Substring(1);
|
|
wordNodes.First().Word = x;
|
|
}
|
|
}
|
|
|
|
first = false;
|
|
v.Word.AddRange(wordNodes);
|
|
if (v.Word.Count > 1
|
|
&& (
|
|
v.Word.Last().Word.StartsWith('?')
|
|
|| v.Word.Last().Word.StartsWith(';')
|
|
|| v.Word.Last().Word.StartsWith(':')
|
|
|| v.Word.Last().Word.StartsWith('.')
|
|
|| v.Word.Last().Word.StartsWith(',')
|
|
|| v.Word.Last().Word.StartsWith('!')
|
|
|| v.Word.Last().Word.StartsWith('-')
|
|
|| v.Word.Last().Word.StartsWith('\'')
|
|
)
|
|
)
|
|
{
|
|
var prev = v.Word[v.Word.Count - 2];
|
|
var curr = v.Word.Last();
|
|
if (curr.Word.StartsWith("-") || curr.Word.StartsWith("'"))
|
|
{
|
|
if (curr.Word.Trim().Contains(" "))
|
|
{
|
|
prev.Word += curr.Word.Trim().Substring(0, curr.Word.Trim().IndexOf(" ", StringComparison.Ordinal));
|
|
curr.Word = curr.Word
|
|
.Trim()
|
|
.Substring(curr.Word.Trim().IndexOf(" ", StringComparison.Ordinal)); // you want to join the two words.
|
|
}
|
|
else
|
|
|
|
{
|
|
prev.Word += curr.Word.Trim();
|
|
curr.Word = ""; // you want to join the two words.
|
|
}
|
|
}
|
|
else
|
|
{
|
|
prev.Word += curr.Word.Substring(0, 1);
|
|
curr.Word = curr.Word.Substring(1).Trim();
|
|
}
|
|
|
|
if (curr.Word.Trim().Length == 0)
|
|
{
|
|
v.Word.Remove(curr);
|
|
}
|
|
}
|
|
|
|
lastVsW = v.Word.Last().Word;
|
|
}
|
|
|
|
ch.Verses.Add(v);
|
|
}
|
|
|
|
bk.Chapters.Add(ch);
|
|
br.Chapters.Add(last);
|
|
File.WriteAllText(
|
|
"bibles/kjv_strongs/" + bk.BookNumber + "-" + ch.ChapterId + ".json",
|
|
JSON.Serialize(ch).Replace(",\"s\":\"\"", "")
|
|
);
|
|
}
|
|
|
|
logger?.LogInformation("Book: {BookNumber}", bk.BookNumber);
|
|
}
|
|
|
|
// finished.
|
|
File.WriteAllText("books.json", JSON.Serialize(bookRecords));
|
|
var lst = hebCrossRefs.OrderBy(kvp => int.Parse(kvp.Key)).ToList();
|
|
var Strongs = new List<Strongs>();
|
|
|
|
var lastStrongs = 0;
|
|
foreach (var (id, refs) in lst)
|
|
{
|
|
var s = new Strongs
|
|
{
|
|
Id = "H" + id,
|
|
Testament = "heb",
|
|
StrongsReferences = refs
|
|
.Values
|
|
.Select(
|
|
cr =>
|
|
{
|
|
return new StrongRef
|
|
{
|
|
Word = cr.Word,
|
|
BibleReferences = cr.Refs
|
|
.Select(
|
|
br => new BibleRef
|
|
{
|
|
Reference = br,
|
|
}
|
|
)
|
|
.ToList(),
|
|
};
|
|
}
|
|
)
|
|
.OrderBy(o => o.Word)
|
|
.ToList(),
|
|
};
|
|
Strongs.Add(s);
|
|
|
|
if (int.Parse(id) / 100 > lastStrongs)
|
|
{
|
|
lastStrongs = int.Parse(id) / 100;
|
|
File.WriteAllText($"strongscr/cr{s.Testament}{lastStrongs}.json", JSON.Serialize(Strongs));
|
|
Strongs = [];
|
|
|
|
logger?.LogInformation("Set: {lastStrongs}", lastStrongs);
|
|
}
|
|
}
|
|
|
|
File.WriteAllText("strongscr/crheb" + (lastStrongs + 1) + ".json", JSON.Serialize(Strongs));
|
|
logger?.LogInformation("Set: " + (lastStrongs + 1) + "\r\n");
|
|
|
|
lst = grkCrossRefs.OrderBy(kvp => int.Parse(kvp.Key)).ToList();
|
|
Strongs = [];
|
|
|
|
lastStrongs = 0;
|
|
foreach (var (id, refs) in lst)
|
|
{
|
|
var s = new Strongs
|
|
{
|
|
Id = "G" + id,
|
|
Testament = "grk",
|
|
StrongsReferences = refs
|
|
.Values
|
|
.Select(
|
|
cr =>
|
|
{
|
|
return new StrongRef
|
|
{
|
|
Word = cr.Word,
|
|
BibleReferences = cr.Refs
|
|
.Select(
|
|
br => new BibleRef
|
|
{
|
|
Reference = br,
|
|
}
|
|
)
|
|
.ToList(),
|
|
};
|
|
}
|
|
)
|
|
.OrderBy(o => o.Word)
|
|
.ToList(),
|
|
};
|
|
Strongs.Add(s);
|
|
|
|
if (int.Parse(id) / 100 > lastStrongs)
|
|
{
|
|
lastStrongs = int.Parse(id) / 100;
|
|
File.WriteAllText("strongscr/cr" + s.Testament + "" + lastStrongs + ".json", JSON.Serialize(Strongs));
|
|
Strongs = [];
|
|
|
|
logger?.LogInformation("Set: " + lastStrongs + "\r\n");
|
|
}
|
|
}
|
|
|
|
File.WriteAllText("strongscr/crgrk" + (lastStrongs + 1) + ".json", JSON.Serialize(Strongs));
|
|
logger?.LogInformation("Set: " + (lastStrongs + 1) + "\r\n");
|
|
}
|
|
|
|
private List<Text> ProcessText(object o, string location)
|
|
{
|
|
var textItems = new List<Text>();
|
|
|
|
switch (o)
|
|
{
|
|
case XElement xmlElement:
|
|
if (xmlElement.Name == "gr")
|
|
{
|
|
var strongsNumber = xmlElement.FirstAttribute?.Value;
|
|
ArgumentNullException.ThrowIfNull(strongsNumber);
|
|
|
|
var textValue = xmlElement.Value.Trim();
|
|
|
|
foreach (var val in strongsNumber.Split(' '))
|
|
{
|
|
var tv = textValue.ToLower();
|
|
|
|
if (strongsNumber.Contains(" "))
|
|
{
|
|
File.AppendAllLines("errata.txt", new List<string> { "Multiple strongs numbers found: " + location + ", " + tv });
|
|
}
|
|
|
|
var sn = val.Trim('*', ' ');
|
|
if (int.Parse(location.ParseToIndexOf(referenceDelimiter)) < 40)
|
|
{
|
|
if (!hebCrossRefs.ContainsKey(sn))
|
|
{
|
|
hebCrossRefs.Add(sn, new ());
|
|
}
|
|
|
|
if (!hebCrossRefs[sn].ContainsKey(tv))
|
|
{
|
|
hebCrossRefs[sn].Add(tv, new ());
|
|
}
|
|
|
|
hebCrossRefs[sn][tv].Word = tv;
|
|
hebCrossRefs[sn][tv].Refs.Add(location);
|
|
}
|
|
else
|
|
{
|
|
if (!grkCrossRefs.ContainsKey(sn))
|
|
{
|
|
grkCrossRefs.Add(sn, new ());
|
|
}
|
|
|
|
if (!grkCrossRefs[sn].ContainsKey(tv))
|
|
{
|
|
grkCrossRefs[sn].Add(tv, new ());
|
|
}
|
|
|
|
grkCrossRefs[sn][tv].Word = tv;
|
|
grkCrossRefs[sn][tv].Refs.Add(location);
|
|
}
|
|
}
|
|
|
|
textItems.Add(new (textValue, strongsNumber));
|
|
}
|
|
else if (xmlElement.Name.ToString().ToLower() == "style")
|
|
{
|
|
foreach (var n in xmlElement.Nodes())
|
|
{
|
|
textItems.AddRange(ProcessText(n, location));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw new ("Unknown Element");
|
|
}
|
|
|
|
break;
|
|
case XText xmlText:
|
|
var t = xmlText.Value.Trim();
|
|
textItems.Add(new (t));
|
|
break;
|
|
|
|
default:
|
|
throw new ("Unknown Element");
|
|
}
|
|
|
|
return textItems;
|
|
}
|
|
|
|
private void CreateIndex(XMLBIBLE xmlBible)
|
|
{
|
|
// to index, you need to iterate through every word in the bible.
|
|
PopulateIndex(xmlBible);
|
|
|
|
File.WriteAllLines("word_list", stems);
|
|
|
|
idx.Sort((x, y) => string.Compare(x.Word, y.Word, StringComparison.Ordinal));
|
|
wordStemsIndex.Sort((x, y) => string.Compare(x.w, y.w, StringComparison.Ordinal));
|
|
|
|
File.WriteAllText("index/word_to_stem_idx.json", JSON.Serialize(wordStemsIndex));
|
|
|
|
var tmp = new Index();
|
|
int i;
|
|
for (i = 0; i < idx.Count; i++)
|
|
{
|
|
if (i % 50 == 49 || i == idx.Count - 1)
|
|
{
|
|
tmp.Add(idx[i]);
|
|
Console.WriteLine("words.unshift('" + idx[i].Word + "');");
|
|
var json = JSON.Serialize(tmp.ToArray());
|
|
|
|
File.WriteAllText($"index/{idx[i].Word}idx.json", json);
|
|
tmp.Clear();
|
|
}
|
|
else
|
|
{
|
|
tmp.Add(idx[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void PopulateIndex(XMLBIBLE b)
|
|
{
|
|
foreach (var bk in b.BIBLEBOOKS)
|
|
{
|
|
foreach (var ch in bk.CHAPTERS)
|
|
foreach (var vs in ch.VERSES)
|
|
{
|
|
if (vs.Items != null)
|
|
{
|
|
foreach (var w in vs.Items)
|
|
{
|
|
// for each word, add an entry.
|
|
if (w.GetType() == typeof(XMLBIBLE_GR))
|
|
{
|
|
var gr = (XMLBIBLE_GR)w;
|
|
if (gr.Text == null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
foreach (var textItem in gr.Text)
|
|
{
|
|
foreach (var word in textItem.Split(' '))
|
|
{
|
|
ArgumentNullException.ThrowIfNull(ch);
|
|
|
|
AddWordToIndex(
|
|
word,
|
|
bk.bnumber.ToString(),
|
|
ch.cnumber.ToString(),
|
|
vs.vnumber.ToString()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
else if (w.GetType() == typeof(XMLBIBLE_STYLE_ITEM))
|
|
{
|
|
var o = (XMLBIBLE_STYLE_ITEM)w;
|
|
if (o.Text != null)
|
|
{
|
|
foreach (var textItem in o.Text)
|
|
{
|
|
foreach (var word in textItem.Split(' '))
|
|
{
|
|
ArgumentNullException.ThrowIfNull(ch);
|
|
|
|
AddWordToIndex(
|
|
word,
|
|
bk.bnumber.ToString(),
|
|
ch.cnumber.ToString(),
|
|
vs.vnumber.ToString()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (o.gr != null)
|
|
{
|
|
var gr = o.gr;
|
|
if (gr.Text != null)
|
|
{
|
|
foreach (var t in gr.Text)
|
|
{
|
|
foreach (var s in t.Split(' '))
|
|
{
|
|
ArgumentNullException.ThrowIfNull(ch);
|
|
|
|
AddWordToIndex(
|
|
s,
|
|
bk.bnumber.ToString(),
|
|
ch.cnumber.ToString(),
|
|
vs.vnumber.ToString()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (o.STYLE == null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
foreach (var so in o.STYLE)
|
|
{
|
|
if (so.Text != null)
|
|
{
|
|
foreach (var t in so.Text)
|
|
{
|
|
foreach (var s in t.Split(' '))
|
|
{
|
|
if (ch is null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
AddWordToIndex(
|
|
s,
|
|
bk.bnumber.ToString(),
|
|
ch.cnumber.ToString(),
|
|
vs.vnumber.ToString()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
var gr = so.gr;
|
|
if (gr?.Value == null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
foreach (var s in gr.Value.Split(' '))
|
|
{
|
|
ArgumentNullException.ThrowIfNull(ch);
|
|
|
|
AddWordToIndex(
|
|
s,
|
|
bk.bnumber.ToString(),
|
|
ch.cnumber.ToString(),
|
|
vs.vnumber.ToString()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (vs.Text == null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
foreach (var w in vs.Text)
|
|
{
|
|
foreach (var s in w.Split(' '))
|
|
{
|
|
if (s.Trim() != "")
|
|
{
|
|
ArgumentNullException.ThrowIfNull(ch);
|
|
|
|
AddWordToIndex(
|
|
s,
|
|
bk.bnumber.ToString(),
|
|
ch.cnumber.ToString(),
|
|
vs.vnumber.ToString()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
logger?.LogInformation("Indexing Book: {bnumber}, Word Count: {Count}", bk.bnumber, stems.Count);
|
|
}
|
|
}
|
|
|
|
private void AddWordToIndex(string s, string bk, string ch, string vs)
|
|
{
|
|
var stemmer = new EnglishPorter2Stemmer();
|
|
|
|
var cased = s.Trim().TrimEnd(trims).TrimStart(trims).Replace("'", "");
|
|
s = cased.ToLower();
|
|
|
|
if (cased.Length > 0 && uppers.Contains(cased[0]))
|
|
{
|
|
capitals.Add(cased);
|
|
}
|
|
else if (cased.Length > 0)
|
|
{
|
|
lowercase.Add(cased);
|
|
}
|
|
|
|
if (s != "" && !exclusions.Contains(s))
|
|
{
|
|
var original = s;
|
|
|
|
var stemmedWord = stemmer.Stem(s);
|
|
s = stemmedWord.Value;
|
|
|
|
if (!words.Contains(original))
|
|
{
|
|
wordStemsIndex.Add(
|
|
new()
|
|
{
|
|
s = s, w = original,
|
|
}
|
|
);
|
|
words.Add(original);
|
|
}
|
|
|
|
// add the word to the index
|
|
if (stems.Add(s))
|
|
{
|
|
var i = new IndexItem { Word = s };
|
|
i.References.Add(bk + ":" + ch + ":" + vs);
|
|
idx.Add(i);
|
|
}
|
|
else
|
|
{
|
|
var i = idx.GetItem(s);
|
|
|
|
ArgumentNullException.ThrowIfNull(i);
|
|
if (!i.References.Contains(bk + ":" + ch + ":" + vs))
|
|
{
|
|
i.References.Add(bk + ":" + ch + ":" + vs);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public void CreateStrongsDict(string dataPath)
|
|
{
|
|
// iterate through text, output json format.
|
|
var masterDict = new Dictionary<string, StrongDictEntry>();
|
|
|
|
var grkFilenames = Directory.EnumerateFiles(Path.Combine(dataPath, "xml"), "grk*", SearchOption.TopDirectoryOnly);
|
|
var hebFilenames = Directory.EnumerateFiles(Path.Combine(dataPath, "xml"), "heb*", SearchOption.TopDirectoryOnly);
|
|
var files = grkFilenames.Concat(hebFilenames);
|
|
|
|
foreach (var f in files)
|
|
{
|
|
var doc = XDocument.Load(f);
|
|
|
|
if (doc.Root is null)
|
|
{
|
|
logger?.LogError("Unable to process RMAC, no nodes.");
|
|
return;
|
|
}
|
|
|
|
foreach (var n in doc.Root.Nodes())
|
|
{
|
|
if (n.NodeType != XmlNodeType.Element)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var el = (XElement)n;
|
|
if (el.Name != "i")
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var sr = new StrongDictEntry
|
|
{
|
|
Dict = el.FirstAttribute?.Value ?? throw new NullReferenceException(),
|
|
};
|
|
|
|
if (!masterDict.TryAdd(sr.Dict, sr))
|
|
{
|
|
// duplicate?
|
|
continue;
|
|
}
|
|
|
|
foreach (var xNode in el.Nodes())
|
|
{
|
|
var d = (XElement)xNode;
|
|
if (d.Name == "d")
|
|
{
|
|
var s = d.ToString()
|
|
.Replace("<d>", "")
|
|
.Replace("</d>", "")
|
|
.Replace("><", "> <")
|
|
.Replace("<br>", "")
|
|
.Replace("[", "")
|
|
.Replace("]", "")
|
|
.Replace(";", "; ")
|
|
.Replace("<br>", "")
|
|
.Replace(" ", " ")
|
|
.Replace(" ", " ")
|
|
.Replace(" ", " ")
|
|
.Replace(" ", " ")
|
|
.Replace(" ", " ")
|
|
.Replace("\r\n", "")
|
|
.Replace("<br />", "")
|
|
.Replace("\n", "");
|
|
|
|
var parts = s.Split([ "<link", "</link>" ], StringSplitOptions.None);
|
|
foreach (var part in parts)
|
|
{
|
|
if (part.Contains("target="))
|
|
{
|
|
sr.Description.Add(
|
|
new()
|
|
{
|
|
StrongsNumber = part.ParseAfterLastIndexOf_PlusLength(">"),
|
|
}
|
|
);
|
|
}
|
|
else
|
|
{
|
|
sr.Description.Add(
|
|
new()
|
|
{
|
|
Word = part,
|
|
}
|
|
);
|
|
}
|
|
}
|
|
}
|
|
else if (d.Name == "p")
|
|
{
|
|
sr.Pronounciation = d.Value;
|
|
}
|
|
else if (d.Name == "tr")
|
|
{
|
|
sr.Translation = d.Value;
|
|
}
|
|
else if (d.Name == "t")
|
|
{
|
|
sr.Lemma = d.Value;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// combine with other javascript
|
|
foreach (var f in new List<string>
|
|
{
|
|
Path.Combine(dataPath, "strongs-greek-dictionary.json"),
|
|
Path.Combine(dataPath, "strongs-hebrew-dictionary.json"),
|
|
}
|
|
)
|
|
{
|
|
var doc = JSON.Deserialize<Dictionary<string, StrongDictEntry>>(File.ReadAllText(f));
|
|
ArgumentNullException.ThrowIfNull(doc);
|
|
var dict = doc.ToDictionary(k => k.Key, v => v.Value);
|
|
|
|
foreach (var pair in masterDict)
|
|
{
|
|
pair.Value.n = Convert.ToInt32(pair.Key.Substring(1));
|
|
if (dict.TryGetValue(pair.Key, out var dictValue))
|
|
{
|
|
if (dictValue.Lemma is not null)
|
|
{
|
|
pair.Value.Lemma = dictValue.Lemma;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
var lst_heb = masterDict.Values.Where(o => o.Dict?.StartsWith("H") ?? false).OrderBy(o => o.n);
|
|
var lst_grk = masterDict.Values.Where(o => o.Dict?.StartsWith("G") ?? false).OrderBy(o => o.n);
|
|
|
|
var lsts = new List<IEnumerable<StrongDictEntry>> { lst_grk, lst_heb };
|
|
foreach (var lst in lsts)
|
|
{
|
|
var last = 0;
|
|
var temp = new List<StrongDictEntry>();
|
|
foreach (var e in lst)
|
|
{
|
|
temp.Add(e);
|
|
if (e.n / 100 <= last)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
last = e.n / 100;
|
|
|
|
var first = temp.FirstOrDefault()?.Dict ?? throw new InvalidOperationException("No data where expected!");
|
|
var strongsOutputName = "strongs/" + (first.Contains('H') ? "heb" : "grk") + last + ".json";
|
|
|
|
File.WriteAllText(strongsOutputName, JSON.Serialize(temp));
|
|
temp = [];
|
|
|
|
logger?.LogInformation("Set: {last}", last);
|
|
}
|
|
|
|
// handle the last set.
|
|
last = temp.Last().n / 100 + 1;
|
|
var lastDict = temp.FirstOrDefault()?.Dict ?? throw new InvalidOperationException("No data where expected!");
|
|
var lastStrongsOutputName = "strongs/" + (lastDict.Contains('H') ? "heb" : "grk") + last + ".json";
|
|
File.WriteAllText(lastStrongsOutputName, JSON.Serialize(temp));
|
|
|
|
logger?.LogInformation("Set: {last}", last);
|
|
}
|
|
}
|
|
|
|
public void CreateRmac(string dataPath)
|
|
{
|
|
var fileNames = Directory.EnumerateFiles(Path.Combine(dataPath, "xml"), "r-*", SearchOption.TopDirectoryOnly);
|
|
foreach (var f in fileNames)
|
|
{
|
|
var doc = XDocument.Load(f);
|
|
var rmacs = new List<RMAC>();
|
|
|
|
if (doc.Root is null)
|
|
{
|
|
logger?.LogError("Unable to process RMAC, no nodes.");
|
|
return;
|
|
}
|
|
|
|
foreach (var n in doc.Root.Nodes())
|
|
{
|
|
if (n.NodeType != XmlNodeType.Element)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var el = (XElement)n;
|
|
if (el.Name != "i")
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var r = new RMAC
|
|
{
|
|
Id = el.FirstAttribute?.Value ?? throw new InvalidOperationException("Id cannot be null"),
|
|
Description = [],
|
|
};
|
|
foreach (var xNode in el.Nodes())
|
|
{
|
|
var d = (XElement)xNode;
|
|
r.Description.Add(d.Value);
|
|
}
|
|
|
|
rmacs.Add(r);
|
|
}
|
|
|
|
var fi = new FileInfo(f);
|
|
File.WriteAllText($"rmac/{fi.Name.Substring(0, fi.Name.Length - 4)}.json", JSON.Serialize(rmacs));
|
|
logger?.LogInformation("Set: {f}", f);
|
|
}
|
|
}
|
|
|
|
public void CreateRmacCrossRefs(string dataPath)
|
|
{
|
|
var fileNames = Directory.EnumerateFiles(Path.Combine(dataPath, "xml"), "rs*", SearchOption.TopDirectoryOnly);
|
|
|
|
foreach (var f in fileNames)
|
|
{
|
|
var doc = XDocument.Load(f);
|
|
var rmacs = new List<RMACCrossRef>();
|
|
|
|
if (doc.Root is null)
|
|
{
|
|
logger?.LogError("Unable to process RMAC, no nodes.");
|
|
return;
|
|
}
|
|
|
|
foreach (var n in doc.Root.Nodes())
|
|
{
|
|
if (n.NodeType != XmlNodeType.Element)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var el = (XElement)n;
|
|
if (el.Name != "s")
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var r = new RMACCrossRef
|
|
{
|
|
Id = el.FirstAttribute?.Value ?? throw new InvalidOperationException("Id cannot be null"),
|
|
Reference = el.LastAttribute?.Value ?? throw new InvalidOperationException("Reference cannot be null"),
|
|
};
|
|
rmacs.Add(r);
|
|
}
|
|
|
|
var fi = new FileInfo(f);
|
|
var i = int.Parse(fi.Name.Substring(2, 1));
|
|
File.WriteAllText($"rmac/rs{i + 1}.json", JSON.Serialize(rmacs));
|
|
logger?.LogInformation("Set: {f}", f);
|
|
}
|
|
}
|
|
} |