2024-03-25 16:14:17 +00:00

189 lines
10 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

namespace DynamicBible.DataPreparation;
public class StepBibleProcessor
{
public static void ProcessStep(string fileName)
{
using var fs = new FileStream(fileName, FileMode.Open);
using var reader = new StreamReader(fs);
Dictionary<string, List<Definition>> definitions = new();
var start = false;
while (reader.ReadLine() is { } line)
{
if (line.StartsWith("Reference + word Word # Greek", StringComparison.Ordinal))
{
start = true;
continue;
}
if (!start || line.StartsWith("===", StringComparison.Ordinal) || line.StartsWith("#") || string.IsNullOrWhiteSpace(line))
{
continue;
}
// ok, at this point, you should be getting good data.
var fields = line.Split('\t');
var def = new Definition
{
Reference = fields[0],
WordType = fields[1],
Greek = fields[2],
EnglishTranslation = fields[3],
Strongs = fields[4],
Grammar = fields[5],
DictionaryForm = fields[6],
EnglishGloss = fields[7],
Editions = fields[8],
SpellingVariants = fields[9],
MeaningVariants = fields[10],
SpanishTranslation = fields[11],
SubMeaning = fields[12],
SuperMeaning = fields[13],
ConjoinWord = fields[14],
};
if (!definitions.TryAdd(def.Reference, [def]))
{
definitions[def.Reference].Add(def);
}
}
}
}
/*
# Act.1.1 Τὸν μὲν πρῶτον λόγον ἐποιησάμην περὶ πάντων, ὦ Θεόφιλε, ὧν ἤρξατο
#_Translation The indeed first account I composed concerning all the things, O Theophilus, of which began
#_Word=Grammar G3588=T-ASM G3303=PRT G4413=A-ASM G3056=N-ASM G4160=V-AMI-1S G4012=PREP G3956=A-GPN G5599=INJ G2321=N-VSM-P G3739=R-GPN G0757=V-ADI-3S
#_Non-NA variants
#_Act.1.1 cont. ὁ Ἰησοῦς ποιεῖν τε καὶ διδάσκειν
#_Translation <the> Jesus to do both and to teach,
#_Word=Grammar G3588=T-NSM G2424=N-NSM-P G4160=V-PAN G5037=CONJ G2532=CONJ G1321=V-PAN
#_Non-NA variants
#_REFERENCE WORD TYPE GREEK ENGLISH TRANSLATION STRONGS GRAMMAR DICTIONARY FORM GLOSS EDITIONS SPELLING VARIANTS MEANING VARIANTS SPANISH TRANSLATION SUB-MEANING SUPER-MEANING CONJOIN WORD
45_Act.001.001 =NA same TR ~~ Τὸν The G3588 T-ASM ὁ the/this/who NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV τὸν A la the »008:G3056
45_Act.001.001 =NA same TR ~~ μὲν indeed G3303 PRT μέν on one hand NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV μὲν de hecho on one hand
45_Act.001.001 =NA same TR ~~ πρῶτον first G4413 A-ASM πρῶτος first NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV πρῶτον primera first§1_first
45_Act.001.001 =NA same TR ~~ λόγον account G3056 N-ASM λόγος word NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV λόγον palabra word
45_Act.001.001 =NA same TR ~~ ἐποιησάμην I composed G4160 V-AMI-1S ποιέω to do/make NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV ἐποιησάμην hice to do§1_do/work
45_Act.001.001 =NA same TR ~~ περὶ concerning G4012 PREP περί about NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV περὶ acerca de about
45_Act.001.001 =NA same TR ~~ πάντων, all the things, G3956 A-GPN πᾶς all NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV πάντων todas (cosas) all
45_Act.001.001 =NA same TR ~~ ὦ O G5599 INJ ὦ oh! NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV ὦ oh oh!
45_Act.001.001 =NA same TR ~~ Θεόφιλε, Theophilus, G2321 N-VSM-P Θεόφιλος Theophilus NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV θεόφιλε Teófilo Theophilus§Theophilus@Luk.1.3
45_Act.001.001 =NA same TR ~~ ὧν of which G3739 R-GPN ὅς, ἥ which NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV ὧν de cuales which »022:G0757
45_Act.001.001 =NA same TR ~~ ἤρξατο began G0757 V-ADI-3S ἄρχω be first NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV ἤρξατο comenzó be first
45_Act.001.001 =NA same TR ~~ ὁ <the> G3588 T-NSM ὁ the/this/who NA27+NA28+Tyn+SBL+TR+Byz+NIV ὁ el the »026:G2424
45_Act.001.001 =NA same TR ~~ Ἰησοῦς Jesus G2424 N-NSM-P Ἰησοῦς Jesus/Joshua NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV ἰησοῦς Jesús Jesus§Jesus@Mat.1.1
45_Act.001.001 =NA same TR ~~ ποιεῖν to do G4160 V-PAN ποιέω to do/make NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV ποιεῖν estar haciendo to do§1_do/work
45_Act.001.001 =NA same TR ~~ τε both G5037 CONJ τε and/both NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV τε y and
45_Act.001.001 =NA same TR ~~ καὶ and G2532 CONJ καί and NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV καὶ también and
45_Act.001.001 =NA same TR ~~ διδάσκειν to teach, G1321 V-PAN διδάσκω to teach NA27+NA28+Tyn+SBL+WH+Treg+TR+Byz+NIV διδάσκειν estar enseñando to teach
*/
public class Definition
{
/// <summary>
/// Versification used by SBL and all modern texts.
/// Significantly different from KJV at
/// Mat.17.14; 20.4; Mrk.3.19; 12.15; Luk.1.74; 6.17; 7.19;
/// Act.2.10; 3.19; 4.6; 5.40; 9.29; 13.39; 19.41; 24.18; Rom.7.9; 9.11;
/// 2Co.1.6; 8.13; 10.5; 13.13; 13.14; Gal.2.20; Eph.1.11; 2.15; 5.13;
/// Php.2.8; Col.1.21; 1Th.1.3; 2.6; 2.11; Heb.3.9; 7.21; 12.23; 1Pe.3.15;
/// Rev.2.27; 13.1; 17.10;
/// </summary>
public string? Reference { get; init; }
/// <summary>
/// Source of the word, privileging NA, then TR.
/// </summary>
public string? WordType { get; set; }
/// <summary>
/// spelling based on NA28 for NA, then TR if not in NA, then other Eds if in neither. Cases and final accents based on
/// the punctuation which follows THGHT
/// </summary>
public string? Greek { get; set; }
/// <summary>
/// English is based on Berean Study Bible, with permission, as at 1-July02019. This covered only NA words. Others supplied
/// by Tyndale scholars, and the complete work was unified in many ways.
/// </summary>
public string? EnglishTranslation { get; set; }
/// <summary>
/// Extended with words not used in the KJV. Backwardly compatible with standard Strongs and NASB tagging, as defined in
/// the Brief Lexicon at https://STEPBible\.github.io/STEPBible-Data/
/// </summary>
public string? Strongs { get; set; }
/// <summary>
/// Based on James Tuaber, with addtional details by Tyndale scholars: persons added to Personal, Reflexive and Possessive
/// pronouns; distinguish 2nd verbal forms (e.g. 2nd Aorist); distinguished betweeen "Passive", "Either middle or Passive",
/// "Deponent"
/// - some changes have been made, including:
/// ἐμοῦ A-GSMN => P-1GS @ Jhn.016.015
/// ευθυς =G2117 A-MNSM "straight" => G2112 ADV "immediately" @ Mat.003.016; Mat.013.20; Mat.013.21; Mrk.001.012;
/// Mrk.001.028; Jhn.013.032; Jhn.019.034; Jhn.021.003
/// V-PAI-3P changed to V-PAP-DPM except for εισι[ν] (G1510) which is V-PAI-3P while οὖσι[ν] (G1510) is V-PAP-DPM
/// τίς etc changed from I-NSM etc to X-MNSM etc at Act.025.014; 1Co.010.019; Heb.010.025, 27, 28; 11.040; 12.15x2, 16;
/// 13.002; Jas.001.005, 7, 18, 23, 26; 2.014, 16, 18; 3.002; 5.013x2, 14x2; 5.019x2; 1Pe.002.019
/// G4302 changed to G4277 when προειπον is used, ie 45_Act.001.016-A, 49_Gal.005.021-A, 53_1Th.004.006-A
/// G4302 changed to G4280 when προερέω is used, ie 41_Mat.024.025-A, 42_Mrk.013.023-INSTW, 42_Mrk.013.023-B,
/// 46_Rom.009.029-A, 48_2Co.007.003-A, 48_2Co.013.002-A, 49_Gal.001.009-A, 59_Heb.004.007-IMNSTW, 59_Heb.010.015-B,
/// 62_2Pe.003.002-A, 66_Jud.001.017-A,
/// ποῦ =G4225 => G4226 at 42_Mrk.015.047-A, 44_Jhn.001.039-A, 63_1Jn.002.011-A
/// </summary>
public string? Grammar { get; set; }
/// <summary>
/// from the TBESG - Tyndale Brief lexicon of Extended Strongs for Greek at https://STEPBible\.github.io/STEPBible-Data/
/// </summary>
public string? DictionaryForm { get; set; }
/// <summary>
/// from the TBESG - Tyndale Brief lexicon of Extended Strongs for Greek at https://STEPBible\.github.io/STEPBible-Data/
/// </summary>
public string? EnglishGloss { get; set; }
/// <summary>
/// Those which use the same letters, though they may be accented or capitalised differently. Byz=Byzantine based on RP;
/// NA27=Nestle-Aland 27th ed; NA28 2012 (this is NOT the same as ECM Acts - see eg Act.1.10 ἐσθῆτι in ECM); TR= Textus
/// Receptus; SBL= SBLGNT; Treg= Tregelles; WH= WH; Tyn= Tyndale House GNT.
/// </summary>
public string? Editions { get; set; }
/// <summary>
/// </summary>
public string? SpellingVariants { get; set; }
/// <summary>
/// </summary>
public string? MeaningVariants { get; set; }
/// <summary>
/// Spanish is based on Marvel Bible Project as on 9-Jan-2019 from
/// https://github.com/eliranwong/OpenGNT/blob/master/OpenGNT_BASE_TEXT.zip & OpenGNT_keyedFeatures.csv.zip. This was
/// available only for words in NA28. Other words are supplied by Tyndale scholars.
/// </summary>
public string? SpanishTranslation { get; set; }
/// <summary>
/// Words with more than one meaning are supplied with a context-sensitive sub-meaning. Individuals and Places are
/// identified as in TIPNR. Other are based on CSG (Context Sensitive Gloss) from Marvel Bible Project as at 9-Jan-2019
/// from https://github.com/eliranwong/OpenGNT/blob/master/OpenGNT_BASE_TEXT.zip & OpenGNT_keyedFeatures.csv.zip. This was
/// available only for words in NA28. Other words are supplied by Tyndale House who also edited the CSG for words where
/// sub-meanings were not sufficiently detailed.
/// </summary>
public string? SubMeaning { get; set; }
/// <summary>
/// </summary>
public string? SuperMeaning { get; set; }
/// <summary>
/// Links words that might not be translated (eg articles and particles) with the word they are connected to, which are
/// often separated by several other words.
/// </summary>
public string? ConjoinWord { get; set; }
}