le traitement par lots fonctionne

develop
gwen 3 years ago
parent ce6e7b9a8c
commit 30c76bba9a

@ -10,7 +10,7 @@
conf/**/*credentials* conf/**/*credentials*
# ignore everything in the following folders # ignore everything in the following folders
data/** # data/**
logs/** logs/**
# except their sub-folders # except their sub-folders
@ -20,8 +20,6 @@ logs/**
# also keep all .gitkeep files # also keep all .gitkeep files
!.gitkeep !.gitkeep
# also keep the example dataset
!data/01_raw/**
########################## ##########################

@ -40,28 +40,32 @@ preprocessed_dataset_test:
load_xml: load_xml:
type: actesdataset.XMLDataSet type: actesdataset.XMLDataSet
filepath: data/01_raw/xml/Anjou/anj_is_i_1441_08_05a.xml filepath: data/01_raw/xml/anjou/anj_is_i_1441_08_05a.xml
preprocess_html: preprocess_html:
type: actesdataset.XMLDataSet type: actesdataset.XMLDataSet
filepath: data/02_intermediate/xml/Anjou/anj_is_i_1441_08_05a.html filepath: data/02_intermediate/xml/anjou/anj_is_i_1441_08_05a.html
# _________________________________________________________________________ # _________________________________________________________________________
# same test with kedro.io.PartitionedDataSet # same test with kedro.io.PartitionedDataSet
load_full_xml_catalog: # warning :
type: PartitionedDataSet # this kind of yaml data in generated programmatically
path: data/01_raw/xml/Anjou/ # in the generic data loader
dataset:
type: actesdataset.XMLDataSet #load_full_xml_catalog:
filename_suffix: '.xml' # type: PartitionedDataSet
# path: data/01_raw/xml/anjou/
preprocess_full_catalog_html: # dataset:
type: PartitionedDataSet # type: actesdataset.XMLDataSet
path: data/02_intermediate/xml/Anjou/ # filename_suffix: '.xml'
dataset:
type: actesdataset.XMLDataSet #preprocess_full_catalog_html:
filename_suffix: '.html' # type: PartitionedDataSet
# path: data/02_intermediate/xml/anjou/
# dataset:
# type: actesdataset.XMLDataSet
# filename_suffix: '.html'
# _________________________________________________________________________ # _________________________________________________________________________

@ -0,0 +1,9 @@
houses:
- name: bourbon
- name: berry
- name: anjou
# - Bretagne
# - Bourgogne
# - Orléans
# - Armagnac

@ -0,0 +1,557 @@
+ actes-princiers/data/01_raw
+ csv
+ actors.csv
+ corpus-agnes-bourgogne.csv
+ corpus-charles-i.csv
+ xml
+ anjou
+ anj_is_i_1441_08_05a.xml
+ anj_lo_i_1360_08a.xml
+ anj_lo_i_1371_07_08a.xml
+ anj_lo_ii_1401_04_28a.xml
+ anj_lo_ii_1402_11_07a.xml
+ anj_lo_ii_1405_05_02a.xml
+ anj_lo_ii_1406_01_26a.xml
+ anj_lo_ii_1406_04_15a.xml
+ anj_lo_ii_1409_08_07a.xml
+ anj_lo_ii_1409_12_12a.xml
+ anj_lo_ii_1413_03_01a.xml
+ anj_lo_iii_1420_11_04a.xml
+ anj_lo_iii_1422_02_09a.xml
+ anj_lo_iii_1424_03_31a.xml
+ anj_lo_iii_1424_03_31b.xml
+ anj_lo_iii_1428_06_07a.xml
+ anj_lo_iii_1428_06_07b.xml
+ anj_lo_iii_1432_10_27a.xml
+ anj_ma_i_1370_12_10a.xml
+ anj_re_i_1437_09_16a.xml
+ anj_re_i_1439_11_22a.xml
+ anj_re_i_1440_01_20a.xml
+ anj_re_i_1445a.xml
+ anj_re_i_1450_11_07a.xml
+ anj_re_i_1454_01_14a.xml
+ anj_re_i_1454_02_09a.xml
+ anj_re_i_1454_06_17a.xml
+ anj_re_i_1454_09_01a.xml
+ anj_re_i_1455_11_13a.xml
+ anj_re_i_1456_11_29a.xml
+ anj_re_i_1457_01_04a.xml
+ anj_re_i_1459_03_17a.xml
+ anj_re_i_1459_04_16a.xml
+ anj_re_i_1463_07_21a.xml
+ anj_re_i_1466_12_16a.xml
+ anj_re_i_1474_02_01a.xml
+ anj_re_i_1475_05_26a.xml
+ anj_yo_i_1418_12_20a.xml
+ anj_yo_i_1421_06_28a.xml
+ anj_yo_i_1442_02_24a.xml
+ berry
+ bry_je_i_1391_07_12a.xml
+ bry_je_i_1404_12_07a.xml
+ bourbon
+ 5-Agnes-Bourgogne
+ agnes_1.xml
+ agnes_10.xml
+ agnes_11.xml
+ agnes_12.xml
+ agnes_13.xml
+ agnes_14.xml
+ agnes_15.xml
+ agnes_16.xml
+ agnes_17.xml
+ agnes_18.xml
+ agnes_19.xml
+ agnes_2.xml
+ agnes_20.xml
+ agnes_21.xml
+ agnes_22.xml
+ agnes_23.xml
+ agnes_24.xml
+ agnes_25.xml
+ agnes_26.xml
+ agnes_27.xml
+ agnes_28.xml
+ agnes_29.xml
+ agnes_3.xml
+ agnes_30.xml
+ agnes_31.xml
+ agnes_32.xml
+ agnes_4.xml
+ agnes_5.xml
+ agnes_6.xml
+ agnes_7.xml
+ agnes_8.xml
+ agnes_9.xml
+ corpus-agnes-bourgogne.xml
+ 5-Charles-Ier
+ charles_ier_1.xml
+ charles_ier_10.xml
+ charles_ier_100.xml
+ charles_ier_101.xml
+ charles_ier_102.xml
+ charles_ier_103.xml
+ charles_ier_104.xml
+ charles_ier_105.xml
+ charles_ier_106.xml
+ charles_ier_107.xml
+ charles_ier_108.xml
+ charles_ier_109.xml
+ charles_ier_11.xml
+ charles_ier_110.xml
+ charles_ier_111.xml
+ charles_ier_112.xml
+ charles_ier_113.xml
+ charles_ier_114.xml
+ charles_ier_115.xml
+ charles_ier_116.xml
+ charles_ier_117.xml
+ charles_ier_118.xml
+ charles_ier_119.xml
+ charles_ier_12.xml
+ charles_ier_120.xml
+ charles_ier_121.xml
+ charles_ier_122.xml
+ charles_ier_123.xml
+ charles_ier_124.xml
+ charles_ier_125.xml
+ charles_ier_126.xml
+ charles_ier_127.xml
+ charles_ier_128.xml
+ charles_ier_129.xml
+ charles_ier_13.xml
+ charles_ier_130.xml
+ charles_ier_131.xml
+ charles_ier_132.xml
+ charles_ier_133.xml
+ charles_ier_134.xml
+ charles_ier_135.xml
+ charles_ier_136.xml
+ charles_ier_137.xml
+ charles_ier_138.xml
+ charles_ier_139.xml
+ charles_ier_14.xml
+ charles_ier_140.xml
+ charles_ier_141.xml
+ charles_ier_142.xml
+ charles_ier_143.xml
+ charles_ier_144.xml
+ charles_ier_145.xml
+ charles_ier_146.xml
+ charles_ier_147.xml
+ charles_ier_148.xml
+ charles_ier_149.xml
+ charles_ier_15.xml
+ charles_ier_150.xml
+ charles_ier_151.xml
+ charles_ier_152.xml
+ charles_ier_153.xml
+ charles_ier_154.xml
+ charles_ier_155.xml
+ charles_ier_156.xml
+ charles_ier_157.xml
+ charles_ier_158.xml
+ charles_ier_159.xml
+ charles_ier_16.xml
+ charles_ier_160.xml
+ charles_ier_161.xml
+ charles_ier_162.xml
+ charles_ier_163.xml
+ charles_ier_164.xml
+ charles_ier_165.xml
+ charles_ier_166.xml
+ charles_ier_167.xml
+ charles_ier_168.xml
+ charles_ier_169.xml
+ charles_ier_17.xml
+ charles_ier_170.xml
+ charles_ier_171.xml
+ charles_ier_172.xml
+ charles_ier_173.xml
+ charles_ier_174.xml
+ charles_ier_175.xml
+ charles_ier_176.xml
+ charles_ier_177.xml
+ charles_ier_178.xml
+ charles_ier_179.xml
+ charles_ier_18.xml
+ charles_ier_180.xml
+ charles_ier_181.xml
+ charles_ier_182.xml
+ charles_ier_183.xml
+ charles_ier_184.xml
+ charles_ier_185.xml
+ charles_ier_186.xml
+ charles_ier_187.xml
+ charles_ier_188.xml
+ charles_ier_189.xml
+ charles_ier_19.xml
+ charles_ier_190.xml
+ charles_ier_191.xml
+ charles_ier_192.xml
+ charles_ier_193.xml
+ charles_ier_194.xml
+ charles_ier_195.xml
+ charles_ier_196.xml
+ charles_ier_197.xml
+ charles_ier_198.xml
+ charles_ier_199.xml
+ charles_ier_2.xml
+ charles_ier_20.xml
+ charles_ier_200.xml
+ charles_ier_201.xml
+ charles_ier_202.xml
+ charles_ier_203.xml
+ charles_ier_204.xml
+ charles_ier_205.xml
+ charles_ier_206.xml
+ charles_ier_207.xml
+ charles_ier_208.xml
+ charles_ier_209.xml
+ charles_ier_21.xml
+ charles_ier_210.xml
+ charles_ier_211.xml
+ charles_ier_212.xml
+ charles_ier_213.xml
+ charles_ier_214.xml
+ charles_ier_215.xml
+ charles_ier_216.xml
+ charles_ier_217.xml
+ charles_ier_218.xml
+ charles_ier_219.xml
+ charles_ier_22.xml
+ charles_ier_220.xml
+ charles_ier_221.xml
+ charles_ier_222.xml
+ charles_ier_223.xml
+ charles_ier_224.xml
+ charles_ier_225.xml
+ charles_ier_226.xml
+ charles_ier_227.xml
+ charles_ier_228.xml
+ charles_ier_229.xml
+ charles_ier_23.xml
+ charles_ier_230.xml
+ charles_ier_231.xml
+ charles_ier_232.xml
+ charles_ier_233.xml
+ charles_ier_234.xml
+ charles_ier_235.xml
+ charles_ier_236.xml
+ charles_ier_237.xml
+ charles_ier_238.xml
+ charles_ier_239.xml
+ charles_ier_24.xml
+ charles_ier_240.xml
+ charles_ier_241.xml
+ charles_ier_242.xml
+ charles_ier_243.xml
+ charles_ier_244.xml
+ charles_ier_245.xml
+ charles_ier_246.xml
+ charles_ier_247.xml
+ charles_ier_248.xml
+ charles_ier_249.xml
+ charles_ier_25.xml
+ charles_ier_250.xml
+ charles_ier_251.xml
+ charles_ier_252.xml
+ charles_ier_253.xml
+ charles_ier_254.xml
+ charles_ier_255.xml
+ charles_ier_256.xml
+ charles_ier_257.xml
+ charles_ier_258.xml
+ charles_ier_259.xml
+ charles_ier_26.xml
+ charles_ier_260.xml
+ charles_ier_261.xml
+ charles_ier_262.xml
+ charles_ier_263.xml
+ charles_ier_264.xml
+ charles_ier_265.xml
+ charles_ier_266.xml
+ charles_ier_267.xml
+ charles_ier_268.xml
+ charles_ier_269.xml
+ charles_ier_27.xml
+ charles_ier_270.xml
+ charles_ier_271.xml
+ charles_ier_272.xml
+ charles_ier_273.xml
+ charles_ier_274.xml
+ charles_ier_275.xml
+ charles_ier_276.xml
+ charles_ier_277.xml
+ charles_ier_278.xml
+ charles_ier_279.xml
+ charles_ier_28.xml
+ charles_ier_280.xml
+ charles_ier_281.xml
+ charles_ier_282.xml
+ charles_ier_283.xml
+ charles_ier_284.xml
+ charles_ier_285.xml
+ charles_ier_286.xml
+ charles_ier_287.xml
+ charles_ier_288.xml
+ charles_ier_289.xml
+ charles_ier_29.xml
+ charles_ier_290.xml
+ charles_ier_291.xml
+ charles_ier_292.xml
+ charles_ier_293.xml
+ charles_ier_294.xml
+ charles_ier_295.xml
+ charles_ier_296.xml
+ charles_ier_297.xml
+ charles_ier_298.xml
+ charles_ier_299.xml
+ charles_ier_3.xml
+ charles_ier_30.xml
+ charles_ier_300.xml
+ charles_ier_301.xml
+ charles_ier_302.xml
+ charles_ier_303.xml
+ charles_ier_304.xml
+ charles_ier_305.xml
+ charles_ier_306.xml
+ charles_ier_307.xml
+ charles_ier_308.xml
+ charles_ier_309.xml
+ charles_ier_31.xml
+ charles_ier_310.xml
+ charles_ier_311.xml
+ charles_ier_312.xml
+ charles_ier_313.xml
+ charles_ier_314.xml
+ charles_ier_315.xml
+ charles_ier_316.xml
+ charles_ier_317.xml
+ charles_ier_318.xml
+ charles_ier_319.xml
+ charles_ier_32.xml
+ charles_ier_320.xml
+ charles_ier_321.xml
+ charles_ier_322.xml
+ charles_ier_323.xml
+ charles_ier_324.xml
+ charles_ier_325.xml
+ charles_ier_326.xml
+ charles_ier_327.xml
+ charles_ier_328.xml
+ charles_ier_329.xml
+ charles_ier_33.xml
+ charles_ier_330.xml
+ charles_ier_331.xml
+ charles_ier_332.xml
+ charles_ier_333.xml
+ charles_ier_334.xml
+ charles_ier_335.xml
+ charles_ier_336.xml
+ charles_ier_337.xml
+ charles_ier_338.xml
+ charles_ier_339.xml
+ charles_ier_34.xml
+ charles_ier_340.xml
+ charles_ier_341.xml
+ charles_ier_342.xml
+ charles_ier_343.xml
+ charles_ier_344.xml
+ charles_ier_345.xml
+ charles_ier_346.xml
+ charles_ier_347.xml
+ charles_ier_348.xml
+ charles_ier_349.xml
+ charles_ier_35.xml
+ charles_ier_350.xml
+ charles_ier_351.xml
+ charles_ier_352.xml
+ charles_ier_353.xml
+ charles_ier_354.xml
+ charles_ier_355.xml
+ charles_ier_356.xml
+ charles_ier_357.xml
+ charles_ier_358.xml
+ charles_ier_359.xml
+ charles_ier_36.xml
+ charles_ier_360.xml
+ charles_ier_361.xml
+ charles_ier_362.xml
+ charles_ier_363.xml
+ charles_ier_364.xml
+ charles_ier_365.xml
+ charles_ier_366.xml
+ charles_ier_367.xml
+ charles_ier_368.xml
+ charles_ier_369.xml
+ charles_ier_37.xml
+ charles_ier_370.xml
+ charles_ier_371.xml
+ charles_ier_372.xml
+ charles_ier_373.xml
+ charles_ier_374.xml
+ charles_ier_38.xml
+ charles_ier_39.xml
+ charles_ier_4.xml
+ charles_ier_40.xml
+ charles_ier_41.xml
+ charles_ier_42.xml
+ charles_ier_43.xml
+ charles_ier_44.xml
+ charles_ier_45.xml
+ charles_ier_46.xml
+ charles_ier_47.xml
+ charles_ier_48.xml
+ charles_ier_49.xml
+ charles_ier_5.xml
+ charles_ier_50.xml
+ charles_ier_51.xml
+ charles_ier_52.xml
+ charles_ier_53.xml
+ charles_ier_54.xml
+ charles_ier_55.xml
+ charles_ier_56.xml
+ charles_ier_57.xml
+ charles_ier_58.xml
+ charles_ier_59.xml
+ charles_ier_6.xml
+ charles_ier_60.xml
+ charles_ier_61.xml
+ charles_ier_62.xml
+ charles_ier_63.xml
+ charles_ier_64.xml
+ charles_ier_65.xml
+ charles_ier_66.xml
+ charles_ier_67.xml
+ charles_ier_68.xml
+ charles_ier_69.xml
+ charles_ier_7.xml
+ charles_ier_70.xml
+ charles_ier_71.xml
+ charles_ier_72.xml
+ charles_ier_73.xml
+ charles_ier_74.xml
+ charles_ier_75.xml
+ charles_ier_76.xml
+ charles_ier_77.xml
+ charles_ier_78.xml
+ charles_ier_79.xml
+ charles_ier_8.xml
+ charles_ier_80.xml
+ charles_ier_81.xml
+ charles_ier_82.xml
+ charles_ier_83.xml
+ charles_ier_84.xml
+ charles_ier_85.xml
+ charles_ier_86.xml
+ charles_ier_87.xml
+ charles_ier_88.xml
+ charles_ier_89.xml
+ charles_ier_9.xml
+ charles_ier_90.xml
+ charles_ier_91.xml
+ charles_ier_92.xml
+ charles_ier_93.xml
+ charles_ier_94.xml
+ charles_ier_95.xml
+ charles_ier_96.xml
+ charles_ier_97.xml
+ charles_ier_98.xml
+ charles_ier_99.xml
+ charles_ier_Numéro.xml
+ corpus-charles-i.xml
+ corpus2.xml
+ brb_ch_i_1421_01_15a.xml
+ brb_ch_i_1421_01_18a.xml
+ brb_ch_i_1425_02_04a.xml
+ brb_ch_i_1425_02_07a.xml
+ brb_ch_i_1425_08_06a.xml
+ brb_ch_i_1425_08_13a.xml
+ brb_ch_i_1425_12_08a.xml
+ brb_ch_i_1425a.xml
+ brb_ch_i_1426_03_13a.xml
+ brb_ch_i_1426_10_09a.xml
+ brb_ch_i_1426_12_16a.xml
+ brb_ch_i_1427_05_01a.xml
+ brb_ch_i_1427_05_14a.xml
+ brb_ch_i_1427_06_27a.xml
+ brb_ch_i_1427_08_04a.xml
+ brb_ch_i_1427_08_24a.xml
+ brb_ch_i_1427_08_24b.xml
+ brb_ch_i_1427_08_28a.xml
+ brb_ch_i_1428_01_28a.xml
+ brb_ch_i_1428_01_31a.xml
+ brb_ch_i_1428_11_04a.xml
+ brb_ch_i_1428_12_12a.xml
+ brb_ch_i_1429_04_20a.xml
+ brb_ch_i_1429_04a.xml
+ brb_ch_i_1429_05_19a.xml
+ brb_ch_i_1430_02_01a.xml
+ brb_ch_i_1430_02_15a.xml
+ brb_ch_i_1430_05_09a.xml
+ brb_ch_i_1430_09_05a.xml
+ brb_ch_i_1431_04_12a.xml
+ brb_ch_i_1431_05_28a.xml
+ brb_ch_i_1431_10_08a.xml
+ brb_ch_i_1432_03_12a.xml
+ brb_ch_i_1432_06_28a.xml
+ brb_ch_i_1433_03_29a.xml
+ brb_ch_i_1433_05_27a.xml
+ brb_ch_i_1433_07_13a.xml
+ brb_ch_i_1433_07_18a.xml
+ brb_ch_i_1433_07a.xml
+ brb_ch_i_1433_10_07a.xml
+ brb_ch_i_1433_10_24a.xml
+ brb_ch_i_1434_04_15a.xml
+ brb_ch_i_1434_08a.xml
+ brb_ch_i_1434_11_21a.xml
+ brb_ch_i_1434_12_04a.xml
+ brb_ch_i_1434_12_04b.xml
+ brb_ch_i_1434_12_04c.xml
+ brb_ch_i_1434_12_18a.xml
+ brb_ch_i_1435_01_15a.xml
+ brb_ch_i_1435_02_04a.xml
+ brb_ch_i_1435_02_06a.xml
+ brb_ch_i_1435_02_06b.xml
+ brb_ch_i_1435_02_10a.xml
+ brb_ch_i_1435_03_03a.xml
+ brb_ch_i_1435_05_01a.xml
+ brb_ch_i_1435_06_14a.xml
+ brb_ch_i_1435_06_27a.xml
+ brb_ch_i_1435_06_29a.xml
+ brb_ch_i_1435_09_07a.xml
+ brb_ch_i_1435_09_18a.xml
+ brb_ch_i_1435_09_21a.xml
+ brb_ch_i_1435_10_01a.xml
+ brb_ch_i_1435_12_26a.xml
+ brb_ch_i_1436_01_26a.xml
+ brb_ch_i_1436_02_06a.xml
+ brb_ch_i_1436_04_03a.xml
+ brb_ch_i_1436_04_25a.xml
+ brb_ch_i_1436_05_01a.xml
+ brb_ch_i_1436_05_21a.xml
+ brb_ch_i_1436_05_24a.xml
+ brb_ch_i_1436_06_11a.xml
+ brb_ch_i_1436_06_21a.xml
+ brb_ch_i_1436_06_22a.xml
+ brb_ch_i_1436_06a.xml
+ brb_ch_i_1436_07_23a.xml
+ brb_ch_i_1436_08_03a.xml
+ brb_ch_i_1436_12a.xml
+ brb_ch_i_1436_12b.xml
+ brb_ch_i_1436a.xml
+ brb_ch_i_1437_02_03a.xml
+ brb_ch_i_1437_02_03b.xml
+ brb_ch_i_1437_02_06a.xml
+ brb_ch_i_1437_05_06a.xml
+ brb_ch_i_1437_07_11a.xml
+ brb_ch_i_1437_07_31a.xml
+ brb_ch_i_1437_08_18a.xml
+ brb_ch_i_1437_09_06a.xml
+ brb_ch_i_1437_11_22a.xml
+ brb_ch_i_1437_11_22b.xml
+ brb_ch_i_1438_04_20a.xml
+ brb_ch_i_1438_04a.xml
+ brb_ch_i_1438_08_12a.xml
+ brb_ch_i_1438_09_10a.xml
+ brb_ch_i_1438_12_23a.xml

@ -0,0 +1,25 @@
# display_dir_tree.py
from pathlib import Path
def tree(directory):
print(f"+ {directory}")
for path in sorted(directory.rglob("*")):
depth = len(path.relative_to(directory).parts)
spacer = " " * depth
print(f"{spacer}+ {path.name}")
#tree(Path.cwd())
def tree2(directory):
#print(f"+ {directory}")
tree = dict()
for path in sorted(directory.rglob("*")):
#depth = len(path.relative_to(directory).parts)
# spacer = " " * depth
tree[path.stem] = str(path.relative_to(directory))
# print(path.relative_to(directory))
# print(f"+ {path.stem}")
print(tree)
tree2(Path.cwd() / "xml")

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save