[feat] add code du travail
This commit is contained in:
parent
b3f6f1988c
commit
81ea50344f
14
code-travail-extract.py
Normal file
14
code-travail-extract.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
from pypdf import PdfReader
|
||||||
|
reader = PdfReader("/home/alban/code/wokegpt/sources/code-travail-2022.pdf")
|
||||||
|
|
||||||
|
full_text = ""
|
||||||
|
for page in reader.pages:
|
||||||
|
|
||||||
|
text= page.extract_text()
|
||||||
|
|
||||||
|
full_text += "\n"
|
||||||
|
full_text += text
|
||||||
|
|
||||||
|
with open("/home/alban/code/wokegpt/sources/code-travail-2022.txt", "a") as myfile:
|
||||||
|
myfile.write(full_text)
|
||||||
|
|
98
code-travail-to-yaml.py
Normal file
98
code-travail-to-yaml.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
#! /usr/bin/env python3
|
||||||
|
import re
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# Première partie : Les relations individuelles de travail
|
||||||
|
re_partie = re.compile("^[^ ]+ partie")
|
||||||
|
|
||||||
|
# Livre Ier : Dispositions préliminaires
|
||||||
|
re_livre = re.compile("^Livre")
|
||||||
|
|
||||||
|
# Titre Ier : Champ d'application et calcul des seuils d'effectifs
|
||||||
|
re_titre = re.compile("^Titre")
|
||||||
|
|
||||||
|
# Chapitre unique.
|
||||||
|
re_chapitre = re.compile("^Chapitre")
|
||||||
|
|
||||||
|
# Article L1111-1
|
||||||
|
re_article = re.compile("^Article")
|
||||||
|
|
||||||
|
|
||||||
|
class Doc():
|
||||||
|
content: dict = {}
|
||||||
|
partie: str = ""
|
||||||
|
livre: str = ""
|
||||||
|
titre: str = ""
|
||||||
|
chapitre: str = ""
|
||||||
|
article: str = ""
|
||||||
|
text: str = ""
|
||||||
|
|
||||||
|
def set_partie(self, arg: str):
|
||||||
|
self.partie = arg
|
||||||
|
self.livre = self.titre = self.chapitre = self.text = ""
|
||||||
|
|
||||||
|
def set_livre(self, arg: str):
|
||||||
|
self.livre = arg
|
||||||
|
self.titre = self.chapitre = self.text = ""
|
||||||
|
|
||||||
|
def set_titre(self, arg: str):
|
||||||
|
self.titre = arg
|
||||||
|
self.chapitre = self.text = ""
|
||||||
|
|
||||||
|
def set_chapitre(self, arg: str):
|
||||||
|
self.chapitre = arg
|
||||||
|
self.text = ""
|
||||||
|
|
||||||
|
def set_article(self, arg: str):
|
||||||
|
self.article = arg
|
||||||
|
self.text = ""
|
||||||
|
|
||||||
|
def set_text(self, arg: str):
|
||||||
|
self.text += arg + " "
|
||||||
|
if not self.partie in self.content:
|
||||||
|
self.content[self.partie] = {}
|
||||||
|
if not self.livre in self.content[self.partie]:
|
||||||
|
self.content[self.partie][self.livre] = {}
|
||||||
|
if not self.titre in self.content[self.partie][self.livre]:
|
||||||
|
self.content[self.partie][self.livre][self.titre] = {}
|
||||||
|
if not self.chapitre in self.content[self.partie][self.livre][self.titre]:
|
||||||
|
self.content[self.partie][self.livre][self.titre][self.chapitre] = {}
|
||||||
|
if not self.article in self.content[self.partie][self.livre][self.titre][self.chapitre]:
|
||||||
|
self.content[self.partie][self.livre][self.titre][self.chapitre][self.article] = ""
|
||||||
|
self.content[self.partie][self.livre][self.titre][self.chapitre][self.article] = self.text
|
||||||
|
|
||||||
|
|
||||||
|
Document = Doc()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_line(line: str, doc: Doc) -> int:
|
||||||
|
if re_partie.match(line):
|
||||||
|
doc.set_partie(line)
|
||||||
|
elif re_livre.match(line):
|
||||||
|
doc.set_livre(line)
|
||||||
|
elif re_titre.match(line):
|
||||||
|
doc.set_titre(line)
|
||||||
|
elif re_chapitre.match(line):
|
||||||
|
doc.set_chapitre(line)
|
||||||
|
elif re_article.match(line):
|
||||||
|
doc.set_article(line)
|
||||||
|
else:
|
||||||
|
doc.set_text(line)
|
||||||
|
|
||||||
|
|
||||||
|
# Read the text as list of line
|
||||||
|
with open("./sources/code-travail-2022.txt") as fh:
|
||||||
|
lines = fh.readlines()
|
||||||
|
|
||||||
|
doc = Doc()
|
||||||
|
# Parse the line to get its type and value
|
||||||
|
for line in lines:
|
||||||
|
line = line.rstrip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
parse_line(line, doc)
|
||||||
|
|
||||||
|
with open("./sources/code-travail.yaml", "w") as fh:
|
||||||
|
yaml.safe_dump(doc.content, fh)
|
||||||
|
|
||||||
|
# print(doc)
|
BIN
sources/code-travail-2022.pdf
Normal file
BIN
sources/code-travail-2022.pdf
Normal file
Binary file not shown.
81280
sources/code-travail-2022.txt
Normal file
81280
sources/code-travail-2022.txt
Normal file
File diff suppressed because it is too large
Load Diff
102349
sources/code-travail.yaml
Normal file
102349
sources/code-travail.yaml
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user