From 6ad9dcad54aee175cab4cb8c66b7e25404a469c1 Mon Sep 17 00:00:00 2001 From: gwen Date: Wed, 1 Apr 2026 15:32:44 +0200 Subject: [PATCH] dvc --- dvc/dvc-count/.dvcignore | 3 + dvc/dvc-count/.gitignore | 146 ++++++++ dvc/dvc-count/LICENSE | 319 ++++++++++++++++++ dvc/dvc-count/Makefile | 3 + dvc/dvc-count/README.md | 42 +++ dvc/dvc-count/dvc.yaml | 15 + dvc/dvc-count/hello.py | 13 + dvc/dvc-count/test.txt | 11 + dvc/dvc-count/titi.py | 10 + dvc/dvc-params/.dvc/.gitignore | 3 + dvc/dvc-params/.dvc/config | 0 dvc/dvc-params/.dvcignore | 3 + dvc/dvc-params/.gitignore | 1 + dvc/dvc-params/LICENSE | 319 ++++++++++++++++++ dvc/dvc-params/Makefile | 4 + dvc/dvc-params/README.md | 2 + dvc/dvc-params/Readme.txt | 23 ++ dvc/dvc-params/data/.gitignore | 2 + dvc/dvc-params/deep.py | 25 ++ dvc/dvc-params/dvc.lock | 16 + dvc/dvc-params/dvc.yaml | 8 + dvc/dvc-params/params.yaml | 11 + dvc/pipeline_appusers/.dvc/.gitignore | 3 + dvc/pipeline_appusers/.dvc/config | 6 + dvc/pipeline_appusers/.dvcignore | 3 + dvc/pipeline_appusers/data/.empty | 0 dvc/pipeline_appusers/data/.gitignore | 5 + dvc/pipeline_appusers/dvc.yaml | 35 ++ dvc/pipeline_appusers/index.txt | 98 ++++++ .../src/appinfra/__init__.py | 0 dvc/pipeline_appusers/src/appinfra/config.py | 5 + .../src/appinfra/load_json_file.py | 24 ++ .../src/appinfra/load_json_file_test.py | 21 ++ .../src/appinfra/password.py | 37 ++ dvc/pipeline_appusers/src/appinfra/user.py | 22 ++ dvc/pipeline_appusers/src/clean_csvdata.py | 38 +++ dvc/pipeline_appusers/src/datatolist.py | 28 ++ .../src/make_json_with_password.py | 33 ++ dvc/pipeline_appusers/src/prepare.py | 35 ++ dvc/pipeline_appusers/src/requirements.txt | 3 + dvc/pipeline_appusers/src/visualize_data.py | 28 ++ 41 files changed, 1403 insertions(+) create mode 100755 dvc/dvc-count/.dvcignore create mode 100644 dvc/dvc-count/.gitignore create mode 100644 dvc/dvc-count/LICENSE create mode 100644 dvc/dvc-count/Makefile create mode 100644 dvc/dvc-count/README.md create mode 100755 dvc/dvc-count/dvc.yaml create mode 100755 dvc/dvc-count/hello.py create mode 100755 dvc/dvc-count/test.txt create mode 100755 dvc/dvc-count/titi.py create mode 100755 dvc/dvc-params/.dvc/.gitignore create mode 100755 dvc/dvc-params/.dvc/config create mode 100755 dvc/dvc-params/.dvcignore create mode 100755 dvc/dvc-params/.gitignore create mode 100644 dvc/dvc-params/LICENSE create mode 100644 dvc/dvc-params/Makefile create mode 100644 dvc/dvc-params/README.md create mode 100755 dvc/dvc-params/Readme.txt create mode 100755 dvc/dvc-params/data/.gitignore create mode 100755 dvc/dvc-params/deep.py create mode 100644 dvc/dvc-params/dvc.lock create mode 100755 dvc/dvc-params/dvc.yaml create mode 100755 dvc/dvc-params/params.yaml create mode 100755 dvc/pipeline_appusers/.dvc/.gitignore create mode 100755 dvc/pipeline_appusers/.dvc/config create mode 100755 dvc/pipeline_appusers/.dvcignore create mode 100755 dvc/pipeline_appusers/data/.empty create mode 100755 dvc/pipeline_appusers/data/.gitignore create mode 100755 dvc/pipeline_appusers/dvc.yaml create mode 100755 dvc/pipeline_appusers/index.txt create mode 100755 dvc/pipeline_appusers/src/appinfra/__init__.py create mode 100755 dvc/pipeline_appusers/src/appinfra/config.py create mode 100755 dvc/pipeline_appusers/src/appinfra/load_json_file.py create mode 100755 dvc/pipeline_appusers/src/appinfra/load_json_file_test.py create mode 100755 dvc/pipeline_appusers/src/appinfra/password.py create mode 100755 dvc/pipeline_appusers/src/appinfra/user.py create mode 100755 dvc/pipeline_appusers/src/clean_csvdata.py create mode 100755 dvc/pipeline_appusers/src/datatolist.py create mode 100755 dvc/pipeline_appusers/src/make_json_with_password.py create mode 100755 dvc/pipeline_appusers/src/prepare.py create mode 100755 dvc/pipeline_appusers/src/requirements.txt create mode 100755 dvc/pipeline_appusers/src/visualize_data.py diff --git a/dvc/dvc-count/.dvcignore b/dvc/dvc-count/.dvcignore new file mode 100755 index 0000000..5197305 --- /dev/null +++ b/dvc/dvc-count/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/dvc/dvc-count/.gitignore b/dvc/dvc-count/.gitignore new file mode 100644 index 0000000..5d879b6 --- /dev/null +++ b/dvc/dvc-count/.gitignore @@ -0,0 +1,146 @@ +<<<<<<< HEAD +# ---> Python +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +======= +/lines +/toto +/calcul +>>>>>>> dfd15fb (first revision) diff --git a/dvc/dvc-count/LICENSE b/dvc/dvc-count/LICENSE new file mode 100644 index 0000000..1d80ac3 --- /dev/null +++ b/dvc/dvc-count/LICENSE @@ -0,0 +1,319 @@ +GNU GENERAL PUBLIC LICENSE + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. + +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share +and change it. By contrast, the GNU General Public License is intended to +guarantee your freedom to share and change free software--to make sure the +software is free for all its users. This General Public License applies to +most of the Free Software Foundation's software and to any other program whose +authors commit to using it. (Some other Free Software Foundation software +is covered by the GNU Lesser General Public License instead.) You can apply +it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you +wish), that you receive source code or can get it if you want it, that you +can change the software or use pieces of it in new free programs; and that +you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to +deny you these rights or to ask you to surrender the rights. These restrictions +translate to certain responsibilities for you if you distribute copies of +the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or +for a fee, you must give the recipients all the rights that you have. You +must make sure that they, too, receive or can get the source code. And you +must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If +the software is modified by someone else and passed on, we want its recipients +to know that what they have is not the original, so that any problems introduced +by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will individually +obtain patent licenses, in effect making the program proprietary. To prevent +this, we have made it clear that any patent must be licensed for everyone's +free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice +placed by the copyright holder saying it may be distributed under the terms +of this General Public License. The "Program", below, refers to any such program +or work, and a "work based on the Program" means either the Program or any +derivative work under copyright law: that is to say, a work containing the +Program or a portion of it, either verbatim or with modifications and/or translated +into another language. (Hereinafter, translation is included without limitation +in the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not covered +by this License; they are outside its scope. The act of running the Program +is not restricted, and the output from the Program is covered only if its +contents constitute a work based on the Program (independent of having been +made by running the Program). Whether that is true depends on what the Program +does. + +1. You may copy and distribute verbatim copies of the Program's source code +as you receive it, in any medium, provided that you conspicuously and appropriately +publish on each copy an appropriate copyright notice and disclaimer of warranty; +keep intact all the notices that refer to this License and to the absence +of any warranty; and give any other recipients of the Program a copy of this +License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you +may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, +thus forming a work based on the Program, and copy and distribute such modifications +or work under the terms of Section 1 above, provided that you also meet all +of these conditions: + +a) You must cause the modified files to carry prominent notices stating that +you changed the files and the date of any change. + +b) You must cause any work that you distribute or publish, that in whole or +in part contains or is derived from the Program or any part thereof, to be +licensed as a whole at no charge to all third parties under the terms of this +License. + +c) If the modified program normally reads commands interactively when run, +you must cause it, when started running for such interactive use in the most +ordinary way, to print or display an announcement including an appropriate +copyright notice and a notice that there is no warranty (or else, saying that +you provide a warranty) and that users may redistribute the program under +these conditions, and telling the user how to view a copy of this License. +(Exception: if the Program itself is interactive but does not normally print +such an announcement, your work based on the Program is not required to print +an announcement.) + +These requirements apply to the modified work as a whole. If identifiable +sections of that work are not derived from the Program, and can be reasonably +considered independent and separate works in themselves, then this License, +and its terms, do not apply to those sections when you distribute them as +separate works. But when you distribute the same sections as part of a whole +which is a work based on the Program, the distribution of the whole must be +on the terms of this License, whose permissions for other licensees extend +to the entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest your +rights to work written entirely by you; rather, the intent is to exercise +the right to control the distribution of derivative or collective works based +on the Program. + +In addition, mere aggregation of another work not based on the Program with +the Program (or with a work based on the Program) on a volume of a storage +or distribution medium does not bring the other work under the scope of this +License. + +3. You may copy and distribute the Program (or a work based on it, under Section +2) in object code or executable form under the terms of Sections 1 and 2 above +provided that you also do one of the following: + +a) Accompany it with the complete corresponding machine-readable source code, +which must be distributed under the terms of Sections 1 and 2 above on a medium +customarily used for software interchange; or, + +b) Accompany it with a written offer, valid for at least three years, to give +any third party, for a charge no more than your cost of physically performing +source distribution, a complete machine-readable copy of the corresponding +source code, to be distributed under the terms of Sections 1 and 2 above on +a medium customarily used for software interchange; or, + +c) Accompany it with the information you received as to the offer to distribute +corresponding source code. (This alternative is allowed only for noncommercial +distribution and only if you received the program in object code or executable +form with such an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for making +modifications to it. For an executable work, complete source code means all +the source code for all modules it contains, plus any associated interface +definition files, plus the scripts used to control compilation and installation +of the executable. However, as a special exception, the source code distributed +need not include anything that is normally distributed (in either source or +binary form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component itself +accompanies the executable. + +If distribution of executable or object code is made by offering access to +copy from a designated place, then offering equivalent access to copy the +source code from the same place counts as distribution of the source code, +even though third parties are not compelled to copy the source along with +the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except +as expressly provided under this License. Any attempt otherwise to copy, modify, +sublicense or distribute the Program is void, and will automatically terminate +your rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses terminated +so long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed +it. However, nothing else grants you permission to modify or distribute the +Program or its derivative works. These actions are prohibited by law if you +do not accept this License. Therefore, by modifying or distributing the Program +(or any work based on the Program), you indicate your acceptance of this License +to do so, and all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), +the recipient automatically receives a license from the original licensor +to copy, distribute or modify the Program subject to these terms and conditions. +You may not impose any further restrictions on the recipients' exercise of +the rights granted herein. You are not responsible for enforcing compliance +by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement +or for any other reason (not limited to patent issues), conditions are imposed +on you (whether by court order, agreement or otherwise) that contradict the +conditions of this License, they do not excuse you from the conditions of +this License. If you cannot distribute so as to satisfy simultaneously your +obligations under this License and any other pertinent obligations, then as +a consequence you may not distribute the Program at all. For example, if a +patent license would not permit royalty-free redistribution of the Program +by all those who receive copies directly or indirectly through you, then the +only way you could satisfy both it and this License would be to refrain entirely +from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply and +the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents +or other property right claims or to contest validity of any such claims; +this section has the sole purpose of protecting the integrity of the free +software distribution system, which is implemented by public license practices. +Many people have made generous contributions to the wide range of software +distributed through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing to +distribute software through any other system and a licensee cannot impose +that choice. + +This section is intended to make thoroughly clear what is believed to be a +consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain +countries either by patents or by copyrighted interfaces, the original copyright +holder who places the Program under this License may add an explicit geographical +distribution limitation excluding those countries, so that distribution is +permitted only in or among countries not thus excluded. In such case, this +License incorporates the limitation as if written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of +the General Public License from time to time. Such new versions will be similar +in spirit to the present version, but may differ in detail to address new +problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies +a version number of this License which applies to it and "any later version", +you have the option of following the terms and conditions either of that version +or of any later version published by the Free Software Foundation. If the +Program does not specify a version number of this License, you may choose +any version ever published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs +whose distribution conditions are different, write to the author to ask for +permission. For software which is copyrighted by the Free Software Foundation, +write to the Free Software Foundation; we sometimes make exceptions for this. +Our decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing and reuse +of software generally. + + NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR +THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE +STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM +"AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE +OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA +OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES +OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH +HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible +use to the public, the best way to achieve this is to make it free software +which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach +them to the start of each source file to most effectively convey the exclusion +of warranty; and each file should have at least the "copyright" line and a +pointer to where the full notice is found. + + + +Copyright (C) + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 51 Franklin +Street, Fifth Floor, Boston, MA 02110-1301, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when +it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes +with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, +and you are welcome to redistribute it under certain conditions; type `show +c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than `show w' and `show c'; they could even be mouse-clicks +or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, +if any, to sign a "copyright disclaimer" for the program, if necessary. Here +is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' +(which makes passes at compilers) written by James Hacker. + +, 1 April 1989 Ty Coon, President of Vice This General +Public License does not permit incorporating your program into proprietary +programs. If your program is a subroutine library, you may consider it more +useful to permit linking proprietary applications with the library. If this +is what you want to do, use the GNU Lesser General Public License instead +of this License. diff --git a/dvc/dvc-count/Makefile b/dvc/dvc-count/Makefile new file mode 100644 index 0000000..c645e2e --- /dev/null +++ b/dvc/dvc-count/Makefile @@ -0,0 +1,3 @@ +clean: + rm -rf .dvc/cache/ + rm -f lines calcul dvc.lock diff --git a/dvc/dvc-count/README.md b/dvc/dvc-count/README.md new file mode 100644 index 0000000..407fa29 --- /dev/null +++ b/dvc/dvc-count/README.md @@ -0,0 +1,42 @@ +# dvc-count + +premier exemple dvc + +dvc stage list +count Outputs lines +titi Outputs calcul +dvc-count (master *=) $ d + +dvc-count (master *%=) $ dvc dag ++-------+ +| count | ++-------+ + * + * + * ++------+ +| titi | ++------+ + +dvc repro +Running stage 'count': +> python hello.py +écriture du fichier lines +Generating lock file 'dvc.lock' +Updating lock file 'dvc.lock' + +Running stage 'titi': +> python titi.py +écriture du fichier `calcul` +Updating lock file 'dvc.lock' + +To track the changes with git, run: + + git add dvc.lock + +To enable auto staging, run: + + dvc config core.autostage true +Use `dvc push` to send your updates to remote storage. +dvc-count (master *%=) $ + diff --git a/dvc/dvc-count/dvc.yaml b/dvc/dvc-count/dvc.yaml new file mode 100755 index 0000000..415fe2a --- /dev/null +++ b/dvc/dvc-count/dvc.yaml @@ -0,0 +1,15 @@ +stages: + count: + cmd: python hello.py + deps: + - test.txt + - hello.py + outs: + - lines + titi: + cmd: python titi.py + deps: + - lines + - titi.py + outs: + - calcul diff --git a/dvc/dvc-count/hello.py b/dvc/dvc-count/hello.py new file mode 100755 index 0000000..ecd2ed1 --- /dev/null +++ b/dvc/dvc-count/hello.py @@ -0,0 +1,13 @@ +"""simple line counter +""" +lines = 0 +with open('test.txt', 'r') as fh: + lines = len(fh.readlines()) + +# print(lines) + +with open('lines', 'w') as fh: + fh.write(str(lines)) + +print("écriture du fichier lines") + diff --git a/dvc/dvc-count/test.txt b/dvc/dvc-count/test.txt new file mode 100755 index 0000000..81a6afa --- /dev/null +++ b/dvc/dvc-count/test.txt @@ -0,0 +1,11 @@ +sdfsdsdf +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd +sfsdfsdfsdsdfsdfsd diff --git a/dvc/dvc-count/titi.py b/dvc/dvc-count/titi.py new file mode 100755 index 0000000..4858744 --- /dev/null +++ b/dvc/dvc-count/titi.py @@ -0,0 +1,10 @@ +"""simple line counter +""" +lines = 0 +with open('lines', 'r') as fh: + lines = len(fh.readlines()) + +with open('calcul', 'w') as fh: + fh.write(str(lines + 145)) +print("écriture du fichier `calcul`") + diff --git a/dvc/dvc-params/.dvc/.gitignore b/dvc/dvc-params/.dvc/.gitignore new file mode 100755 index 0000000..528f30c --- /dev/null +++ b/dvc/dvc-params/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/dvc/dvc-params/.dvc/config b/dvc/dvc-params/.dvc/config new file mode 100755 index 0000000..e69de29 diff --git a/dvc/dvc-params/.dvcignore b/dvc/dvc-params/.dvcignore new file mode 100755 index 0000000..5197305 --- /dev/null +++ b/dvc/dvc-params/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/dvc/dvc-params/.gitignore b/dvc/dvc-params/.gitignore new file mode 100755 index 0000000..d01a780 --- /dev/null +++ b/dvc/dvc-params/.gitignore @@ -0,0 +1 @@ +/output.txt diff --git a/dvc/dvc-params/LICENSE b/dvc/dvc-params/LICENSE new file mode 100644 index 0000000..1d80ac3 --- /dev/null +++ b/dvc/dvc-params/LICENSE @@ -0,0 +1,319 @@ +GNU GENERAL PUBLIC LICENSE + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. + +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share +and change it. By contrast, the GNU General Public License is intended to +guarantee your freedom to share and change free software--to make sure the +software is free for all its users. This General Public License applies to +most of the Free Software Foundation's software and to any other program whose +authors commit to using it. (Some other Free Software Foundation software +is covered by the GNU Lesser General Public License instead.) You can apply +it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you +wish), that you receive source code or can get it if you want it, that you +can change the software or use pieces of it in new free programs; and that +you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to +deny you these rights or to ask you to surrender the rights. These restrictions +translate to certain responsibilities for you if you distribute copies of +the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or +for a fee, you must give the recipients all the rights that you have. You +must make sure that they, too, receive or can get the source code. And you +must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If +the software is modified by someone else and passed on, we want its recipients +to know that what they have is not the original, so that any problems introduced +by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will individually +obtain patent licenses, in effect making the program proprietary. To prevent +this, we have made it clear that any patent must be licensed for everyone's +free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice +placed by the copyright holder saying it may be distributed under the terms +of this General Public License. The "Program", below, refers to any such program +or work, and a "work based on the Program" means either the Program or any +derivative work under copyright law: that is to say, a work containing the +Program or a portion of it, either verbatim or with modifications and/or translated +into another language. (Hereinafter, translation is included without limitation +in the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not covered +by this License; they are outside its scope. The act of running the Program +is not restricted, and the output from the Program is covered only if its +contents constitute a work based on the Program (independent of having been +made by running the Program). Whether that is true depends on what the Program +does. + +1. You may copy and distribute verbatim copies of the Program's source code +as you receive it, in any medium, provided that you conspicuously and appropriately +publish on each copy an appropriate copyright notice and disclaimer of warranty; +keep intact all the notices that refer to this License and to the absence +of any warranty; and give any other recipients of the Program a copy of this +License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you +may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, +thus forming a work based on the Program, and copy and distribute such modifications +or work under the terms of Section 1 above, provided that you also meet all +of these conditions: + +a) You must cause the modified files to carry prominent notices stating that +you changed the files and the date of any change. + +b) You must cause any work that you distribute or publish, that in whole or +in part contains or is derived from the Program or any part thereof, to be +licensed as a whole at no charge to all third parties under the terms of this +License. + +c) If the modified program normally reads commands interactively when run, +you must cause it, when started running for such interactive use in the most +ordinary way, to print or display an announcement including an appropriate +copyright notice and a notice that there is no warranty (or else, saying that +you provide a warranty) and that users may redistribute the program under +these conditions, and telling the user how to view a copy of this License. +(Exception: if the Program itself is interactive but does not normally print +such an announcement, your work based on the Program is not required to print +an announcement.) + +These requirements apply to the modified work as a whole. If identifiable +sections of that work are not derived from the Program, and can be reasonably +considered independent and separate works in themselves, then this License, +and its terms, do not apply to those sections when you distribute them as +separate works. But when you distribute the same sections as part of a whole +which is a work based on the Program, the distribution of the whole must be +on the terms of this License, whose permissions for other licensees extend +to the entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest your +rights to work written entirely by you; rather, the intent is to exercise +the right to control the distribution of derivative or collective works based +on the Program. + +In addition, mere aggregation of another work not based on the Program with +the Program (or with a work based on the Program) on a volume of a storage +or distribution medium does not bring the other work under the scope of this +License. + +3. You may copy and distribute the Program (or a work based on it, under Section +2) in object code or executable form under the terms of Sections 1 and 2 above +provided that you also do one of the following: + +a) Accompany it with the complete corresponding machine-readable source code, +which must be distributed under the terms of Sections 1 and 2 above on a medium +customarily used for software interchange; or, + +b) Accompany it with a written offer, valid for at least three years, to give +any third party, for a charge no more than your cost of physically performing +source distribution, a complete machine-readable copy of the corresponding +source code, to be distributed under the terms of Sections 1 and 2 above on +a medium customarily used for software interchange; or, + +c) Accompany it with the information you received as to the offer to distribute +corresponding source code. (This alternative is allowed only for noncommercial +distribution and only if you received the program in object code or executable +form with such an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for making +modifications to it. For an executable work, complete source code means all +the source code for all modules it contains, plus any associated interface +definition files, plus the scripts used to control compilation and installation +of the executable. However, as a special exception, the source code distributed +need not include anything that is normally distributed (in either source or +binary form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component itself +accompanies the executable. + +If distribution of executable or object code is made by offering access to +copy from a designated place, then offering equivalent access to copy the +source code from the same place counts as distribution of the source code, +even though third parties are not compelled to copy the source along with +the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except +as expressly provided under this License. Any attempt otherwise to copy, modify, +sublicense or distribute the Program is void, and will automatically terminate +your rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses terminated +so long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed +it. However, nothing else grants you permission to modify or distribute the +Program or its derivative works. These actions are prohibited by law if you +do not accept this License. Therefore, by modifying or distributing the Program +(or any work based on the Program), you indicate your acceptance of this License +to do so, and all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), +the recipient automatically receives a license from the original licensor +to copy, distribute or modify the Program subject to these terms and conditions. +You may not impose any further restrictions on the recipients' exercise of +the rights granted herein. You are not responsible for enforcing compliance +by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement +or for any other reason (not limited to patent issues), conditions are imposed +on you (whether by court order, agreement or otherwise) that contradict the +conditions of this License, they do not excuse you from the conditions of +this License. If you cannot distribute so as to satisfy simultaneously your +obligations under this License and any other pertinent obligations, then as +a consequence you may not distribute the Program at all. For example, if a +patent license would not permit royalty-free redistribution of the Program +by all those who receive copies directly or indirectly through you, then the +only way you could satisfy both it and this License would be to refrain entirely +from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply and +the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents +or other property right claims or to contest validity of any such claims; +this section has the sole purpose of protecting the integrity of the free +software distribution system, which is implemented by public license practices. +Many people have made generous contributions to the wide range of software +distributed through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing to +distribute software through any other system and a licensee cannot impose +that choice. + +This section is intended to make thoroughly clear what is believed to be a +consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain +countries either by patents or by copyrighted interfaces, the original copyright +holder who places the Program under this License may add an explicit geographical +distribution limitation excluding those countries, so that distribution is +permitted only in or among countries not thus excluded. In such case, this +License incorporates the limitation as if written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of +the General Public License from time to time. Such new versions will be similar +in spirit to the present version, but may differ in detail to address new +problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies +a version number of this License which applies to it and "any later version", +you have the option of following the terms and conditions either of that version +or of any later version published by the Free Software Foundation. If the +Program does not specify a version number of this License, you may choose +any version ever published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs +whose distribution conditions are different, write to the author to ask for +permission. For software which is copyrighted by the Free Software Foundation, +write to the Free Software Foundation; we sometimes make exceptions for this. +Our decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing and reuse +of software generally. + + NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR +THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE +STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM +"AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE +OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA +OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES +OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH +HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible +use to the public, the best way to achieve this is to make it free software +which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach +them to the start of each source file to most effectively convey the exclusion +of warranty; and each file should have at least the "copyright" line and a +pointer to where the full notice is found. + + + +Copyright (C) + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 51 Franklin +Street, Fifth Floor, Boston, MA 02110-1301, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when +it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes +with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, +and you are welcome to redistribute it under certain conditions; type `show +c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than `show w' and `show c'; they could even be mouse-clicks +or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, +if any, to sign a "copyright disclaimer" for the program, if necessary. Here +is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' +(which makes passes at compilers) written by James Hacker. + +, 1 April 1989 Ty Coon, President of Vice This General +Public License does not permit incorporating your program into proprietary +programs. If your program is a subroutine library, you may consider it more +useful to permit linking proprietary applications with the library. If this +is what you want to do, use the GNU Lesser General Public License instead +of this License. diff --git a/dvc/dvc-params/Makefile b/dvc/dvc-params/Makefile new file mode 100644 index 0000000..9833752 --- /dev/null +++ b/dvc/dvc-params/Makefile @@ -0,0 +1,4 @@ +clean: + rm -rf .dvc/cache + rm dvc.lock + rm data/output.txt diff --git a/dvc/dvc-params/README.md b/dvc/dvc-params/README.md new file mode 100644 index 0000000..71e151e --- /dev/null +++ b/dvc/dvc-params/README.md @@ -0,0 +1,2 @@ +# dvc-params + diff --git a/dvc/dvc-params/Readme.txt b/dvc/dvc-params/Readme.txt new file mode 100755 index 0000000..616038b --- /dev/null +++ b/dvc/dvc-params/Readme.txt @@ -0,0 +1,23 @@ +Testeur de paramètres +====================== + +**prérequis** : git et dvc installés + +lancer la commande : dvc repro + + +dvc repro +Running stage 'learn': +> python deep.py +Generating lock file 'dvc.lock' +Updating lock file 'dvc.lock' + +To track the changes with git, run: + + git add dvc.lock + +To enable auto staging, run: + + dvc config core.autostage true +Use `dvc push` to send your updates to remote storage. + diff --git a/dvc/dvc-params/data/.gitignore b/dvc/dvc-params/data/.gitignore new file mode 100755 index 0000000..fbcbbf4 --- /dev/null +++ b/dvc/dvc-params/data/.gitignore @@ -0,0 +1,2 @@ +/output.txt +/prepared diff --git a/dvc/dvc-params/deep.py b/dvc/dvc-params/deep.py new file mode 100755 index 0000000..6f80e95 --- /dev/null +++ b/dvc/dvc-params/deep.py @@ -0,0 +1,25 @@ +"""This script stands for showing the retreived parameters +""" + +import yaml +import pprint + +with open("params.yaml", 'r') as fd: + params = yaml.safe_load(fd) + +epoch = params['train']['epoch'] +learning_rate = params['train']['learning_rate'] +batch_size = params['train']['batch_size'] +threshold = params['process']['threshold'] + +# ... do some deep learning stuff with these parameters +# here we just print the parameters in an outpout in a file, that's all + +params = {"epoch": epoch, "learning_rate": learning_rate, "batch_size": batch_size, "threshold": threshold} + +with open('data/output.txt', 'w') as fhandle: + fhandle.write("parameters arguments of the script are: \n") + + ppp = pprint.PrettyPrinter(indent=4, stream=fhandle) + ppp.pprint(params) + diff --git a/dvc/dvc-params/dvc.lock b/dvc/dvc-params/dvc.lock new file mode 100644 index 0000000..a8afbe8 --- /dev/null +++ b/dvc/dvc-params/dvc.lock @@ -0,0 +1,16 @@ +schema: '2.0' +stages: + learn: + cmd: python deep.py + params: + params.yaml: + process.threshold: 0.98 + train: + epoch: 1650263152.981932 + learning_rate: 0.00054 + batch_size: 3.2 + outs: + - path: data/output.txt + hash: md5 + md5: cd667337e13e9ee240d1102e04adc9fc + size: 149 diff --git a/dvc/dvc-params/dvc.yaml b/dvc/dvc-params/dvc.yaml new file mode 100755 index 0000000..8ca7ff0 --- /dev/null +++ b/dvc/dvc-params/dvc.yaml @@ -0,0 +1,8 @@ +stages: + learn: + cmd: python deep.py + params: + - process.threshold + - train + outs: + - data/output.txt diff --git a/dvc/dvc-params/params.yaml b/dvc/dvc-params/params.yaml new file mode 100755 index 0000000..0ea32f6 --- /dev/null +++ b/dvc/dvc-params/params.yaml @@ -0,0 +1,11 @@ +lr: 0.0041 + +train: + epoch: 1650263152.981932 + learning_rate: 0.00054 + batch_size: 3.2 + +process: + threshold: 0.98 + bow: 15000 + layers: 9 diff --git a/dvc/pipeline_appusers/.dvc/.gitignore b/dvc/pipeline_appusers/.dvc/.gitignore new file mode 100755 index 0000000..528f30c --- /dev/null +++ b/dvc/pipeline_appusers/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/dvc/pipeline_appusers/.dvc/config b/dvc/pipeline_appusers/.dvc/config new file mode 100755 index 0000000..eb59774 --- /dev/null +++ b/dvc/pipeline_appusers/.dvc/config @@ -0,0 +1,6 @@ +[core] + remote = storage +['remote "storage"'] + url = ../../storage/user +['remote "webstorage"'] + url = http://localhost:8000 diff --git a/dvc/pipeline_appusers/.dvcignore b/dvc/pipeline_appusers/.dvcignore new file mode 100755 index 0000000..5197305 --- /dev/null +++ b/dvc/pipeline_appusers/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/dvc/pipeline_appusers/data/.empty b/dvc/pipeline_appusers/data/.empty new file mode 100755 index 0000000..e69de29 diff --git a/dvc/pipeline_appusers/data/.gitignore b/dvc/pipeline_appusers/data/.gitignore new file mode 100755 index 0000000..2f5e284 --- /dev/null +++ b/dvc/pipeline_appusers/data/.gitignore @@ -0,0 +1,5 @@ +/original_data.csv +/prepared_data.csv +/data.py +/userdata.json +/visualize.txt diff --git a/dvc/pipeline_appusers/dvc.yaml b/dvc/pipeline_appusers/dvc.yaml new file mode 100755 index 0000000..1ec187e --- /dev/null +++ b/dvc/pipeline_appusers/dvc.yaml @@ -0,0 +1,35 @@ +stages: + prepare: + cmd: python src/prepare.py + deps: + - src/prepare.py + outs: + - data/original_data.csv + cleancsv: + cmd: python src/clean_csvdata.py + deps: + - data/original_data.csv + - src/clean_csvdata.py + outs: + - data/prepared_data.csv + datatolist: + cmd: python src/datatolist.py + deps: + - data/prepared_data.csv + - src/datatolist.py + outs: + - data/data.py + jsonfile: + cmd: python src/make_json_with_password.py + deps: + - data/prepared_data.csv + - src/make_json_with_password.py + outs: + - data/userdata.json + visualize: + cmd: python src/visualize_data.py + deps: + - data/prepared_data.csv + - src/visualize_data.py + outs: + - data/visualize.txt diff --git a/dvc/pipeline_appusers/index.txt b/dvc/pipeline_appusers/index.txt new file mode 100755 index 0000000..7c9050b --- /dev/null +++ b/dvc/pipeline_appusers/index.txt @@ -0,0 +1,98 @@ +.. meta:: + :description: pipeline ML + :keywords: faker, fake users, pipeline, data version control + +Pipeline de création des utilisateurs d'une application +========================================================== + +Générer des données utilisateur +-------------------------------- + +Nous alons générer aléatoirement ces données utilisateurs avec la librairie +python `faker`. + +- Installer d'abord `faker` + +:: + + pip install -r src/requirements.txt + + +.. important:: Installez `faker `_ + + +- Lancer le script : + +:: + + python src/prepare.py + + +.. note:: Le fichier `csv` généré est : `data/original_data.csv` + + +stage dvc:: + + dvc stage add -n prepare -d src/prepare.py -o data/prepared/original_data.csv python src/prepare.py + +Nettoyer les données +---------------------- + +- Les données, brutes, ont souvent besoin d'être retravaillées. +- exemple : faire un tri, ou bien enlever un champ ou une colonne. + Ou tout exemple de manipulations comme si elles étaient faites dans un tableur. + + +commande dvc:: + + dvc stage add -n cleancsv -d src/clean_csvdata.py -d data/original_data.csv -o data/prepared_data.csv python src/clean_csvdata.py + +Visualisation des données générées +------------------------------------ + +.. admonition:: Prérequis : installer pandas + + Utilisez la librairie `pandas `_ plutôt qu'un simple tableur. + La librairie pandas doit être installée au préalable + +.. rubric:: Vous devez obtenir un résultat de ce type : + +.. code-block:: shell + + name last name mail company phone + 0 Frédéric Launay Marty zoeboutin@faure.net Lebrun Gerard S.A.R.L. +33 (0)1 16 92 23 53 + 1 Lorraine Boyer Lemaire rodriguesanouk@morin.com Ruiz 0172426520 + 2 Frédérique Le Royer Lebrun nguyennoel@tiscali.fr Carre SA +33 4 29 92 99 00 + 3 Thierry Gomez Fischer alain91@gomez.org Wagner 02 83 77 23 36 + 4 Thibault-Charles Lecoq Leconte guilbertdenis@tele2.fr Labbe Leger et Fils +33 (0)3 19 61 82 07 + +commande dvc:: + + dvc stage add -n visualize -d src/visualize_data.py -d data/prepared_data.csv -o data/visualize.txt python src/visualize_data.py + + +Exporter les données en liste python +--------------------------------------- + +commande dvc:: + + dvc stage add -n datatolist -d src/datatolist.py -d data/prepared_data.csv -o data.py python src/datatolist.py + +Générer le json +---------------- + +- création du json +- ajout d'un mot de passe + +commande dvc:: + + dvc stage add -n jsonfile -d src/make_json_with_password.py -d data/prepared_data.csv -o data/userdata.json python src/make_json_with_password.py + + +Manipulations dvc +----------------------- + +:: + + dvc status + Data and pipelines are up to date. diff --git a/dvc/pipeline_appusers/src/appinfra/__init__.py b/dvc/pipeline_appusers/src/appinfra/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/dvc/pipeline_appusers/src/appinfra/config.py b/dvc/pipeline_appusers/src/appinfra/config.py new file mode 100755 index 0000000..305c848 --- /dev/null +++ b/dvc/pipeline_appusers/src/appinfra/config.py @@ -0,0 +1,5 @@ +from pathlib import Path + +rootdirectory = Path(Path.cwd()) +csvfilename = rootdirectory / 'data' / "prepared_data.csv" +jsonfilename = rootdirectory / 'data' / 'userdata.json' diff --git a/dvc/pipeline_appusers/src/appinfra/load_json_file.py b/dvc/pipeline_appusers/src/appinfra/load_json_file.py new file mode 100755 index 0000000..093ea28 --- /dev/null +++ b/dvc/pipeline_appusers/src/appinfra/load_json_file.py @@ -0,0 +1,24 @@ +""" +factory that builds the user object space +from json data file +""" + +import csv +from json import load + +from appinfra.user import OriginalUser + +def populate_from_json(jsonfilename): + with open(jsonfilename, 'r') as fhandle: + for user in load(fhandle): + yield OriginalUser(user['first_name'], user['last_name'], user['mail'], + password = user['password']) + +def get_users(jsonfilename): + return list(populate_from_json(jsonfilename)) + +__all__ = ['get_users'] + +#from config import jsonfilename +#print([vars(user) for user in list(populate_from_json(jsonfilename))]) + diff --git a/dvc/pipeline_appusers/src/appinfra/load_json_file_test.py b/dvc/pipeline_appusers/src/appinfra/load_json_file_test.py new file mode 100755 index 0000000..687114b --- /dev/null +++ b/dvc/pipeline_appusers/src/appinfra/load_json_file_test.py @@ -0,0 +1,21 @@ +""" +factory that builds the user object space +from json data file +""" + +import csv + +from appinfra.user import OriginalUser + +user = dict(first_name="toto", last_name="dupond", mail="toto.dupond@free.fr", password="mon_beau_password") + +def populate(): + return OriginalUser(user['first_name'], user['last_name'], user['mail'], + password = user['password']) + +def get_users(): + return [populate()] + +__all__ = ['get_users'] + +#print(get_users()) diff --git a/dvc/pipeline_appusers/src/appinfra/password.py b/dvc/pipeline_appusers/src/appinfra/password.py new file mode 100755 index 0000000..ac75e80 --- /dev/null +++ b/dvc/pipeline_appusers/src/appinfra/password.py @@ -0,0 +1,37 @@ +""" +simple password provider + +>>> from password import password +>>> password() +'XERHSiGuIaSjxjaEzJgQ%;14' +>>> +""" + +import random +from faker import Faker +from faker.providers import BaseProvider + +fake = Faker(['fr_FR']) + +class PasswordProvider(BaseProvider): + def password(self): + randint1 = random.randint(0, 10) + randint2 = random.randint(0, 10) + randint = randint1 + randint2 + + randchars = ['~', '%', '&', ';'] + randchar1 = random.choice(randchars) + randchar2 = random.choice(randchars) + randchar = randchar1 + randchar2 + + randstr = fake.pystr() + + return randstr + randchar + str(randint) + +fake.add_provider(PasswordProvider) + +__all__ = ['password'] + +password = fake.password + + diff --git a/dvc/pipeline_appusers/src/appinfra/user.py b/dvc/pipeline_appusers/src/appinfra/user.py new file mode 100755 index 0000000..c5c38a1 --- /dev/null +++ b/dvc/pipeline_appusers/src/appinfra/user.py @@ -0,0 +1,22 @@ +class OriginalUser: + + def __init__(self, first_name, last_name, mail, password=None): + self.first_name = first_name + self.last_name = last_name + self.mail = mail + self.password = password + + def pydata(self): + return vars(self) + + @property + def login_name(self): + return self.first_name.lower() + + @property + def user_name(self): + return self.login_name + + @property + def full_name(self): + return self.first_name + " " + self.last_name diff --git a/dvc/pipeline_appusers/src/clean_csvdata.py b/dvc/pipeline_appusers/src/clean_csvdata.py new file mode 100755 index 0000000..d352e21 --- /dev/null +++ b/dvc/pipeline_appusers/src/clean_csvdata.py @@ -0,0 +1,38 @@ +""" +**original csv** : + name last name mail company phone +0 Frédéric Launay Marty zoeboutin@faure.net Lebrun Gerard S.A.R.L. +33 (0)1 16 92 23 53 +1 Lorraine Boyer Lemaire rodriguesanouk@morin.com Ruiz 0172426520 +2 Frédérique Le Royer Lebrun nguyennoel@tiscali.fr Carre SA +33 4 29 92 99 00 +3 Thierry Gomez Fischer alain91@gomez.org Wagner 02 83 77 23 36 +4 Thibault-Charles Lecoq Leconte guilbertdenis@tele2.fr Labbe Leger et Fils +33 (0)3 19 61 82 07 + +**cleaned csv** : + +print(dataframe.head()) + name last name mail +0 Frédéric Launay Marty zoeboutin@faure.net +1 Lorraine Boyer Lemaire rodriguesanouk@morin.com +2 Frédérique Le Royer Lebrun nguyennoel@tiscali.fr +3 Thierry Gomez Fischer alain91@gomez.org +4 Thibault-Charles Lecoq Leconte guilbertdenis@tele2.fr + +""" +from pathlib import Path + +import pandas as pd + +# ____ config section ____ + +rootdirectory = Path(Path.cwd()) +csvdatafilename = "original_data.csv" +datafile = rootdirectory / 'data' / csvdatafilename +csv_cleaned_datafilename = "prepared_data.csv" +cleaned_datafile = rootdirectory / 'data' / csv_cleaned_datafilename + +# ____ end section _______ + +dataframe = pd.read_csv(datafile, sep=',') + +dataframe.drop(columns=['company', 'phone'], inplace=True, axis=1) +dataframe.to_csv(cleaned_datafile, index=False) diff --git a/dvc/pipeline_appusers/src/datatolist.py b/dvc/pipeline_appusers/src/datatolist.py new file mode 100755 index 0000000..7d342fd --- /dev/null +++ b/dvc/pipeline_appusers/src/datatolist.py @@ -0,0 +1,28 @@ +""" +print(dataframe.head()) + name last name mail +0 Frédéric Launay Marty zoeboutin@faure.net +1 Lorraine Boyer Lemaire rodriguesanouk@morin.com +2 Frédérique Le Royer Lebrun nguyennoel@tiscali.fr +3 Thierry Gomez Fischer alain91@gomez.org +4 Thibault-Charles Lecoq Leconte guilbertdenis@tele2.fr + +""" +from pathlib import Path + +import pandas as pd + +# ____ config section ____ + +rootdirectory = Path(Path.cwd()) +csv_cleaned_datafilename = "prepared_data.csv" +cleaned_datafile = rootdirectory / 'data' / csv_cleaned_datafilename +pylist_datafilename = "data.py" +pylist_datafile = rootdirectory / 'data' / pylist_datafilename + +# ____ end section _______ + +dataframe = pd.read_csv(cleaned_datafile, sep=',') +with open(pylist_datafile, 'w') as fhandle: + fhandle.write(str(dataframe.values.tolist())) + diff --git a/dvc/pipeline_appusers/src/make_json_with_password.py b/dvc/pipeline_appusers/src/make_json_with_password.py new file mode 100755 index 0000000..402cef5 --- /dev/null +++ b/dvc/pipeline_appusers/src/make_json_with_password.py @@ -0,0 +1,33 @@ +""" +reads user from csv datafile +""" + +import csv +from json import dump +from appinfra.password import password + +from src.appinfra.config import csvfilename, jsonfilename +from src.appinfra.user import OriginalUser + + +def from_csv_to_obj(csvfilename): + with open(csvfilename, 'r') as fhandle: + reader = csv.reader(fhandle) + next(reader) # Skip header row + for prenom, nom, mail in reader: + user = OriginalUser(prenom, nom, mail) + user.password = password() + yield user + + +def from_csv_to_pydata(csvfilename): + for user in from_csv_to_obj(csvfilename): + yield user.pydata() + + +def from_csv_to_json(csvfilename, jsonfilename): + users = list(from_csv_to_pydata(csvfilename)) + with open(jsonfilename, 'w') as fhandle: + dump(users, fhandle, indent=2) + +from_csv_to_json(csvfilename, jsonfilename) diff --git a/dvc/pipeline_appusers/src/prepare.py b/dvc/pipeline_appusers/src/prepare.py new file mode 100755 index 0000000..79c6357 --- /dev/null +++ b/dvc/pipeline_appusers/src/prepare.py @@ -0,0 +1,35 @@ +""" +Prepares the original csv data file + +- generates fake user datas +- creates the file 'original_data.csv' + +""" +from pathlib import Path + +from faker import Faker +from faker.providers import company +from faker.providers import phone_number + +# ____ config section ____ + +rootdirectory = Path(Path.cwd()) +csvdatafilename = "original_data.csv" +datafile = rootdirectory / 'data' / csvdatafilename + +# ____ end section _______ + +fake = Faker(['fr_FR']) + +# header schema +original_data_structure = ['name', 'last name', 'mail', 'company', 'phone'] + +with open(datafile, 'w') as fhandle: + # writes the csv header + fhandle.write(','.join(original_data_structure)+'\n') + # populates the csv with fake datas + for _ in range(40): + fhandle.write('"{}","{}","{}","{}","{}"\n'.format(fake.name(), fake.last_name(), fake.email(), + fake.company(), fake.phone_number())) + + diff --git a/dvc/pipeline_appusers/src/requirements.txt b/dvc/pipeline_appusers/src/requirements.txt new file mode 100755 index 0000000..d500421 --- /dev/null +++ b/dvc/pipeline_appusers/src/requirements.txt @@ -0,0 +1,3 @@ +dvc>=2.10.1 # data version control https://dvc.org +Faker>=10.0.0 # fake generator library https://faker.readthedocs.io/en/master/ +pandas>=1.4.2 # python data analysis library diff --git a/dvc/pipeline_appusers/src/visualize_data.py b/dvc/pipeline_appusers/src/visualize_data.py new file mode 100755 index 0000000..83f181c --- /dev/null +++ b/dvc/pipeline_appusers/src/visualize_data.py @@ -0,0 +1,28 @@ +""" +print(dataframe.head()) + name last name mail +0 Frédéric Launay Marty zoeboutin@faure.net +1 Lorraine Boyer Lemaire rodriguesanouk@morin.com +2 Frédérique Le Royer Lebrun nguyennoel@tiscali.fr +3 Thierry Gomez Fischer alain91@gomez.org +4 Thibault-Charles Lecoq Leconte guilbertdenis@tele2.fr + +""" +from pathlib import Path + +import pandas as pd + +# ____ config section ____ + +rootdirectory = Path(Path.cwd()) +csv_cleaned_datafilename = "prepared_data.csv" +cleaned_datafile = rootdirectory / 'data' / csv_cleaned_datafilename +pylist_datafilename = "visualize.txt" +pylist_datafile = rootdirectory / 'data' / pylist_datafilename + +# ____ end section _______ + +dataframe = pd.read_csv(cleaned_datafile, sep=',') +with open(pylist_datafile, 'w') as fhandle: + fhandle.write(dataframe.to_string()) +