瀏覽代碼

Added first thougths for paper.

Kristian Schultz 3 年之前
父節點
當前提交
89dc5de652

+ 4 - 0
.gitignore

@@ -3,3 +3,7 @@ __pycache__
 *.swp
 temp
 docker/container.ok
+*.aux
+*.log
+*.o
+*.hi

+ 7 - 0
documentation/paper/.obsidian/app.json

@@ -0,0 +1,7 @@
+{
+  "legacyEditor": false,
+  "livePreview": false,
+  "autoPairBrackets": false,
+  "autoPairMarkdown": false,
+  "autoConvertHtml": false
+}

+ 3 - 0
documentation/paper/.obsidian/appearance.json

@@ -0,0 +1,3 @@
+{
+  "textFontFamily": "Courier New"
+}

+ 14 - 0
documentation/paper/.obsidian/core-plugins.json

@@ -0,0 +1,14 @@
+[
+  "file-explorer",
+  "global-search",
+  "switcher",
+  "graph",
+  "backlink",
+  "page-preview",
+  "note-composer",
+  "command-palette",
+  "editor-status",
+  "word-count",
+  "open-with-default-app",
+  "file-recovery"
+]

+ 22 - 0
documentation/paper/.obsidian/graph.json

@@ -0,0 +1,22 @@
+{
+  "collapse-filter": true,
+  "search": "",
+  "showTags": false,
+  "showAttachments": false,
+  "hideUnresolved": false,
+  "showOrphans": true,
+  "collapse-color-groups": true,
+  "colorGroups": [],
+  "collapse-display": true,
+  "showArrow": false,
+  "textFadeMultiplier": 0,
+  "nodeSizeMultiplier": 1,
+  "lineSizeMultiplier": 1.02136752136752,
+  "collapse-forces": true,
+  "centerStrength": 0.230769230769231,
+  "repelStrength": 10,
+  "linkStrength": 1,
+  "linkDistance": 250,
+  "scale": 1,
+  "close": false
+}

+ 1 - 0
documentation/paper/.obsidian/hotkeys.json

@@ -0,0 +1 @@
+{}

+ 3 - 0
documentation/paper/.obsidian/page-preview.json

@@ -0,0 +1,3 @@
+{
+  "editor": false
+}

+ 103 - 0
documentation/paper/.obsidian/workspace

@@ -0,0 +1,103 @@
+{
+  "main": {
+    "id": "4138b4a72deb772e",
+    "type": "split",
+    "children": [
+      {
+        "id": "ab1c8cd4d8e329f8",
+        "type": "leaf",
+        "state": {
+          "type": "markdown",
+          "state": {
+            "file": "Benchmark/Results.md",
+            "mode": "source",
+            "source": true
+          }
+        }
+      }
+    ],
+    "direction": "vertical"
+  },
+  "left": {
+    "id": "fcd194c05020ce0e",
+    "type": "split",
+    "children": [
+      {
+        "id": "324e719d5b4cb9a3",
+        "type": "tabs",
+        "children": [
+          {
+            "id": "c96a0994c9f1afeb",
+            "type": "leaf",
+            "state": {
+              "type": "file-explorer",
+              "state": {}
+            }
+          },
+          {
+            "id": "8204ccfb337d43c5",
+            "type": "leaf",
+            "state": {
+              "type": "search",
+              "state": {
+                "query": "",
+                "matchingCase": false,
+                "explainSearch": false,
+                "collapseAll": false,
+                "extraContext": false,
+                "sortOrder": "alphabetical"
+              }
+            }
+          }
+        ]
+      }
+    ],
+    "direction": "horizontal",
+    "width": 171
+  },
+  "right": {
+    "id": "c7b05898ea0989d2",
+    "type": "split",
+    "children": [
+      {
+        "id": "77e3a12dc1d1a2da",
+        "type": "tabs",
+        "children": [
+          {
+            "id": "920104c41ba00f95",
+            "type": "leaf",
+            "state": {
+              "type": "backlink",
+              "state": {
+                "file": "Benchmark/Results.md",
+                "collapseAll": false,
+                "extraContext": false,
+                "sortOrder": "alphabetical",
+                "showSearch": false,
+                "searchQuery": "",
+                "backlinkCollapsed": false,
+                "unlinkedCollapsed": true
+              }
+            }
+          }
+        ]
+      }
+    ],
+    "direction": "horizontal",
+    "width": 300,
+    "collapsed": true
+  },
+  "active": "ab1c8cd4d8e329f8",
+  "lastOpenFiles": [
+    "Benchmark/Results.md",
+    "Benchmark/Scores.md",
+    "Benchmark/Algorithms/convGAN.md",
+    "Benchmark/Algorithms/SimpleGAN.md",
+    "Benchmark/Algorithms/Repeater.md",
+    "Benchmark/Algorithms/ProWRAS.md",
+    "Benchmark/Algorithms.md",
+    "Benchmark/Classifier.md",
+    "Benchmark/Datasets.md",
+    "convGAN algotithm.md"
+  ]
+}

+ 5 - 0
documentation/paper/Benchmark.md

@@ -0,0 +1,5 @@
+# Benchmark
+[[Benchmark/Algorithms]]
+[[Benchmark/Classifier]]
+[[Benchmark/Scores]]
+[[Benchmark/Datasets]]

+ 7 - 0
documentation/paper/Benchmark/Algorithms.md

@@ -0,0 +1,7 @@
+## Compared algorithms
+[[Benchmark/Algorithms/convGAN]]
+[[Benchmark/Algorithms/Repeater]]
+[[Benchmark/Algorithms/SimpleGAN]]
+[[Benchmark/Algorithms/ProWRAS]]
+[[Benchmark/Algorithms/CTGAN]]
+[[Benchmark/Algorithms/CTAB-GAN]]

+ 4 - 0
documentation/paper/Benchmark/Algorithms/CTAB-GAN.md

@@ -0,0 +1,4 @@
+### CTAB-GAN
+Is a syntetic point generation library using a generative neuronal network.
+
+(GIT link: [https://github.com/Team-TUD/CTAB-GAN])

+ 4 - 0
documentation/paper/Benchmark/Algorithms/CTGAN.md

@@ -0,0 +1,4 @@
+### CTGAN
+Is a syntetic point generation library using a generative neuronal network.
+
+(GIT link: [https://github.com/sdv-dev/CTGAN])

+ 5 - 0
documentation/paper/Benchmark/Algorithms/ProWRAS.md

@@ -0,0 +1,5 @@
+### ProWRAS
+ProWRAS is a point syntesither using affine projection.
+
+(GIT link: [https://github.com/COSPOV/ProWRAS.git])
+ 

+ 2 - 0
documentation/paper/Benchmark/Algorithms/Repeater.md

@@ -0,0 +1,2 @@
+### Repeater
+This generator copies the minority dataset as often as needed.

+ 2 - 0
documentation/paper/Benchmark/Algorithms/SimpleGAN.md

@@ -0,0 +1,2 @@
+### SimpleGAN
+This generator is an implementation of the basic GAN principe.

+ 9 - 0
documentation/paper/Benchmark/Algorithms/convGAN.md

@@ -0,0 +1,9 @@
+### convGAN
+Is our approach for systetic point generation for imbalanced datasets.
+
+In our benchmark we use the following settings:
+
+- proximary in majority to minority / full majority class
+- Neighborhood size: 5 / all points in the majority data set
+
+(GIT link: [https://github.com/kristian10007/convGAN])

+ 6 - 0
documentation/paper/Benchmark/Classifier.md

@@ -0,0 +1,6 @@
+## Used classifiers
+- LR: Logistic regression
+- GB: Gradient boosting
+- KNN: k-next-neighbourhood
+- Discriminator: Uses the discriminator of the trained GAN.
+  This classifier was only user for convGAN. As this algoritm is the only one that trains its discriminator to be able to classify between the majority and the minority dataset.

+ 29 - 0
documentation/paper/Benchmark/Datasets.md

@@ -0,0 +1,29 @@
+## Used datasets
+
+The column "feat." is the number of values for each point in the dataset. The value "points total" is the sum of number of points in the minority set and the number of points in the majority set. The column "minority" contains the numer of points in the minority set. The value "common" gives the sum of number of points in the majority set that are also in the minority set and the number of points in the minority set that are also in the majority set. If the value "common" is not zero then even a perfect classifier will have false positive or false negative predictions.
+
+ dataset            | feat. | points total | minority | common 
+--------------------|------:|-------------:|---------:|-------:|
+ **folding**                         |     |          |      |
+ $~$ abalone (17 vs. 7, 8, 9, 10)    | 8   | 2338     | 58   | 0      
+ $~$ abalone (9-18)                  | 8   | 731      | 42   | 0      
+ $~$ car good                        | 6   | 1728     | 69   | 0      
+ $~$ car vgood                       | 6   | 1728     | 65   | 0      
+ $~$ flare-F                         | 11  | 1066     | 43   | 93     
+ $~$ hypothyroid                     | 25  | 3163     | 151  | 2      
+ $~$ kddcup (guess passwd vs. satan) | 38  | 1642     | 53   | 0      
+ $~$ kr-vs-k (3 vs. 11)              | 6   | 2935     | 81   | 0      
+ $~$ kr-vs-k (zero-one vs. draw)     | 6   | 2901     | 105  | 0      
+ $~$ shuttle (2 vs. 5)               | 9   | 3316     | 49   | 0      
+ $~$ winequality (red 4)             | 11  | 1599     | 53   | 0      
+ $~$ yeast4                          | 10  | 1484     | 51   | 0      
+ $~$ yeast5                          | 10  | 1484     | 44   | 0      
+ $~$ yeast6                          | 10  | 1484     | 35   | 0      
+ **imblearn**                        |     |          |      |
+ $~$ webpage                         | 300 | 34780    | 981  | 152    
+ $~$ mammography                     | 6   | 11183    | 260  | 3329   
+ $~$ protein homo                    | 74  | 145751   | 1296 | 0      
+ $~$ ozone level                     | 72  | 2536     | 73   | 0      
+ **kaggle**                          |     |          |      |
+ $~$ creditcard                      | 30  | 284807   | 492  | 0  
+

+ 2 - 0
documentation/paper/Benchmark/Results.md

@@ -0,0 +1,2 @@
+## Results
+...

+ 17 - 0
documentation/paper/Benchmark/Scores.md

@@ -0,0 +1,17 @@
+## Used scores
+- $F_1$ score: is the harmonic mean of precision and recall. Where Precosion is the quotiont of true positives with all positives and recall is the quotient of true positives and all elements to recognize as positive.
+
+  $F_1 = 2 {P R \over P + R}$
+  \hfill $P = {TP \over TP + FP}$
+  \hfill $R = {TP \over TP + FN}$
+- Cohens kappa score:
+
+  $\kappa = {p_0 - p_c \over 1 - p_c}$
+  \hfill $p_0 = TP + TN$
+  \hfill $p_c = {1 \over 4} \sum_{x \in \{TP, TN\}} (x + FN)(x + FP)$
+
+Where
+- TP is the number of correct predicted positive values.
+- TN is the number of correct predicted negative values.
+- FN is the number of negative predicted values that should be positive.
+- FP is the number of positive predicted values that should be negative.

+ 14 - 0
documentation/paper/all.md

@@ -0,0 +1,14 @@
+---
+title: convGAN
+author:
+- Kristian
+- Saptarshi
+keywords: [GAN]
+abstract: |
+	benchmark about synthetic point generation with GANs
+date: April 2022
+...
+
+[[Benchmark]]
+
+[[convGAN algotithm]]

+ 2 - 0
documentation/paper/convGAN algotithm.md

@@ -0,0 +1,2 @@
+# The convGAN algorithm
+TODO: details on the convGAN algorithm.

+ 17 - 0
documentation/paperTex/Makefile

@@ -0,0 +1,17 @@
+src = ../paper/
+srcFiles = $(src)*.md $(src)**/*.md $(src)**/**/*.md
+
+nothing:
+
+all: paper.tex
+
+paper.md: $(srcFiles)
+	python ../../tools/mdImport.py $(src)all.md > $@
+
+
+%.tex: %.md template.tex
+	pandoc --standalone -f markdown -t latex --template=template.tex $< -o $@
+
+
+paper.pdf: paper.tex
+	xelatex paper.tex

+ 498 - 0
documentation/paperTex/template.tex

@@ -0,0 +1,498 @@
+% Options for packages loaded elsewhere
+\PassOptionsToPackage{unicode$for(hyperrefoptions)$,$hyperrefoptions$$endfor$}{hyperref}
+\PassOptionsToPackage{hyphens}{url}
+$if(colorlinks)$
+\PassOptionsToPackage{dvipsnames,svgnames*,x11names*}{xcolor}
+$endif$
+$if(dir)$
+$if(latex-dir-rtl)$
+\PassOptionsToPackage{RTLdocument}{bidi}
+$endif$
+$endif$
+$if(CJKmainfont)$
+\PassOptionsToPackage{space}{xeCJK}
+$endif$
+%
+\documentclass[
+$if(fontsize)$
+  $fontsize$,
+$endif$
+$if(lang)$
+  $babel-lang$,
+$endif$
+$if(papersize)$
+  $papersize$paper,
+$endif$
+$if(beamer)$
+  ignorenonframetext,
+$if(handout)$
+  handout,
+$endif$
+$if(aspectratio)$
+  aspectratio=$aspectratio$,
+$endif$
+$endif$
+$for(classoption)$
+  $classoption$$sep$,
+$endfor$
+]{$documentclass$}
+$if(beamer)$
+$if(background-image)$
+\usebackgroundtemplate{%
+  \includegraphics[width=\paperwidth]{$background-image$}%
+}
+$endif$
+\usepackage{pgfpages}
+\setbeamertemplate{caption}[numbered]
+\setbeamertemplate{caption label separator}{: }
+\setbeamercolor{caption name}{fg=normal text.fg}
+\beamertemplatenavigationsymbols$if(navigation)$$navigation$$else$empty$endif$
+$for(beameroption)$
+\setbeameroption{$beameroption$}
+$endfor$
+% Prevent slide breaks in the middle of a paragraph
+\widowpenalties 1 10000
+\raggedbottom
+$if(section-titles)$
+\setbeamertemplate{part page}{
+  \centering
+  \begin{beamercolorbox}[sep=16pt,center]{part title}
+    \usebeamerfont{part title}\insertpart\par
+  \end{beamercolorbox}
+}
+\setbeamertemplate{section page}{
+  \centering
+  \begin{beamercolorbox}[sep=12pt,center]{part title}
+    \usebeamerfont{section title}\insertsection\par
+  \end{beamercolorbox}
+}
+\setbeamertemplate{subsection page}{
+  \centering
+  \begin{beamercolorbox}[sep=8pt,center]{part title}
+    \usebeamerfont{subsection title}\insertsubsection\par
+  \end{beamercolorbox}
+}
+\AtBeginPart{
+  \frame{\partpage}
+}
+\AtBeginSection{
+  \ifbibliography
+  \else
+    \frame{\sectionpage}
+  \fi
+}
+\AtBeginSubsection{
+  \frame{\subsectionpage}
+}
+$endif$
+$endif$
+$if(beamerarticle)$
+\usepackage{beamerarticle} % needs to be loaded first
+$endif$
+$if(fontfamily)$
+\usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$}
+$else$
+\usepackage{lmodern}
+$endif$
+$if(linestretch)$
+\usepackage{setspace}
+$endif$
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \usepackage[$if(fontenc)$$fontenc$$else$T1$endif$]{fontenc}
+  \usepackage[utf8]{inputenc}
+  \usepackage{textcomp} % provide euro and other symbols
+\else % if luatex or xetex
+$if(mathspec)$
+  \ifxetex
+    \usepackage{mathspec}
+  \else
+    \usepackage{unicode-math}
+  \fi
+$else$
+  \usepackage{unicode-math}
+$endif$
+  \defaultfontfeatures{Scale=MatchLowercase}
+  \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
+$if(mainfont)$
+  \setmainfont[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$]{$mainfont$}
+$endif$
+$if(sansfont)$
+  \setsansfont[$for(sansfontoptions)$$sansfontoptions$$sep$,$endfor$]{$sansfont$}
+$endif$
+$if(monofont)$
+  \setmonofont[$for(monofontoptions)$$monofontoptions$$sep$,$endfor$]{$monofont$}
+$endif$
+$for(fontfamilies)$
+  \newfontfamily{$fontfamilies.name$}[$for(fontfamilies.options)$$fontfamilies.options$$sep$,$endfor$]{$fontfamilies.font$}
+$endfor$
+$if(mathfont)$
+$if(mathspec)$
+  \ifxetex
+    \setmathfont(Digits,Latin,Greek)[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$}
+  \else
+    \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$}
+  \fi
+$else$
+  \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$}
+$endif$
+$endif$
+$if(CJKmainfont)$
+  \ifxetex
+    \usepackage{xeCJK}
+    \setCJKmainfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$}
+  \fi
+$endif$
+$if(luatexjapresetoptions)$
+  \ifluatex
+    \usepackage[$for(luatexjapresetoptions)$$luatexjapresetoptions$$sep$,$endfor$]{luatexja-preset}
+  \fi
+$endif$
+$if(CJKmainfont)$
+  \ifluatex
+    \usepackage[$for(luatexjafontspecoptions)$$luatexjafontspecoptions$$sep$,$endfor$]{luatexja-fontspec}
+    \setmainjfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$}
+  \fi
+$endif$
+\fi
+$if(beamer)$
+$if(theme)$
+\usetheme[$for(themeoptions)$$themeoptions$$sep$,$endfor$]{$theme$}
+$endif$
+$if(colortheme)$
+\usecolortheme{$colortheme$}
+$endif$
+$if(fonttheme)$
+\usefonttheme{$fonttheme$}
+$endif$
+$if(mainfont)$
+\usefonttheme{serif} % use mainfont rather than sansfont for slide text
+$endif$
+$if(innertheme)$
+\useinnertheme{$innertheme$}
+$endif$
+$if(outertheme)$
+\useoutertheme{$outertheme$}
+$endif$
+$endif$
+% Use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+\IfFileExists{microtype.sty}{% use microtype if available
+  \usepackage[$for(microtypeoptions)$$microtypeoptions$$sep$,$endfor$]{microtype}
+  \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+$if(indent)$
+$else$
+\makeatletter
+\@ifundefined{KOMAClassName}{% if non-KOMA class
+  \IfFileExists{parskip.sty}{%
+    \usepackage{parskip}
+  }{% else
+    \setlength{\parindent}{0pt}
+    \setlength{\parskip}{6pt plus 2pt minus 1pt}}
+}{% if KOMA class
+  \KOMAoptions{parskip=half}}
+\makeatother
+$endif$
+$if(verbatim-in-note)$
+\usepackage{fancyvrb}
+$endif$
+\usepackage{xcolor}
+\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
+\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
+\hypersetup{
+$if(title-meta)$
+  pdftitle={$title-meta$},
+$endif$
+$if(author-meta)$
+  pdfauthor={$author-meta$},
+$endif$
+$if(lang)$
+  pdflang={$lang$},
+$endif$
+$if(subject)$
+  pdfsubject={$subject$},
+$endif$
+$if(keywords)$
+  pdfkeywords={$for(keywords)$$keywords$$sep$, $endfor$},
+$endif$
+$if(colorlinks)$
+  colorlinks=true,
+  linkcolor=$if(linkcolor)$$linkcolor$$else$Maroon$endif$,
+  filecolor=$if(filecolor)$$filecolor$$else$Maroon$endif$,
+  citecolor=$if(citecolor)$$citecolor$$else$Blue$endif$,
+  urlcolor=$if(urlcolor)$$urlcolor$$else$Blue$endif$,
+$else$
+  hidelinks,
+$endif$
+  pdfcreator={LaTeX via pandoc}}
+\urlstyle{same} % disable monospaced font for URLs
+$if(verbatim-in-note)$
+\VerbatimFootnotes % allow verbatim text in footnotes
+$endif$
+$if(geometry)$
+$if(beamer)$
+\geometry{$for(geometry)$$geometry$$sep$,$endfor$}
+$else$
+\usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry}
+$endif$
+$endif$
+$if(beamer)$
+\newif\ifbibliography
+$endif$
+$if(listings)$
+\usepackage{listings}
+\newcommand{\passthrough}[1]{#1}
+\lstset{defaultdialect=[5.3]Lua}
+\lstset{defaultdialect=[x86masm]Assembler}
+$endif$
+$if(lhs)$
+\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{}
+$endif$
+$if(highlighting-macros)$
+$highlighting-macros$
+$endif$
+$if(tables)$
+\usepackage{longtable,booktabs}
+$if(beamer)$
+\usepackage{caption}
+% Make caption package work with longtable
+\makeatletter
+\def\fnum@table{\tablename~\thetable}
+\makeatother
+$else$
+% Correct order of tables after \paragraph or \subparagraph
+\usepackage{etoolbox}
+\makeatletter
+\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{}
+\makeatother
+% Allow footnotes in longtable head/foot
+\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
+\makesavenoteenv{longtable}
+$endif$
+$endif$
+$if(graphics)$
+\usepackage{graphicx}
+\makeatletter
+\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
+\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
+\makeatother
+% Scale images if necessary, so that they will not overflow the page
+% margins by default, and it is still possible to overwrite the defaults
+% using explicit options in \includegraphics[width, height, ...]{}
+\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
+% Set default figure placement to htbp
+\makeatletter
+\def\fps@figure{htbp}
+\makeatother
+$endif$
+$if(links-as-notes)$
+% Make links footnotes instead of hotlinks:
+\DeclareRobustCommand{\href}[2]{#2\footnote{\url{#1}}}
+$endif$
+$if(strikeout)$
+\usepackage[normalem]{ulem}
+% Avoid problems with \sout in headers with hyperref
+\pdfstringdefDisableCommands{\renewcommand{\sout}{}}
+$endif$
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+\providecommand{\tightlist}{%
+  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+$if(numbersections)$
+\setcounter{secnumdepth}{$if(secnumdepth)$$secnumdepth$$else$5$endif$}
+$else$
+\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
+$endif$
+$if(beamer)$
+$else$
+$if(block-headings)$
+% Make \paragraph and \subparagraph free-standing
+\ifx\paragraph\undefined\else
+  \let\oldparagraph\paragraph
+  \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+  \let\oldsubparagraph\subparagraph
+  \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+$endif$
+$endif$
+$if(pagestyle)$
+\pagestyle{$pagestyle$}
+$endif$
+$for(header-includes)$
+$header-includes$
+$endfor$
+$if(lang)$
+\ifxetex
+  % Load polyglossia as late as possible: uses bidi with RTL langages (e.g. Hebrew, Arabic)
+  \usepackage{polyglossia}
+  \setmainlanguage[$polyglossia-lang.options$]{$polyglossia-lang.name$}
+$for(polyglossia-otherlangs)$
+  \setotherlanguage[$polyglossia-otherlangs.options$]{$polyglossia-otherlangs.name$}
+$endfor$
+\else
+  \usepackage[shorthands=off,$for(babel-otherlangs)$$babel-otherlangs$,$endfor$main=$babel-lang$]{babel}
+$if(babel-newcommands)$
+  $babel-newcommands$
+$endif$
+\fi
+$endif$
+$if(dir)$
+\ifxetex
+  % Load bidi as late as possible as it modifies e.g. graphicx
+  \usepackage{bidi}
+\fi
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \TeXXeTstate=1
+  \newcommand{\RL}[1]{\beginR #1\endR}
+  \newcommand{\LR}[1]{\beginL #1\endL}
+  \newenvironment{RTL}{\beginR}{\endR}
+  \newenvironment{LTR}{\beginL}{\endL}
+\fi
+$endif$
+$if(natbib)$
+\usepackage[$natbiboptions$]{natbib}
+\bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$}
+$endif$
+$if(biblatex)$
+\usepackage[$if(biblio-style)$style=$biblio-style$,$endif$$for(biblatexoptions)$$biblatexoptions$$sep$,$endfor$]{biblatex}
+$for(bibliography)$
+\addbibresource{$bibliography$}
+$endfor$
+$endif$
+$if(csl-refs)$
+\newlength{\cslhangindent}
+\setlength{\cslhangindent}{1.5em}
+\newenvironment{cslreferences}%
+  {$if(csl-hanging-indent)$\setlength{\parindent}{0pt}%
+  \everypar{\setlength{\hangindent}{\cslhangindent}}\ignorespaces$endif$}%
+  {\par}
+$endif$
+
+$if(title)$
+\title{$title$$if(thanks)$\thanks{$thanks$}$endif$}
+$endif$
+$if(subtitle)$
+$if(beamer)$
+$else$
+\usepackage{etoolbox}
+\makeatletter
+\providecommand{\subtitle}[1]{% add subtitle to \maketitle
+  \apptocmd{\@title}{\par {\large #1 \par}}{}{}
+}
+\makeatother
+$endif$
+\subtitle{$subtitle$}
+$endif$
+\author{$for(author)$$author$$sep$ \and $endfor$}
+\date{$date$}
+$if(beamer)$
+$if(institute)$
+\institute{$for(institute)$$institute$$sep$ \and $endfor$}
+$endif$
+$if(titlegraphic)$
+\titlegraphic{\includegraphics{$titlegraphic$}}
+$endif$
+$if(logo)$
+\logo{\includegraphics{$logo$}}
+$endif$
+$endif$
+
+\begin{document}
+$if(has-frontmatter)$
+\frontmatter
+$endif$
+$if(title)$
+$if(beamer)$
+\frame{\titlepage}
+$else$
+\maketitle
+$endif$
+$if(abstract)$
+\begin{abstract}
+$abstract$
+\end{abstract}
+$endif$
+$endif$
+
+$for(include-before)$
+$include-before$
+
+$endfor$
+$if(toc)$
+$if(toc-title)$
+\renewcommand*\contentsname{$toc-title$}
+$endif$
+$if(beamer)$
+\begin{frame}[allowframebreaks]
+$if(toc-title)$
+  \frametitle{$toc-title$}
+$endif$
+  \tableofcontents[hideallsubsections]
+\end{frame}
+$else$
+{
+$if(colorlinks)$
+\hypersetup{linkcolor=$if(toccolor)$$toccolor$$else$$endif$}
+$endif$
+\setcounter{tocdepth}{$toc-depth$}
+\tableofcontents
+}
+$endif$
+$endif$
+$if(lot)$
+\listoftables
+$endif$
+$if(lof)$
+\listoffigures
+$endif$
+$if(linestretch)$
+\setstretch{$linestretch$}
+$endif$
+$if(has-frontmatter)$
+\mainmatter
+$endif$
+$body$
+
+$if(has-frontmatter)$
+\backmatter
+$endif$
+$if(natbib)$
+$if(bibliography)$
+$if(biblio-title)$
+$if(has-chapters)$
+\renewcommand\bibname{$biblio-title$}
+$else$
+\renewcommand\refname{$biblio-title$}
+$endif$
+$endif$
+$if(beamer)$
+\begin{frame}[allowframebreaks]{$biblio-title$}
+  \bibliographytrue
+$endif$
+  \bibliography{$for(bibliography)$$bibliography$$sep$,$endfor$}
+$if(beamer)$
+\end{frame}
+$endif$
+
+$endif$
+$endif$
+$if(biblatex)$
+$if(beamer)$
+\begin{frame}[allowframebreaks]{$biblio-title$}
+  \bibliographytrue
+  \printbibliography[heading=none]
+\end{frame}
+$else$
+\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$
+$endif$
+
+$endif$
+$for(include-after)$
+$include-after$
+
+$endfor$
+
+\end{document}

+ 105 - 0
ds_statistic.py

@@ -0,0 +1,105 @@
+from library.analysis import *
+
+
+def padLeft(n, text, p=" "):
+    while len(text) < n:
+        text = text + p
+    return text
+
+
+class Table:
+    def __init__(self, cols):
+        self.rows = []
+        self.cols = cols
+        self.colSize = [len(c) for c in cols]
+
+    def showHead(self):
+        h = ""
+        b = ""
+        for (i,c) in enumerate(self.cols):
+            if h != "":
+                h += "|"
+                b += "|"
+            h += padLeft(2 + self.colSize[i], " " + c)
+            b += padLeft(2 + self.colSize[i], "", "-")
+
+        print(h)
+        print(b)
+
+    def showRow(self, row):
+        r = ""
+        for (i,c) in enumerate(row):
+            if r != "":
+                r += "|"
+            r += padLeft(2 + self.colSize[i], " " + c)
+        print(r)
+
+    def show(self):
+        self.showHead()
+        for r in self.rows:
+            self.showRow(r)
+
+    def addRow(self, row):
+        for i in range(len(row)):
+            row[i] = str(row[i])
+            self.colSize[i] = max(self.colSize[i], len(row[i]))
+        self.rows.append(row)
+
+
+class CheckTree:
+    def __init__(self, data=None):
+        self.tree = {}
+        if data is not None:
+            for x in data:
+                self.add(x)
+
+    def add(self, xs):
+        t = self.tree
+        for x in xs:
+            if x not in t:
+                t[x] = {}
+            t = t[x]
+
+    def isIn(self, xs):
+        t = self.tree
+        for x in xs:
+            if x not in t:
+                return False
+            t = t[x]
+        return True
+
+def isSame(xs, ys):
+    for (x, y) in zip(xs, ys):
+        if x != y:
+            return False
+    return True
+
+def countCommon(setA, setB):
+    n = 0
+    print("->")
+    tree = CheckTree(setB)
+    for x in setA:
+        if tree.isIn(x):
+            n += 1
+    print("<-")
+    tree = CheckTree(setA)
+    for x in setB:
+        if tree.isIn(x):
+            n += 1
+    return n
+
+table = Table(["dataset", "features", "points total", "majority", "minority", "common"])
+if __name__ == "__main__":
+    for ds in testSets:
+        d = loadDataset("data_input/" + ds)
+        print((d.data0.shape[0], d.data1.shape[0]))
+        table.addRow(
+            [ ds
+            , d.data0.shape[1]
+            , d.data0.shape[0] + d.data1.shape[0]
+            , d.data0.shape[0]
+            , d.data1.shape[0]
+            , countCommon(d.data0, d.data1)
+            ])
+
+table.show()

+ 43 - 0
tools/mdImport.py

@@ -0,0 +1,43 @@
+import sys
+import os
+import re
+
+global fileData
+fileData = {}
+
+def readFile(name):
+    text = ""
+    with open(name) as f:
+        for l in f:
+            text += l
+    return text
+
+def basePath(name):
+    p = os.path.dirname(name)
+    if p != "":
+        p = p + "/"
+    return p
+
+def getFile(name, path=None):
+    global fileData
+    if path is None:
+        path = basePath(name)
+    if name not in fileData:
+        fileData[name] = readFile(name)
+        fileData[name] = parse(fileData[name], path)
+    return fileData[name]
+
+def parse(text, path=None):
+    c = re.compile("!\[\[([^]]*)]]")
+    tPath = path or ""
+    for m in c.findall(text):
+       text = text.replace(f"![[{m}]]", "\n" + getFile(tPath + m + ".md", path)) 
+    c = re.compile("\[\[([^]]*)]]")
+    for m in c.findall(text):
+       text = text.replace(f"[[{m}]]", "\n" + getFile(tPath + m + ".md", path)) 
+    return text
+
+if __name__ == "__main__":
+    fileData = {}
+    for arg in sys.argv[1:]:
+        print(getFile(arg, basePath(arg)))