Bladeren bron

Renamed convGAN to ConvGeN.

Kristian Schultz 3 jaren geleden
bovenliggende
commit
3d7bcba41e
8 gewijzigde bestanden met toevoegingen van 239 en 418 verwijderingen
  1. 75 110
      CreateTables.ipynb
  2. 5 5
      Makefile
  3. 0 132
      convGAN-predict.ipynb
  4. 23 103
      convGeN-Example.ipynb
  5. 127 0
      convGeN-predict.ipynb
  6. 5 62
      library/analysis.py
  7. 3 5
      library/generators/ConvGeN.py
  8. 1 1
      library/generators/__init__.py

File diff suppressed because it is too large
+ 75 - 110
CreateTables.ipynb


+ 5 - 5
Makefile

@@ -1,15 +1,15 @@
 all: benchmark
 
 benchmark: docker/container.ok
-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark python3 /benchmark/data/run_all_exercises.py
-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
+	docker container run --rm -it -v `(pwd)`:/benchmark/data convgenbenchmark python3 /benchmark/data/run_all_exercises.py
+	docker container run --rm -it -v `(pwd)`:/benchmark/data convgenbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
 
 benchmark-gpu: docker/container.ok
-	docker container run --rm --gpus all -it -v `(pwd)`:/benchmark/data convganbenchmark python3 /benchmark/data/run_all_exercises.py
-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
+	docker container run --rm --gpus all -it -v `(pwd)`:/benchmark/data convgenbenchmark python3 /benchmark/data/run_all_exercises.py
+	docker container run --rm -it -v `(pwd)`:/benchmark/data convgenbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
 
 
 docker/container.ok: docker/Dockerfile docker/run.sh docker/requirements.txt
-	docker build -t convganbenchmark docker/.
+	docker build -t convgenbenchmark docker/.
 	date > $@
 

File diff suppressed because it is too large
+ 0 - 132
convGAN-predict.ipynb


+ 23 - 103
convGeN-Example.ipynb

@@ -2,99 +2,59 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "c993b281",
+   "execution_count": null,
+   "id": "d12db561",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-06-13 12:00:08.186819: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
-      "2022-06-13 12:00:08.186840: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from library.analysis import loadDataset, testSets\n",
-    "from library.generators.convGAN import ConvGAN"
+    "from library.generators.convGeN import ConvGeN"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "5c3b99d1",
+   "execution_count": null,
+   "id": "63b800ff",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Load 'data_input/folding_flare-F'\n",
-      "from pickle file\n",
-      "non empty cut in data_input/folding_flare-F! (23 points)\n",
-      "Data loaded.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "data = loadDataset(\"data_input/\" + testSets[4])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "3b4a0637",
+   "execution_count": null,
+   "id": "2b0329e5",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1066, 1023, 43)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print((len(data.data), len(data.data0), len(data.data1)))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "97dfa225",
+   "execution_count": null,
+   "id": "a4853614",
    "metadata": {},
    "outputs": [],
    "source": [
-    "gen = ConvGAN(data.data0.shape[1], neb=5)"
+    "gen = ConvGeN(data.data0.shape[1], neb=5)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "c681bcee",
+   "execution_count": null,
+   "id": "842f6772",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-06-13 12:00:09.935294: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n",
-      "2022-06-13 12:00:09.935314: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n",
-      "2022-06-13 12:00:09.935330: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (sbi-klabautermann): /proc/driver/nvidia/version does not exist\n",
-      "2022-06-13 12:00:09.935515: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "gen.reset(data)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "6ac6fc34",
+   "execution_count": null,
+   "id": "333d62b9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -103,8 +63,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "523e4106",
+   "execution_count": null,
+   "id": "f6f58a17",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -113,50 +73,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "13ee7dd4",
+   "execution_count": null,
+   "id": "82ae5f98",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.9999993 , 0.6453496 , 0.2968364 , 1.3546498 , 0.        ,\n",
-       "        0.99999964, 0.99999964, 0.99999964, 0.2968364 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.7537636 , 0.9999999 , 0.7530866 , 1.3768817 , 0.        ,\n",
-       "        0.9999999 , 0.9999999 , 0.3768818 , 3.2469125 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.9999995 , 0.5992881 , 0.9999995 , 0.        ,\n",
-       "        0.9999995 , 0.9999995 , 0.        , 0.40071142, 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.9999996 , 0.64247733, 1.3485842 , 0.        ,\n",
-       "        0.9999996 , 0.9999996 , 0.        , 0.        , 0.        ,\n",
-       "        0.        ],\n",
-       "       [1.9999995 , 0.3972343 , 0.99999976, 1.6027652 , 0.        ,\n",
-       "        0.99999976, 0.99999976, 0.99999976, 0.3972343 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [2.304937  , 0.7317089 , 0.2787703 , 1.2682906 , 0.        ,\n",
-       "        0.99999976, 0.99999976, 0.6950623 , 0.3154169 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.99999964, 0.33072543, 0.99999964, 0.        ,\n",
-       "        0.99999964, 0.99999964, 0.2920961 , 1.1246573 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [1.2489054 , 0.9999997 , 0.7045903 , 1.0157089 , 0.        ,\n",
-       "        0.9999997 , 0.9999997 , 0.32904333, 2.6687403 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [3.6535707 , 0.9999997 , 0.29958388, 1.3539872 , 0.        ,\n",
-       "        0.9999997 , 0.9999997 , 0.        , 0.34642833, 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.9999995 , 0.24637261, 0.97887444, 0.        ,\n",
-       "        0.9999995 , 0.9999995 , 0.        , 0.35920745, 0.        ,\n",
-       "        0.        ]], dtype=float32)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "syntheticPoints"
    ]

+ 127 - 0
convGeN-predict.ipynb

@@ -0,0 +1,127 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d69d2abf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import numpy as np\n",
+    "import library.analysis as A \n",
+    "from library.exercise import plotCloud\n",
+    "from library.generators import *\n",
+    "from library.dataset import DataSet\n",
+    "from library.timing import timing\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3364eda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataSetName = \"imblearn_ozone_level\"\n",
+    "dataSetName = \"folding_abalone_17_vs_7_8_9_10\"\n",
+    "dataSetName = \"folding_abalone9-18\"\n",
+    "#dataSetName = \"folding_yeast4\"\n",
+    "#dataSetName = \"folding_car_good\"\n",
+    "data = A.loadDataset(\"data_input/\" + dataSetName)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0ea209e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def avg(x):\n",
+    "    return sum(x) / len(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a11a121e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def testHisto(descTrainCount):\n",
+    "    print(f\"======[ {descTrainCount} ]======\")\n",
+    "    t = timing(f\"train with {descTrainCount} extra rounds\")\n",
+    "    t.start()\n",
+    "    g = ConvGeN(data.data1.shape[1], neb_epochs=10, withMajorhoodNbSearch=True)\n",
+    "    g.reset(data)\n",
+    "    g.train(data, descTrainCount)\n",
+    "    t.stop()\n",
+    "    print(t)\n",
+    "    t0 = g.predictReal(data.data0)\n",
+    "    print(\"majority \" + str((min(t0), avg(t0), max(t0))))\n",
+    "    t1 = g.predictReal(data.data1)\n",
+    "    print(\"minority \" + str((min(t1), avg(t1), max(t1))))\n",
+    "    t2 = g.predictReal(g.generateData(data.data0.shape[0]))\n",
+    "    print(\"synthetic \" + str((min(t2), avg(t2), max(t2))))\n",
+    "    plt.hist(t0, label=\"majority\")\n",
+    "    #plt.hist(t1, label=\"minority\")\n",
+    "    plt.hist(t2, label=\"synthetic\")\n",
+    "    plt.show()\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "662817c4",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "for n in range(10):\n",
+    "    testHisto(n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19babca7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05929c92",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

+ 5 - 62
library/analysis.py

@@ -1,6 +1,6 @@
 from library.exercise import Exercise
 from library.dataset import DataSet, TrainTestData
-from library.generators import ProWRAS, SimpleGan, Repeater, SpheredNoise, ConvGAN, StupidToyListGan, CtGAN, CtabGan
+from library.generators import ProWRAS, SimpleGan, Repeater, SpheredNoise, ConvGeN, StupidToyListGan, CtGAN, CtabGan
 
 import pickle
 import numpy as np
@@ -179,54 +179,6 @@ def runExercise(datasetName, resultList, ganName, ganCreator, skipIfCsvExists=Tr
 
     print(f"  wall time: {showTime(twEnd - twStart)}s, process time: {showTime(tpEnd - tpStart)}")
 
-
-def runExerciseForSimpleGAN(datasetName, resultList=None):
-    runExercise(datasetName, resultList, "SimpleGAN", lambda data: SimpleGan(numOfFeatures=data.data0.shape[1]))
-
-
-def runExerciseForRepeater(datasetName, resultList=None):
-    runExercise(datasetName, resultList, "Repeater", lambda _data: Repeater())
-
-
-def runExerciseForSpheredNoise(datasetName, resultList=None):
-    runExercise(datasetName, resultList, "SpheredNoise", lambda _data: SpheredNoise())
-
-
-def runExerciseForCtGAN(datasetName, resultList=None, debug=False):
-    runExercise(datasetName, resultList, "ctGAN", lambda data: CtGAN(data.data0.shape[1], debug=debug))
-
-
-def runExerciseForConvGAN(datasetName, resultList=None, neb=5, debug=False):
-    runExercise(datasetName, resultList, "convGAN", lambda data: ConvGAN(data.data0.shape[1], neb=neb, gen=neb, debug=debug))
-
-def runExerciseForConvGANfull(datasetName, resultList=None, debug=False):
-    runExercise(datasetName, resultList, "convGAN-full", lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1], debug=debug))
-
-
-def runSpeedTestForConvGan(datasetName, ganGenerator):
-    ganName = "convGAN"
-    print()
-    print()
-    print("///////////////////////////////////////////")
-    print(f"// Running speed test for {ganName} on {datasetName}")
-    print("///////////////////////////////////////////")
-    print()
-    d = []
-    t1 = time.time()
-    data = loadDataset(f"data_input/{datasetName}")
-    gan = ganGenerator(data.data0.shape[1])
-    random.seed(2021)
-    shuffler = genShuffler()
-    exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=3, numOfSlices=3)
-    exercise.debug = (lambda _x: None)
-    t2 = time.time()
-    exercise.run(gan, data)
-    t3 = time.time()
-    d = (t3 - t1, t2 - t1, t3 - t2)
-    print(f"Total Time: {d[0]}")
-    print(f"Preparation Time: {d[1]}")
-    print(f"Test Time: {d[2]}")
-    return d, gan
     
 testSets = [
     "folding_abalone_17_vs_7_8_9_10",
@@ -250,23 +202,14 @@ testSets = [
     #"kaggle_creditcard"
     ]
 
-def runAllTestSets(dataSetList):
-    for dataset in testSets:
-        runExerciseForRepeater(dataset)
-        runExerciseForSpheredNoise(dataset)
-        runExerciseForSimpleGAN(dataset)
-        runExerciseForConvGAN(dataset)
-        runExerciseForConvGANfull(dataset)
-
-
 
 generators = { "Repeater":                lambda _data: Repeater()
              , "ProWRAS":                 lambda _data: ProWRAS()
              , "GAN":                     lambda data: SimpleGan(numOfFeatures=data.data0.shape[1])
              , "ctGAN":                   lambda data: CtGAN(data.data0.shape[1])
              , "CTAB-GAN":                lambda _data: CtabGan()
-             , "convGAN-majority-5":      lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5)
-             , "convGAN-majority-full":   lambda data: ConvGAN(data.data0.shape[1], neb=None)
-             , "convGAN-proximity-5":     lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True)
-             , "convGAN-proximity-full":  lambda data: ConvGAN(data.data0.shape[1], neb=None, withMajorhoodNbSearch=True)
+             , "ConvGeN-majority-5":      lambda data: ConvGeN(data.data0.shape[1], neb=5, gen=5)
+             , "ConvGeN-majority-full":   lambda data: ConvGeN(data.data0.shape[1], neb=None)
+             , "ConvGeN-proximity-5":     lambda data: ConvGeN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True)
+             , "ConvGeN-proximity-full":  lambda data: ConvGeN(data.data0.shape[1], neb=None, withMajorhoodNbSearch=True)
              }

+ 3 - 5
library/generators/convGAN.py → library/generators/ConvGeN.py

@@ -30,7 +30,7 @@ def create01Labels(totalSize, sizeFirstHalf):
     labels.extend(repeat(np.array([0,1]), totalSize - sizeFirstHalf))
     return np.array(labels)
 
-class ConvGAN(GanBaseClass):
+class ConvGeN(GanBaseClass):
     """
     This is a toy example of a GAN.
     It repeats the first point of the training-data-set.
@@ -79,7 +79,7 @@ class ConvGAN(GanBaseClass):
         self.maj_min_discriminator = self._maj_min_disc()
 
         ## instanciate network and visualize architecture
-        self.cg = self._convGAN(self.conv_sample_generator, self.maj_min_discriminator)
+        self.cg = self._convGeN(self.conv_sample_generator, self.maj_min_discriminator)
 
         if self.debug:
             print(f"neb={self.neb}, gen={self.gen}")
@@ -233,7 +233,7 @@ class ConvGAN(GanBaseClass):
         model.compile(loss='binary_crossentropy', optimizer=opt)
         return model
 
-    def _convGAN(self, generator, discriminator):
+    def _convGeN(self, generator, discriminator):
         """
         for joining the generator and the discriminator
         conv_coeff_generator-> generator network instance
@@ -383,8 +383,6 @@ class ConvGAN(GanBaseClass):
         self.loss_history = loss_history
 
 
-
-    ## convGAN
     def _BMB(self, data_maj, min_idxs):
 
         ## Generate a borderline majority batch

+ 1 - 1
library/generators/__init__.py

@@ -1,6 +1,6 @@
 from library.generators.autoencoder import Autoencoder
 from library.generators.SimpleGan import SimpleGan
-from library.generators.convGAN import ConvGAN
+from library.generators.ConvGeN import ConvGeN
 from library.generators.LoRAS_ProWRAS import ProWRAS
 from library.generators.Repeater import Repeater
 from library.generators.SpheredNoise import SpheredNoise

Some files were not shown because too many files changed in this diff