Переглянути джерело

Renamed convGAN to ConvGeN.

Kristian Schultz 3 роки тому
батько
коміт
3d7bcba41e

Різницю між файлами не показано, бо вона завелика
+ 75 - 110
CreateTables.ipynb


+ 5 - 5
Makefile

@@ -1,15 +1,15 @@
 all: benchmark
 all: benchmark
 
 
 benchmark: docker/container.ok
 benchmark: docker/container.ok
-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark python3 /benchmark/data/run_all_exercises.py
-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
+	docker container run --rm -it -v `(pwd)`:/benchmark/data convgenbenchmark python3 /benchmark/data/run_all_exercises.py
+	docker container run --rm -it -v `(pwd)`:/benchmark/data convgenbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
 
 
 benchmark-gpu: docker/container.ok
 benchmark-gpu: docker/container.ok
-	docker container run --rm --gpus all -it -v `(pwd)`:/benchmark/data convganbenchmark python3 /benchmark/data/run_all_exercises.py
-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
+	docker container run --rm --gpus all -it -v `(pwd)`:/benchmark/data convgenbenchmark python3 /benchmark/data/run_all_exercises.py
+	docker container run --rm -it -v `(pwd)`:/benchmark/data convgenbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
 
 
 
 
 docker/container.ok: docker/Dockerfile docker/run.sh docker/requirements.txt
 docker/container.ok: docker/Dockerfile docker/run.sh docker/requirements.txt
-	docker build -t convganbenchmark docker/.
+	docker build -t convgenbenchmark docker/.
 	date > $@
 	date > $@
 
 

Різницю між файлами не показано, бо вона завелика
+ 0 - 132
convGAN-predict.ipynb


+ 23 - 103
convGeN-Example.ipynb

@@ -2,99 +2,59 @@
  "cells": [
  "cells": [
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "c993b281",
+   "execution_count": null,
+   "id": "d12db561",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-06-13 12:00:08.186819: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
-      "2022-06-13 12:00:08.186840: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "from library.analysis import loadDataset, testSets\n",
     "from library.analysis import loadDataset, testSets\n",
-    "from library.generators.convGAN import ConvGAN"
+    "from library.generators.convGeN import ConvGeN"
    ]
    ]
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "5c3b99d1",
+   "execution_count": null,
+   "id": "63b800ff",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Load 'data_input/folding_flare-F'\n",
-      "from pickle file\n",
-      "non empty cut in data_input/folding_flare-F! (23 points)\n",
-      "Data loaded.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "data = loadDataset(\"data_input/\" + testSets[4])"
     "data = loadDataset(\"data_input/\" + testSets[4])"
    ]
    ]
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "3b4a0637",
+   "execution_count": null,
+   "id": "2b0329e5",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1066, 1023, 43)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "print((len(data.data), len(data.data0), len(data.data1)))"
     "print((len(data.data), len(data.data0), len(data.data1)))"
    ]
    ]
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "97dfa225",
+   "execution_count": null,
+   "id": "a4853614",
    "metadata": {},
    "metadata": {},
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
-    "gen = ConvGAN(data.data0.shape[1], neb=5)"
+    "gen = ConvGeN(data.data0.shape[1], neb=5)"
    ]
    ]
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "c681bcee",
+   "execution_count": null,
+   "id": "842f6772",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-06-13 12:00:09.935294: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n",
-      "2022-06-13 12:00:09.935314: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n",
-      "2022-06-13 12:00:09.935330: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (sbi-klabautermann): /proc/driver/nvidia/version does not exist\n",
-      "2022-06-13 12:00:09.935515: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "gen.reset(data)"
     "gen.reset(data)"
    ]
    ]
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "6ac6fc34",
+   "execution_count": null,
+   "id": "333d62b9",
    "metadata": {},
    "metadata": {},
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
@@ -103,8 +63,8 @@
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "523e4106",
+   "execution_count": null,
+   "id": "f6f58a17",
    "metadata": {},
    "metadata": {},
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
@@ -113,50 +73,10 @@
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "13ee7dd4",
+   "execution_count": null,
+   "id": "82ae5f98",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.9999993 , 0.6453496 , 0.2968364 , 1.3546498 , 0.        ,\n",
-       "        0.99999964, 0.99999964, 0.99999964, 0.2968364 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.7537636 , 0.9999999 , 0.7530866 , 1.3768817 , 0.        ,\n",
-       "        0.9999999 , 0.9999999 , 0.3768818 , 3.2469125 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.9999995 , 0.5992881 , 0.9999995 , 0.        ,\n",
-       "        0.9999995 , 0.9999995 , 0.        , 0.40071142, 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.9999996 , 0.64247733, 1.3485842 , 0.        ,\n",
-       "        0.9999996 , 0.9999996 , 0.        , 0.        , 0.        ,\n",
-       "        0.        ],\n",
-       "       [1.9999995 , 0.3972343 , 0.99999976, 1.6027652 , 0.        ,\n",
-       "        0.99999976, 0.99999976, 0.99999976, 0.3972343 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [2.304937  , 0.7317089 , 0.2787703 , 1.2682906 , 0.        ,\n",
-       "        0.99999976, 0.99999976, 0.6950623 , 0.3154169 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.99999964, 0.33072543, 0.99999964, 0.        ,\n",
-       "        0.99999964, 0.99999964, 0.2920961 , 1.1246573 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [1.2489054 , 0.9999997 , 0.7045903 , 1.0157089 , 0.        ,\n",
-       "        0.9999997 , 0.9999997 , 0.32904333, 2.6687403 , 0.        ,\n",
-       "        0.        ],\n",
-       "       [3.6535707 , 0.9999997 , 0.29958388, 1.3539872 , 0.        ,\n",
-       "        0.9999997 , 0.9999997 , 0.        , 0.34642833, 0.        ,\n",
-       "        0.        ],\n",
-       "       [0.        , 0.9999995 , 0.24637261, 0.97887444, 0.        ,\n",
-       "        0.9999995 , 0.9999995 , 0.        , 0.35920745, 0.        ,\n",
-       "        0.        ]], dtype=float32)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "syntheticPoints"
     "syntheticPoints"
    ]
    ]

+ 127 - 0
convGeN-predict.ipynb

@@ -0,0 +1,127 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d69d2abf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import numpy as np\n",
+    "import library.analysis as A \n",
+    "from library.exercise import plotCloud\n",
+    "from library.generators import *\n",
+    "from library.dataset import DataSet\n",
+    "from library.timing import timing\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3364eda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataSetName = \"imblearn_ozone_level\"\n",
+    "dataSetName = \"folding_abalone_17_vs_7_8_9_10\"\n",
+    "dataSetName = \"folding_abalone9-18\"\n",
+    "#dataSetName = \"folding_yeast4\"\n",
+    "#dataSetName = \"folding_car_good\"\n",
+    "data = A.loadDataset(\"data_input/\" + dataSetName)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0ea209e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def avg(x):\n",
+    "    return sum(x) / len(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a11a121e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def testHisto(descTrainCount):\n",
+    "    print(f\"======[ {descTrainCount} ]======\")\n",
+    "    t = timing(f\"train with {descTrainCount} extra rounds\")\n",
+    "    t.start()\n",
+    "    g = ConvGeN(data.data1.shape[1], neb_epochs=10, withMajorhoodNbSearch=True)\n",
+    "    g.reset(data)\n",
+    "    g.train(data, descTrainCount)\n",
+    "    t.stop()\n",
+    "    print(t)\n",
+    "    t0 = g.predictReal(data.data0)\n",
+    "    print(\"majority \" + str((min(t0), avg(t0), max(t0))))\n",
+    "    t1 = g.predictReal(data.data1)\n",
+    "    print(\"minority \" + str((min(t1), avg(t1), max(t1))))\n",
+    "    t2 = g.predictReal(g.generateData(data.data0.shape[0]))\n",
+    "    print(\"synthetic \" + str((min(t2), avg(t2), max(t2))))\n",
+    "    plt.hist(t0, label=\"majority\")\n",
+    "    #plt.hist(t1, label=\"minority\")\n",
+    "    plt.hist(t2, label=\"synthetic\")\n",
+    "    plt.show()\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "662817c4",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "for n in range(10):\n",
+    "    testHisto(n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19babca7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05929c92",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

+ 5 - 62
library/analysis.py

@@ -1,6 +1,6 @@
 from library.exercise import Exercise
 from library.exercise import Exercise
 from library.dataset import DataSet, TrainTestData
 from library.dataset import DataSet, TrainTestData
-from library.generators import ProWRAS, SimpleGan, Repeater, SpheredNoise, ConvGAN, StupidToyListGan, CtGAN, CtabGan
+from library.generators import ProWRAS, SimpleGan, Repeater, SpheredNoise, ConvGeN, StupidToyListGan, CtGAN, CtabGan
 
 
 import pickle
 import pickle
 import numpy as np
 import numpy as np
@@ -179,54 +179,6 @@ def runExercise(datasetName, resultList, ganName, ganCreator, skipIfCsvExists=Tr
 
 
     print(f"  wall time: {showTime(twEnd - twStart)}s, process time: {showTime(tpEnd - tpStart)}")
     print(f"  wall time: {showTime(twEnd - twStart)}s, process time: {showTime(tpEnd - tpStart)}")
 
 
-
-def runExerciseForSimpleGAN(datasetName, resultList=None):
-    runExercise(datasetName, resultList, "SimpleGAN", lambda data: SimpleGan(numOfFeatures=data.data0.shape[1]))
-
-
-def runExerciseForRepeater(datasetName, resultList=None):
-    runExercise(datasetName, resultList, "Repeater", lambda _data: Repeater())
-
-
-def runExerciseForSpheredNoise(datasetName, resultList=None):
-    runExercise(datasetName, resultList, "SpheredNoise", lambda _data: SpheredNoise())
-
-
-def runExerciseForCtGAN(datasetName, resultList=None, debug=False):
-    runExercise(datasetName, resultList, "ctGAN", lambda data: CtGAN(data.data0.shape[1], debug=debug))
-
-
-def runExerciseForConvGAN(datasetName, resultList=None, neb=5, debug=False):
-    runExercise(datasetName, resultList, "convGAN", lambda data: ConvGAN(data.data0.shape[1], neb=neb, gen=neb, debug=debug))
-
-def runExerciseForConvGANfull(datasetName, resultList=None, debug=False):
-    runExercise(datasetName, resultList, "convGAN-full", lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1], debug=debug))
-
-
-def runSpeedTestForConvGan(datasetName, ganGenerator):
-    ganName = "convGAN"
-    print()
-    print()
-    print("///////////////////////////////////////////")
-    print(f"// Running speed test for {ganName} on {datasetName}")
-    print("///////////////////////////////////////////")
-    print()
-    d = []
-    t1 = time.time()
-    data = loadDataset(f"data_input/{datasetName}")
-    gan = ganGenerator(data.data0.shape[1])
-    random.seed(2021)
-    shuffler = genShuffler()
-    exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=3, numOfSlices=3)
-    exercise.debug = (lambda _x: None)
-    t2 = time.time()
-    exercise.run(gan, data)
-    t3 = time.time()
-    d = (t3 - t1, t2 - t1, t3 - t2)
-    print(f"Total Time: {d[0]}")
-    print(f"Preparation Time: {d[1]}")
-    print(f"Test Time: {d[2]}")
-    return d, gan
     
     
 testSets = [
 testSets = [
     "folding_abalone_17_vs_7_8_9_10",
     "folding_abalone_17_vs_7_8_9_10",
@@ -250,23 +202,14 @@ testSets = [
     #"kaggle_creditcard"
     #"kaggle_creditcard"
     ]
     ]
 
 
-def runAllTestSets(dataSetList):
-    for dataset in testSets:
-        runExerciseForRepeater(dataset)
-        runExerciseForSpheredNoise(dataset)
-        runExerciseForSimpleGAN(dataset)
-        runExerciseForConvGAN(dataset)
-        runExerciseForConvGANfull(dataset)
-
-
 
 
 generators = { "Repeater":                lambda _data: Repeater()
 generators = { "Repeater":                lambda _data: Repeater()
              , "ProWRAS":                 lambda _data: ProWRAS()
              , "ProWRAS":                 lambda _data: ProWRAS()
              , "GAN":                     lambda data: SimpleGan(numOfFeatures=data.data0.shape[1])
              , "GAN":                     lambda data: SimpleGan(numOfFeatures=data.data0.shape[1])
              , "ctGAN":                   lambda data: CtGAN(data.data0.shape[1])
              , "ctGAN":                   lambda data: CtGAN(data.data0.shape[1])
              , "CTAB-GAN":                lambda _data: CtabGan()
              , "CTAB-GAN":                lambda _data: CtabGan()
-             , "convGAN-majority-5":      lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5)
-             , "convGAN-majority-full":   lambda data: ConvGAN(data.data0.shape[1], neb=None)
-             , "convGAN-proximity-5":     lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True)
-             , "convGAN-proximity-full":  lambda data: ConvGAN(data.data0.shape[1], neb=None, withMajorhoodNbSearch=True)
+             , "ConvGeN-majority-5":      lambda data: ConvGeN(data.data0.shape[1], neb=5, gen=5)
+             , "ConvGeN-majority-full":   lambda data: ConvGeN(data.data0.shape[1], neb=None)
+             , "ConvGeN-proximity-5":     lambda data: ConvGeN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True)
+             , "ConvGeN-proximity-full":  lambda data: ConvGeN(data.data0.shape[1], neb=None, withMajorhoodNbSearch=True)
              }
              }

+ 3 - 5
library/generators/convGAN.py → library/generators/ConvGeN.py

@@ -30,7 +30,7 @@ def create01Labels(totalSize, sizeFirstHalf):
     labels.extend(repeat(np.array([0,1]), totalSize - sizeFirstHalf))
     labels.extend(repeat(np.array([0,1]), totalSize - sizeFirstHalf))
     return np.array(labels)
     return np.array(labels)
 
 
-class ConvGAN(GanBaseClass):
+class ConvGeN(GanBaseClass):
     """
     """
     This is a toy example of a GAN.
     This is a toy example of a GAN.
     It repeats the first point of the training-data-set.
     It repeats the first point of the training-data-set.
@@ -79,7 +79,7 @@ class ConvGAN(GanBaseClass):
         self.maj_min_discriminator = self._maj_min_disc()
         self.maj_min_discriminator = self._maj_min_disc()
 
 
         ## instanciate network and visualize architecture
         ## instanciate network and visualize architecture
-        self.cg = self._convGAN(self.conv_sample_generator, self.maj_min_discriminator)
+        self.cg = self._convGeN(self.conv_sample_generator, self.maj_min_discriminator)
 
 
         if self.debug:
         if self.debug:
             print(f"neb={self.neb}, gen={self.gen}")
             print(f"neb={self.neb}, gen={self.gen}")
@@ -233,7 +233,7 @@ class ConvGAN(GanBaseClass):
         model.compile(loss='binary_crossentropy', optimizer=opt)
         model.compile(loss='binary_crossentropy', optimizer=opt)
         return model
         return model
 
 
-    def _convGAN(self, generator, discriminator):
+    def _convGeN(self, generator, discriminator):
         """
         """
         for joining the generator and the discriminator
         for joining the generator and the discriminator
         conv_coeff_generator-> generator network instance
         conv_coeff_generator-> generator network instance
@@ -383,8 +383,6 @@ class ConvGAN(GanBaseClass):
         self.loss_history = loss_history
         self.loss_history = loss_history
 
 
 
 
-
-    ## convGAN
     def _BMB(self, data_maj, min_idxs):
     def _BMB(self, data_maj, min_idxs):
 
 
         ## Generate a borderline majority batch
         ## Generate a borderline majority batch

+ 1 - 1
library/generators/__init__.py

@@ -1,6 +1,6 @@
 from library.generators.autoencoder import Autoencoder
 from library.generators.autoencoder import Autoencoder
 from library.generators.SimpleGan import SimpleGan
 from library.generators.SimpleGan import SimpleGan
-from library.generators.convGAN import ConvGAN
+from library.generators.ConvGeN import ConvGeN
 from library.generators.LoRAS_ProWRAS import ProWRAS
 from library.generators.LoRAS_ProWRAS import ProWRAS
 from library.generators.Repeater import Repeater
 from library.generators.Repeater import Repeater
 from library.generators.SpheredNoise import SpheredNoise
 from library.generators.SpheredNoise import SpheredNoise

Деякі файли не було показано, через те що забагато файлів було змінено