Преглед на файлове

Simplified data loading.

Kristian Schultz преди 3 години
родител
ревизия
b140b413e6
променени са 3 файла, в които са добавени 37 реда и са изтрити 70 реда
  1. 27 48
      convGeN-Example.ipynb
  2. 3 3
      convGeN-predict.ipynb
  3. 7 19
      library/analysis.py

+ 27 - 48
convGeN-Example.ipynb

@@ -5,16 +5,7 @@
    "execution_count": 1,
    "id": "b9b5254c",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-06-13 18:29:33.355426: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
-      "2022-06-13 18:29:33.355447: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from library.analysis import loadDataset, testSets\n",
     "from library.generators.ConvGeN import ConvGeN"
@@ -30,15 +21,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Load 'data_input/folding_flare-F'\n",
+      "Load 'folding_flare-F'\n",
       "from pickle file\n",
-      "non empty cut in data_input/folding_flare-F! (23 points)\n",
+      "non empty cut in folding_flare-F! (70 points)\n",
       "Data loaded.\n"
      ]
     }
    ],
    "source": [
-    "data = loadDataset(\"data_input/\" + testSets[4])"
+    "data = loadDataset(testSets[4])"
    ]
   },
   {
@@ -74,19 +65,7 @@
    "execution_count": 5,
    "id": "ad01be2b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-06-13 18:29:35.134261: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n",
-      "2022-06-13 18:29:35.134290: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n",
-      "2022-06-13 18:29:35.134310: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (sbi-klabautermann): /proc/driver/nvidia/version does not exist\n",
-      "2022-06-13 18:29:35.134557: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "gen.reset(data)"
    ]
@@ -120,35 +99,35 @@
     {
      "data": {
       "text/plain": [
-       "array([[1.9999962 , 0.8138561 , 0.18614197, 1.1861401 , 0.        ,\n",
-       "        0.9999981 , 0.9999981 , 0.9999981 , 0.        , 0.        ,\n",
+       "array([[1.9999971 , 0.82202756, 0.49161404, 1.1779697 , 0.        ,\n",
+       "        0.99999857, 0.99999857, 0.99999857, 0.313643  , 0.        ,\n",
        "        0.        ],\n",
-       "       [1.2640123 , 0.4237353 , 0.9442571 , 0.9999993 , 0.        ,\n",
-       "        0.9999993 , 0.9999993 , 0.        , 3.632004  , 0.        ,\n",
+       "       [0.9483413 , 0.99999917, 0.        , 0.99999917, 0.        ,\n",
+       "        0.99999917, 0.99999917, 0.        , 3.9999967 , 0.        ,\n",
        "        0.        ],\n",
-       "       [0.        , 0.123703  , 0.99999845, 0.99999845, 0.        ,\n",
-       "        0.99999845, 0.99999845, 0.        , 0.99999845, 0.        ,\n",
+       "       [0.        , 0.3485548 , 0.84628767, 0.9999995 , 0.        ,\n",
+       "        0.9999995 , 0.9999995 , 0.        , 0.8051566 , 0.        ,\n",
        "        0.        ],\n",
-       "       [0.        , 0.9999981 , 0.958097  , 0.9999981 , 0.        ,\n",
-       "        0.9999981 , 0.9999981 , 0.        , 0.        , 0.        ,\n",
+       "       [0.        , 0.99999964, 0.90224695, 1.1576508 , 0.        ,\n",
+       "        0.99999964, 0.99999964, 0.        , 0.        , 0.        ,\n",
        "        0.        ],\n",
-       "       [1.9999985 , 0.67955816, 0.9999992 , 1.3204403 , 1.1185625 ,\n",
-       "        0.9999992 , 0.9999992 , 0.6477282 , 0.5342972 , 0.        ,\n",
+       "       [1.9999989 , 0.48536825, 0.99999946, 1.5146307 , 0.70072913,\n",
+       "        0.99999946, 0.99999946, 0.99999946, 0.8357328 , 0.        ,\n",
        "        0.        ],\n",
-       "       [2.9999838 , 0.99999464, 0.        , 0.99999464, 0.        ,\n",
-       "        0.99999464, 0.99999464, 0.        , 0.99999464, 0.        ,\n",
+       "       [1.9999907 , 0.        , 0.99999535, 1.9999907 , 0.        ,\n",
+       "        0.99999535, 0.99999535, 0.99999535, 0.        , 0.        ,\n",
        "        0.        ],\n",
-       "       [0.        , 0.99999905, 0.        , 0.99999905, 0.        ,\n",
-       "        0.99999905, 0.99999905, 0.        , 1.9999981 , 0.        ,\n",
+       "       [0.        , 0.9999988 , 0.        , 0.9999988 , 0.        ,\n",
+       "        0.9999988 , 0.9999988 , 0.        , 1.7140688 , 0.        ,\n",
        "        0.        ],\n",
-       "       [1.041093  , 0.85719097, 0.99999857, 0.99999857, 0.        ,\n",
-       "        0.99999857, 0.99999857, 0.37773895, 2.3873253 , 0.        ,\n",
+       "       [1.9999973 , 0.        , 0.9999986 , 0.9999986 , 0.        ,\n",
+       "        0.9999986 , 0.9999986 , 0.        , 3.9999945 , 0.        ,\n",
        "        0.        ],\n",
-       "       [3.999983  , 1.9004215 , 0.99999577, 0.99999577, 0.        ,\n",
-       "        0.0995701 , 0.99999577, 0.        , 0.        , 0.        ,\n",
+       "       [3.291949  , 0.9999985 , 0.29195344, 0.9999985 , 0.        ,\n",
+       "        0.9999985 , 0.9999985 , 0.        , 0.70804507, 0.        ,\n",
        "        0.        ],\n",
-       "       [0.        , 0.9999983 , 0.        , 0.        , 0.        ,\n",
-       "        0.9999983 , 0.9999983 , 0.        , 0.        , 0.        ,\n",
+       "       [0.        , 0.9999995 , 0.05166833, 0.32977083, 0.        ,\n",
+       "        0.9999995 , 0.9999995 , 0.        , 0.08435939, 0.        ,\n",
        "        0.        ]], dtype=float32)"
       ]
      },
@@ -164,7 +143,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -178,7 +157,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,

+ 3 - 3
convGeN-predict.ipynb

@@ -30,7 +30,7 @@
     "dataSetName = \"folding_abalone9-18\"\n",
     "#dataSetName = \"folding_yeast4\"\n",
     "#dataSetName = \"folding_car_good\"\n",
-    "data = A.loadDataset(\"data_input/\" + dataSetName)"
+    "data = A.loadDataset(dataSetName)"
    ]
   },
   {
@@ -105,7 +105,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -119,7 +119,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,

+ 7 - 19
library/analysis.py

@@ -28,27 +28,19 @@ def loadDataset(datasetName):
             return False
         return f
 
-    def isNotIn(ys):
-        def f(x):
-            for y in ys:
-                if isSame(x,y):
-                    return False
-            return True
-        return f
-
     print(f"Load '{datasetName}'")
-    if datasetName.startswith("data_input/imblearn_"):
+    if datasetName.startswith("imblearn_"):
         print("from imblearn")
         ds = fetch_datasets()
-        myData = ds[datasetName[20:]]
+        myData = ds[datasetName[9:]]
         ds = None
 
         features = myData["data"]
         labels = myData["target"]
-    elif datasetName.startswith("data_input/kaggle_"):
+    elif datasetName.startswith("kaggle_"):
         features = []
         labels = []
-        c = csv.reader(gzip.open(f"{datasetName}.csv.gz", "rt")) 
+        c = csv.reader(gzip.open(f"data_input/{datasetName}.csv.gz", "rt")) 
         for (n, row) in enumerate(c):
             # Skip heading
             if n > 0:
@@ -60,7 +52,7 @@ def loadDataset(datasetName):
 
     else:
         print("from pickle file")
-        pickle_in = open(f"{datasetName}.pickle", "rb")
+        pickle_in = open(f"data_input/{datasetName}.pickle", "rb")
         pickle_dict = pickle.load(pickle_in)
 
         myData = pickle_dict["folding"]
@@ -73,13 +65,9 @@ def loadDataset(datasetName):
     label_0 = list(np.where(labels != 1)[0])
     features_1 = features[label_1]
     features_0 = features[label_0]
-    cut = np.array(list(filter(isIn(features_0), features_1)))
+    cut = np.array(list(filter(isIn(features_1), features_0)))
     if len(cut) > 0:
         print(f"non empty cut in {datasetName}! ({len(cut)} points)")
-    #    print(f"{len(features_0)}/{len(features_1)} point before")
-    #    features_0 = np.array(list(filter(isNotIn(cut), features_0)))
-    #    features_1 = np.array(list(filter(isNotIn(cut), features_1)))
-    #    print(f"{len(features_0)}/{len(features_1)} points after")
     
     ds = DataSet(data0=features_0, data1=features_1)
     print("Data loaded.")
@@ -160,7 +148,7 @@ def runExercise(datasetName, resultList, ganName, ganCreator, skipIfCsvExists=Tr
     print(f"// Running {ganName} on {datasetName}")
     print("///////////////////////////////////////////")
     print()
-    data = loadDataset(f"data_input/{datasetName}")
+    data = loadDataset(f"{datasetName}")
     gan = ganCreator(data)
     random.seed(2021)
     shuffler = genShuffler()