Просмотр исходного кода

Added log files und image saving. Reorganized results folder structure.

Kristian Schultz 4 лет назад
Родитель
Сommit
f99825dffb
2 измененных файлов с 95 добавлено и 67 удалено
  1. 71 61
      library/analysis.py
  2. 24 6
      library/exercise.py

+ 71 - 61
library/analysis.py

@@ -12,6 +12,8 @@ import time
 import random
 import csv
 import gzip
+import sys
+import os
 from imblearn.datasets import fetch_datasets
 
 
@@ -110,26 +112,45 @@ def genShuffler():
     return shuffler
 
 
-def runExerciseForSimpleGAN(datasetName):
-    ganName = "SimpleGAN"
-    print()
-    print()
-    print("///////////////////////////////////////////")
-    print(f"// Running {ganName} on {datasetName}")
-    print("///////////////////////////////////////////")
-    print()
-    data = loadDataset(f"data_input/{datasetName}")
-    gan = SimpleGan(numOfFeatures=data.data0.shape[1])
-    random.seed(2021)
-    shuffler = genShuffler()
-    exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
-    exercise.run(gan, data)
-    exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
-    exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
-    
-    
-def runExerciseForRepeater(datasetName):
-    ganName = "Repeater"
+def showTime(t):
+    s = int(t)
+    m = s // 60
+    h = m // 60
+    d = h // 24
+    if d > 0:
+        return f"{d} days {h:02d}:{m:02d}:{s:02d}"
+    else:
+        return f"{h:02d}:{m:02d}:{s:02d}"
+
+
+def runExercise(datasetName, resultList, ganName, ganCreator, skipIfCsvExists=True):
+    print(f"* Running {ganName} on {datasetName}")
+    oldStdOut = sys.stdout
+    oldStdErr = sys.stderr
+    resultsFileName = f"data_result/{ganName}"
+
+    # Prepare Folder for result data
+    try:
+        os.mkdir(resultsFileName)
+    except FileExistsError as e:
+        pass
+
+    resultsFileName += f"/{datasetName}"
+
+    try:
+        os.stat(f"{resultsFileName}.csv")
+        if skipIfCsvExists and resultList is None:
+            print("  Resultfile exists => skip calculation.")
+            return
+    except FileNotFoundError as e:
+        pass
+
+    sys.stdout = open(resultsFileName + ".log", "w")
+    sys.stderr = sys.stdout
+
+
+    twStart = time.time()
+    tpStart = time.process_time()
     print()
     print()
     print("///////////////////////////////////////////")
@@ -137,52 +158,39 @@ def runExerciseForRepeater(datasetName):
     print("///////////////////////////////////////////")
     print()
     data = loadDataset(f"data_input/{datasetName}")
-    gan = Repeater()
+    gan = ganCreator(data)
     random.seed(2021)
     shuffler = genShuffler()
+
     exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
-    exercise.run(gan, data)
-    exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
-    exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
+    avg = exercise.run(gan, data, resultsFileName=resultsFileName)
+
+    tpEnd = time.process_time()
+    twEnd = time.time()
     
-def runExerciseForSpheredNoise(datasetName, resultList=None):
-    ganName = "SpheredNoise"
-    print()
-    print()
-    print("///////////////////////////////////////////")
-    print(f"// Running {ganName} on {datasetName}")
-    print("///////////////////////////////////////////")
-    print()
-    data = loadDataset(f"data_input/{datasetName}")
-    gan = SpheredNoise()
-    random.seed(2021)
-    shuffler = genShuffler()
-    exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
-    exercise.run(gan, data)
-    avg = exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
-    exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
     if resultList is not None:
         resultList[datasetName] = avg
 
+    sys.stdout = oldStdOut
+    sys.stderr = oldStdErr
 
-def runExerciseForConvGAN(datasetName, resultList=None):
-    ganName = "convGAN"
-    print()
-    print()
-    print("///////////////////////////////////////////")
-    print(f"// Running {ganName} on {datasetName}")
-    print("///////////////////////////////////////////")
-    print()
-    data = loadDataset(f"data_input/{datasetName}")
-    gan = ConvGAN(data.data0.shape[1])
-    random.seed(2021)
-    shuffler = genShuffler()
-    exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
-    exercise.run(gan, data)
-    avg = exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
-    exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
-    if resultList is not None:
-        resultList[datasetName] = avg
+    print(f"  wall time: {showTime(twEnd - twStart)}s, process time: {showTime(tpEnd - tpStart)}")
+
+
+def runExerciseForSimpleGAN(datasetName, resultList=None):
+    runExercise(datasetName, resultList, "SimpleGAN", lambda data: SimpleGan(numOfFeatures=data.data0.shape[1]))
+
+
+def runExerciseForRepeater(datasetName, resultList=None):
+    runExercise(datasetName, resultList, "Repeater", lambda _data: Repeater())
+
+
+def runExerciseForSpheredNoise(datasetName, resultList=None):
+    runExercise(datasetName, resultList, "SpheredNoise", lambda _data: SpheredNoise())
+
+
+def runExerciseForConvGAN(datasetName, resultList=None, debug=False):
+    runExercise(datasetName, resultList, "convGAN", lambda data: ConvGAN(data.data0.shape[1], debug=debug))
 
 
 def runSpeedTestForConvGan(datasetName, ganGenerator):
@@ -233,6 +241,8 @@ testSets = [
     ]
 
 def runAllTestSets(dataSetList):
-    for dsFileName in dataSetList:
-        runExerciseForSimpleGAN(dataSetList)
-        runExerciseForRepeater(dataSetList)
+    for dataset in testSets:
+        runExerciseForRepeater(dataset)
+        runExerciseForSpheredNoise(dataset)
+        runExerciseForSimpleGAN(dataset)
+        runExerciseForConvGAN(dataset)

+ 24 - 6
library/exercise.py

@@ -4,6 +4,7 @@ in generating synthetic samples for datasets with a minority class.
 """
 
 
+import os
 import numpy as np
 from sklearn.decomposition import PCA
 from sklearn.preprocessing import StandardScaler
@@ -57,7 +58,7 @@ class Exercise:
         if self.numOfShuffles < 0:
             raise AttributeError(f"Expected numOfShuffles to be > 0 but got {self.numOfShuffles}")
 
-    def run(self, gan, dataset):
+    def run(self, gan, dataset, resultsFileName=None):
         """
         Exercise all tests for a given GAN.
 
@@ -74,6 +75,13 @@ class Exercise:
             raise AttributeError(
                 "Expected class 1 to be the minority class but class 1 is bigger than class 0.")
 
+        # Prepare Folder for Images
+        if resultsFileName is not None:
+            try:
+                os.mkdir(resultsFileName)
+            except FileExistsError as e:
+                pass
+
         # Reset results array.
         self.results = { name: [] for name in self.testFunctions }
 
@@ -110,7 +118,10 @@ class Exercise:
             for (sliceNr, sliceData) in enumerate(dataSlices):
                 sliceTitle = f"Slice {sliceNr + 1}/{self.numOfSlices}"
                 self.debug(f"\n------ {stepTitle}: {sliceTitle} -------")
-                self._exerciseWithDataSlice(gan, sliceData)
+                imageFileName = None
+                if resultsFileName is not None:
+                    imageFileName = f"{resultsFileName}/Step{shuffleStep + 1}_Slice{sliceNr + 1}.png"
+                self._exerciseWithDataSlice(gan, sliceData, imageFileName=imageFileName)
 
         self.debug("### Exercise is done.")
 
@@ -131,7 +142,12 @@ class Exercise:
             self.debug("minimum:")
             self.debug(str(mi))
 
-    def _exerciseWithDataSlice(self, gan, dataSlice):
+        if resultsFileName is not None:
+            return self.saveResultsTo(resultsFileName + ".csv")
+
+        return {}
+
+    def _exerciseWithDataSlice(self, gan, dataSlice, imageFileName=None):
         """
         Runs one test for the given gan and dataSlice.
 
@@ -159,7 +175,7 @@ class Exercise:
             newSamples = gan.generateData(numOfNeededSamples)
 
             # Print out an overview of the new dataset.
-            plotCloud(dataSlice.train.data0, dataSlice.train.data1, newSamples)
+            plotCloud(dataSlice.train.data0, dataSlice.train.data1, newSamples, outputFile=imageFileName, doShow=False)
 
             dataSlice.train = DataSet(
                 data0=dataSlice.train.data0,
@@ -202,7 +218,7 @@ class Exercise:
         return avgResults
 
 
-def plotCloud(data0, data1, dataNew=None, outputFile=None, title=""):
+def plotCloud(data0, data1, dataNew=None, outputFile=None, title="", doShow=True):
     """
     Does a PCA analysis of the given data and plot the both important axis.
     """
@@ -246,6 +262,8 @@ def plotCloud(data0, data1, dataNew=None, outputFile=None, title=""):
     ax.legend(title="", loc='upper left', labels=['majority', 'minority', 'synthetic minority'])
     ax.set_xlabel("PCA0")
     ax.set_ylabel("PCA1")
-    plt.show()
+    if doShow: 
+        plt.show()
+
     if outputFile is not None:
         fig.savefig(outputFile)