-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathKDDCupFull.json
More file actions
1 lines (1 loc) · 55.1 KB
/
KDDCupFull.json
File metadata and controls
1 lines (1 loc) · 55.1 KB
1
{"paragraphs":[{"text":"%md # Load Data","authenticationInfo":{},"dateUpdated":"2016-12-30T01:42:42+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483054835627_1211440216","id":"20161230-014035_1411649165","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Load Data</h1>\n"},"dateCreated":"2016-12-30T01:40:35+0200","dateStarted":"2016-12-30T01:42:42+0200","dateFinished":"2016-12-30T01:42:45+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:266"},{"text":"import org.apache.spark.sql.SQLContext\nval df = sqlContext.load(\"com.databricks.spark.csv\", Map(\"path\" -> \"/Users/mrbank/Documents/Projects/Bitirme/KDDCup/kddcup.data\", \"header\" -> \"false\"))","authenticationInfo":{},"dateUpdated":"2017-01-01T19:28:38+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483054962318_447129686","id":"20161230-014242_292416781","result":{"code":"SUCCESS","type":"TEXT","msg":"import org.apache.spark.sql.SQLContext\nwarning: there were 1 deprecation warning(s); re-run with -deprecation for details\ndf: org.apache.spark.sql.DataFrame = [C0: string, C1: string, C2: string, C3: string, C4: string, C5: string, C6: string, C7: string, C8: string, C9: string, C10: string, C11: string, C12: string, C13: string, C14: string, C15: string, C16: string, C17: string, C18: string, C19: string, C20: string, C21: string, C22: string, C23: string, C24: string, C25: string, C26: string, C27: string, C28: string, C29: string, C30: string, C31: string, C32: string, C33: string, C34: string, C35: string, C36: string, C37: string, C38: string, C39: string, C40: string, C41: string]\n"},"dateCreated":"2016-12-30T01:42:42+0200","dateStarted":"2017-01-01T19:28:39+0200","dateFinished":"2017-01-01T19:28:43+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:267"},{"text":"%md # Check out Schema","authenticationInfo":{},"dateUpdated":"2016-12-30T01:44:41+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055023025_-1193916318","id":"20161230-014343_462001531","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Check out Schema</h1>\n"},"dateCreated":"2016-12-30T01:43:43+0200","dateStarted":"2016-12-30T01:44:41+0200","dateFinished":"2016-12-30T01:44:42+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:268"},{"text":"df.printSchema\ndf.groupBy($\"C41\").count().orderBy($\"count\".desc).registerTempTable(\"KDDCupDefault\")","authenticationInfo":{},"dateUpdated":"2016-12-30T01:45:02+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055081928_-1783892912","id":"20161230-014441_1304629248","result":{"code":"SUCCESS","type":"TEXT","msg":"root\n |-- C0: string (nullable = true)\n |-- C1: string (nullable = true)\n |-- C2: string (nullable = true)\n |-- C3: string (nullable = true)\n |-- C4: string (nullable = true)\n |-- C5: string (nullable = true)\n |-- C6: string (nullable = true)\n |-- C7: string (nullable = true)\n |-- C8: string (nullable = true)\n |-- C9: string (nullable = true)\n |-- C10: string (nullable = true)\n |-- C11: string (nullable = true)\n |-- C12: string (nullable = true)\n |-- C13: string (nullable = true)\n |-- C14: string (nullable = true)\n |-- C15: string (nullable = true)\n |-- C16: string (nullable = true)\n |-- C17: string (nullable = true)\n |-- C18: string (nullable = true)\n |-- C19: string (nullable = true)\n |-- C20: string (nullable = true)\n |-- C21: string (nullable = true)\n |-- C22: string (nullable = true)\n |-- C23: string (nullable = true)\n |-- C24: string (nullable = true)\n |-- C25: string (nullable = true)\n |-- C26: string (nullable = true)\n |-- C27: string (nullable = true)\n |-- C28: string (nullable = true)\n |-- C29: string (nullable = true)\n |-- C30: string (nullable = true)\n |-- C31: string (nullable = true)\n |-- C32: string (nullable = true)\n |-- C33: string (nullable = true)\n |-- C34: string (nullable = true)\n |-- C35: string (nullable = true)\n |-- C36: string (nullable = true)\n |-- C37: string (nullable = true)\n |-- C38: string (nullable = true)\n |-- C39: string (nullable = true)\n |-- C40: string (nullable = true)\n |-- C41: string (nullable = true)\n\n"},"dateCreated":"2016-12-30T01:44:41+0200","dateStarted":"2016-12-30T01:45:02+0200","dateFinished":"2016-12-30T01:45:03+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:269"},{"text":"%sql SELECT * FROM KDDCupDefault","authenticationInfo":{},"dateUpdated":"2017-01-11T12:28:47+0200","config":{"colWidth":12,"graph":{"mode":"pieChart","height":300,"optionOpen":false,"keys":[{"name":"C41","index":0,"aggr":"sum"}],"values":[{"name":"count","index":1,"aggr":"sum"}],"groups":[],"scatter":{"xAxis":{"name":"C41","index":0,"aggr":"sum"},"yAxis":{"name":"count","index":1,"aggr":"sum"}}},"enabled":true,"editorMode":"ace/mode/sql"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055102267_2047852032","id":"20161230-014502_1702496742","result":{"code":"SUCCESS","type":"TABLE","msg":"C41\tcount\nsmurf.\t2807886\nneptune.\t1072017\nnormal.\t972781\nsatan.\t15892\nipsweep.\t12481\nportsweep.\t10413\nnmap.\t2316\nback.\t2203\nwarezclient.\t1020\nteardrop.\t979\npod.\t264\nguess_passwd.\t53\nbuffer_overflow.\t30\nland.\t21\nwarezmaster.\t20\nimap.\t12\nrootkit.\t10\nloadmodule.\t9\nftp_write.\t8\nmultihop.\t7\nphf.\t4\nperl.\t3\nspy.\t2\n","comment":"","msgTable":[[{"key":"count","value":"smurf."},{"key":"count","value":"2807886"}],[{"value":"neptune."},{"value":"1072017"}],[{"value":"normal."},{"value":"972781"}],[{"value":"satan."},{"value":"15892"}],[{"value":"ipsweep."},{"value":"12481"}],[{"value":"portsweep."},{"value":"10413"}],[{"value":"nmap."},{"value":"2316"}],[{"value":"back."},{"value":"2203"}],[{"value":"warezclient."},{"value":"1020"}],[{"value":"teardrop."},{"value":"979"}],[{"value":"pod."},{"value":"264"}],[{"value":"guess_passwd."},{"value":"53"}],[{"value":"buffer_overflow."},{"value":"30"}],[{"value":"land."},{"value":"21"}],[{"value":"warezmaster."},{"value":"20"}],[{"value":"imap."},{"value":"12"}],[{"value":"rootkit."},{"value":"10"}],[{"value":"loadmodule."},{"value":"9"}],[{"value":"ftp_write."},{"value":"8"}],[{"value":"multihop."},{"value":"7"}],[{"value":"phf."},{"value":"4"}],[{"value":"perl."},{"value":"3"}],[{"value":"spy."},{"value":"2"}]],"columnNames":[{"name":"C41","index":0,"aggr":"sum"},{"name":"count","index":1,"aggr":"sum"}],"rows":[["smurf.","2807886"],["neptune.","1072017"],["normal.","972781"],["satan.","15892"],["ipsweep.","12481"],["portsweep.","10413"],["nmap.","2316"],["back.","2203"],["warezclient.","1020"],["teardrop.","979"],["pod.","264"],["guess_passwd.","53"],["buffer_overflow.","30"],["land.","21"],["warezmaster.","20"],["imap.","12"],["rootkit.","10"],["loadmodule.","9"],["ftp_write.","8"],["multihop.","7"],["phf.","4"],["perl.","3"],["spy.","2"]]},"dateCreated":"2016-12-30T01:45:02+0200","dateStarted":"2016-12-30T01:45:42+0200","dateFinished":"2016-12-30T01:46:35+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:270"},{"text":"val transformed1 = df.where($\"C41\".notEqual(\"normal.\")).groupBy($\"C41\").count().orderBy($\"count\".desc).registerTempTable(\"anomalies\")\nval transformed2 = df.where($\"C41\"===\"normal.\").groupBy($\"C41\").count().orderBy($\"count\".desc).registerTempTable(\"justNormals\")","authenticationInfo":{},"dateUpdated":"2016-12-30T01:47:19+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055119051_-1335413612","id":"20161230-014519_166137813","result":{"code":"SUCCESS","type":"TEXT","msg":"transformed1: Unit = ()\ntransformed2: Unit = ()\n"},"dateCreated":"2016-12-30T01:45:19+0200","dateStarted":"2016-12-30T01:47:19+0200","dateFinished":"2016-12-30T01:47:20+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:271"},{"text":"%sql select SUM(COUNT) from anomalies","authenticationInfo":{},"dateUpdated":"2016-12-30T01:49:47+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{"xAxis":null,"yAxis":null}},"enabled":true,"editorMode":"ace/mode/sql"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055237577_-574564178","id":"20161230-014717_795236545","result":{"code":"SUCCESS","type":"TABLE","msg":"_c0\n3925650\n","comment":"","msgTable":[[{"value":"3925650"}]],"columnNames":[{"name":"_c0","index":0,"aggr":"sum"}],"rows":[["3925650"]]},"dateCreated":"2016-12-30T01:47:17+0200","dateStarted":"2016-12-30T01:49:47+0200","dateFinished":"2016-12-30T01:50:34+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:272"},{"text":"%sql select * from justNormals","authenticationInfo":{},"dateUpdated":"2016-12-30T01:50:54+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/sql"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055318235_27457292","id":"20161230-014838_1129168533","result":{"code":"SUCCESS","type":"TABLE","msg":"C41\tcount\nnormal.\t972781\n","comment":"","msgTable":[[{"key":"count","value":"normal."},{"key":"count","value":"972781"}]],"columnNames":[{"name":"C41","index":0,"aggr":"sum"},{"name":"count","index":1,"aggr":"sum"}],"rows":[["normal.","972781"]]},"dateCreated":"2016-12-30T01:48:38+0200","dateStarted":"2016-12-30T01:50:54+0200","dateFinished":"2016-12-30T01:51:40+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:273"},{"text":"%md # Semi-Supervised Learning ","authenticationInfo":{},"dateUpdated":"2017-01-02T02:18:10+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055454453_1663995223","id":"20161230-015054_1599825872","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Semi-Supervised Learning</h1>\n"},"dateCreated":"2016-12-30T01:50:54+0200","dateStarted":"2017-01-02T02:18:10+0200","dateFinished":"2017-01-02T02:18:12+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:274"},{"text":"val rawData = sc.textFile(\"/Users/mrbank/Documents/Projects/Bitirme/KDDCup/kddcup.data\")\n\n\n//rawData.saveAsTextFile(\"/Users/mrbank/Documents/Projects/Bitirme/FullFiles\")","authenticationInfo":{},"dateUpdated":"2017-01-10T00:31:49+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055526314_1580812840","id":"20161230-015206_407540211","result":{"code":"SUCCESS","type":"TEXT","msg":"rawData: org.apache.spark.rdd.RDD[String] = /Users/mrbank/Documents/Projects/Bitirme/KDDCup/kddcup.data MapPartitionsRDD[111] at textFile at <console>:30\n"},"dateCreated":"2016-12-30T01:52:06+0200","dateStarted":"2017-01-10T00:31:51+0200","dateFinished":"2017-01-10T00:31:52+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:275"},{"text":"rawData.count","authenticationInfo":{},"dateUpdated":"2017-01-10T00:31:55+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1484001087467_955208046","id":"20170110-003127_1149049209","result":{"code":"SUCCESS","type":"TEXT","msg":"res45: Long = 4898431\n"},"dateCreated":"2017-01-10T00:31:27+0200","dateStarted":"2017-01-10T00:31:55+0200","dateFinished":"2017-01-10T00:32:08+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:276"},{"text":"%md # Split raw data as Training and Testing","authenticationInfo":{},"dateUpdated":"2017-01-08T19:49:29+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483897739727_2010445759","id":"20170108-194859_1224134641","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Split raw data as Training and Testing</h1>\n"},"dateCreated":"2017-01-08T19:48:59+0200","dateStarted":"2017-01-08T19:49:29+0200","dateFinished":"2017-01-08T19:49:29+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:277"},{"text":"val Array(training, test) = rawData.randomSplit(Array(0.6, 0.4))\ntraining.saveAsTextFile(\"/Users/mrbank/Documents/Projects/Bitirme/Training\")\ntest.saveAsTextFile(\"/Users/mrbank/Documents/Projects/Bitirme/Test\")\n\n\ntraining.count\ntest.count","authenticationInfo":{},"dateUpdated":"2017-01-02T02:37:57+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055553088_1148385725","id":"20161230-015233_2003439852","result":{"code":"SUCCESS","type":"TEXT","msg":"training: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[10] at randomSplit at <console>:31\ntest: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[11] at randomSplit at <console>:31\nres12: Long = 2937770\nres13: Long = 1960661\n"},"dateCreated":"2016-12-30T01:52:33+0200","dateStarted":"2017-01-02T02:37:57+0200","dateFinished":"2017-01-02T02:38:42+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:278"},{"text":"val training = sc.textFile(\"/Users/mrbank/Documents/Projects/Bitirme/Training\")\nval test = sc.textFile(\"/Users/mrbank/Documents/Projects/Bitirme/Test\")\n\ntraining.count\ntest.count","authenticationInfo":{},"dateUpdated":"2017-01-02T02:41:14+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483317512180_473006648","id":"20170102-023832_256362118","result":{"code":"SUCCESS","type":"TEXT","msg":"training: org.apache.spark.rdd.RDD[String] = /Users/mrbank/Documents/Projects/Bitirme/Training MapPartitionsRDD[1] at textFile at <console>:29\ntest: org.apache.spark.rdd.RDD[String] = /Users/mrbank/Documents/Projects/Bitirme/Test MapPartitionsRDD[3] at textFile at <console>:29\nres0: Long = 2937770\nres1: Long = 1960661\n"},"dateCreated":"2017-01-02T02:38:32+0200","dateStarted":"2017-01-02T02:41:15+0200","dateFinished":"2017-01-02T02:42:10+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:279"},{"text":"%md # Preprocessing of Training","authenticationInfo":{},"dateUpdated":"2017-01-08T19:50:02+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483897779120_1358943278","id":"20170108-194939_910754208","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Preprocessing of Training</h1>\n"},"dateCreated":"2017-01-08T19:49:39+0200","dateStarted":"2017-01-08T19:50:02+0200","dateFinished":"2017-01-08T19:50:02+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:280"},{"text":"import org.apache.spark.mllib.linalg._\n\n\n\nval splitTraining = training.map (line => line.split(',') )\n\n\nval trainRDD = splitTraining.flatMap ( arr => {\n \n val buffer = arr.toBuffer\n buffer.remove(1, 3)\n\n val label = buffer.remove(buffer.length-1) \n\n if(label ==\"normal.\" ) {\n val vector = Vectors.dense(buffer.map(_.toDouble).toArray)\n Some((label,vector))\n } \n else\n None\n}\n)\n\ntrainRDD.first\ntrainRDD.count\n","authenticationInfo":{},"dateUpdated":"2017-01-02T02:43:10+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055583049_-1478573846","id":"20161230-015303_1213769897","result":{"code":"SUCCESS","type":"TEXT","msg":"import org.apache.spark.mllib.linalg._\nsplitTraining: org.apache.spark.rdd.RDD[Array[String]] = MapPartitionsRDD[4] at map at <console>:34\ntrainRDD: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[5] at flatMap at <console>:36\nres3: (String, org.apache.spark.mllib.linalg.Vector) = (normal.,[0.0,236.0,1228.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,1.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0])\nres4: Long = 583628\n"},"dateCreated":"2016-12-30T01:53:03+0200","dateStarted":"2017-01-02T02:43:10+0200","dateFinished":"2017-01-02T02:43:26+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:281"},{"text":"%md # Euclidean Distance Calculation","authenticationInfo":{},"dateUpdated":"2017-01-08T19:50:26+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483897811498_1480185154","id":"20170108-195011_837190927","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Euclidean Distance Calculation</h1>\n"},"dateCreated":"2017-01-08T19:50:11+0200","dateStarted":"2017-01-08T19:50:26+0200","dateFinished":"2017-01-08T19:50:26+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:282"},{"text":"import org.apache.spark.mllib.clustering._\n\ndef distance(a: Vector, b:Vector) = math.sqrt(a.toArray.zip(b.toArray).map(p => p._1 - p._2).map(d => d * d).sum )\ndef disToCentroid(datum: Vector, model: KMeansModel) = {\n val cluster = model.predict(datum)\n val centroid = model.clusterCenters(cluster)\n distance(centroid, datum)\n}","authenticationInfo":{},"dateUpdated":"2017-01-02T02:44:20+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055609507_-1589350881","id":"20161230-015329_287278497","result":{"code":"SUCCESS","type":"TEXT","msg":"import org.apache.spark.mllib.clustering._\ndistance: (a: org.apache.spark.mllib.linalg.Vector, b: org.apache.spark.mllib.linalg.Vector)Double\ndisToCentroid: (datum: org.apache.spark.mllib.linalg.Vector, model: org.apache.spark.mllib.clustering.KMeansModel)Double\n"},"dateCreated":"2016-12-30T01:53:29+0200","dateStarted":"2017-01-02T02:44:20+0200","dateFinished":"2017-01-02T02:44:22+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:283"},{"text":"%md # Normalization","authenticationInfo":{},"dateUpdated":"2017-01-08T19:50:41+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483897833725_-913925006","id":"20170108-195033_422258809","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Normalization</h1>\n"},"dateCreated":"2017-01-08T19:50:33+0200","dateStarted":"2017-01-08T19:50:41+0200","dateFinished":"2017-01-08T19:50:41+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:284"},{"text":"import org.apache.spark.rdd._\n\nval dataArray = trainRDD.values.map(_.toArray).cache()\ndataArray.first\nval numCols = dataArray.first().length\nval n = dataArray.count()\n\nval sums = dataArray.reduce((a,b) => a.zip(b).map(t => t._1 + t._2 ))\n\nval means = sums.map ( _ / n)\n\n val devs = dataArray.map( \n (_,means).zipped.map(\n (value,mean) => (value -mean) * (value -mean)\n )\n ).cache()\n \n\nval devs2 = devs.reduce((a,b) => a.zip(b).map(t => t._1 + t._2 ))\n\nval stdDevs = devs2.map ( t => Math.sqrt(t/n ))\n\ndef normalize(v: Vector) = {\n val normed = (v.toArray, means, stdDevs).zipped.map(\n (value, mean, stdev) => \n if ( stdev <= 0 ) (value -mean) else\n (value - mean)/stdev)\n Vectors.dense(normed) \n \n}","authenticationInfo":{},"dateUpdated":"2017-01-02T02:44:24+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055650879_52634831","id":"20161230-015410_1082994814","result":{"code":"SUCCESS","type":"TEXT","msg":"import org.apache.spark.rdd._\ndataArray: org.apache.spark.rdd.RDD[Array[Double]] = MapPartitionsRDD[7] at map at <console>:44\nres7: Array[Double] = Array(0.0, 236.0, 1228.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0)\nnumCols: Int = 38\nn: Long = 583628\nsums: Array[Double] = Array(1.27215952E8, 9.37000558E8, 1.889253106E9, 5.0, 0.0, 16.0, 28519.0, 58.0, 419985.0, 20316.0, 185.0, 112.0, 36069.0, 3299.0, 200.0, 2975.0, 0.0, 2.0, 2243.0, 4764611.0, 6370205.0, 860.3600000000006, 1003.6600000000013, 32681.23, 32836.28000000002, 575030.0300000025, 10818.070000000027, 77302.560000002, 8.6608631E7, 1.17934623E8, 493219.6500000083, 32884.36000000609, 78605.87000002165, 14213.589999998769, 1195.5199999999813, 604.1000000000188, 33774.990000000966, 32745.45000000044)\nmeans: Array[Double] = Array(217.97438094128452, 1605.47567628695, 3237.084420212875, 8.567100961571412E-6, 0.0, 2.741472307702852E-5, 0.04886503046461102, 9.937837115422837E-5, 0.7196107794691139, 0.03480984462705696, 3.1698273557814223E-4, 1.9190306153919962E-4, 0.061801352916583854, 0.005652573214444818, 3.426840384628565E-4, 0.00509742507213499, 0.0, 3.426840384628565E-6, 0.0038432014913609355, 8.163780695922746, 10.914837876181403, 0.001474158196659517, 0.001719691310218155, 0.055996679391667296, 0.05626234519248566, 0.9852680645890918, 0.018535899579869414, 0.1324517672215898, 148.39697718409673, 202.07156442117238, 0.8450925075561972, 0.05634472643534253, 0.13468488489246858, 0.024353852111274252, 0.002048428108315539, 0.0010350771381770901, 0.05787074986121462, 0.056106715236418...devs: org.apache.spark.rdd.RDD[Array[Double]] = MapPartitionsRDD[8] at map at <console>:52\ndevs2: Array[Double] = Array(1.0697721533451562E12, 1.1094741016326394E16, 6.06939406269752E14, 4.999957164496516, 0.0, 45.99956136444523, 495771.41819635703, 97.9942360545458, 117759.26678472527, 1.2823124803182634E7, 184.9413581937966, 197.97850685701388, 1.497796788699854E7, 41082.352160939845, 203.9314631922391, 3743.8351604127947, 0.0, 1.9999931463212133, 2234.3796990570895, 1.8530439569465542E8, 2.772836861869501E8, 429.4620932544536, 399.9038146197836, 30637.81594158224, 30310.732279802985, 5044.76736129528, 8070.531840833573, 44524.15611721849, 6.241954164548197E9, 4.412449541965405E9, 54376.23410550537, 18850.920331799367, 46370.348747197124, 1509.618031169437, 475.8304632282771, 132.12590990078573, 29612.728102152116, 28089.21286154543)\nstdDevs: Array[Double] = Array(1353.8719321697756, 137876.58715630847, 32248.135360262844, 0.002926948507636374, 0.0, 0.00887787008687559, 0.9216641246733684, 0.012957827857564361, 0.44918938716433726, 4.687366101163769, 0.01780118696950514, 0.018417936130116188, 5.0659207077705215, 0.26531364785913414, 0.018692787028203667, 0.08009221410691218, 0.0, 0.0018511695334023138, 0.061874318530885875, 17.818650153920707, 21.79686828221103, 0.027126537235993467, 0.02617638766050512, 0.2291188610366144, 0.22789256401218264, 0.09297207248615777, 0.11759341911348441, 0.27620388870722956, 103.4170696985614, 86.95044647827538, 0.30523652713654936, 0.17972074877490601, 0.2818721197069743, 0.050858726068271166, 0.028553415274071656, 0.015046169012473036, 0.2252532936046442, 0.21938237053871487)\nnormalize: (v: org.apache.spark.mllib.linalg.Vector)org.apache.spark.mllib.linalg.Vector\n"},"dateCreated":"2016-12-30T01:54:10+0200","dateStarted":"2017-01-02T02:44:24+0200","dateFinished":"2017-01-02T02:44:43+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:285"},{"text":"val normalizedLabelsAndData = trainRDD.map(ld => (ld._1, normalize(ld._2))).cache()","authenticationInfo":{},"dateUpdated":"2017-01-02T02:44:46+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055664691_176554625","id":"20161230-015424_490935794","result":{"code":"SUCCESS","type":"TEXT","msg":"normalizedLabelsAndData: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[9] at map at <console>:60\n"},"dateCreated":"2016-12-30T01:54:24+0200","dateStarted":"2017-01-02T02:44:46+0200","dateFinished":"2017-01-02T02:44:47+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:286"},{"text":"%md # To find best K number as possible as 10% data was used","authenticationInfo":{},"dateUpdated":"2017-01-08T19:52:18+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483897886064_2097966005","id":"20170108-195126_785404432","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>To find best K number as possible as 10% data was used</h1>\n"},"dateCreated":"2017-01-08T19:51:26+0200","dateStarted":"2017-01-08T19:52:18+0200","dateFinished":"2017-01-08T19:52:18+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:287"},{"text":"%md # Model Creation","authenticationInfo":{},"dateUpdated":"2017-01-08T19:52:32+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483897941084_-478804648","id":"20170108-195221_257479022","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Model Creation</h1>\n"},"dateCreated":"2017-01-08T19:52:21+0200","dateStarted":"2017-01-08T19:52:32+0200","dateFinished":"2017-01-08T19:52:32+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:288"},{"text":"val kmeans = new KMeans()\nkmeans.setRuns(10)\nkmeans.setEpsilon(1.0e-6)\nkmeans.setK(60)\nval model = kmeans.run(normalizedLabelsAndData.values)\n\n","authenticationInfo":{},"dateUpdated":"2017-01-02T02:55:23+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055726107_63715928","id":"20161230-015526_337061166","result":{"code":"SUCCESS","type":"TEXT","msg":"kmeans: org.apache.spark.mllib.clustering.KMeans = org.apache.spark.mllib.clustering.KMeans@26010455\nwarning: there were 1 deprecation warning(s); re-run with -deprecation for details\nres10: kmeans.type = org.apache.spark.mllib.clustering.KMeans@26010455\nres11: kmeans.type = org.apache.spark.mllib.clustering.KMeans@26010455\nres12: kmeans.type = org.apache.spark.mllib.clustering.KMeans@26010455\nmodel: org.apache.spark.mllib.clustering.KMeansModel = org.apache.spark.mllib.clustering.KMeansModel@349294f7\n"},"dateCreated":"2016-12-30T01:55:26+0200","dateStarted":"2017-01-02T02:54:45+0200","dateFinished":"2017-01-02T02:55:01+0200","status":"ABORT","progressUpdateIntervalMs":500,"$$hashKey":"object:289"},{"text":"%md # Testing Step","authenticationInfo":{},"dateUpdated":"2016-12-30T02:00:14+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483055748688_-1611834883","id":"20161230-015548_244024603","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Testing Step</h1>\n"},"dateCreated":"2016-12-30T01:55:48+0200","dateStarted":"2016-12-30T02:00:14+0200","dateFinished":"2016-12-30T02:00:14+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:290"},{"text":"\n\nval labelsAndDataTest = test.flatMap { line =>\n val buffer = line.split(',').toBuffer\n buffer.remove(1, 3)\n val label = buffer.remove(buffer.length-1)\n val vector = Vectors.dense(buffer.map(_.toDouble).toArray)\n Some((label,vector))\n }\n","authenticationInfo":{},"dateUpdated":"2017-01-02T02:49:44+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483056014667_461575295","id":"20161230-020014_2026765309","result":{"code":"SUCCESS","type":"TEXT","msg":"labelsAndDataTest: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[78] at flatMap at <console>:42\n"},"dateCreated":"2016-12-30T02:00:14+0200","dateStarted":"2017-01-02T02:49:44+0200","dateFinished":"2017-01-02T02:49:45+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:291"},{"text":"%md # Situation of Testing Data","authenticationInfo":{},"dateUpdated":"2017-01-08T19:53:01+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483897967333_-1435828404","id":"20170108-195247_88613577","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Situation of Testing Data</h1>\n"},"dateCreated":"2017-01-08T19:52:47+0200","dateStarted":"2017-01-08T19:53:01+0200","dateFinished":"2017-01-08T19:53:01+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:292"},{"text":"//val sqlContext = new SQLContext(sc)\nimport sqlContext.implicits._\n\nval tester = labelsAndDataTest.filter {\n case (label, data) => true\n}\ntester.toDF.registerTempTable(\"test\")\n\nval my_df = sqlContext.sql(\"SELECT CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END AS category,COUNT(*) AS count FROM test GROUP BY CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END \")\nmy_df.collect().foreach(println)","authenticationInfo":{},"dateUpdated":"2017-01-02T02:49:49+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483056025667_-1925590771","id":"20161230-020025_218674661","result":{"code":"SUCCESS","type":"TEXT","msg":"import sqlContext.implicits._\ntester: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[79] at filter at <console>:45\nmy_df: org.apache.spark.sql.DataFrame = [category: string, count: bigint]\n[NORMAL,389153]\n[ANOMALY,1571508]\n"},"dateCreated":"2016-12-30T02:00:25+0200","dateStarted":"2017-01-02T02:49:50+0200","dateFinished":"2017-01-02T02:50:19+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:293"},{"text":"%sql select count(`_1`) as count,`_1` from test group by `_1` order by count","authenticationInfo":{},"dateUpdated":"2017-01-08T17:43:57+0200","config":{"colWidth":8,"graph":{"mode":"pieChart","height":424,"optionOpen":true,"keys":[{"name":"count","index":0,"aggr":"sum","$$hashKey":"object:3266"},{"name":"_1","index":1,"aggr":"sum","$$hashKey":"object:3267"}],"values":[{"name":"_1","index":1,"aggr":"sum","$$hashKey":"object:3278"}],"groups":[],"scatter":{"xAxis":{"name":"count","index":0,"aggr":"sum"},"yAxis":{"name":"_1","index":1,"aggr":"sum"}}},"enabled":true,"editorMode":"ace/mode/sql"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483294644536_-233934741","id":"20170101-201724_1289828383","result":{"code":"SUCCESS","type":"TABLE","msg":"count\t_1\n1\tspy.\n1\tperl.\n2\tmultihop.\n3\tphf.\n3\tloadmodule.\n5\trootkit.\n6\tftp_write.\n6\tland.\n7\timap.\n8\twarezmaster.\n9\tbuffer_overflow.\n27\tguess_passwd.\n115\tpod.\n404\tteardrop.\n407\twarezclient.\n870\tback.\n919\tnmap.\n4090\tportsweep.\n5028\tipsweep.\n6397\tsatan.\n389153\tnormal.\n429328\tneptune.\n1123872\tsmurf.\n","comment":"","msgTable":[[{"key":"_1","value":"1"},{"key":"_1","value":"spy."}],[{"value":"1"},{"value":"perl."}],[{"value":"2"},{"value":"multihop."}],[{"value":"3"},{"value":"phf."}],[{"value":"3"},{"value":"loadmodule."}],[{"value":"5"},{"value":"rootkit."}],[{"value":"6"},{"value":"ftp_write."}],[{"value":"6"},{"value":"land."}],[{"value":"7"},{"value":"imap."}],[{"value":"8"},{"value":"warezmaster."}],[{"value":"9"},{"value":"buffer_overflow."}],[{"value":"27"},{"value":"guess_passwd."}],[{"value":"115"},{"value":"pod."}],[{"value":"404"},{"value":"teardrop."}],[{"value":"407"},{"value":"warezclient."}],[{"value":"870"},{"value":"back."}],[{"value":"919"},{"value":"nmap."}],[{"value":"4090"},{"value":"portsweep."}],[{"value":"5028"},{"value":"ipsweep."}],[{"value":"6397"},{"value":"satan."}],[{"value":"389153"},{"value":"normal."}],[{"value":"429328"},{"value":"neptune."}],[{"value":"1123872"},{"value":"smurf."}]],"columnNames":[{"name":"count","index":0,"aggr":"sum","$$hashKey":"object:3251"},{"name":"_1","index":1,"aggr":"sum","$$hashKey":"object:3252"}],"rows":[["1","spy."],["1","perl."],["2","multihop."],["3","phf."],["3","loadmodule."],["5","rootkit."],["6","ftp_write."],["6","land."],["7","imap."],["8","warezmaster."],["9","buffer_overflow."],["27","guess_passwd."],["115","pod."],["404","teardrop."],["407","warezclient."],["870","back."],["919","nmap."],["4090","portsweep."],["5028","ipsweep."],["6397","satan."],["389153","normal."],["429328","neptune."],["1123872","smurf."]]},"dateCreated":"2017-01-01T20:17:24+0200","dateStarted":"2017-01-02T03:04:52+0200","dateFinished":"2017-01-02T03:05:11+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:294"},{"text":"val normalizedLabelsAndDataTest = labelsAndDataTest.map(ld => (ld._1, normalize(ld._2))).cache()","authenticationInfo":{},"dateUpdated":"2017-01-02T02:51:42+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483056046586_66685006","id":"20161230-020046_635617560","result":{"code":"SUCCESS","type":"TEXT","msg":"normalizedLabelsAndDataTest: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[87] at map at <console>:67\n"},"dateCreated":"2016-12-30T02:00:46+0200","dateStarted":"2017-01-02T02:51:42+0200","dateFinished":"2017-01-02T02:51:43+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:295"},{"text":"%md # Anomalies","authenticationInfo":{},"dateUpdated":"2016-12-30T02:01:41+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483056090294_843321180","id":"20161230-020130_1443583280","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Anomalies</h1>\n"},"dateCreated":"2016-12-30T02:01:30+0200","dateStarted":"2016-12-30T02:01:41+0200","dateFinished":"2016-12-30T02:01:41+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:296"},{"text":"val distances = normalizedLabelsAndDataTest.map {\n case (label, data) => disToCentroid(data, model)\n }\nval threshold = distances.top(1550000).last\n\n\n\n\nval anomalies = normalizedLabelsAndDataTest.filter {\n case (label, data) =>\n disToCentroid(data, model) > threshold\n}\nanomalies.toDF.registerTempTable(\"anomalies\")","authenticationInfo":{},"dateUpdated":"2017-01-02T02:51:46+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483056101805_-250519943","id":"20161230-020141_895689513","result":{"code":"SUCCESS","type":"TEXT","msg":"distances: org.apache.spark.rdd.RDD[Double] = MapPartitionsRDD[88] at map at <console>:79\nthreshold: Double = 11.134285508812084\nanomalies: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[90] at filter at <console>:83\n"},"dateCreated":"2016-12-30T02:01:41+0200","dateStarted":"2017-01-02T02:51:46+0200","dateFinished":"2017-01-02T02:55:30+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:297"},{"text":"val test = sqlContext.sql(\"SELECT CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END AS category,COUNT(*) AS count FROM anomalies GROUP BY CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END \")\ntest.collect().foreach(println)","authenticationInfo":{},"dateUpdated":"2017-01-02T02:56:32+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483056116397_952235767","id":"20161230-020156_97801457","result":{"code":"SUCCESS","type":"TEXT","msg":"test: org.apache.spark.sql.DataFrame = [category: string, count: bigint]\n[NORMAL,856]\n[ANOMALY,1549133]\n"},"dateCreated":"2016-12-30T02:01:56+0200","dateStarted":"2017-01-02T02:56:32+0200","dateFinished":"2017-01-02T02:57:37+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:298"},{"text":"%sql select count(`_1`) as count,`_1` from anomalies group by `_1` order by count","authenticationInfo":{},"dateUpdated":"2017-02-02T11:14:05+0200","config":{"colWidth":8,"graph":{"mode":"table","height":300,"optionOpen":true,"keys":[{"name":"count","index":0,"aggr":"sum"},{"name":"_1","index":1,"aggr":"sum"}],"values":[{"name":"_1","index":1,"aggr":"sum"}],"groups":[],"scatter":{"xAxis":{"name":"count","index":0,"aggr":"sum"},"yAxis":{"name":"_1","index":1,"aggr":"sum"}}},"enabled":true,"editorMode":"ace/mode/sql"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483056487143_-600088127","id":"20161230-020807_2091635397","result":{"code":"SUCCESS","type":"TABLE","msg":"count\t_1\n1\tperl.\n1\tmultihop.\n1\tbuffer_overflow.\n2\tpod.\n2\trootkit.\n3\tphf.\n3\tftp_write.\n6\tland.\n7\timap.\n7\twarezmaster.\n27\tguess_passwd.\n34\twarezclient.\n39\tteardrop.\n413\tnmap.\n474\tportsweep.\n856\tnormal.\n1294\tipsweep.\n5511\tsatan.\n419698\tneptune.\n1121610\tsmurf.\n","comment":"","msgTable":[[{"key":"_1","value":"1"},{"key":"_1","value":"perl."}],[{"value":"1"},{"value":"multihop."}],[{"value":"1"},{"value":"buffer_overflow."}],[{"value":"2"},{"value":"pod."}],[{"value":"2"},{"value":"rootkit."}],[{"value":"3"},{"value":"phf."}],[{"value":"3"},{"value":"ftp_write."}],[{"value":"6"},{"value":"land."}],[{"value":"7"},{"value":"imap."}],[{"value":"7"},{"value":"warezmaster."}],[{"value":"27"},{"value":"guess_passwd."}],[{"value":"34"},{"value":"warezclient."}],[{"value":"39"},{"value":"teardrop."}],[{"value":"413"},{"value":"nmap."}],[{"value":"474"},{"value":"portsweep."}],[{"value":"856"},{"value":"normal."}],[{"value":"1294"},{"value":"ipsweep."}],[{"value":"5511"},{"value":"satan."}],[{"value":"419698"},{"value":"neptune."}],[{"value":"1121610"},{"value":"smurf."}]],"columnNames":[{"name":"count","index":0,"aggr":"sum"},{"name":"_1","index":1,"aggr":"sum"}],"rows":[["1","perl."],["1","multihop."],["1","buffer_overflow."],["2","pod."],["2","rootkit."],["3","phf."],["3","ftp_write."],["6","land."],["7","imap."],["7","warezmaster."],["27","guess_passwd."],["34","warezclient."],["39","teardrop."],["413","nmap."],["474","portsweep."],["856","normal."],["1294","ipsweep."],["5511","satan."],["419698","neptune."],["1121610","smurf."]]},"dateCreated":"2016-12-30T02:08:07+0200","dateStarted":"2017-01-02T03:15:46+0200","dateFinished":"2017-01-02T03:16:19+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:299"},{"text":"%md # Different thresholds ","authenticationInfo":{},"dateUpdated":"2017-01-02T01:55:18+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483295526772_-337544398","id":"20170101-203206_475643642","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Different thresholds</h1>\n"},"dateCreated":"2017-01-01T20:32:06+0200","dateStarted":"2017-01-02T01:55:19+0200","dateFinished":"2017-01-02T01:55:22+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:300"},{"text":"\nval threshold2 = distances.top(1650000).last\n\n\n","authenticationInfo":{},"dateUpdated":"2017-01-10T22:48:49+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483314918500_505217763","id":"20170102-015518_1772801129","result":{"code":"SUCCESS","type":"TEXT","msg":"threshold2: Double = 1.6356444903644292\n"},"dateCreated":"2017-01-02T01:55:18+0200","dateStarted":"2017-01-02T03:39:20+0200","dateFinished":"2017-01-02T03:46:15+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:301"},{"text":"val threshold3 = distances.top(1560000).last\n","authenticationInfo":{},"dateUpdated":"2017-01-02T03:46:56+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483314980512_-1747194243","id":"20170102-015620_1445543379","result":{"code":"SUCCESS","type":"TEXT","msg":"threshold3: Double = 10.027966256994015\n"},"dateCreated":"2017-01-02T01:56:20+0200","dateStarted":"2017-01-02T03:46:56+0200","dateFinished":"2017-01-02T03:53:24+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:302"},{"text":"val anomalies3 = normalizedLabelsAndDataTest.filter {\n case (label, data) =>\n disToCentroid(data, model) > threshold3\n}\nanomalies3.toDF.registerTempTable(\"anomalies3\")","authenticationInfo":{},"dateUpdated":"2017-01-02T04:01:00+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483321616368_-280446667","id":"20170102-034656_218015476","result":{"code":"SUCCESS","type":"TEXT","msg":"anomalies3: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[126] at filter at <console>:83\n"},"dateCreated":"2017-01-02T03:46:56+0200","dateStarted":"2017-01-02T04:01:01+0200","dateFinished":"2017-01-02T04:01:02+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:303"},{"text":"val test = sqlContext.sql(\"SELECT CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END AS category,COUNT(*) AS count FROM anomalies3 GROUP BY CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END \")\ntest.collect().foreach(println)","authenticationInfo":{},"dateUpdated":"2017-01-02T04:01:05+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483322062850_1548079845","id":"20170102-035422_188594783","result":{"code":"SUCCESS","type":"TEXT","msg":"test: org.apache.spark.sql.DataFrame = [category: string, count: bigint]\n[NORMAL,1030]\n[ANOMALY,1558969]\n"},"dateCreated":"2017-01-02T03:54:22+0200","dateStarted":"2017-01-02T04:01:05+0200","dateFinished":"2017-01-02T04:02:03+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:304"},{"text":"%md # Back still not detected","authenticationInfo":{},"dateUpdated":"2017-01-02T04:05:13+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483322696866_14557740","id":"20170102-040456_304905361","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>Back still not detected</h1>\n"},"dateCreated":"2017-01-02T04:04:56+0200","dateStarted":"2017-01-02T04:05:13+0200","dateFinished":"2017-01-02T04:05:17+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:305"},{"text":"%sql select count(`_1`) as count,`_1` from anomalies3 group by `_1` order by count","authenticationInfo":{},"dateUpdated":"2017-01-02T04:17:01+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483322577039_-194512510","id":"20170102-040257_841810305","result":{"code":"SUCCESS","type":"TABLE","msg":"count\t_1\n1\tperl.\n1\tloadmodule.\n1\tmultihop.\n2\tpod.\n2\trootkit.\n2\tbuffer_overflow.\n3\tphf.\n3\tftp_write.\n6\tland.\n7\twarezmaster.\n7\timap.\n27\tguess_passwd.\n37\twarezclient.\n39\tteardrop.\n413\tnmap.\n529\tportsweep.\n1030\tnormal.\n1294\tipsweep.\n5554\tsatan.\n428998\tneptune.\n1122043\tsmurf.\n","comment":"","msgTable":[[{"key":"_1","value":"1"},{"key":"_1","value":"perl."}],[{"value":"1"},{"value":"loadmodule."}],[{"value":"1"},{"value":"multihop."}],[{"value":"2"},{"value":"pod."}],[{"value":"2"},{"value":"rootkit."}],[{"value":"2"},{"value":"buffer_overflow."}],[{"value":"3"},{"value":"phf."}],[{"value":"3"},{"value":"ftp_write."}],[{"value":"6"},{"value":"land."}],[{"value":"7"},{"value":"warezmaster."}],[{"value":"7"},{"value":"imap."}],[{"value":"27"},{"value":"guess_passwd."}],[{"value":"37"},{"value":"warezclient."}],[{"value":"39"},{"value":"teardrop."}],[{"value":"413"},{"value":"nmap."}],[{"value":"529"},{"value":"portsweep."}],[{"value":"1030"},{"value":"normal."}],[{"value":"1294"},{"value":"ipsweep."}],[{"value":"5554"},{"value":"satan."}],[{"value":"428998"},{"value":"neptune."}],[{"value":"1122043"},{"value":"smurf."}]],"columnNames":[{"name":"count","index":0,"aggr":"sum"},{"name":"_1","index":1,"aggr":"sum"}],"rows":[["1","perl."],["1","loadmodule."],["1","multihop."],["2","pod."],["2","rootkit."],["2","buffer_overflow."],["3","phf."],["3","ftp_write."],["6","land."],["7","warezmaster."],["7","imap."],["27","guess_passwd."],["37","warezclient."],["39","teardrop."],["413","nmap."],["529","portsweep."],["1030","normal."],["1294","ipsweep."],["5554","satan."],["428998","neptune."],["1122043","smurf."]]},"dateCreated":"2017-01-02T04:02:57+0200","dateStarted":"2017-01-02T04:17:02+0200","dateFinished":"2017-01-02T04:17:28+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:306"},{"text":"%md # one more changing in threshold","authenticationInfo":{},"dateUpdated":"2017-01-08T18:48:04+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/markdown"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483322885118_1774123374","id":"20170102-040805_734117161","result":{"code":"SUCCESS","type":"HTML","msg":"<h1>one more changing in threshold</h1>\n"},"dateCreated":"2017-01-02T04:08:05+0200","dateStarted":"2017-01-08T18:48:05+0200","dateFinished":"2017-01-08T18:48:08+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:307"},{"text":"val threshold4 = distances.top(1565000).last","authenticationInfo":{},"dateUpdated":"2017-01-02T04:06:11+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483322122888_-1522308323","id":"20170102-035522_1559568424","result":{"code":"SUCCESS","type":"TEXT","msg":"threshold4: Double = 6.753445813949076\n"},"dateCreated":"2017-01-02T03:55:22+0200","dateStarted":"2017-01-02T04:06:11+0200","dateFinished":"2017-01-02T04:10:08+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:308"},{"text":"val threshold5 = distances.top(1562000).last","authenticationInfo":{},"dateUpdated":"2017-01-02T04:11:26+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483323044466_798779455","id":"20170102-041044_946173744","result":{"code":"SUCCESS","type":"TEXT","msg":"threshold5: Double = 8.288765966952257\n"},"dateCreated":"2017-01-02T04:10:44+0200","dateStarted":"2017-01-02T04:11:26+0200","dateFinished":"2017-01-02T04:16:22+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:309"},{"text":"val anomalies4 = normalizedLabelsAndDataTest.filter {\n case (label, data) =>\n disToCentroid(data, model) > threshold5\n}\nanomalies4.toDF.registerTempTable(\"anomalies4\")","authenticationInfo":{},"dateUpdated":"2017-01-02T04:20:05+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483322245455_884447644","id":"20170102-035725_531394876","result":{"code":"SUCCESS","type":"TEXT","msg":"anomalies4: org.apache.spark.rdd.RDD[(String, org.apache.spark.mllib.linalg.Vector)] = MapPartitionsRDD[160] at filter at <console>:83\n"},"dateCreated":"2017-01-02T03:57:25+0200","dateStarted":"2017-01-02T04:20:05+0200","dateFinished":"2017-01-02T04:20:07+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:310"},{"text":"val test = sqlContext.sql(\"SELECT CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END AS category,COUNT(*) AS count FROM anomalies4 GROUP BY CASE WHEN `_1` LIKE 'normal.' THEN 'NORMAL' ELSE 'ANOMALY' END \")\ntest.collect().foreach(println)","authenticationInfo":{},"dateUpdated":"2017-01-02T04:20:09+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483323522657_-124175148","id":"20170102-041842_1191452160","result":{"code":"SUCCESS","type":"TEXT","msg":"test: org.apache.spark.sql.DataFrame = [category: string, count: bigint]\n[NORMAL,1647]\n[ANOMALY,1560350]\n"},"dateCreated":"2017-01-02T04:18:42+0200","dateStarted":"2017-01-02T04:20:10+0200","dateFinished":"2017-01-02T04:20:58+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:311"},{"text":"%sql select count(`_1`) as count,`_1` from anomalies4 group by `_1` order by count","authenticationInfo":{},"dateUpdated":"2017-01-02T04:24:03+0200","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[{"name":"count","index":0,"aggr":"sum"}],"values":[{"name":"_1","index":1,"aggr":"sum"}],"groups":[],"scatter":{"xAxis":{"name":"count","index":0,"aggr":"sum"},"yAxis":{"name":"_1","index":1,"aggr":"sum"}}},"enabled":true,"editorMode":"ace/mode/sql"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483323539090_-2039439849","id":"20170102-041859_303766365","result":{"code":"SUCCESS","type":"TABLE","msg":"count\t_1\n1\tperl.\n1\tmultihop.\n1\tloadmodule.\n2\trootkit.\n2\tbuffer_overflow.\n3\tftp_write.\n3\tphf.\n4\tback.\n6\tland.\n7\twarezmaster.\n7\timap.\n8\tpod.\n27\tguess_passwd.\n39\tteardrop.\n42\twarezclient.\n413\tnmap.\n1296\tipsweep.\n1427\tportsweep.\n1647\tnormal.\n5636\tsatan.\n429305\tneptune.\n1122120\tsmurf.\n","comment":"","msgTable":[[{"key":"_1","value":"1"},{"key":"_1","value":"perl."}],[{"value":"1"},{"value":"multihop."}],[{"value":"1"},{"value":"loadmodule."}],[{"value":"2"},{"value":"rootkit."}],[{"value":"2"},{"value":"buffer_overflow."}],[{"value":"3"},{"value":"ftp_write."}],[{"value":"3"},{"value":"phf."}],[{"value":"4"},{"value":"back."}],[{"value":"6"},{"value":"land."}],[{"value":"7"},{"value":"warezmaster."}],[{"value":"7"},{"value":"imap."}],[{"value":"8"},{"value":"pod."}],[{"value":"27"},{"value":"guess_passwd."}],[{"value":"39"},{"value":"teardrop."}],[{"value":"42"},{"value":"warezclient."}],[{"value":"413"},{"value":"nmap."}],[{"value":"1296"},{"value":"ipsweep."}],[{"value":"1427"},{"value":"portsweep."}],[{"value":"1647"},{"value":"normal."}],[{"value":"5636"},{"value":"satan."}],[{"value":"429305"},{"value":"neptune."}],[{"value":"1122120"},{"value":"smurf."}]],"columnNames":[{"name":"count","index":0,"aggr":"sum"},{"name":"_1","index":1,"aggr":"sum"}],"rows":[["1","perl."],["1","multihop."],["1","loadmodule."],["2","rootkit."],["2","buffer_overflow."],["3","ftp_write."],["3","phf."],["4","back."],["6","land."],["7","warezmaster."],["7","imap."],["8","pod."],["27","guess_passwd."],["39","teardrop."],["42","warezclient."],["413","nmap."],["1296","ipsweep."],["1427","portsweep."],["1647","normal."],["5636","satan."],["429305","neptune."],["1122120","smurf."]]},"dateCreated":"2017-01-02T04:18:59+0200","dateStarted":"2017-01-02T04:21:32+0200","dateFinished":"2017-01-02T04:23:09+0200","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:312"},{"config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1483323692364_-1407619214","id":"20170102-042132_475678100","dateCreated":"2017-01-02T04:21:32+0200","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:313"}],"name":"KDDCupFull","id":"2C7QWGRM3","angularObjects":{"2BNE85FA8:shared_process":[],"2BKTND94M:shared_process":[],"2BPYGYPMY:shared_process":[],"2BME26P9X:shared_process":[],"2BKSR69R2:shared_process":[],"2BM98NRPX:shared_process":[],"2BPM8BVK6:shared_process":[],"2BMWNAMZ2:shared_process":[],"2BPRJX5VM:shared_process":[],"2BMDDYWS6:shared_process":[],"2BKFJ7HEB:shared_process":[],"2BP5Y1161:shared_process":[],"2BPQ81A2Y:shared_process":[],"2BNFHSPUF:shared_process":[],"2BK7Q5NDS:shared_process":[],"2BPF3D36C:shared_process":[],"2BKUJTY95:shared_process":[],"2BMMBV8W3:shared_process":[],"2BPXB6Z76:shared_process":[],"2BKHK2XZB:shared_process":[]},"config":{"looknfeel":"default"},"info":{}}