diff --git a/bmarket/Screencast from 26-11-21 18:52:49.webm b/bmarket/Screencast from 26-11-21 18:52:49.webm new file mode 100644 index 0000000..01fe03a Binary files /dev/null and b/bmarket/Screencast from 26-11-21 18:52:49.webm differ diff --git a/bmarket/Virtusa_BM.pdf b/bmarket/Virtusa_BM.pdf new file mode 100644 index 0000000..c7f7b29 Binary files /dev/null and b/bmarket/Virtusa_BM.pdf differ diff --git a/bmarket/dataSet.csv b/bmarket/dataSet.csv new file mode 100644 index 0000000..5d3d225 --- /dev/null +++ b/bmarket/dataSet.csv @@ -0,0 +1,15 @@ +Subject,Category +Could you please ,SR +let me know,SR +Missing,Incident +Need to ,Incident +not accessible,Incident +Send me ,SR +Provide,SR +Not working,Incident +dispointed,Incident +please,SR +Slowness,Incident +Request to fix,SR +Solution,SR +Still,Incident diff --git a/bmarket/incore-develop.zip b/bmarket/incore-develop.zip new file mode 100644 index 0000000..bb07e2a Binary files /dev/null and b/bmarket/incore-develop.zip differ diff --git a/bmarket/test.py b/bmarket/test.py new file mode 100644 index 0000000..1d1248a --- /dev/null +++ b/bmarket/test.py @@ -0,0 +1,39 @@ +import numpy as np +import pandas as pd + +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer +from sklearn.linear_model import SGDClassifier +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline + +emails = pd.read_csv('dataSet.csv') +em = emails.dropna(axis=0) + +categories = ['SR', 'Incident'] + +pipeline = Pipeline([ + ('vect', CountVectorizer()), + ('tfidf', TfidfTransformer()), + ('clf', SGDClassifier()), +]); + +parameters = { + 'vect__max_df': (0.5, 1.0), + 'vect__max_features': (None, 1000, 5000), + 'vect__ngram_range': ((1, 1), (1, 2)), + 'tfidf__use_idf': (True, False), + 'tfidf__norm': ('l1', 'l2'), + 'clf__alpha': (0.1, 0.01, 0.001), + 'clf__penalty': ('l2', 'elasticnet'), +} + +grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, refit=True) +grid_search.fit(np.array(em['Subject']), np.array(em['Category'])) +best_parameters = grid_search.best_estimator_.get_params() + +input_test = input("Please enter the new content here: ") + +if input_test : + test_set = [input_test] + print("Prediction:", *grid_search.best_estimator_.predict(np.array(test_set))) +