libUtil/Util.cpp at master · mesaleh/libUtil · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
/*
 * Util.cpp
 *
 *  Created on: January 4, 2015
 *  Author: Moustafa
 *  Version: 1.0
 *
 * This file has some utility functions
 */

#include <regex>
#include <string>
#include <sstream>
#include <fstream>
#include <iostream>
#include <string.h>
#include <stdio.h>
#ifdef __linux__
#include <sys/stat.h>
#include <sys/resource.h>
#endif
#include "Util.h"

using namespace std;


// private functions
#ifndef __linux__
DWORD _getFileAttributes(char* path)
{
	wchar_t* wstr = new wchar_t[strlen(path)+1];
    int len = std::mbstowcs(wstr, path, MAX_PATH);
	DWORD res = INVALID_FILE_ATTRIBUTES;
	if(len > 0)	{
		wstr[len] = NULL;
		res = GetFileAttributesW((LPCWSTR)wstr);
	}
	delete[] wstr;
	return res;
}
#endif

// public functions

string int2HexStr(int n)
{
	std::stringstream ss;
	string s;
	ss << std::hex << n;
	ss >> s;
	return s;
}


string getFileName(string path)
{
	int i = path.length()-1;	// point to last char
	string FileName;

	while(path[i] != PATHSEPARATOR && i > 0)
	{
		i--;
	};
	if(i > 0)	FileName = path.substr(i+1);
	else		FileName = path;
	return FileName;
}

// untested, check the other getLineFromMem() which is tested.
DWORD getLineFromMem(LPVOID ReadAddr, LPVOID Bound, char* &Line)
{
	DWORD LimitSize = (DWORD)Bound - (DWORD)ReadAddr;
	BYTE x = *(BYTE *)ReadAddr;
	DWORD i = 0, StrLen = 0;

	while( (x == '\r' || x == '\n' || x == '\0' || x == EOF || x < 0x20 || x > 0x7E) && (i < LimitSize))
		x = *((BYTE *)ReadAddr + ++i);

	for(; i < LimitSize; i++)
	{
		if(x == '\r' || x == '\n' || x == '\0' || x == EOF)
			break;

		StrLen++;
		x = *((BYTE *)ReadAddr + i + 1);
	}

	// if end of file
	if(x == EOF || x == 0)
		ReadAddr = Bound;
	Line =  new char[StrLen+1];
	memcpy(Line, (LPVOID)((DWORD)ReadAddr + i - StrLen), StrLen);
	Line[StrLen] = '\0';
	return i;
}

string getLineFromMem(LPVOID &ReadAddr, LPVOID Bound)
{
	int LimitSize = (DWORD)Bound - (DWORD)ReadAddr;

	BYTE x = *(BYTE *)ReadAddr;
	int i = 1, StrLen = 0;
	char* Line;

	// skip newlines and non printable characters
	while( (x == '\r' || x == '\n' || x == '\0' || x == EOF || x < 0x20 || x > 0x7E) && (i < LimitSize))
		x = *((BYTE *)ReadAddr + i++);

	for(; i < LimitSize; i++)
	{
		// I allow character out of the range 0x20-0x7E here for unicode tool names
		if(x == '\r' || x == '\n' || x == '\0' || x == EOF)
			break;

		StrLen++;
		x = *((BYTE *)ReadAddr + i);
	}
	// if end of file
	if(x == EOF || x == 0)
		ReadAddr = Bound;
	ReadAddr = (LPVOID) ((DWORD) ReadAddr + i);				// update memory pointer
	if(StrLen) {
		Line =  new char[StrLen+1];
		memcpy(Line, (LPVOID)((DWORD)ReadAddr - 1 - StrLen), StrLen);
		Line[StrLen] = '\0';
		string s = string(Line);
		delete[] Line;
		return s;
	}

	return "";
}

/*
* The functions escape the following:
- it adds a single backslash to an odd number of backslashes.
- removes non-printable characters. (should we remove or replace it by another one for unescape())
- replace " with \"
*/
string escapeForJson(string s)
{
	// The order of replacement is important!
	std::regex e2("(^|[^\\\\])(\\\\(\\\\\\\\)*)(?!\\\\)");		// if a sequence of backslashes was of odd length, add a backslashes to make it even
	std::regex e3("[^[:print:]]");								// remove all non-printable character, equivalent to str.encoding('ascii', 'ignore') in python
	std::regex e4("\\\"");										// replace a double quote with a backslash double quote
	s = std::regex_replace(s, e2, "$1$2\\");
	s = std::regex_replace(s, e3, "");
	s = std::regex_replace(s, e4, "\\\"");
	return s;
}

/* remove spaces in between characters */
string removeSpaces(string s)
{
	string s2;
	for(int i = 0; i < s.length(); i++)
		if(!isspace(s[i]))	s2.append(1, s[i]);
	return s2;
}

#ifdef __linux__

bool isFile(char* path) {
    struct stat buf;
    stat(path, &buf);
    return S_ISREG(buf.st_mode);
}

bool isDir(char* path) {
    struct stat buf;
    stat(path, &buf);
    return S_ISDIR(buf.st_mode);
}

bool isFileExists(char* file) {
  struct stat buffer;
  return (stat (file, &buffer) == 0);
}

int increaseStackSize(unsigned int Size)
{
	rlim_t kStackSize = Size;
    struct rlimit rl;
    int result;
    result = getrlimit(RLIMIT_STACK, &rl);
    if (result == 0)
    {
        if (rl.rlim_cur < kStackSize)
        {
            rl.rlim_cur = kStackSize;
            result = setrlimit(RLIMIT_STACK, &rl);
            if (result != 0)
            {
                fprintf(stderr, "setrlimit returned result = %d\n", result);
            }
        }
    }
	return result;
}
#else

bool isFile(char* path) {
	DWORD res = _getFileAttributes(path);
	return !(res & FILE_ATTRIBUTE_DIRECTORY);
}

bool isDir(char* path) {
	DWORD res = _getFileAttributes(path);
	return ( (res != INVALID_FILE_ATTRIBUTES) && (res & FILE_ATTRIBUTE_DIRECTORY) );
}

// Checks if file exists. This is a faster implementation than fstream way of C++
bool isFileExists(char* path)
{
	DWORD res = _getFileAttributes(path);
	return (res != INVALID_FILE_ATTRIBUTES && !(res & FILE_ATTRIBUTE_DIRECTORY));
}
#endif

bool isValidPath(string x)
{

	// file name cannot contain any of < > : " / \ | ? *, to match them use std::regex r1("[<>:\"/\\\\\\|\\*\\?]");
	// However, the path can have :, ?, \, / as in c:\, or \\?\.
	std::regex r1("[<>\"\\|\\*]");				// does the string contain any of < > " | *
	std::regex r2("[^[:print:]]");				// does it contain non-printable chars
	std::regex r3("(.*[ ]+)");					// ends with a space(s)

	return !(std::regex_search(x, r1) || std::regex_search(x, r2) || std::regex_match(x, r3));

	// more checks to be added
}


int LevenshteinDistance(string s, string t)
{
	// degenerate cases
	if (s == t) return 0;
	if (s.length() == 0) return t.length();
	if (t.length() == 0) return s.length();

	// create two work vectors of integer distances
	int* v0 = new int[t.length() + 1];
	int* v1 = new int[t.length() + 1];

	// initialize v0 (the previous row of distances)
	// this row is A[0][i]: edit distance for an empty s
	// the distance is just the number of characters to delete from t
	for (int i = 0; i < t.length() + 1; i++)
		v0[i] = i;

	for (int i = 0; i < s.length(); i++)
	{
		// calculate v1 (current row distances) from the previous row v0

		// first element of v1 is A[i+1][0]
		//   edit distance is delete (i+1) chars from s to match empty t
		v1[0] = i + 1;

		// use formula to fill in the rest of the row
		for (int j = 0; j < t.length(); j++)
		{
			int cost = (s[i] == t[j]) ? 0 : 1;
			v1[j + 1] = min(min(v1[j] + 1, v0[j + 1] + 1), v0[j] + cost);
		}

		// copy v1 (current row) to v0 (previous row) for next iteration
		for (int j = 0; j < t.length() + 1; j++)
			v0[j] = v1[j];
	}

	return v1[t.length()];
}