-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsichel.m
More file actions
executable file
·71 lines (65 loc) · 2.85 KB
/
sichel.m
File metadata and controls
executable file
·71 lines (65 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env wolframscript
ReadStats[filename_, max_: 100] := Module[{x, y, ii},
{x, y} =
Transpose[
Map[ToExpression,
StringSplit /@ ReadList[filename, String],
2]];
ii = Flatten[Position[x, _Integer?(# <= max &)]];
x = x[[ii]]; y = y[[ii]];
y /= N[Total[y]];
{x, y}
]
PrintF[x_]:=ToString[NumberForm[x, NumberFormat -> (If[#3 == "", #1, SequenceForm[#1, "e", #3]] &)]]
MyPrint[x_] := If[Head[x] === String, x, PrintF[x]]
LoadModel[filename_]:=Check[ToExpression[ReadString[filename]], {}]
PrintLine[x_, stream_: Streams["stdout"][[1]]]:= WriteLine[stream,
If[Head[x] === List, StringRiffle[MyPrint/@x, "\t"], MyPrint[x]]
]
HesseStep[hessian_, grad_, epsilon_: 0.1] := MatrixFunction[If[#<epsilon, 1/epsilon, 1/#]&, hessian].grad
Sichel[a_, b_, r_, g_: -1/2] := ((1 - b)^(g/2)*(a*b/2)^r * BesselK[g + r, a])/(BesselK[g, a*Sqrt[1 - b]]*r!)
Optimize[dataset_, max_: 100, tol_: 10^-3, eta_: 1.0, maxiter_: 1000, usehessian_: False, gamma_: -1/2] :=
Module[{learned=dataset <> ".g" <> ToString[N[gamma]] <> ".learned",
x0, x, y, i, f, df, ddf, obj, grad, hessian, constant},
x0 = LoadModel[learned];
If[Head[x0] =!= List || Length[x0] != 2, x0={10.0, 0.95}];
{x, y} = ReadStats[dataset, max];
(*x = x-1;*)
constant = y.Log[y] + y.Log[x!];
f[{a_, b_}] := constant - Sum[
y[[i]] Log[((1 - b)^(gamma/2)*(a*b/2)^x[[i]]*BesselK[gamma + x[[i]], a])/(BesselK[gamma, a*Sqrt[1 - b]])]
, {i, Length[x]}];
df = {Derivative[{1, 0}][f], Derivative[{0, 1}][f]};
ddf = {{Derivative[{1, 0}][df[[1]]], Derivative[{0, 1}][df[[1]]]},
{Derivative[{1, 0}][df[[2]]], Derivative[{0, 1}][df[[2]]]}};
PrintLine["\titer\tobjective\terror", Streams["stderr"][[1]]];
For[i = 1, i <= maxiter , i++,
obj = f[x0];
grad = {df[[1]][x0], df[[2]][x0]};
Switch[usehessian,
1, hessian = {{ddf[[1, 1]][x0], ddf[[1, 2]][x0]},
{ddf[[2, 1]][x0], ddf[[2, 2]][x0]}};
x0 -= eta*LinearSolve[hessian, grad],
2, hessian = {{ddf[[1, 1]][x0], ddf[[1, 2]][x0]},
{ddf[[2, 1]][x0], ddf[[2, 2]][x0]}};
x0 -= HesseStep[hessian, grad, 1/eta],
_, x0 -= eta*grad
];
PrintLine[{"", i, obj, Max[Abs[grad]]}, Streams["stderr"][[1]]];
If[Not[Head[obj] === Real && 0<x0[[1]]<100 && 0<x0[[2]]<1], Return[1]];
If[Max[Abs[grad]] < tol, Break[]];
];
hessian = {{ddf[[1, 1]][x0], ddf[[1, 2]][x0]},
{ddf[[2, 1]][x0], ddf[[2, 2]][x0]}};
obj = f[N[x0, 19]];
PrintLine[{obj, " ", 0, " ", Log[100.0], " ", 0, " ", Log[Det[hessian]], " ", 0, " ", 2}];
f = OpenWrite[learned];
WriteString[f, ToString[x0]];
Close[f];
Return[0]
]
If[Length[$ScriptCommandLine] < 2,
Print["Usage: datasetname x_max tol eta maxiter usehessian gamma"];
Exit[1]
]
Exit[Optimize@@Prepend[ToExpression[$ScriptCommandLine[[3;;]]], $ScriptCommandLine[[2]]]]