; verbose (-v: yes -v-: no)
-v -
               ; keep intermediary files (-x: yes -x-: no)
-x -
               ; flex rules (input file, binary format)
;-b (not specified, not doing actions with already created flex rules.)
               ; Word list (Optional if -b is specified. Otherwise N/A) (-I filename)
;-I  (N/A)
               ; Output, lemmas of words in input (-I option)
;-O  (N/A)
               ; word/lemma list
-i dict_de_without_doubles.ph
               ; extra file name affix
-e ziggurat
               ; suffix only (-s: yes -s-: no)
-s -
               ; make rules with infixes less prevalent(-A: yes -A-: no)
-A -
               ; columns (1 or F or W=word,2 or B or L=lemma,3 or T=tags,0 or O=other)
-n FBO
               ; max recursion depth when attempting to create candidate rule
-Q 1
               ; flex rules (output, binary format, can be left unspecified)
;-o (Not specified, autogenerated)
               ; temp dir (including separator at end!)
-j tmp/
               ; penalties to decide which rule survives (4 or 6 floating point numbers: R=>R;W=>R;R=>W;W=>W[;R=>N/A;W=>NA], where R=#right cases, W=#wrong cases, N/A=#not applicable cases, previous success state=>success state after rule application)
-D 0.072141;-0.562967;0.815173;-0.043071;0.073552;0.078053;
               ; compute parms (-p: yes -p-: no)
-p 
               ; expected optimal pruning threshold (only effective in combination with -XW)
-C -1
               ; tree penalty (-XC: constant -XD: more support is better -XE: higher entropy is better -XW: Fewer pattern characters other than wildcards is better)
-X C
               ; current parameters (-P filename)
-P parms.txt
               ; best parameters (-B filename)
-B best_ziggurat.txt
               ; start training with minimal fraction of training pairs (-Ln: 0.0 < n <= 1.0)
-L 0.062830
               ; end training with maximal fraction of training pairs (-Hn: 0.0 < n <= 1.0)
-H 1.000000
               ; number of differently sized fractions of trainingdata (natural number)
-K 20
               ; number of iterations of training with same fraction of training data when fraction is minimal (positive number)
-N 100.000000
               ; number of iterations of training with same fraction of training data when fraction is maximal (positive number)
-M 10.000000
               ; competition function (deprecated)
;-f  (N/A)
               ; redo training after homographs for next round are removed (-R: yes -R-: no)
;-R - (N/A)
               ; max. pruning threshold to evaluate
-c 5
               ; test with the training data (-T: yes -T-: no)
-T 
               ; test with data not used for training (-t: yes -t-: no)
-t 
               ; create flexrules using full training set (-F: yes -F-: no)
-F 
               ; Number of clusters found in word/lemma list: 49938
               ; Number of lines found in word/lemma list:    318319

; Evaluation:
; -----------
; Lemmatization results for all data in the training set.
; For pruning threshold 0 there may be no errors (diff%%).

; prun. thrshld.              0              1              2              3              4              5 
; rules            33033.000000   17698.000000    7428.000000    5085.000000    4058.000000    3460.000000 
; rules%              10.383491       5.563135       2.334895       1.598403       1.275579       1.087606 
; same%               98.261403      94.622953      91.974036      90.729262      89.853519      89.189011 
; ambi1%               0.856882       0.784585       0.486594       0.420583       0.414610       0.393864 
; ambi2%               0.856882       0.724861       0.414296       0.330997       0.304907       0.295791 
; ambi3%               0.024833       0.009116       0.000000       0.000000       0.000000       0.000000 
; diff%                0.000000       3.858486       7.125075       8.519159       9.426964      10.121334 
; same%stdev           0.000000       0.000000       0.000000       0.000000       0.000000       0.000000 
; ambi1%stdev          0.000000       0.000000       0.000000       0.000000       0.000000       0.000000 
; ambi2%stdev          0.000000       0.000000       0.000000       0.000000       0.000000       0.000000 
; ambi3%stdev          0.000000       0.000000       0.000000       0.000000       0.000000       0.000000 
; diff%stdev           0.000000       0.000000       0.000000       0.000000       0.000000       0.000000 
; 
;Evaluation of prediction of ambiguity (whether a word has more than one possible lemma)
;---------------------------------------------------------------------------------------
; amb.rules%           1.738597       1.581743       1.024110       0.877629       0.858454       0.844623 
; false_amb%           0.000000       0.676139       0.726747       0.696885       0.698771       0.698457 
; false_not_amb%       0.000000       0.832993       1.441235       1.557854       1.578914       1.592431 
; true_amb%            1.738597       0.905605       0.297363       0.180744       0.159683       0.146167 
; true_not_amb%       98.261403      97.585264      97.534656      97.564518      97.562632      97.562946 
; precision            1.000000       0.401086       0.169838       0.114793       0.102543       0.094724 
; recall               1.000000       0.520882       0.171036       0.103960       0.091846       0.084072 

; Evaluation:
; -----------
; Lemmatization results for data that is not part of the training data.

; prun. thrshld.              0              1              2              3              4              5 
; rules            32719.428571   17545.428571    7359.285714    5035.428571    4018.000000    3424.857143 
; rules%              10.438922       5.597755       2.347932       1.606521       1.281917       1.092678 
; same%               90.371971      90.213686      89.720565      89.193961      88.658225      88.107269 
; ambi1%               0.648362       0.523560       0.441373       0.374406       0.368319       0.383538 
; ambi2%               0.420066       0.395714       0.313527       0.234385       0.258736       0.277000 
; ambi3%               0.000000       0.003044       0.003044       0.003044       0.003044       0.003044 
; diff%                8.559601       8.863996       9.521490      10.194204      10.711677      11.229149 
; same%stdev           1.164450       1.134153       1.091868       1.107046       1.119933       1.154140 
; ambi1%stdev          0.198522       0.189906       0.138120       0.149730       0.151857       0.163871 
; ambi2%stdev          0.131795       0.122593       0.079563       0.089025       0.101452       0.076885 
; ambi3%stdev          0.000000       0.007507       0.007763       0.007763       0.007763       0.007763 
; diff%stdev           1.131287       1.066137       1.030327       1.010569       1.021147       1.106600 
; 
;Evaluation of prediction of ambiguity (whether a word has more than one possible lemma)
;---------------------------------------------------------------------------------------
; amb.rules%           1.257153       1.111043       0.910142       0.760989       0.782296       0.824912 
; false_amb%           0.167418       0.143066       0.121758       0.121758       0.133934       0.143066 
; false_not_amb%       0.258736       0.258736       0.252648       0.264824       0.264824       0.264824 
; true_amb%            0.030440       0.030440       0.036527       0.024352       0.024352       0.024352 
; true_not_amb%       14.364422      14.388774      14.410082      14.410082      14.397906      14.388774 
; precision            0.083333       0.096154       0.130435       0.090909       0.083333       0.078431 
; recall               0.105263       0.105263       0.126316       0.084211       0.084211       0.084211 
; 
; Power law relating the number of rules in the decision tree to the number of examples in the training data
;----------------------------------------------------------------------------------------------------------
; #rules =        2.014*N^0.768  1.286*N^0.754  0.198*N^0.839  0.086*N^0.876  0.051*N^0.898  0.035*N^0.915 

; Postscriptum

; The number of rules can be estimated from the number of training examples by
; a power law. See the last line in the table above, which is based on 7
; different samples from the total available training data mass varying in size
; from 1.54 % to 98.56 %