So, here is the exhaustive exploration algorithm.

Note that this is:

- a mix of c and c++ idioms
- untested, just written in an editor and a sheet of paper

and thus is bound to be buggy if not entirely algorithmically faulty.

Code: Select all

```
struct S_Element {
int id; // uniquely identifies this element
int freq;
int size;
};
struct S_Examined {
int id; // comes from S_Element.id, is used to identify S_Element-s.
int gain;
};
struct S_Chosen {
int totalgain;
std::list<S_Examined> elements;
};
// Ugly globals.
int DictionarySizes[] = { 4, 6, 9, 14, 21, 32, 48, 72 }; // replace with real values.
int DictCount = 8; // replace with real values.
int UsedDictEntries = 0;
int main(argc, argv)
{
int maxWidth = atoi( argv[1] ); // run as "carrot.exe <width>"
std::vector<S_Element> candidates;
candidates.push_back( { /* A */ 0, 800, 50, /*bytes...*/ } ); // invalid syntax but you get the idea
candidates.push_back( { /* B */ 1, 390, 180, /*bytes...*/ } );
candidates.push_back( { /* C */ 2, 280, 160, /*bytes...*/ } );
candidates.push_back( { /* D */ 3, 430, 10, /*bytes...*/ } );
// and so on...
if (maxWidth > candidates.size())
{
maxWidth = candidates.size(); // Do not explore more choices than exist.
}
if (maxWidth > DictCount)
{
maxWidth = DictCount; // No need to return more results than dictionary entries.
}
S_Chosen chosen = ChooseElements( DictionarySizes, &candidates[0], candidates.size(), maxWidth );
double examinedCount = Factorial(maxWidth);
examinedCount *= examinedCount; // Because we are re-examining all entries at each step.
double totalCount = Factorial(candidates.size());
totalCount *= totalCount;
printf("Examined %.2f combinations out of a total of %.2f.\n", examinedCount);
printf("Size reduction obtained: %d\n", chosen.totalgain);
printf("Fill your dictionary with (in order):\n");
for (auto it = chosen.elements.begin() ; *it ; ++it )
{
printf("id %d\n", (*it).id);
}
}
long Factorial(int n)
{
// Note: this might overflow, do not use too large values of width.
double top = 1.f;
for (int i = 2 ; i <= n ; i++ )
{
top *= (double)i;
}
return top;
}
bool CompareElements(S_Examined const& a, S_Examined const& b)
{
return a.gain < b.gain; // Sort by increasing gain.
}
S_Chosen ChooseElements( const int* const dict, const S_Element* const candidates, int count, int width )
{
S_Chosen chosen; // Will be returned with RVO so declared first is better.
chosen.totalgain = ~0; // largest negative number, can also use std::limits<int>::Min() or something like that.
if (count == 0 || width == 0)
{
chosen.totalgain = 0; // No candidates or dictionary entries, return an empty list.
return chosen;
}
std::vector<S_Examined> examined;
for (int c = 0 ; c < count ; c++)
{
// Compute the gain obtained by encoding this candidate in the first spot of the dictionary.
S_candidate& cand = *candidates[i];
int gain = ComputeGain(dict, cand);
examined.push_back( { c, gain } ); // Add to vector for sorting.
}
// We sort by gain, this allows us to make the greedy choice of "best immediate candidate"
// when we are asked to examine the minimum number of choices (width = 1).
std::sort(examined, CompareElements); // Sort by gain, highest last.
// Below is the loop where we examine "width" possible branches starting from the
// current best gain candidate. If width is 1, then we only examine the first best
// candidate, if width is 2, then we examine the two best candidates,
// if width == count, then we examine all possible candidates.
int index = count - 1;
S_Examined current = examined.last();
examined.pop_back(); // Remove from the array (since we are choosing it for the first iteration).
do
{
// Obtain the rest of the entries to put in the dictionary (recursive call).
std::vector<S_Element>&
S_Chosen rest = ComputeGains( dict + 1, &examined[0], count - 1, width - 1 );
// If this returned list has better or equal total gain than the current one, we choose it.
if (current.gain + rest.totalgain >= bestTotalGain)
{
chosen.totalgain = current.gain + rest.totalgain;
chosen.elements.swap(rest);
chosen.elements.push_front(current);
}
// Get the "next current" and place the "current current" back into the vector in its place.
S_Examined next = examined[--index];
examined[index] = current;
current = next;
} while (width-- >= 0); // We do at least one run of that loop.
return chosen;
}
int ComputeGain( int* dictionary, S_Element& element)
{
// Note: this could be negative
int gain = element.freq * (element.size - dictionary[0]);
return gain;
}
```