Skip to content

Commit b139576

Browse files
More experiments with generating input data for syntax tree parsers
1 parent 2f0edae commit b139576

File tree

3 files changed

+240
-1
lines changed

3 files changed

+240
-1
lines changed

blobstamper/galley.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,19 @@ GalleySetBase::extract_internal(Blob &blob)
347347
return res;
348348
}
349349

350+
void
351+
GalleySetBase::LoadAll(Blob &blob)
352+
{
353+
std::vector<Blob> blobs = extract_internal(blob);
354+
for(int i=0; i<blobs.size(); i++)
355+
{
356+
Blob blob = blobs[i];
357+
StampBase & stamp = stamps[i];
358+
stamp.Load(blob);
359+
}
360+
}
361+
362+
350363
std::vector<std::string>
351364
GalleySetStr::ExtractStrSet(Blob &blob)
352365
{
@@ -356,7 +369,7 @@ GalleySetStr::ExtractStrSet(Blob &blob)
356369
{
357370
Blob blob = blobs[i];
358371
StampBaseStr & stamp = s_stamps[i];
359-
std::string str= stamp.ExtractStr(blob);
372+
std::string str = stamp.ExtractStr(blob);
360373
res.push_back(str);
361374
}
362375
return res;

blobstamper/galley.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class GalleySetBase : public GalleyBase
9999
public:
100100
GalleySetBase(std::vector<std::reference_wrapper<StampBase>> arg) : stamps(arg) {};
101101
std::vector<Blob> extract_internal(Blob &blob);
102+
void LoadAll(Blob &blob);
102103

103104
int minSize() override;
104105
int maxSize() override;

examples/exampleZZ.cpp

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
#include<stdio.h>
2+
#include<string.h>
3+
4+
#include<string>
5+
#include<iostream>
6+
#include<cstdlib>
7+
8+
#include<blobstamper/blobstamper.h>
9+
10+
namespace std
11+
{
12+
template<class T> using ref_vector = vector<reference_wrapper<T>>;
13+
}
14+
15+
template<class StampT> class StampLottery: public StampT
16+
{
17+
protected:
18+
std::ref_vector<StampT> stamps;
19+
int oracle_size;
20+
int init_oracle_size(std::ref_vector<StampT> stamps_arg);
21+
22+
int stored_min;
23+
int init_stored_min(std::ref_vector<StampT> stamps_arg);
24+
25+
public:
26+
StampLottery(std::ref_vector<StampT> stamps_arg): stamps(stamps_arg), oracle_size(init_oracle_size(stamps_arg)), stored_min(init_stored_min(stamps_arg)) {};
27+
StampLottery(): stored_min(-1) {};
28+
29+
virtual int minSize() override;
30+
virtual int maxSize() override;
31+
virtual std::string ExtractStr(Blob &blob) override;
32+
void Append(StampT & stamp);
33+
};
34+
35+
36+
template<class StampT> int
37+
StampLottery<StampT>::
38+
init_stored_min(std::ref_vector<StampT> stamps_arg)
39+
{
40+
int min = std::numeric_limits<int>::max();
41+
42+
for(StampT & stamp : stamps)
43+
{
44+
45+
if (min > stamp.minSize())
46+
min = stamp.minSize();
47+
}
48+
return min;
49+
}
50+
51+
template<class StampT> int
52+
StampLottery<StampT>::init_oracle_size(std::ref_vector<StampT> stamps_arg)
53+
{
54+
unsigned long size = stamps_arg.size();
55+
if (size < std::numeric_limits<unsigned char>::max())
56+
return 1;
57+
if (size < std::numeric_limits<unsigned short int>::max())
58+
return 2;
59+
if (size < std::numeric_limits<unsigned int>::max())
60+
return 4;
61+
return 8;
62+
}
63+
64+
65+
template<class StampT> int
66+
StampLottery<StampT>::minSize()
67+
{
68+
return stored_min + oracle_size;
69+
}
70+
71+
template<class StampT> int
72+
StampLottery<StampT>::maxSize()
73+
{
74+
return -1; // FIXME this is true only for recurion case. Should fix it somehow if Lottery is used in other cases
75+
}
76+
77+
78+
template<class StampT> std::string
79+
StampLottery<StampT>::ExtractStr(Blob &blob)
80+
{
81+
unsigned long oracle;
82+
unsigned long oracle_max;
83+
84+
switch (oracle_size)
85+
{
86+
case 1:
87+
{
88+
StampArithm<unsigned char> stamp;
89+
oracle = stamp.ExtractValue(blob);
90+
oracle_max = std::numeric_limits<unsigned char>::max();
91+
break;
92+
}
93+
case 2:
94+
{
95+
StampArithm<unsigned short> stamp;
96+
oracle = stamp.ExtractValue(blob);
97+
oracle_max = std::numeric_limits<unsigned short>::max();
98+
break;
99+
}
100+
case 4:
101+
{
102+
StampArithm<unsigned int> stamp;
103+
oracle = stamp.ExtractValue(blob);
104+
oracle_max = std::numeric_limits<unsigned int>::max();
105+
break;
106+
}
107+
case 8:
108+
{
109+
StampArithm<unsigned long> stamp;
110+
oracle = stamp.ExtractValue(blob);
111+
oracle_max = std::numeric_limits<unsigned long>::max();
112+
break;
113+
}
114+
default:
115+
abort(); // Should never get here
116+
}
117+
118+
/* Actually we use only stamps that short enogh to consume blob's available data*/
119+
std::ref_vector<StampT> actual_stamps;
120+
for(StampT & stamp : stamps)
121+
{
122+
if(blob.Size() < stamp.minSize()) // Skip all stamps that dose not fit
123+
continue;
124+
if ( stamp.isUnbounded() || // Unbounded is always ok
125+
stamp.maxSize() > blob.Size() || // Variated that can consume all data is ok
126+
stamp.minSize() * 2 > blob.Size() // Fixed or variated stamp that lefts less data then it's min size will also do
127+
)
128+
{
129+
actual_stamps.push_back(stamp);
130+
}
131+
}
132+
if (actual_stamps.empty())
133+
{
134+
// Add just everything that fits
135+
for(StampT & stamp : stamps)
136+
{
137+
if(blob.Size() < stamp.minSize()) // Skip all stamps that dose not fit
138+
continue;
139+
actual_stamps.push_back(stamp);
140+
}
141+
}
142+
143+
if (actual_stamps.empty())
144+
throw OutOfData(); // This should not happen
145+
146+
long long index = ((double) oracle) / oracle_max * actual_stamps.size();
147+
if ( index == actual_stamps.size()) index--; /* If we hit the boundary step inside a bit*/
148+
149+
StampT& stamp = actual_stamps[index];
150+
return stamp.ExtractStr(blob);
151+
}
152+
153+
154+
template<class StampT> void
155+
StampLottery<StampT>::Append(StampT & stamp)
156+
{
157+
if (stamp.minSize()<stored_min)
158+
{
159+
stored_min = stamp.minSize();
160+
}
161+
stamps.push_back(stamp);
162+
oracle_size = init_oracle_size(stamps);
163+
}
164+
165+
166+
class BinaryOp: public StampBaseStr, public GalleySetBase
167+
{
168+
protected:
169+
std::string op_name;
170+
StampBaseStr &stamp1;
171+
StampBaseStr &stamp2;
172+
public:
173+
virtual std::string ExtractStr(Blob &blob) override;
174+
BinaryOp(std::string arg_op_name, StampBaseStr& arg_stamp1, StampBaseStr& arg_stamp2) :
175+
GalleySetBase({arg_stamp1, arg_stamp2}),
176+
op_name(arg_op_name),
177+
stamp1(arg_stamp1),
178+
stamp2(arg_stamp2) {};
179+
};
180+
181+
std::string
182+
BinaryOp::ExtractStr(Blob &blob)
183+
{
184+
std::vector<Blob> blobs = extract_internal(blob);
185+
return (std::string)"(" + stamp1.ExtractStr(blobs[0]) + " "+ op_name + " " + stamp2.ExtractStr(blobs[1]) + ")";
186+
}
187+
188+
std::vector<std::string> ops = {"+","-","*","/","^"};
189+
190+
int main()
191+
{
192+
// char data[] = "abcdef" "abcdef" "ABCDEF" "012345" "sdfaskdlfjalsfjdlasjfaksdjfgkwuergkwhfdaksjdfgaskuyrgfaelkrgfsaldjfgakyefgrkweugyfaksjskdfsd";
193+
194+
char data[] =
195+
"\x051\x04E\x05A\x018\x043\x00C\x039\x0DC\x069\x0AC\x009\x014\x05A\x0B2\x07F\x078\x021\x09F\x08B\x0B1\x07E\x060\x01F\x04A\x0D1\x071\x05C\x04F\x011\x0D0\x061\x0FB\x037\x077\x081\x00C\x059\x00A\x037\x02F\x061\x04A\x065\x06D"
196+
"\x003\x04A\x0BC\x099\x0F8\x00B\x0F7\x020\x0C9\x074\x065\x008\x0B4\x010\x008\x0B4\x08B\x070\x0E1\x0EF\x026\x04F\x0F9\x0AB\x01C\x06C\x035\x018\x086\x037\x0E7\x02F\x044\x057\x001\x020\x006\x0DD\x0C4\x059\x0D1\x0C5\x0A9\x005"
197+
"\x038\x078\x0E2\x053\x01D\x0F0\x06E\x0E6\x018\x0B6\x048\x0F1\x0DC\x061\x092\x0FB\x0D3\x010\x0B8\x042\x0CA\x0C1\x0E3\x075\x077\x099\x093\x0CC\x063\x0F0\x09E\x044\x03D\x070\x01A\x089\x035\x032\x04A\x0BD\x082\x0BF\x0EA\x002"
198+
"\x043\x071\x079\x0A0\x068\x0B3\x0D9\x029\x0E9\x045\x0A2\x027\x003\x02E\x0E2\x01F\x007\x0BD\x0CF\x00A\x03E\x00D\x044\x024\x0FA\x0DB\x03D\x033\x036\x011\x081\x070\x0B6\x04A\x083\x061\x05F\x0AE\x0F0\x0C5\x0A1\x010\x05B\x003"
199+
"\x061\x0C3\x0D2\x078\x0BD\x0F8\x0E1\x04B\x02F\x0D9\x093\x09F\x00E\x0D6\x03A\x070\x0F8\x052\x013\x0EE\x062\x0C0\x027\x0E5\x07B\x07B\x09E\x05D\x074\x068\x0C6\x0CD\x04E\x022\x03B\x04E\x0E7\x0E7\x0EE\x0EC\x015\x02C\x0FA\x050"
200+
"\x033\x042\x0E6\x0BF\x028\x002\x052\x096\x033\x057\x0D8\x082\x053\x06E\x0BD\x0C6\x0ED\x015\x036\x09E\x03B\x0BE\x0F3\x068\x0BD\x0EC\x0D3\x0E9\x023\x029\x081\x0CF\x0F8\x02D\x081\x049\x007\x0CC\x005\x004\x062\x040\x0E0\x0D0"
201+
"\x0CD\x062\x0D4\x09B\x007\x001\x037\x020\x059\x0AC\x0FC\x0A4\x095\x049\x05F\x04C\x0DA\x02B\x0E8\x0E9\x0BF\x029\x01F\x0D0\x06B\x06E\x0F5\x005\x075\x07B\x036\x0D2\x054\x078\x0D3\x059\x077\x09A\x0D5\x079\x0AC\x034\x030\x0FD"
202+
"\x006\x079\x022\x0F4\x0ED\x059\x080\x081\x08F\x0A6\x08F\x042\x08A\x0CC\x030\x019\x094\x0F3\x062\x00B\x08A\x0D4\x0F8\x0F3\x03B\x049\x0D1\x06D\x0C6\x067\x006\x0D3\x023\x035\x053\x0C1\x0F8\x068\x0EF\x0AD\x0C7\x053\x004\x02C"
203+
"\x092\x087\x075\x0B0\x0F0\x0F7\x0D9\x04C\x0C7\x0A2\x095\x02B\x038\x02E\x0F2\x005\x0BE\x0CD\x02E\x093\x08A\x088\x063\x07D\x0F1\x08A\x002\x0D0\x0B9\x05C\x008\x066\x002\x044\x0B0\x08F\x041\x009\x06F\x0E5\x08B\x068\x0EB\x05A";
204+
205+
Blob blob(data, strlen(data));
206+
207+
StampArithm<unsigned char> stampс;
208+
209+
DictLCAlphaSmall dict;
210+
StampDict stamp_dict(dict);
211+
212+
StampLottery<StampBaseStr> stamp_lot({stampс, stamp_dict});
213+
214+
for(std::string op_name : ops)
215+
{
216+
BinaryOp *stamp_bi = new BinaryOp(op_name, stamp_lot, stamp_lot);
217+
stamp_lot.Append(*stamp_bi);
218+
}
219+
220+
for(int i=stamp_lot.minSize(); i<=strlen(data);i++)
221+
{
222+
Blob blob2(data, i);
223+
std::cout << i << " " << stamp_lot.ExtractStr(blob2) <<"\n";
224+
}
225+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy