ZenHAX
http://zenhax.com/

BPE Compression Code converted to Python error
http://zenhax.com/viewtopic.php?f=11&t=4148
Page 1 of 1

Author:  eatrawmeat391 [ Sat Apr 29, 2017 2:59 pm ]
Post subject:  BPE Compression Code converted to Python error

I tried to convert aluigi's yuke_bpe.c code to python for decompression BPE files in memory.The code works fine for most of the files,however there is one file that it fails to decompress properly.This is not the bug from his source since quickbms unpacks them properly.
Can anyone tell me what is wrong with this converted code?

Code:
from binascii import hexlify
from io import BytesIO
import numpy

def read_int(content,offset,size):
    string   = content[offset:offset+size]
    string   = string[::-1]
    hex_data = hexlify(string)   
    return int(hex_data, 16)
   
def xgetc(input_string):
    try:
        # ord return an int of a ASCII character
        return ord(input_string.read(1))
    except:
        # If reaches EOF then ord will take a string size of 0 which raises Exception
        return -1   
   
def memset(bytes, char ,time):
    # chr converts an int to a ASCII character
    # 'ac' * 4 = 'acacacac'
    replace_string = chr(char) * time
    # bytes is a file like object
    offset = bytes.tell()
    bytes.seek(0)
    bytes.write(replace_string)
    bytes.seek(offset)
    return
   
def yuke_bpe(input_buffer,unbpe_size,fillout_size):
    in_buf = BytesIO(input_buffer)
    in_buf.seek(0)
    out_buf = BytesIO()
    stack = numpy.empty((512+4096), dtype=numpy.uint8) # stack can also be a list if you don't like numpy
    count = 0
    while True:
        i = 0
        while True:
            c = xgetc(in_buf)
            if c < 0:
                break
            if c > 127:
                c -= 127
                while c > 0 and i < 256:
                    stack[i * 2] = i
                    c -= 1
                    i += 1
            c += 1
            while c > 0 and i < 256:
                n = xgetc(in_buf)
                if n < 0:
                    break
                stack[i * 2] = n
                if i != n:
                    n = xgetc(in_buf)
                    if n < 0:
                        break
                    stack[(i * 2) + 1] = n
                c -= 1
                i += 1
            if not i < 256:
                break
               
        n = xgetc(in_buf)
        if n < 0:
            break
        size = n
        n = xgetc(in_buf)
        if n < 0:
            break
        size |= (n << 8)
       
        while (size | count) != 0:
            if count != 0:
                count -= 1
                n = stack[count + 512]
            else:
                n = xgetc(in_buf)
                if n < 0:
                    break
                size -= 1
            c = stack[n * 2]
            if n == c:
                if len(out_buf.getvalue()) >= unbpe_size:
                    return out_buf.getvalue()
                out_buf.write(chr(n))
            else:
                if (count + 512 + 2) > (512 + 4096): # (512 + 4096 is the sizeof(stack))
                    return out_buf.getvalue()
                stack[count + 512] = stack[(n * 2) + 1];
                stack[count + 512 + 1] = c
                count += 2
    if fillout_size != 0: # this is what is wanted by the format
        memset(out_buf,0,(unbpe_size-len(out_buf.getvalue())))
    return out_buf.getvalue() # getvalue returns all of the bytes in a BytesIO without changing the cursor
   
def extract_bpe(input_buffer):
    unbpe_size = read_int(input_buffer, 0x0C, 4)
    unbpe_data = yuke_bpe(input_buffer[0x10:len(input_buffer)], unbpe_size, 1)
    return unbpe_data


Original Code
Code:
/*
  by Luigi Auriemma

reversed from asmodean's unrrbpe.exe
*/

#include <string.h>

static int xgetc(unsigned char **in, unsigned char *inl) {
    int     ret;
    if(*in >= inl) return(-1);
    ret = **in;
    (*in)++;
    return(ret);
}

int yuke_bpe(unsigned char *in, int insz, unsigned char *out, int outsz, int fill_outsz) {
    unsigned char   stack[512 + 4096];
    int             c,
                    count,
                    i,
                    size,
                    n;

    unsigned char   *inl,
                    *o,
                    *outl;

    inl  = in + insz;
    o    = out;
    outl = out + outsz;

    count = 0;
    for(;;) {
        i = 0;
        do {
            if((c = xgetc(&in, inl)) < 0) break;
            if(c > 127) {
                c -= 127;
                while((c > 0) && (i < 256)) {
                    stack[i * 2] = i;
                    c--;
                    i++;
                }
            }
            c++;
            while((c > 0) && (i < 256)) {
                if((n = xgetc(&in, inl)) < 0) break;
                stack[i * 2] = n;
                if(i != n) {
                    if((n = xgetc(&in, inl)) < 0) break;
                    stack[(i * 2) + 1] = n;
                }
                c--;
                i++;
            }
        } while(i < 256);

        if((n = xgetc(&in, inl)) < 0) break;
        size = n;
        if((n = xgetc(&in, inl)) < 0) break;
        size |= (n << 8);

        while(size || count) {
            if(count) {
                count--;
                n = stack[count + 512];
            } else {
                if((n = xgetc(&in, inl)) < 0) break;
                size--;
            }
            c = stack[n * 2];
            if(n == c) {
                if(o >= outl) return(-1);
                *o++ = n;
            } else {
                if((count + 512 + 2) > sizeof(stack)) return(-1);
                stack[count + 512] = stack[(n * 2) + 1];
                stack[count + 512 + 1] = c;
                count += 2;
            }
        }
    }
    if(fill_outsz) {    // this is what is wanted by the format
        memset(o, 0, outl - o);
        o = outl;
    }
    return(o - out);
}


Attachments:
problematic_bpe.7z [302.84 KiB]
Downloaded 14 times

Author:  eatrawmeat391 [ Thu May 04, 2017 12:54 pm ]
Post subject:  Re: BPE Compression Code converted to Python error

I have found out where the problem is.I forgot that o is an increasing pointer.The code worked when I corrected the memset function.
Code:
def memset(bytes, char ,time):
    # chr converts an int to a ASCII character
    # 'ac' * 4 = 'acacacac'
    replace_string = chr(char) * time
    bytes.write(replace_string)
    return

Thanks for the people who have looked at my files

Page 1 of 1 All times are UTC
Powered by phpBB® Forum Software © phpBB Limited
https://www.phpbb.com/