Update:
睡前簡單弄個小更新
把我回文中說的code cache的hashtable快速弄出來了
第一次呼叫時會emite code,第二次因為hashtable查找成功則直接呼叫
另外就是把mmap要記憶體時的可讀可寫可執行,改成僅可讀可寫
後面再透過mprotect改成可讀可執行
這樣可以防止後來有人竄改JIT emit的code cache
patch放在這邊:
https://paste.plurk.com/show/2636452/
P.S. 我不用github放的原因是因為我不想暴露我的主ID
還請擔待
= = = = = = = = = = = =
中午吃飽飯有點脹氣
看到某篇回文的推文中有人想要看JIT範例
所以簡單寫了個很粗糙的版本
前後花不到15分鐘
所以覺得coding style很爛、沒效率是很正常的,別打我QQ
(結果真的被轉出去了Orz
我晚點忙完會來修code
真的assembly code gen會找時間補上的)
大抵上的思路就是你吃到虛擬機的bytecode,
就把它轉成host平台的native code
接著把它塞進你跟系統動態要的、可以執行的記憶體區段
然後就很快樂的開始執行它
黃色上色的地方,每個作業系統給的API不一樣
Linux、OSX、FreeBSD ... (most of *NIX OSes):
mprotect()、mmap() with proper permissions.
Windows:
VirtualAlloc()
= = = = = = = = = = =
/**
*
* This piece of code is to demo simple VM/JIT knowledge
* and is released under 2-clause BSD license.
*
* Copyright 2018/05/16 snaketsai
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* U2FsdGVkX19w8Ikk1T7xBlbh4vDhIEvZzshUhXft6XMFugC9M27uV9LDszf7/8gP
* OtF2AZwYaUQqzLLY5vXhCQ==
*
**/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#ifdef _NOJIT
static inline int IAdd(int inp1, int inp2) {
puts("Non-JIT version iadd.");
return inp1+inp2;
}
static inline int ISub(int inp1, int inp2) {
puts("Non-JIT version isub.");
return inp1-inp2;
}
#else
// Nope, I'm not gonna write an assembler.
// Just pretend that we _magically_ get assembly pieces we need.
const unsigned char _add[] = \
"\x55\x48\x89\xe5\x89\x7d\xfc\x89\x75\xf8\x8b\x55\xfc\x8b\x45\xf8\x01\xd0\x5d\xc3\x00";
const unsigned char _sub[] = \
"\x55\x48\x89\xe5\x89\x7d\xfc\x89\x75\xf8\x8b\x45\xfc\x2b\x45\xf8\x5d\xc3\x00";
#define MAXCODECAHE_SIZE 4096
void* AllocExeMem(size_t size) {
void* memPtr = mmap(0, size,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (memPtr == (void*)-1) {
perror("mmap() failed.");
return NULL;
}
return memPtr;
}
void *CodeCachePool;
int (*IAdd)(int, int);
int (*ISub)(int, int);
#endif
/**
Very Simple VM spec - -
Every insn. is 3 byte long.
opcodes are listed below:
0x00 HCF, halt and catch fire.
0x01 IADD, humbly add two signed integers and print the result.
0x02 ISUB, humbly substract two signed integers and print the result.
The bytecode stream should end with a 0xff byte mark.
**/
const unsigned char BytecodeStream[] = \
"\x01\x02\x03\x02\x05\x04\x00\x00\x00\xff";
int runVM(const unsigned char* bstream) {
unsigned char insn[3];
while(bstream[0] != 0xff) {
memcpy(&insn,bstream,3);
switch(insn[0]) {
case 0x00:
puts("Dave, stop, will you ?");
return 0;
case 0x01:
#ifndef _NOJIT
// emit code to code cache.
memcpy(CodeCachePool, _add, sizeof(_add));
#endif
printf("iadd: %d\n", IAdd((int)insn[1], (int)insn[2]));
break;
case 0x02:
#ifndef _NOJIT
memcpy(CodeCachePool, _sub, sizeof(_sub));
#endif
printf("isub: %d\n", ISub((int)insn[1], (int)insn[2]));
break;
default :
// Unrecognized insn.
perror("Sorry Dave, I can't do that.");
return -1;
}
bstream+=3;
}
}
int main(int ac, char* av[]) {
int ret = -1;
#ifndef _NOJIT
CodeCachePool = AllocExeMem(MAXCODECAHE_SIZE);
IAdd = CodeCachePool;
ISub= CodeCachePool;
#endif
ret = runVM(BytecodeStream);
if(ret == 0) { perror("VM exited successfully."); }
else { perror("Unexpected error occured."); }
return 0;
}