diff --git a/.travis.yml b/.travis.yml index f403b54..9b505d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ rust: - beta - nightly before_script: - - wget https://github.com/python/cpython/archive/3.5.zip -O cpython.zip + - wget https://github.com/python/cpython/archive/3.6.zip -O cpython.zip - unzip cpython.zip - cd cpython-* - ./configure --prefix=$HOME/.local/ diff --git a/README.md b/README.md index 76ed500..e28f808 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ A Python virtual machine, written in Rust. ## Dependencies -* CPython 3.6 (used as a parser and bytecode compiler). Older versions down to 3.4 should work, but their support is not tested. +* CPython 3.6 (used as a parser and bytecode compiler). * [Rust](https://www.rust-lang.org/downloads.html) * [Cargo](https://crates.io/install) diff --git a/src/marshal/mod.rs b/src/marshal/mod.rs index 11cd6ae..24155a8 100644 --- a/src/marshal/mod.rs +++ b/src/marshal/mod.rs @@ -18,6 +18,6 @@ pub fn check_magic(buf: &[u8]) -> bool { false } else { - 3310 <= version /* ≥ 3.4rc2 */ && version < 3390 /* < 3.7 */ + 3379 <= version /* ≥ 3.6rc1 */ && version < 3390 /* < 3.7 */ } } diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index 538db21..50da2c9 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -89,7 +89,9 @@ fn build_class(state: &mut State, call_stack: &mut Vec, let mut instructions: Vec = InstructionDecoder::new(code.code.iter()).collect(); // Hack to made the class' code return the class instead of None - assert_eq!(instructions.pop(), Some(Instruction::ReturnValue)); + let mut last_instruction; + while {last_instruction = instructions.pop(); last_instruction == Some(Instruction::Nop)} {}; + assert_eq!(last_instruction, Some(Instruction::ReturnValue)); instructions.pop(); // LoadConst None instructions.push(Instruction::PushImmediate(cls_ref.clone())); instructions.push(Instruction::ReturnValue); diff --git a/src/processor/instructions.rs b/src/processor/instructions.rs index 218e9a2..4d50da3 100644 --- a/src/processor/instructions.rs +++ b/src/processor/instructions.rs @@ -18,7 +18,7 @@ pub enum CmpOperator { } impl CmpOperator { - pub fn from_bytecode(n: u32) -> Self { + pub fn from_bytecode(n: usize) -> Self { match n { 0 => CmpOperator::Lt, 1 => CmpOperator::Leq, @@ -69,9 +69,10 @@ pub enum Instruction { LoadFast(usize), StoreFast(usize), LoadGlobal(usize), - CallFunction(usize, usize), // nb_args, nb_kwargs - RaiseVarargs(u16), - MakeFunction(usize, usize, usize), // nb_default_args, nb_default_kwargs, nb_annot + CallFunction(usize, bool), // nb_args + nb_kwargs, has_kwargs + RaiseVarargs(usize), + MakeFunction { has_defaults: bool, has_kwdefaults: bool, has_annotations: bool, has_closure: bool }, + BuildConstKeyMap(usize), } #[derive(Debug)] @@ -123,55 +124,65 @@ impl<'a, I> Iterator for InstructionDecoder where I: Iterator { self.pending_nops -= 1; return Some(Instruction::Nop) }; - self.bytestream.next().map(|opcode| { - match *opcode { - 1 => Instruction::PopTop, - 4 => Instruction::DupTop, - 25 => Instruction::BinarySubscr, - 68 => Instruction::GetIter, - 71 => Instruction::LoadBuildClass, - 83 => Instruction::ReturnValue, - 87 => Instruction::PopBlock, - 88 => Instruction::EndFinally, - 89 => Instruction::PopExcept, - 90 => Instruction::StoreName(self.read_argument() as usize), - 93 => Instruction::ForIter(self.read_argument() as usize), - 95 => Instruction::StoreAttr(self.read_argument() as usize), - 97 => Instruction::StoreGlobal(self.read_argument() as usize), - 100 => Instruction::LoadConst(self.read_argument() as usize), - 101 => Instruction::LoadName(self.read_argument() as usize), - 102 => Instruction::BuildTuple(self.read_argument() as usize), - 106 => Instruction::LoadAttr(self.read_argument() as usize), - 107 => Instruction::CompareOp(CmpOperator::from_bytecode(self.read_argument())), - 110 => Instruction::JumpForward(self.read_argument() as usize + 2), // +2, because JumpForward takes 3 bytes, and the relative address is computed from the next instruction. - 113 => Instruction::JumpAbsolute(self.read_argument() as usize), - 114 => Instruction::PopJumpIfFalse(self.read_argument() as usize), - 116 => Instruction::LoadGlobal(self.read_argument() as usize), - 120 => Instruction::SetupLoop(self.read_argument() as usize + 2), - 121 => Instruction::SetupExcept(self.read_argument() as usize + 2), - 124 => Instruction::LoadFast(self.read_argument() as usize), - 125 => Instruction::StoreFast(self.read_argument() as usize), - 130 => Instruction::RaiseVarargs(self.read_argument() as u16), - 131 => Instruction::CallFunction(self.read_byte() as usize, self.read_byte() as usize), - 132 => { - let arg = self.read_argument(); - let nb_pos = arg & 0xFF; - let nb_kw = (arg >> 8) & 0xFF; - //let nb_annot = (arg >> 16) & 0x7FF; // TODO - let nb_annot = 0; - Instruction::MakeFunction(nb_pos as usize, nb_kw as usize, nb_annot as usize) - }, - 144 => { self.arg_prefix = Some(self.read_argument()); Instruction::Nop }, - _ => panic!(format!("Opcode not supported: {}", opcode)), + let mut opcode = 144; + let mut oparg: usize = 0; + while opcode == 144 { + match self.bytestream.next() { + Some(op) => { opcode = *op }, + None => return None, } - }) + oparg = (oparg << 8) | (*self.bytestream.next().unwrap() as usize); + self.pending_nops += 1; + } + self.pending_nops -= 1; + let inst = match opcode { + 1 => Instruction::PopTop, + 4 => Instruction::DupTop, + 25 => Instruction::BinarySubscr, + 68 => Instruction::GetIter, + 71 => Instruction::LoadBuildClass, + 83 => Instruction::ReturnValue, + 87 => Instruction::PopBlock, + 88 => Instruction::EndFinally, + 89 => Instruction::PopExcept, + 90 => Instruction::StoreName(oparg), + 93 => Instruction::ForIter(oparg), + 95 => Instruction::StoreAttr(oparg), + 97 => Instruction::StoreGlobal(oparg), + 100 => Instruction::LoadConst(oparg), + 101 => Instruction::LoadName(oparg), + 102 => Instruction::BuildTuple(oparg), + 106 => Instruction::LoadAttr(oparg), + 107 => Instruction::CompareOp(CmpOperator::from_bytecode(oparg)), + 110 => Instruction::JumpForward(oparg), + 113 => Instruction::JumpAbsolute(oparg), + 114 => Instruction::PopJumpIfFalse(oparg), + 116 => Instruction::LoadGlobal(oparg), + 120 => Instruction::SetupLoop(oparg + 1), + 121 => Instruction::SetupExcept(oparg + 1), + 124 => Instruction::LoadFast(oparg), + 125 => Instruction::StoreFast(oparg), + 130 => Instruction::RaiseVarargs(oparg), + 131 => Instruction::CallFunction(oparg, false), + 132 => Instruction::MakeFunction { + has_defaults: oparg & 0x01 != 0, + has_kwdefaults: oparg & 0x02 != 0, + has_annotations: oparg & 0x04 != 0, + has_closure: oparg & 0x08 != 0, + }, + 141 => Instruction::CallFunction(oparg, true), + 156 => Instruction::BuildConstKeyMap(oparg), + 144 => panic!("The impossible happened."), + _ => panic!(format!("Opcode not supported: {:?}", (opcode, oparg))), + }; + Some(inst) } } #[test] fn test_load_read() { - let bytes: Vec = vec![124, 1, 0, 83]; + let bytes: Vec = vec![124, 1, 83, 0]; let reader = InstructionDecoder::new(bytes.iter()); let instructions: Vec = reader.collect(); - assert_eq!(vec![Instruction::LoadFast(1), Instruction::Nop, Instruction::Nop, Instruction::ReturnValue], instructions); + assert_eq!(vec![Instruction::LoadFast(1), Instruction::ReturnValue], instructions); } diff --git a/src/processor/mod.rs b/src/processor/mod.rs index d419599..0cfa4fa 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -15,6 +15,8 @@ use super::state::{State, PyResult, unwind, raise, return_value}; use super::sandbox::EnvProxy; use super::primitives; +const WORD_SIZE: usize = 2; + #[derive(Debug)] pub enum ProcessorError { CircularReference, @@ -233,11 +235,12 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> let instruction = py_unwrap!(state, frame.instructions.get(frame.program_counter), ProcessorError::InvalidProgramCounter); // Useful for debugging: /* - println!(""); + println!("======"); for r in frame.var_stack.iter() { println!("{}", r.repr(&state.store)); } - println!("{} {:?}", frame.program_counter, instruction); + println!("{} {:?}", frame.program_counter*WORD_SIZE, instruction); + println!("======"); */ frame.program_counter += 1; instruction.clone() @@ -362,7 +365,7 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> Instruction::ForIter(i) => { let iterator = { let frame = call_stack.last_mut().unwrap(); - frame.block_stack.push(Block::ExceptPopGoto(state.primitive_objects.stopiteration.clone(), 1, frame.program_counter+i)); + frame.block_stack.push(Block::ExceptPopGoto(state.primitive_objects.stopiteration.clone(), 1, frame.program_counter+i/WORD_SIZE)); let iterator = top_stack!(state, frame.var_stack); iterator.clone() }; @@ -436,7 +439,7 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> } Instruction::SetupExcept(i) => { let frame = call_stack.last_mut().unwrap(); - frame.block_stack.push(Block::TryExcept(frame.program_counter, frame.program_counter+i)) + frame.block_stack.push(Block::TryExcept(frame.program_counter, frame.program_counter+i/WORD_SIZE)) } Instruction::CompareOp(CmpOperator::Eq) => { let frame = call_stack.last_mut().unwrap(); @@ -465,11 +468,11 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> } Instruction::JumpAbsolute(target) => { let frame = call_stack.last_mut().unwrap(); - frame.program_counter = target + frame.program_counter = target / WORD_SIZE } Instruction::JumpForward(delta) => { let frame = call_stack.last_mut().unwrap(); - frame.program_counter += delta + frame.program_counter += delta / WORD_SIZE } Instruction::LoadFast(i) => { let frame = call_stack.last_mut().unwrap(); @@ -487,7 +490,7 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> let obj = state.store.deref(&pop_stack!(state, frame.var_stack)); match obj.content { ObjectContent::True => (), - ObjectContent::False => frame.program_counter = target, + ObjectContent::False => frame.program_counter = target / WORD_SIZE, _ => unimplemented!(), } } @@ -509,21 +512,32 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> panic!("Bad RaiseVarargs argument") // TODO: Raise an exception instead } - Instruction::CallFunction(nb_args, nb_kwargs) => { + Instruction::CallFunction(nb_args, has_kwargs) => { // See “Call constructs” at: // http://security.coverity.com/blog/2014/Nov/understanding-python-bytecode.html - let kwargs; + let kwargs: Vec<(ObjectRef, ObjectRef)>; let args; let func; { let frame = call_stack.last_mut().unwrap(); - kwargs = py_unwrap!(state, frame.var_stack.pop_n_pairs(nb_kwargs), ProcessorError::StackTooSmall); - args = py_unwrap!(state, frame.var_stack.pop_many(nb_args), ProcessorError::StackTooSmall); + if has_kwargs { + let ref obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content; + let names: Vec = match obj { + &ObjectContent::Tuple(ref v) => v.into_iter().cloned().collect(), + _ => panic!("Bad CallFunctionKw argument"), + }; + let values: Vec = frame.var_stack.pop_many(names.len()).unwrap(); + kwargs = names.into_iter().zip(values).collect(); + } + else { + kwargs = Vec::new(); + } + args = py_unwrap!(state, frame.var_stack.pop_many(nb_args - kwargs.len()), ProcessorError::StackTooSmall); func = pop_stack!(state, frame.var_stack); } call_function(state, call_stack, &func, args, kwargs) }, - Instruction::MakeFunction(0, nb_default_kwargs, 0) => { + Instruction::MakeFunction { has_defaults: false, has_kwdefaults, has_annotations: false, has_closure: false } => { // TODO: consume default arguments and annotations let obj = { let frame = call_stack.last_mut().unwrap(); @@ -540,18 +554,35 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> }; let frame = call_stack.last_mut().unwrap(); let code = pop_stack!(state, frame.var_stack); - let raw_kwdefaults = py_unwrap!(state, frame.var_stack.pop_n_pairs(nb_default_kwargs), ProcessorError::StackTooSmall); let mut kwdefaults: HashMap = HashMap::new(); - kwdefaults.reserve(nb_default_kwargs); - for (key, value) in raw_kwdefaults { - match state.store.deref(&key).content { - ObjectContent::String(ref s) => { kwdefaults.insert(s.clone(), value); }, - _ => panic!("Defaults' keys must be strings."), + if has_kwdefaults { + let obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content.clone(); // TODO: clone only if necessary + let raw_kwdefaults = match obj { + ObjectContent::Dict(ref d) => d, + _ => panic!("bad type for default kwd"), + }; + kwdefaults.reserve(raw_kwdefaults.len()); + for &(ref key, ref value) in raw_kwdefaults { + match state.store.deref(&key).content { + ObjectContent::String(ref s) => { kwdefaults.insert(s.clone(), value.clone()); }, + _ => panic!("Defaults' keys must be strings."), + } } } let func = state.primitive_objects.new_function(func_name, frame.object.module(&state.store), code, kwdefaults); frame.var_stack.push(state.store.allocate(func)) }, + Instruction::BuildConstKeyMap(size) => { + let frame = call_stack.last_mut().unwrap(); + let obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content.clone(); // TODO: clone only if necessary + let keys: Vec = match obj { + ObjectContent::Tuple(ref v) => v.clone(), + _ => panic!("bad BuildConstKeyMap keys argument."), + }; + let values: Vec = frame.var_stack.peek(size).unwrap().iter().map(|r| (*r).clone()).collect(); + let dict = state.primitive_objects.new_dict(keys.into_iter().zip(values).collect()); + frame.var_stack.push(state.store.allocate(dict)) + } _ => panic!(format!("todo: instruction {:?}", instruction)), } }; diff --git a/src/varstack.rs b/src/varstack.rs index 7bdff47..0e73843 100644 --- a/src/varstack.rs +++ b/src/varstack.rs @@ -9,6 +9,7 @@ pub trait VarStack : Debug { fn push(&mut self, value: Self::Item); fn pop_all_and_get_n_last(&mut self, nb: usize) -> Option>; fn pop_n_pairs(&mut self, nb: usize) -> Option>; + fn peek(&self, nb: usize) -> Option>; } #[derive(Debug)] @@ -75,4 +76,13 @@ impl VarStack for VectorVarStack where Item: Debug { pairs }) } + fn peek(&self, nb: usize) -> Option> { + if nb > self.vector.len() { + None + } + else { + let length = self.vector.len(); + Some(self.vector[(length-nb)..length].iter().collect()) + } + } } diff --git a/tests/test_basic.rs b/tests/test_basic.rs index 9150ad7..951dc51 100644 --- a/tests/test_basic.rs +++ b/tests/test_basic.rs @@ -6,14 +6,14 @@ use pythonvm::{MockEnvProxy, PyResult, run_file}; #[test] fn test_hello_world() { - let mut reader: &[u8] = b"\xee\x0c\r\n\xb0\x92\x0fW\x15\x00\x00\x00\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0e\x00\x00\x00e\x00\x00d\x00\x00\x83\x01\x00\x01d\x01\x00S)\x02z\x0bHello worldN)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x16examples/helloworld.py\xda\x08\x01\x00\x00\x00s\x00\x00\x00\x00"; + let mut reader: &[u8] = b"3\r\r\n\xe1\xc8\xf4Y\x15\x00\x00\x00\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0c\x00\x00\x00e\x00d\x00\x83\x01\x01\x00d\x01S\x00)\x02z\x0bhello worldN)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x16examples/helloworld.py\xda\x08\x01\x00\x00\x00s\x00\x00\x00\x00"; let mut path = PathBuf::new(); path.push(env::current_dir().unwrap()); path.push("pythonlib/"); let envproxy = MockEnvProxy::new(path); let (processor, result) = run_file(&mut reader, envproxy).unwrap(); if let PyResult::Return(_) = result { - assert_eq!(*processor.envproxy.stdout_content.lock().unwrap(), b"Hello world\n"); + assert_eq!(*processor.envproxy.stdout_content.lock().unwrap(), b"hello world\n"); } else { panic!(format!("Exited with: {:?}", result)) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy