Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 155 additions & 33 deletions src/Data/Ruby/Marshal/Get.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,38 @@ getRubyObject = getMarshalVersion >> go
where
go :: Marshal RubyObject
go = liftMarshal getWord8 >>= \case
NilChar -> return RNil
TrueChar -> return $ RBool True
FalseChar -> return $ RBool False
FixnumChar -> RFixnum <$> getFixnum
FloatChar -> RFloat <$> getFloat
StringChar -> RString <$> getString
SymbolChar -> RSymbol <$> getSymbol
ObjectLinkChar -> getObjectLink
SymlinkChar -> RSymbol <$> getSymlink
ArrayChar -> do
NilChar -> return RNil
TrueChar -> return $ RBool True
FalseChar -> return $ RBool False
FixnumChar -> RFixnum <$> getFixnum
FloatChar -> RFloat <$> getFloat
StringChar -> RString <$> getString
SymbolChar -> RSymbol <$> getSymbol
ObjectLinkChar -> getObjectLink
SymlinkChar -> RSymbol <$> getSymlink
ArrayChar -> do
result <- RArray <$> getArray go
writeCache result
pure result
HashChar -> do
HashChar -> do
result <- RHash <$> getHash go go
writeCache result
pure result
IVarChar -> RIVar <$> getIVar go
_ -> return Unsupported
HashDefChar -> getHashWithDefault go
IVarChar -> getIVar go
BignumChar -> getBignum
RegexpChar -> getRegexp
ObjectChar -> getObjectOrStruct RObject "Object" go
StructChar -> getObjectOrStruct RStruct "Struct" go
ClassChar -> getNamedRef RClass "Class"
ModuleChar -> getNamedRef RModule "Module"
OldModuleChar -> getNamedRef RModule "OldModule"
UserDefChar -> getUserDef go
UserMarshalChar -> getUserMarshalOrData RUserMarshal "UserMarshal" go
DataChar -> getUserMarshalOrData RData "Data" go
ExtendedChar -> getWrapper "Extended" go
UClassChar -> getWrapper "UClass" go
c -> fail $ "unknown marshal tag: " <> show c

--------------------------------------------------------------------
-- Ancillary functions.
Expand Down Expand Up @@ -122,29 +135,138 @@ getHash k v = marshalLabel "Hash" $ do
V.replicateM n (liftM2 (,) k v)

-- | Parses <http://docs.ruby-lang.org/en/2.1.0/marshal_rdoc.html#label-Instance+Variables Instance Variables>.
getIVar :: Marshal RubyObject -> Marshal (RubyObject, RubyStringEncoding)
--
-- IVar wraps an arbitrary object together with a list of @(symbol, value)@
-- instance-variable pairs. When the wrapped object is a string and the IVs
-- carry encoding info (@:E@ or @:encoding@), the result is an 'RIVar'. For
-- any other shape we still consume every byte but surface just the inner
-- object — the IV metadata is dropped, but the surrounding stream keeps
-- parsing correctly.
getIVar :: Marshal RubyObject -> Marshal RubyObject
getIVar g = marshalLabel "IVar" $ do
str <- g
inner <- g
len <- getFixnum
if | len /= 1 -> fail "expected single character"
| otherwise -> do
symbol <- g
denote <- g
case symbol of
RSymbol "E" ->
case denote of
RBool True -> return' (str, UTF_8)
RBool False -> return' (str, US_ASCII)
_ -> fail "expected bool"
RSymbol "encoding" ->
case denote of
RString enc -> return' (str, toEnc enc)
_ -> fail "expected string"
_ -> fail "invalid ivar"
where
return' result = do
writeCache $ RIVar result
ivars <- V.replicateM len (liftM2 (,) g g)
let maybeEnc = V.foldl' (\acc pair -> acc <|> extractEncoding pair) Nothing ivars
case (inner, maybeEnc) of
(RString _, Just enc) -> do
let result = RIVar (inner, enc)
writeCache result
return result
_ -> return inner
where
extractEncoding :: (RubyObject, RubyObject) -> Maybe RubyStringEncoding
extractEncoding (RSymbol "E", RBool True) = Just UTF_8
extractEncoding (RSymbol "E", RBool False) = Just US_ASCII
extractEncoding (RSymbol "encoding", RString enc) = Just (toEnc enc)
extractEncoding _ = Nothing

-- | Parses <http://ruby-doc.org/core-2.2.0/Bignum.html Bignum>.
--
-- Wire format: one sign byte (@\'+\'@ or @\'-\'@), then a packed-int count of
-- 16-bit little-endian digits, then that many digits.
getBignum :: Marshal RubyObject
getBignum = marshalLabel "Bignum" $ do
sign <- liftMarshal getWord8
n <- getFixnum
bytes <- liftMarshal $ getBytes (n * 2)
let magnitude = BS.foldr (\b acc -> acc * 256 + fromIntegral b) 0 bytes
value = if sign == 0x2D then negate magnitude else magnitude
result = RBignum value
writeCache result
return result

-- | Parses <http://ruby-doc.org/core-2.2.0/Regexp.html Regexp>.
--
-- Wire format: a raw byte sequence for the pattern, then a single byte of
-- options flags. The pattern's source encoding is typically carried by a
-- surrounding 'RIVar' wrapper, which is parsed independently.
getRegexp :: Marshal RubyObject
getRegexp = marshalLabel "Regexp" $ do
pat <- getString
opts <- liftMarshal getWord8
let result = RRegexp pat opts
writeCache result
return result

-- | Parses Hash with a default value (@}@). Wire format matches a regular
-- Hash followed by one additional object (the default).
getHashWithDefault :: Marshal RubyObject -> Marshal RubyObject
getHashWithDefault g = marshalLabel "HashWithDefault" $ do
n <- getFixnum
pairs <- V.replicateM n (liftM2 (,) g g)
def <- g
let result = RHashWithDefault pairs def
writeCache result
return result

-- | Parses Object (@o@) and Struct (@S@). Both share the wire shape:
-- class symbol, count of pairs, then count many @(symbol, value)@ pairs.
getObjectOrStruct
:: (BS.ByteString -> V.Vector (RubyObject, RubyObject) -> RubyObject)
-> String
-> Marshal RubyObject
-> Marshal RubyObject
getObjectOrStruct con name g = marshalLabel name $ do
classSym <- g
n <- getFixnum
pairs <- V.replicateM n (liftM2 (,) g g)
let result = case classSym of
RSymbol cls -> con cls pairs
_ -> Unsupported
writeCache result
return result

-- | Parses a Class/Module name reference (@c@, @m@, @M@). Wire format is a
-- bare byte sequence — note that this is not a Symbol; the bytes are the
-- fully-qualified class or module name.
getNamedRef
:: (BS.ByteString -> RubyObject)
-> String
-> Marshal RubyObject
getNamedRef con name = marshalLabel name $ do
s <- getString
let result = con s
writeCache result
return result

-- | Parses an object dumped via @_dump@ (@u@). Wire format: class symbol,
-- then a raw byte sequence carrying the user-defined payload.
getUserDef :: Marshal RubyObject -> Marshal RubyObject
getUserDef g = marshalLabel "UserDef" $ do
classSym <- g
payload <- getString
let result = case classSym of
RSymbol cls -> RUserDef cls payload
_ -> Unsupported
writeCache result
return result

-- | Parses an object dumped via @marshal_dump@ (@U@) or @_dump_data@ (@d@).
-- Both share the wire shape: class symbol then one arbitrary Marshal object.
getUserMarshalOrData
:: (BS.ByteString -> RubyObject -> RubyObject)
-> String
-> Marshal RubyObject
-> Marshal RubyObject
getUserMarshalOrData con name g = marshalLabel name $ do
classSym <- g
payload <- g
let result = case classSym of
RSymbol cls -> con cls payload
_ -> Unsupported
writeCache result
return result

-- | Parses a wrapper tag — @e@ (object extended with a module) or @C@
-- (object whose class is a user subclass of a builtin). Both read a symbol
-- and then an object, and Ruby does not give the wrapper its own slot in
-- the object table — the inner object owns it. We discard the modifier
-- symbol and pass the inner through unchanged.
getWrapper :: String -> Marshal RubyObject -> Marshal RubyObject
getWrapper name g = marshalLabel name $ do
_ <- g -- modifier symbol (module name or subclass name)
g

-- | Pulls an Instance Variable out of the object cache.
getObjectLink :: Marshal RubyObject
Expand Down
33 changes: 24 additions & 9 deletions src/Data/Ruby/Marshal/Monad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,31 @@ readSymbol :: Int -> Marshal (Maybe RubyObject)
readSymbol index = readCache index symbols

-- | Write an object to the appropriate cache.
--
-- Symbols feed the symbol table that 'Symlink' draws from; everything else
-- that can be the target of an 'Object link' (@\@@) feeds the object table.
-- 'RFixnum', 'RNil', 'RBool', and 'RString' are intentionally not cached:
-- the first three are immediate values and bare 'RString' values never appear
-- on the wire outside of an 'RIVar' wrapper, which is itself cached.
writeCache :: RubyObject -> Marshal ()
writeCache object = do
cache <- get
let putObj = put $ cache { objects = V.snoc (objects cache) object }
putSym = put $ cache { symbols = V.snoc (symbols cache) object }
case object of
RSymbol _ -> do
put $ cache { symbols = V.snoc (symbols cache) object }
RIVar _ -> do
put $ cache { objects = V.snoc (objects cache) object }
RArray _ -> do
put $ cache { objects = V.snoc (objects cache) object }
RHash _ -> do
put $ cache { objects = V.snoc (objects cache) object }
_ -> return ()
RSymbol _ -> putSym
RIVar _ -> putObj
RArray _ -> putObj
RHash _ -> putObj
RHashWithDefault _ _ -> putObj
RBignum _ -> putObj
RRegexp _ _ -> putObj
RObject _ _ -> putObj
RStruct _ _ -> putObj
RClass _ -> putObj
RModule _ -> putObj
RUserDef _ _ -> putObj
RUserMarshal _ _ -> putObj
RData _ _ -> putObj
Unsupported -> putObj
_ -> return ()
24 changes: 23 additions & 1 deletion src/Data/Ruby/Marshal/RubyObject.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import qualified Data.ByteString as BS
import qualified Data.Map.Strict as DM
import Data.Ruby.Marshal.Encoding (RubyStringEncoding (..))
import qualified Data.Vector as V
import Data.Word (Word8)
import Prelude

-- | Representation of a Ruby object.
Expand All @@ -34,10 +35,14 @@ data RubyObject
-- ^ represents @true@ or @false@
| RFixnum {-# UNPACK #-} !Int
-- ^ represents a @Fixnum@
| RBignum !Integer
-- ^ represents a @Bignum@
| RArray !(V.Vector RubyObject)
-- ^ represents an @Array@
| RHash !(V.Vector (RubyObject, RubyObject))
-- ^ represents an @Hash@
| RHashWithDefault !(V.Vector (RubyObject, RubyObject)) !RubyObject
-- ^ represents a @Hash@ with a default value (pairs, then default)
| RIVar !(RubyObject, RubyStringEncoding)
-- ^ represents an @IVar@
| RString !BS.ByteString
Expand All @@ -46,8 +51,25 @@ data RubyObject
-- ^ represents a @Float@
| RSymbol !BS.ByteString
-- ^ represents a @Symbol@
| RRegexp !BS.ByteString !Word8
-- ^ represents a @Regexp@ (pattern bytes, options flags)
| RObject !BS.ByteString !(V.Vector (RubyObject, RubyObject))
-- ^ represents a generic @Object@ (class name, instance-variable pairs)
| RStruct !BS.ByteString !(V.Vector (RubyObject, RubyObject))
-- ^ represents a @Struct@ (class name, member pairs)
| RClass !BS.ByteString
-- ^ represents a @Class@ reference (class name)
| RModule !BS.ByteString
-- ^ represents a @Module@ reference (module name)
| RUserDef !BS.ByteString !BS.ByteString
-- ^ represents an object dumped via @_dump@ (class name, opaque payload)
| RUserMarshal !BS.ByteString !RubyObject
-- ^ represents an object dumped via @marshal_dump@ (class name, payload object)
| RData !BS.ByteString !RubyObject
-- ^ represents an object dumped via @_dump_data@ (class name, state object)
| Unsupported
-- ^ represents an invalid object
-- ^ represents an object whose bytes were consumed but whose Ruby semantics
-- this library does not (yet) model
deriving (Eq, Ord, Show)

-- | Transform plain Haskell values to RubyObjects and back.
Expand Down
39 changes: 39 additions & 0 deletions src/Data/Ruby/Marshal/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,19 @@ module Data.Ruby.Marshal.Types (
, pattern StringChar
, pattern SymbolChar
, pattern SymlinkChar
, pattern BignumChar
, pattern HashDefChar
, pattern RegexpChar
, pattern ObjectChar
, pattern StructChar
, pattern ClassChar
, pattern ModuleChar
, pattern OldModuleChar
, pattern ExtendedChar
, pattern UserDefChar
, pattern UserMarshalChar
, pattern UClassChar
, pattern DataChar
) where

import Data.Ruby.Marshal.Encoding
Expand Down Expand Up @@ -66,3 +79,29 @@ pattern StringChar = 34
pattern SymbolChar = 58
-- | Character that represents Symlink.
pattern SymlinkChar = 59
-- | Character that represents Bignum.
pattern BignumChar = 108
-- | Character that represents Hash with default value.
pattern HashDefChar = 125
-- | Character that represents Regexp.
pattern RegexpChar = 47
-- | Character that represents Object.
pattern ObjectChar = 111
-- | Character that represents Struct.
pattern StructChar = 83
-- | Character that represents Class reference.
pattern ClassChar = 99
-- | Character that represents Module reference.
pattern ModuleChar = 109
-- | Character that represents the legacy Module/Class reference.
pattern OldModuleChar = 77
-- | Character that represents an object extended with a module.
pattern ExtendedChar = 101
-- | Character that represents a user-defined dump (_dump).
pattern UserDefChar = 117
-- | Character that represents a user-defined marshal (marshal_dump).
pattern UserMarshalChar = 85
-- | Character that represents an object whose class is a user subclass of a builtin.
pattern UClassChar = 67
-- | Character that represents a Data object (_dump_data).
pattern DataChar = 100
Loading