2 Commits

Author SHA1 Message Date
2b16a38a66 fix: improve performance 2024-05-13 15:06:23 -05:00
e87d81cc1d fix: generalize to MonadAff 2024-05-13 11:52:09 -05:00
5 changed files with 73 additions and 58 deletions

BIN
bun.lockb

Binary file not shown.

View File

@@ -9,7 +9,7 @@ workspace:
- datetime: ">=6.1.0 <7.0.0"
- effect: ">=4.0.0 <5.0.0"
- exceptions: ">=6.0.0 <7.0.0"
- foldable-traversable
- foldable-traversable: ">=6.0.0 <7.0.0"
- foreign: ">=7.0.0 <8.0.0"
- foreign-object: ">=4.1.0 <5.0.0"
- integers: ">=6.0.0 <7.0.0"
@@ -18,7 +18,7 @@ workspace:
- newtype: ">=5.0.0 <6.0.0"
- node-buffer: ">=9.0.0 <10.0.0"
- node-event-emitter: ">=3.0.0 <4.0.0"
- node-stream-pipes
- node-stream-pipes: ">=1.3.0 <2.0.0"
- node-streams: ">=9.0.0 <10.0.0"
- nullable: ">=6.0.0 <7.0.0"
- numbers: ">=9.0.1 <10.0.0"
@@ -28,11 +28,11 @@ workspace:
- prelude: ">=6.0.1 <7.0.0"
- record: ">=4.0.0 <5.0.0"
- record-extra: ">=5.0.1 <6.0.0"
- st
- st: ">=6.2.0 <7.0.0"
- strings: ">=6.0.1 <7.0.0"
- tailrec: ">=6.1.0 <7.0.0"
- transformers: ">=6.0.0 <7.0.0"
- tuples
- tuples: ">=7.0.0 <8.0.0"
- typelevel-prelude: ">=7.0.0 <8.0.0"
- unsafe-coerce: ">=6.0.0 <7.0.0"
test_dependencies:
@@ -120,12 +120,10 @@ workspace:
- typelevel-prelude
- unfoldable
- unicode
- unordered-collections
- unsafe-coerce
- variant
extra_packages:
node-stream-pipes:
git: https://git.orionkindel.com/orion/purescript-node-stream-pipes
ref: v1.0.5
extra_packages: {}
packages:
aff:
type: registry
@@ -608,9 +606,9 @@ packages:
dependencies:
- effect
node-stream-pipes:
type: git
url: https://git.orionkindel.com/orion/purescript-node-stream-pipes
rev: f2f18c3c13ae2f0f5787ccfb3832fc8c653e83ad
type: registry
version: 1.3.0
integrity: sha256-5Jpf0BLn0ExQWYxbTTewai4M8quEmEVHxihc9CM1Juo=
dependencies:
- aff
- arrays
@@ -618,6 +616,8 @@ packages:
- either
- exceptions
- foldable-traversable
- foreign-object
- lists
- maybe
- mmorph
- newtype
@@ -627,6 +627,7 @@ packages:
- node-path
- node-streams
- node-zlib
- ordered-collections
- parallel
- pipes
- prelude
@@ -634,6 +635,8 @@ packages:
- strings
- tailrec
- transformers
- tuples
- unordered-collections
- unsafe-coerce
node-streams:
type: registry
@@ -1038,6 +1041,21 @@ packages:
- foldable-traversable
- maybe
- strings
unordered-collections:
type: registry
version: 3.1.0
integrity: sha256-H2eQR+ylI+cljz4XzWfEbdF7ee+pnw2IZCeq69AuJ+Q=
dependencies:
- arrays
- enums
- functions
- integers
- lists
- prelude
- record
- tuples
- typelevel-prelude
- unfoldable
unsafe-coerce:
type: registry
version: 6.0.0

View File

@@ -10,16 +10,14 @@ package:
strict: true
pedanticPackages: true
dependencies:
- foldable-traversable
- node-stream-pipes
- st
- tuples
- node-stream-pipes: ">=1.3.0 <2.0.0"
- aff: ">=7.1.0 <8.0.0"
- arrays: ">=7.3.0 <8.0.0"
- bifunctors: ">=6.0.0 <7.0.0"
- datetime: ">=6.1.0 <7.0.0"
- effect: ">=4.0.0 <5.0.0"
- exceptions: ">=6.0.0 <7.0.0"
- foldable-traversable: ">=6.0.0 <7.0.0"
- foreign: ">=7.0.0 <8.0.0"
- foreign-object: ">=4.1.0 <5.0.0"
- integers: ">=6.0.0 <7.0.0"
@@ -37,9 +35,11 @@ package:
- prelude: ">=6.0.1 <7.0.0"
- record: ">=4.0.0 <5.0.0"
- record-extra: ">=5.0.1 <6.0.0"
- st: ">=6.2.0 <7.0.0"
- strings: ">=6.0.1 <7.0.0"
- tailrec: ">=6.1.0 <7.0.0"
- transformers: ">=6.0.0 <7.0.0"
- tuples: ">=7.0.0 <8.0.0"
- typelevel-prelude: ">=7.0.0 <8.0.0"
- unsafe-coerce: ">=6.0.0 <7.0.0"
test:
@@ -53,7 +53,4 @@ package:
- simple-json
- spec
workspace:
extraPackages:
node-stream-pipes:
git: 'https://git.orionkindel.com/orion/purescript-node-stream-pipes'
ref: 'v1.0.5'
extraPackages: {}

View File

@@ -10,5 +10,5 @@ import Test.Spec.Reporter (specReporter)
import Test.Spec.Runner (defaultConfig, runSpec')
main :: Effect Unit
main = launchAff_ $ runSpec' (defaultConfig { timeout = Nothing }) [ specReporter ] do
main = launchAff_ $ runSpec' (defaultConfig { failFast = true, timeout = Nothing }) [ specReporter ] do
Test.Pipes.CSV.spec

View File

@@ -6,27 +6,31 @@ import Control.Monad.Gen (chooseInt)
import Control.Monad.Rec.Class (Step(..), tailRecM)
import Data.Array as Array
import Data.DateTime (DateTime)
import Data.Foldable (fold)
import Data.Foldable (fold, sum)
import Data.Maybe (Maybe(..), fromJust)
import Data.Newtype (wrap)
import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy)
import Data.String.CodePoints as String.CodePoints
import Data.Tuple.Nested ((/\))
import Effect.Class (liftEffect)
import Effect.Console (log)
import Node.Encoding (Encoding(..))
import Partial.Unsafe (unsafePartial)
import Pipes (yield, (>->))
import Pipes (each) as Pipes
import Pipes.CSV as Pipes.CSV
import Pipes.Collect as Pipes.Collect
import Pipes.Construct as Pipes.Construct
import Pipes.Node.Buffer as Pipes.Buffer
import Pipes.Node.Stream as Pipes.Stream
import Pipes.Prelude (map, toListM) as Pipes
import Pipes.Prelude (chain, map, toListM) as Pipes
import Pipes.Util as Pipes.Util
import Test.QuickCheck.Gen (randomSample')
import Test.Spec (Spec, describe, it)
import Test.Spec (Spec, before, describe, it)
import Test.Spec.Assertions (shouldEqual)
csv :: String
csv = """created,flag,foo,id
csv =
"""created,flag,foo,id
2020-01-01T00:00:00.0Z,true,a,1
2024-02-02T08:00:00.0Z,false,apple,2
1970-01-01T00:00:00.0Z,true,hello,3
@@ -41,48 +45,44 @@ spec =
it "stringify" do
let
objs =
[ {id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z"}
, {id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z"}
, {id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z"}
[ { id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z" }
, { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
]
csv' <- map fold $ Pipes.Collect.collectArray $ Pipes.Stream.withEOS (Pipes.each objs) >-> Pipes.CSV.stringify >-> Pipes.Stream.unEOS
csv' <- map fold $ Pipes.Collect.toArray $ Pipes.Stream.withEOS (Pipes.Construct.eachArray objs) >-> Pipes.CSV.stringify >-> Pipes.Stream.unEOS
csv' `shouldEqual` csv
describe "parse" do
it "parses csv" do
rows <- map Array.fromFoldable
$ Pipes.toListM
$ Pipes.Stream.withEOS (yield csv)
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8)
>-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8)
>-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS
rows `shouldEqual`
[ {id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z"}
, {id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z"}
, {id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z"}
[ { id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z" }
, { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
]
it "parses large csv" do
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
let
csvRows = ["id\n"] <> ((_ <> "\n") <$> show <$> nums)
csv' =
let
go ix
| Just a <- Array.index csvRows ix = yield a $> Loop (ix + 1)
| otherwise = pure $ Done unit
in
tailRecM go 0
in16kbChunks =
Pipes.Util.chunked 16000
>-> Pipes.Stream.inEOS (Pipes.map fold)
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8)
before
(do
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
let
chars = [ "i","d","\n" ] <> join ((\n -> [show n, "\n"]) <$> nums)
bufs <- Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray chars)
>-> Pipes.Util.chunked 1000
>-> Pipes.Stream.inEOS (Pipes.map fold >-> Pipes.Buffer.fromString UTF8)
>-> Pipes.Stream.unEOS
pure $ nums /\ bufs
)
$ it "parses large csv" \(nums /\ bufs) -> do
rows <-
Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray bufs)
>-> Pipes.CSV.parse @(id :: Int)
>-> Pipes.Stream.unEOS
rows <-
Pipes.Collect.collectArray
$ Pipes.Stream.withEOS csv'
>-> in16kbChunks
>-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS
rows `shouldEqual` ((\id -> {id}) <$> nums)
rows `shouldEqual` ((\id -> { id }) <$> nums)