document.write("
'use strict'
/**
* Filename: /Users/apple/Public/Git_Bank/graphql-mongodb-example/src/jb51scraper1.js
* Path: /Users/apple/Public/Git_Bank/graphql-mongodb-example
* Created Date: Wednesday, January 24th 2018, 6:32:08 pm
* Author: apple
* item:抓取jb51的页面数据
* item 2:函数式重构获取方法
* item 3 : 获取页数然后使用foreach顺序调用获取数据函数 --不成功,暂时搁置
* item 4: 准备moongodb配置准备插入数据库--完成
* Copyright (c) 2018 Your Company
*/
import { MongoClient, ObjectId } from 'mongodb'
import express from 'express'
// import bodyParser from 'body-parser'
import cors from 'cors'
import * as R from 'ramda'
import { request } from 'graphql-request'
const gDomApi = 'http://gdom.graphene-python.org/graphql'
const URL = 'http://localhost'
const PORT = 3001
const MONGO_URL = 'mongodb://php-smarter:***密码在这里,去掉两边的星号***@ds239097.mlab.com:39097/recompose'
const dataArray = []
const collection = 'JB51'
// graphql模板
const qu2 = `query getLastPage($url:String!){
page(url: $url) {
query(selector:".dxypage a:last-child") {
herf:attr(name:"href")
}
}
}`
const qu3 = `query getPage($url:String!){
page(url:$url) {
items: query(selector:"div.artlist dt ") {
title:text(selector:"a")
url: attr(selector:"a", name:"href")
time: text(selector:"span")
}
}
}`
export const start = async () => {
try {
const app = express()
app.use(cors())
app.use(express.static(__dirname))
await app.listen(PORT, () => {
console.log(`Visit ${URL}:${PORT}`)
})
const start = Date.now()
const db = await MongoClient.connect(MONGO_URL)
//console.log(db);
// const res = await getJb51Page(variables)
// const getStr=str=> str.page.query[0];
// const ls=await getStr(res)
// const lss=str=>str.herf
// const lsss=await lss(ls)
for (var i =21; i <=30; i++) {
const singlePageData = await getDataFromJB51(i)
dataArray.push(singlePageData)
}
const flattenData = R.flatten(dataArray);
const dbForData=data=>{db.collection('JB51').save(data)}
//const dbForData = InsertConfig(`${db}.${collection}`)
//flattenData是数组 items=>map(function(item))
const insertData = compose(R.map(dbForData), flattenData);
await insertData(flattenData);
// console.log(flattenData)
console.log(dataArray.length);
const end = Date.now()
const elpase = end - start
console.log('操作花费时间:', elpase)
//console.log(flattenData.length)
} catch (e) {
console.log(e)
};
}
// 获取数据的方法
const handleGrqphcoolDataTemplate = R.curry((api, template, variables) => (
request(api, template, variables).then(data => {
// console.log(data.page.items);
return data
})
))
// 柯理化 等待抓取的数据
// const graphqlRequestMethodWaitForData = handleGrqphcoolDataTemplate(api, mu)
// handle str to get the last page number
const s1 = str => R.split('_', str)
const s2 = arr => R.split('.', arr)
const s3 = arr => arr[0]
const getLastPage = R.compose(s3, s2, s1)
// getLastpage
// const getJb51Page = handleGrqphcoolDataTemplate(gDomApi, qu2)
// #############################################################################
const pageFactory = handleGrqphcoolDataTemplate(gDomApi, qu3)
// #############################################################################
// const queryPageWaitForData = R.map(pageFactory)
// const getDataFromMediumWaitForUrl = handleGrqphcoolDataTemplate(gDomApi, que)
const getArray = (obj) => obj.page.items
const compose = (...functions) => input => functions.reduceRight((chain, func) => chain.then(func), Promise.resolve(input))
const queryData = compose(getArray, pageFactory)
// #############################################################################
// #############################################################################
const variablesTemp = (num) => (`{"url":"http://www.jb51.net/list/list_243_${num}.htm"}`)
// const varia = (num)=> variablesTemp(num) //获取模板
const variables = (variaTemp) => JSON.parse(variaTemp) // 格式化模板
const queryPage = (queryStr) => queryData(queryStr) // 查询数据
const getDataFromJB51 = compose(queryPage, variables, variablesTemp)
scrapeFromJb51.js - Snippet hosted by \"Cacher\"
");