Skip to main content
POST
https://api.aisa.one/apis/v1
/
dataforseo
/
on_page
/
content_parsing
/
live
Live OnPage API Content Parsing
curl --request POST \
  --url https://api.aisa.one/apis/v1/dataforseo/on_page/content_parsing/live \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "custom_user_agent": "<string>",
  "browser_preset": "<string>",
  "browser_screen_width": 123,
  "browser_screen_height": 123,
  "browser_screen_scale_factor": 123,
  "store_raw_html": true,
  "disable_cookie_popup": true,
  "accept_language": "<string>",
  "enable_javascript": true,
  "enable_browser_rendering": true,
  "enable_xhr": true,
  "switch_pool": true,
  "ip_pool_for_scan": "<string>",
  "markdown_view": true
}
'
{
  "version": "<string>",
  "status_code": 123,
  "status_message": "<string>",
  "time": "<string>",
  "cost": 123,
  "tasks_count": 123,
  "tasks_error": 123,
  "tasks": [
    "<string>"
  ],
  "tasks.id": "<string>",
  "tasks.status_code": 123,
  "tasks.status_message": "<string>",
  "tasks.time": "<string>",
  "tasks.cost": 123,
  "tasks.result_count": 123,
  "tasks.path": [
    "<string>"
  ],
  "tasks.data": {},
  "tasks.result": [
    "<string>"
  ],
  "tasks.result.crawl_progress": "<string>",
  "tasks.result.crawl_status": {},
  "tasks.result.items_count": 123,
  "tasks.result.items": [
    "<string>"
  ],
  "tasks.result.items.type": "<string>",
  "tasks.result.items.fetch_time": "<string>",
  "tasks.result.items.status_code": 123,
  "tasks.result.items.page_content": {},
  "tasks.result.items.page_content.header": {},
  "tasks.result.items.page_content.primary_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.primary_content.text": "<string>",
  "tasks.result.items.page_content.primary_content.url": "<string>",
  "tasks.result.items.page_content.primary_content.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.primary_content.urls.url": "<string>",
  "tasks.result.items.page_content.primary_content.urls.anchor_text": "<string>",
  "tasks.result.items.page_content.secondary_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.secondary_content.text": "<string>",
  "tasks.result.items.page_content.secondary_content.url": "<string>",
  "tasks.result.items.page_content.secondary_content.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.secondary_content.urls.url": "<string>",
  "tasks.result.items.page_content.secondary_content.urls.anchor_text": "<string>",
  "tasks.result.items.page_content.table_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.header": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.header.row_cells": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.header.row_cells.text": "<string>",
  "tasks.result.items.page_content.table_content.header.row_cells.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.header.row_cells.urls.url": "<string>",
  "tasks.result.items.page_content.table_content.header.row_cells.urls.anchor_text": "<string>",
  "tasks.result.items.page_content.table_content.header.row_cells.is_header": true,
  "tasks.result.items.page_content.table_content.body": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.body.row_cells": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.body.row_cells.text": "<string>",
  "tasks.result.items.page_content.table_content.body.row_cells.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.body.row_cells.urls.url": "<string>",
  "tasks.result.items.page_content.table_content.body.row_cells.urls.anchor_text": "<string>",
  "tasks.result.items.page_content.table_content.body.row_cells.is_header": true,
  "tasks.result.items.page_content.table_content.footer": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.footer.row_cells": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.footer.row_cells.text": "<string>",
  "tasks.result.items.page_content.table_content.footer.row_cells.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.table_content.footer.row_cells.urls.url": "<string>",
  "tasks.result.items.page_content.table_content.footer.row_cells.urls.anchor_text": "<string>",
  "tasks.result.items.page_content.table_content.footer.row_cells.is_header": true,
  "tasks.result.items.page_content.footer": {},
  "tasks.result.items.page_content.main_topic": [
    "<string>"
  ],
  "tasks.result.items.page_content.main_topic.h_title": "<string>",
  "tasks.result.items.page_content.main_topic.main_title": "<string>",
  "tasks.result.items.page_content.main_topic.author": "<string>",
  "tasks.result.items.page_content.main_topic.language": "<string>",
  "tasks.result.items.page_content.main_topic.level": "<string>",
  "tasks.result.items.page_content.main_topic.primary_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.main_topic.text": "<string>",
  "tasks.result.items.page_content.main_topic.url": "<string>",
  "tasks.result.items.page_content.main_topic.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.main_topic.urls.url": "<string>",
  "tasks.result.items.page_content.main_topic.urls.anchor_text": "<string>",
  "tasks.result.items.page_content.main_topic.secondary_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.secondary_topic": [
    "<string>"
  ],
  "tasks.result.items.page_content.secondary_topic.h_title": "<string>",
  "tasks.result.items.page_content.secondary_topic.main_title": "<string>",
  "tasks.result.items.page_content.secondary_topic.author": "<string>",
  "tasks.result.items.page_content.secondary_topic.language": "<string>",
  "tasks.result.items.page_content.secondary_topic.level": "<string>",
  "tasks.result.items.page_content.secondary_topic.primary_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.secondary_topic.text": "<string>",
  "tasks.result.items.page_content.secondary_topic.url": "<string>",
  "tasks.result.items.page_content.secondary_topic.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.secondary_topic.urls.url": "<string>",
  "tasks.result.items.page_content.secondary_topic.urls.anchor_text": "<string>",
  "tasks.result.items.page_content.secondary_topic.secondary_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.ratings": [
    "<string>"
  ],
  "tasks.result.items.page_content.ratings.name": "<string>",
  "tasks.result.items.page_content.ratings.rating_value": 123,
  "tasks.result.items.page_content.ratings.max_rating_value": 123,
  "tasks.result.items.page_content.ratings.rating_count": 123,
  "tasks.result.items.page_content.ratings.relative_rating": 123,
  "tasks.result.items.page_content.offers": [
    "<string>"
  ],
  "tasks.result.items.page_content.offers.name": "<string>",
  "tasks.result.items.page_content.offers.price": 123,
  "tasks.result.items.page_content.offers.price_currency": "<string>",
  "tasks.result.items.page_content.offers.price_valid_until": 123,
  "tasks.result.items.page_content.comments": [
    "<string>"
  ],
  "tasks.result.items.page_content.comments.rating": {},
  "tasks.result.items.page_content.comments.name": "<string>",
  "tasks.result.items.page_content.comments.rating_value": 123,
  "tasks.result.items.page_content.comments.max_rating_value": 123,
  "tasks.result.items.page_content.comments.rating_count": 123,
  "tasks.result.items.page_content.comments.relative rating": 123,
  "tasks.result.items.page_content.comments.title": "<string>",
  "tasks.result.items.page_content.comments.publish_date": "<string>",
  "tasks.result.items.page_content.comments.author": "<string>",
  "tasks.result.items.page_content.comments.primary_content": [
    "<string>"
  ],
  "tasks.result.items.page_content.comments.primary_content.text": "<string>",
  "tasks.result.items.page_content.comments.primary_content.url": "<string>",
  "tasks.result.items.page_content.comments.primary_content.urls": [
    "<string>"
  ],
  "tasks.result.items.page_content.contacts": {},
  "tasks.result.items.page_content.contacts.telephones": [
    "<string>"
  ],
  "tasks.result.items.page_content.contacts.emails": [
    "<string>"
  ],
  "tasks.result.items.page_as_markdown": "<string>"
}

Documentation Index

Fetch the complete documentation index at: https://aisa.one/docs/llms.txt

Use this file to discover all available pages before exploring further.

Authorizations

Authorization
string
header
required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json
url
string
required

URL of the content to parse required field URL of the page to parse example: https://www.fujielectric.com/

custom_user_agent
string

custom user agent optional field custom user agent for crawling a website example: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36 default value: Mozilla/5.0 (compatible; RSiteAuditor)

browser_preset
string

preset for browser screen parameters optional field if you use this field, you don’t need to indicate browser_screen_width, browser_screen_height, browser_screen_scale_factor possible values: desktop, mobile, tablet desktop preset will apply the following values: browser_screen_width: 1920 browser_screen_height: 1080 browser_screen_scale_factor: 1 mobile preset will apply the following values: browser_screen_width: 390 browser_screen_height: 844 browser_screen_scale_factor: 3 tablet preset will apply the following values: browser_screen_width: 1024 browser_screen_height: 1366 browser_screen_scale_factor: 2 Note: to use this parameter, set enable_javascript or enable_browser_rendering to true

browser_screen_width
integer

browser screen width optional field you can set a custom browser screen width to perform audit for a particular device; if you use this field, you don’t need to indicate browser_preset as it will be ignored; Note: to use this parameter, set enable_javascript or enable_browser_rendering to true minimum value, in pixels: 240 maximum value, in pixels: 9999

browser_screen_height
integer

browser screen height optional field you can set a custom browser screen height to perform audit for a particular device; if you use this field, you don’t need to indicate browser_preset as it will be ignored; Note: to use this parameter, set enable_javascript or enable_browser_rendering to true minimum value, in pixels: 240 maximum value, in pixels: 9999

browser_screen_scale_factor
number

browser screen scale factor optional field you can set a custom browser screen resolution ratio to perform audit for a particular device; if you use this field, you don’t need to indicate browser_preset as it will be ignored; Note: to use this parameter, set enable_javascript or enable_browser_rendering to true minimum value: 0.5 maximum value: 3

store_raw_html
boolean

store HTML of a crawled page optional field set to true if you want to get the HTML of the page using the OnPage Raw HTML endpoint default value: false

disable the cookie popup optional field set to true if you want to disable the popup requesting cookie consent from the user; default value: false

accept_language
string

language header for accessing the website optional field all locale formats are supported (xx, xx-XX, xxx-XX, etc.) Note: if you do not specify this parameter, some websites may deny access; in this case, pages will be returned with the "type":"broken in the response array

enable_javascript
boolean

load javascript on a page optional field set to true if you want to load the scripts available on a page default value: false Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page

enable_browser_rendering
boolean

emulate browser rendering to measure Core Web Vitals optional field by using this parameter you will be able to emulate a browser when loading a web page; enable_browser_rendering loads styles, images, fonts, animations, videos, and other resources on a page; default value: false set to true to obtain Core Web Vitals (FID, CLS, LCP) metrics in the response; if you use this field, enable_javascript, and load_resources parameters must be set to true Note: if you use this parameter, additional charges will apply; learn more about the cost of tasks with this parameter in our help article; the cost can be calculated on the Pricing Page

enable_xhr
boolean

enable XMLHttpRequest on a page optional field set to true if you want our crawler to request data from a web server using the XMLHttpRequest object default value: false if you use this field, enable_javascript must be set to true;

switch_pool
boolean

switch proxy pool optional field if true, additional proxy pools will be used to obtain the requested data; the parameter can be used if a multitude of tasks is set simultaneously, resulting in occasional rate-limit and/or site_unreachable errors

ip_pool_for_scan
string

proxy pool optional field you can choose a location of the proxy pool that will be used to obtain the requested data; the parameter can be used if page content is inaccessible in one of the locations, resulting in occasional site_unreachable errors possible values: us, de

markdown_view
boolean

return page content as markdown optional field if set to true, the markdown-formatted content of the page will be returned in the page_as_markdown field of the response; default value: false

Response

Successful response

version
string

the current version of the API

status_code
integer

general status code you can find the full list of the response codes here Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions

status_message
string

general informational message you can find the full list of general informational messages here

time
string

execution time, seconds

cost
number

total tasks cost, USD

tasks_count
integer

the number of tasks in the tasks array

tasks_error
integer

the number of tasks in the tasks array returned with an error

tasks
string[]

array of tasks

tasks.id
string

task identifier unique task identifier in our system in the UUID format

tasks.status_code
integer

status code of the task generated by DataForSEO; can be within the following range: 10000-60000 you can find the full list of the response codes here

tasks.status_message
string

informational message of the task you can find the full list of general informational messages here

tasks.time
string

execution time, seconds

tasks.cost
number

cost of the task, USD

tasks.result_count
integer

number of elements in the result array

tasks.path
string[]

URL path

tasks.data
object

contains the same parameters that you specified in the POST request

tasks.result
string[]

array of results

tasks.result.crawl_progress
string

status of the crawling session possible values: in_progress, finished

tasks.result.crawl_status
object

details of the crawling session

tasks.result.items_count
integer

number of items in the results array

tasks.result.items
string[]

items array

tasks.result.items.type
string

type of the returned item = ‘сontent_parsing_element’

tasks.result.items.fetch_time
string

date and time when the content was fetched in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00” example: "2022-11-01 10:02:52 +00:00"

tasks.result.items.status_code
integer

status code of the page

tasks.result.items.page_content
object

parsed content of the page

tasks.result.items.page_content.header
object

parsed content of the header

tasks.result.items.page_content.primary_content
string[]

primary content on the page you can find more information about content priority calculation in this help center article

tasks.result.items.page_content.primary_content.text
string

content text

tasks.result.items.page_content.primary_content.url
string

page URL displayed in case the text is a link anchor

tasks.result.items.page_content.primary_content.urls
string[]

contains other URLs and anchors found in the content element

tasks.result.items.page_content.primary_content.urls.url
string

other URL found in the content element

tasks.result.items.page_content.primary_content.urls.anchor_text
string

text of the URL’s anchor

tasks.result.items.page_content.secondary_content
string[]

secondary content on the page you can find more information about content priority calculation in this help center article

tasks.result.items.page_content.secondary_content.text
string

content text

tasks.result.items.page_content.secondary_content.url
string

page URL displayed in case the text is a link anchor

tasks.result.items.page_content.secondary_content.urls
string[]

contains other URLs and anchors found in the content element

tasks.result.items.page_content.secondary_content.urls.url
string

other URL found in the content element

tasks.result.items.page_content.secondary_content.urls.anchor_text
string

text of the URL’s anchor

tasks.result.items.page_content.table_content
string[]

content of the table on the page

tasks.result.items.page_content.table_content.header
string[]

content of the header of the table

tasks.result.items.page_content.table_content.header.row_cells
string[]

content of the row cells of the header

tasks.result.items.page_content.table_content.header.row_cells.text
string

text in the row cell

tasks.result.items.page_content.table_content.header.row_cells.urls
string[]

contains other URLs and anchors found in the cell

tasks.result.items.page_content.table_content.header.row_cells.urls.url
string

URL found in the cell

tasks.result.items.page_content.table_content.header.row_cells.urls.anchor_text
string

text of the URL’s anchor

tasks.result.items.page_content.table_content.header.row_cells.is_header
boolean

indicates if the text belongs to the header

tasks.result.items.page_content.table_content.body
string[]

content of the body of the table

tasks.result.items.page_content.table_content.body.row_cells
string[]

content of the row cells of the header

tasks.result.items.page_content.table_content.body.row_cells.text
string

text in the row cell

tasks.result.items.page_content.table_content.body.row_cells.urls
string[]

contains other URLs and anchors found in the cell

tasks.result.items.page_content.table_content.body.row_cells.urls.url
string

URL found in the cell

tasks.result.items.page_content.table_content.body.row_cells.urls.anchor_text
string

text of the URL’s anchor

tasks.result.items.page_content.table_content.body.row_cells.is_header
boolean

indicates if the text belongs to the header

content of the footer of the table

content of the row cells of the header

text in the row cell

contains other URLs and anchors found in the cell

URL found in the cell

text of the URL’s anchor

indicates if the text belongs to the header

parsed content of the footer

tasks.result.items.page_content.main_topic
string[]

main topic on the page you can find more information about topic priority calculation in this help center article

tasks.result.items.page_content.main_topic.h_title
string

meta title

tasks.result.items.page_content.main_topic.main_title
string

main title of the block

tasks.result.items.page_content.main_topic.author
string

content author name

tasks.result.items.page_content.main_topic.language
string

content language

tasks.result.items.page_content.main_topic.level
string

HTML level

tasks.result.items.page_content.main_topic.primary_content
string[]

primary content on the page you can find more information about content priority calculation in this help center article

tasks.result.items.page_content.main_topic.text
string

content text

tasks.result.items.page_content.main_topic.url
string

page URL displayed in case the text is a link anchor

tasks.result.items.page_content.main_topic.urls
string[]

contains other URLs and anchors found in the content element

tasks.result.items.page_content.main_topic.urls.url
string

other URL found in the content element

tasks.result.items.page_content.main_topic.urls.anchor_text
string

text of the URL’s anchor

tasks.result.items.page_content.main_topic.secondary_content
string[]

secondary content on the page you can find more information about content priority calculation in this help center article

tasks.result.items.page_content.secondary_topic
string[]

secondary topic on the page you can find more information about topic priority calculation in this help center article

tasks.result.items.page_content.secondary_topic.h_title
string

meta title

tasks.result.items.page_content.secondary_topic.main_title
string

main title of the block

tasks.result.items.page_content.secondary_topic.author
string

content author name

tasks.result.items.page_content.secondary_topic.language
string

content language

tasks.result.items.page_content.secondary_topic.level
string

HTML level

tasks.result.items.page_content.secondary_topic.primary_content
string[]

primary content on the page you can find more information about content priority calculation in this help center article

tasks.result.items.page_content.secondary_topic.text
string

content text

tasks.result.items.page_content.secondary_topic.url
string

page URL displayed in case the text is a link anchor

tasks.result.items.page_content.secondary_topic.urls
string[]

contains other URLs and anchors found in the content element

tasks.result.items.page_content.secondary_topic.urls.url
string

other URL found in the content element

tasks.result.items.page_content.secondary_topic.urls.anchor_text
string

text of the URL’s anchor

tasks.result.items.page_content.secondary_topic.secondary_content
string[]

secondary content on the page you can find more information about content priority calculation in this help center article

tasks.result.items.page_content.ratings
string[]

contains objects with rating information for the products displayed on the page

tasks.result.items.page_content.ratings.name
string

rating name Note: this field is not used in this particular object, and its value is always set to null

tasks.result.items.page_content.ratings.rating_value
integer

the value of the rating

tasks.result.items.page_content.ratings.max_rating_value
integer

maximum value for the rating

tasks.result.items.page_content.ratings.rating_count
integer

the amount of feedback

tasks.result.items.page_content.ratings.relative_rating
number

relative rating can take values from 0 to 1

tasks.result.items.page_content.offers
string[]

array of products displayed on the page contains objects with information on products displayed on the page

tasks.result.items.page_content.offers.name
string

name of the product

tasks.result.items.page_content.offers.price
integer

price of the product

tasks.result.items.page_content.offers.price_currency
string

price currency

tasks.result.items.page_content.offers.price_valid_until
integer

displays the date and time until which the price is valid in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00” example: "2022-11-01 10:02:52 +00:00"

tasks.result.items.page_content.comments
string[]

array of comments displayed on the page contains objects with information on comments related to displayed products

tasks.result.items.page_content.comments.rating
object

product’s rating contains information about the rating a customer has given to the product

tasks.result.items.page_content.comments.name
string

rating name Note: this field is not used in this particular object, and its value is always null

tasks.result.items.page_content.comments.rating_value
integer

the value of the rating

tasks.result.items.page_content.comments.max_rating_value
integer

maximum value for the rating

tasks.result.items.page_content.comments.rating_count
integer

the amount of feedback Note: this field is not used in this particular object, and its value is always null

tasks.result.items.page_content.comments.relative rating
number

relative rating can take values from 0 to 1

tasks.result.items.page_content.comments.title
string

title of the customer’s comment

tasks.result.items.page_content.comments.publish_date
string

date when the comment was published

tasks.result.items.page_content.comments.author
string

author of the comment

tasks.result.items.page_content.comments.primary_content
string[]

primary content on the page you can find more information about content priority calculation in this help center article

tasks.result.items.page_content.comments.primary_content.text
string

text of the comment

tasks.result.items.page_content.comments.primary_content.url
string

displayed in case the text is a link anchor

tasks.result.items.page_content.comments.primary_content.urls
string[]

contains other URLs and anchors found in the content element

tasks.result.items.page_content.contacts
object

contact information contains contact information displayed on the page

tasks.result.items.page_content.contacts.telephones
string[]

array of telephone numbers

tasks.result.items.page_content.contacts.emails
string[]

array of emails

tasks.result.items.page_as_markdown
string

page content in the markdown format page content in the text-to-HTML markdown format specify markdown_view as true in the request to return the value